From 5216a245901e221ee0e41f065412b5cd1c68071b Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 3 Jan 2016 18:31:03 +0100 Subject: [PATCH] Back to work on the PSP renderer --- GPU/GPU.cpp | 2 +- GPU/Vulkan/DrawEngineVulkan.cpp | 130 ++++++++++++++++++- GPU/Vulkan/DrawEngineVulkan.h | 58 ++++++--- GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp | 39 +----- GPU/Vulkan/GPU_Vulkan.cpp | 2 +- GPU/Vulkan/GPU_Vulkan.h | 2 +- GPU/Vulkan/PipelineManagerVulkan.cpp | 72 +++++++--- GPU/Vulkan/PipelineManagerVulkan.h | 39 ++++-- GPU/Vulkan/ShaderManagerVulkan.h | 19 ++- GPU/Vulkan/VertexShaderGeneratorVulkan.cpp | 2 +- ext/native/thin3d/VulkanContext.cpp | 13 ++ ext/native/thin3d/VulkanContext.h | 100 ++++++++++++++ ext/native/thin3d/thin3d_vulkan.cpp | 108 +-------------- 13 files changed, 385 insertions(+), 201 deletions(-) diff --git a/GPU/GPU.cpp b/GPU/GPU.cpp index 36e9564291..5088b1108c 100644 --- a/GPU/GPU.cpp +++ b/GPU/GPU.cpp @@ -59,7 +59,7 @@ bool GPU_Init(GraphicsContext *ctx) { #endif break; case GPU_VULKAN: - SetGPU(new GPU_Vulkan()); + SetGPU(new GPU_Vulkan(nullptr)); break; } diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 3facb84fab..e5e7e4a966 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -18,6 +18,8 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. +#include + #include "base/logging.h" #include "base/timeutil.h" @@ -59,8 +61,10 @@ enum { TRANSFORMED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * sizeof(TransformedVertex) }; -DrawEngineVulkan::DrawEngineVulkan() - : decodedVerts_(0), +DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan) + : + vulkan_(vulkan), + decodedVerts_(0), prevPrim_(GE_PRIM_INVALID), lastVType_(-1), pipelineManager_(nullptr), @@ -71,7 +75,8 @@ DrawEngineVulkan::DrawEngineVulkan() decodeCounter_(0), dcid_(0), fboTexNeedBind_(false), - fboTexBound_(false) { + fboTexBound_(false), + curFrame_(0) { memset(&decOptions_, 0, sizeof(decOptions_)); decOptions_.expandAllUVtoFloat = true; @@ -88,6 +93,82 @@ DrawEngineVulkan::DrawEngineVulkan() transformedExpanded = (TransformedVertex *)AllocateMemoryPages(3 * TRANSFORMED_VERTEX_BUFFER_SIZE); indexGen.Setup(decIndex); + + VkDescriptorSetLayoutBinding bindings[5]; + bindings[0].descriptorCount = 1; + bindings[0].pImmutableSamplers = nullptr; + bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[0].binding = 1; + bindings[1].descriptorCount = 1; + bindings[1].pImmutableSamplers = nullptr; + bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[1].binding = 1; + bindings[2].descriptorCount = 1; + bindings[2].pImmutableSamplers = nullptr; + bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + bindings[2].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[2].binding = 2; + bindings[3].descriptorCount = 1; + bindings[3].pImmutableSamplers = nullptr; + bindings[3].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + bindings[3].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[3].binding = 3; + bindings[4].descriptorCount = 1; + bindings[4].pImmutableSamplers = nullptr; + bindings[4].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + bindings[4].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[4].binding = 3; + + VkDevice device = vulkan_->GetDevice(); + + VkDescriptorSetLayoutCreateInfo dsl; + dsl.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + dsl.pNext = nullptr; + dsl.bindingCount = 5; + dsl.pBindings = bindings; + VkResult res = vkCreateDescriptorSetLayout(device, &dsl, nullptr, &descriptorSetLayout_); + + VkDescriptorPoolSize dpTypes[2]; + dpTypes[0].descriptorCount = 200; + dpTypes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + dpTypes[1].descriptorCount = 200; + dpTypes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + + VkDescriptorPoolCreateInfo dp; + dp.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + dp.pNext = nullptr; + dp.flags = 0; // Don't want to mess around with individually freeing these, let's go dynamic each frame. + dp.maxSets = 200; // 200 textures per frame should be enough for the UI... + dp.pPoolSizes = dpTypes; + dp.poolSizeCount = ARRAY_SIZE(dpTypes); + res = vkCreateDescriptorPool(device, &dp, nullptr, &frame_[0].descPool); + assert(VK_SUCCESS == res); + res = vkCreateDescriptorPool(device, &dp, nullptr, &frame_[1].descPool); + assert(VK_SUCCESS == res); + + // We are going to use one-shot descriptors in the initial implementation. Might look into caching them + // if creating and updating them turns out to be expensive. + for (int i = 0; i < 2; i++) { + VkResult res = vkCreateDescriptorPool(vulkan_->GetDevice(), &dp, nullptr, &frame_[i].descPool); + assert(VK_SUCCESS == res); + } + + VkPipelineLayoutCreateInfo pl; + pl.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pl.pNext = nullptr; + pl.pPushConstantRanges = nullptr; + pl.pushConstantRangeCount = 0; + pl.setLayoutCount = 1; + pl.pSetLayouts = &descriptorSetLayout_; + res = vkCreatePipelineLayout(device, &pl, nullptr, &pipelineLayout_); + + assert(VK_SUCCESS == res); +} + +void DrawEngineVulkan::EndFrame() { + curFrame_++; } DrawEngineVulkan::~DrawEngineVulkan() { @@ -282,12 +363,32 @@ inline u32 ComputeMiniHashRange(const void *ptr, size_t sz) { } } +/* +struct DescriptorSetKey { + void *texture_; + void *secondaryTexture_; + + bool operator < (const DescriptorSetKey &other) const { + if (texture_ < other.texture_) return true; else if (texture_ > other.texture_) return false; + if (secondaryTexture_ < other.secondaryTexture_) return true; else if (secondaryTexture_ > other.secondaryTexture_) return false; + return false; + } +}; +*/ + +VkDescriptorSet DrawEngineVulkan::GetDescriptorSet() { + return nullptr; +} + // The inline wrapper in the header checks for numDrawCalls == 0 void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { gpuStats.numFlushes++; + FrameData *frame = &frame_[curFrame_ & 1]; + // This is not done on every drawcall, we should collect vertex data // until critical state changes. That's when we draw (flush). + VkDescriptorSet ds = GetDescriptorSet(); GEPrimitiveType prim = prevPrim_; // ApplyDrawState(prim); @@ -296,6 +397,13 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { VulkanFragmentShader *fshader; shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader); + uint32_t baseUBOOffset = 0; + uint32_t lightUBOOffset = 0; + uint32_t boneUBOOffset = 0; + + uint32_t ibOffset = 0; + uint32_t vbOffset = 0; + if (vshader->UseHWTransform()) { int vertexCount = 0; int maxIndex = 0; @@ -319,12 +427,20 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255); } - VkBuffer buf[1] = {}; - VkDeviceSize offsets[1] = { 0 }; + VkBuffer buf[1] = {frame->pushData->GetVkBuffer()}; + uint32_t dynamicUBOOffsets[3] = { + baseUBOOffset, lightUBOOffset, boneUBOOffset, + }; + vkCmdBindDescriptorSets(cmd_, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout_, 0, 1, &ds, 3, dynamicUBOOffsets); + + ibOffset = (uint32_t)(frame->pushData->Push(decIndex, 2 * indexGen.VertexCount())); + // vbOffset = frame->pushData->Push(decoded, ) + + VkDeviceSize offsets[1] = { vbOffset }; if (useElements) { // TODO: Avoid rebinding if the vertex size stays the same by using the offset arguments vkCmdBindVertexBuffers(cmd_, 0, 1, buf, offsets); - vkCmdBindIndexBuffer(cmd_, buf[0], 0, VK_INDEX_TYPE_UINT16); + vkCmdBindIndexBuffer(cmd_, buf[0], ibOffset, VK_INDEX_TYPE_UINT16); vkCmdDrawIndexed(cmd_, maxIndex + 1, 1, 0, 0, 0); } else { vkCmdBindVertexBuffers(cmd_, 0, 1, buf, offsets); @@ -383,7 +499,7 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) { } } else if (result.action == SW_CLEAR) { // TODO: Support clearing only color and not alpha, or vice versa. This is not supported (probably for good reason) by vkCmdClearColorAttachment - // so we will have to simply draw a rectangle instead. Accordingly, + // so we will have to simply draw a rectangle instead. int mask = gstate.isClearModeColorMask() ? 1 : 0; if (gstate.isClearModeAlphaMask()) mask |= 2; diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h index a484700443..ebb81dadc9 100644 --- a/GPU/Vulkan/DrawEngineVulkan.h +++ b/GPU/Vulkan/DrawEngineVulkan.h @@ -17,25 +17,18 @@ #pragma once -#include "GPU/Common/DrawEngineCommon.h" -// Copyright (c) 2012- PPSSPP Project. +// The Descriptor Set used for the majority of PSP rendering looks like this: +// +// * binding 0: Texture/Sampler (the PSP texture) +// * binding 1: Secondary texture sampler for shader blending or depal palettes +// * binding 2: Base Uniform Buffer (includes fragment state) +// * binding 3: Light uniform buffer +// * binding 4: Bone uniform buffer +// +// All shaders conform to this layout, so they are all compatible with the same descriptor set. +// The format of the various uniform buffers may vary though - vertex shaders that don't skin +// won't get any bone data, etc. -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0 or later versions. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official git repository and contact information can be found at -// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. - -#pragma once #include @@ -66,7 +59,7 @@ typedef u32 ReliableHashType; // Handles transform, lighting and drawing. class DrawEngineVulkan : public DrawEngineCommon { public: - DrawEngineVulkan(); + DrawEngineVulkan(VulkanContext *vulkan); virtual ~DrawEngineVulkan(); void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead); @@ -143,13 +136,36 @@ public: cmd_ = cmd; } + VkPipelineLayout GetPipelineLayout() const { + return pipelineLayout_; + } + + void EndFrame(); + private: void DecodeVerts(); void DecodeVertsStep(); void DoFlush(VkCommandBuffer cmd); + VkDescriptorSet GetDescriptorSet(); + VertexDecoder *GetVertexDecoder(u32 vtype); + VulkanContext *vulkan_; + + // We use a single descriptor set layout for all PSP draws. + VkDescriptorSetLayout descriptorSetLayout_; + VkPipelineLayout pipelineLayout_; + + // We alternate between these. + struct FrameData { + VkDescriptorPool descPool; + VulkanPushBuffer *pushData; + }; + + int curFrame_; + FrameData frame_[2]; + // Defer all vertex decoding to a "Flush" (except when software skinning) struct DeferredDrawCall { void *verts; @@ -162,6 +178,10 @@ private: u16 indexUpperBound; }; + // This is always set to the current main command buffer of the VulkanContext. + // In the future, we may support flushing mid-frame and more fine grained command buffer usage, + // but for now, let's just submit a whole frame at a time. This is not compatible with some games + // that do mid frame read-backs. VkCommandBuffer cmd_; // Vertex collector state diff --git a/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp b/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp index 8a1a5d1e60..7be678c0b4 100644 --- a/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp @@ -35,6 +35,7 @@ #include "GPU/Common/ShaderId.h" #include "GPU/Vulkan/FragmentShaderGeneratorVulkan.h" #include "GPU/Vulkan/FramebufferVulkan.h" +#include "GPU/Vulkan/ShaderManagerVulkan.h" #include "GPU/Vulkan/PipelineManagerVulkan.h" #include "GPU/ge_constants.h" @@ -42,31 +43,6 @@ #define WRITE p+=sprintf -// #define DEBUG_SHADER - -// TODO: Which binding number? -static const char *vulkan_uniform_buffer = R"( -layout(set=0, binding=3) uniform constants { - // Blend function replacement - vec3 u_blendFixA; - vec3 u_blendFixB; - - // Texture clamp emulation - vec4 u_texclamp; - vec2 u_texclampoff; - - // Alpha/Color test emulation - vec4 u_alphacolorref; - ivec4 u_alphacolormask; - - // Stencil replacement - float u_stencilReplaceValue; - vec3 u_texenv; - - vec3 u_fogcolor; -)"; - - // Missing: Z depth range bool GenerateVulkanGLSLFragmentShader(const ShaderID &id, char *buffer) { char *p = buffer; @@ -116,15 +92,14 @@ bool GenerateVulkanGLSLFragmentShader(const ShaderID &id, char *buffer) { const char *shading = doFlatShading ? "flat" : ""; - WRITE(p, "%s\n", vulkan_uniform_buffer); - + WRITE(p, "layout (binding = 2) uniform %s\n", ub_baseStr); if (doTexture) { - WRITE(p, "layout (binding = 1) uniform sampler2D tex;\n"); + WRITE(p, "layout (binding = 0) uniform sampler2D tex;\n"); } if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) { if (replaceBlend == REPLACE_BLEND_COPY_FBO) { - WRITE(p, "layout (binding = 2) uniform sampler2D fbotex;\n"); + WRITE(p, "layout (binding = 1) uniform sampler2D fbotex;\n"); } } @@ -132,13 +107,13 @@ bool GenerateVulkanGLSLFragmentShader(const ShaderID &id, char *buffer) { if (lmode) WRITE(p, "layout (location = 2) %s in vec3 v_color1;\n", shading); if (enableFog) { - WRITE(p, "layout (location = 3) %s float v_fogdepth;\n", highpFog ? "highp" : "mediump"); + WRITE(p, "layout (location = 3) %s in float v_fogdepth;\n", highpFog ? "highp" : "mediump"); } if (doTexture) { if (doTextureProjection) - WRITE(p, "layout (location = 0) %s vec3 v_texcoord;\n", highpTexcoord ? "highp" : "mediump"); + WRITE(p, "layout (location = 0) %s in vec3 v_texcoord;\n", highpTexcoord ? "highp" : "mediump"); else - WRITE(p, "layout (location = 0) %s vec2 v_texcoord;\n", highpTexcoord ? "highp" : "mediump"); + WRITE(p, "layout (location = 0) %s in vec2 v_texcoord;\n", highpTexcoord ? "highp" : "mediump"); } if (!g_Config.bFragmentTestCache) { diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index f58f41230e..0bfb67d41c 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -17,7 +17,7 @@ #include "GPU/Vulkan/GPU_Vulkan.h" -GPU_Vulkan::GPU_Vulkan() { +GPU_Vulkan::GPU_Vulkan(VulkanContext *vulkan) : transformDraw_(vulkan) { } diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h index 69efc4c78f..e691cbbb61 100644 --- a/GPU/Vulkan/GPU_Vulkan.h +++ b/GPU/Vulkan/GPU_Vulkan.h @@ -31,7 +31,7 @@ class LinkedShader; class GPU_Vulkan : public GPUCommon { public: - GPU_Vulkan(); + GPU_Vulkan(VulkanContext *vulkan); ~GPU_Vulkan(); // This gets called on startup and when we get back from settings. diff --git a/GPU/Vulkan/PipelineManagerVulkan.cpp b/GPU/Vulkan/PipelineManagerVulkan.cpp index 9ef2abce4a..58920f4fcc 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.cpp +++ b/GPU/Vulkan/PipelineManagerVulkan.cpp @@ -3,6 +3,15 @@ #include "Common/Log.h" #include "GPU/Vulkan/VulkanUtil.h" #include "GPU/Vulkan/PipelineManagerVulkan.h" +#include "thin3d/VulkanContext.h" + +PipelineManagerVulkan::PipelineManagerVulkan(VulkanContext *vulkan) : vulkan_(vulkan) { + pipelineCache_ = vulkan->CreatePipelineCache(); +} + +PipelineManagerVulkan::~PipelineManagerVulkan() { + vkDestroyPipelineCache(vulkan_->GetDevice(), pipelineCache_, nullptr); +} struct DeclTypeInfo { VkFormat type; @@ -87,7 +96,7 @@ static VulkanPipeline *CreateVulkanPipeline(VkDevice device, VkPipelineCache pip blend0.dstAlphaBlendFactor = key.destAlpha; } blend0.colorWriteMask = key.colorWriteMask; - + VkPipelineColorBlendStateCreateInfo cbs; cbs.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; cbs.pNext = nullptr; @@ -117,17 +126,6 @@ static VulkanPipeline *CreateVulkanPipeline(VkDevice device, VkPipelineCache pip dss.depthWriteEnable = key.depthWriteEnable; } - VkGraphicsPipelineCreateInfo pipe; - pipe.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - pipe.pNext = nullptr; - - pipe.pColorBlendState = &cbs; - if (key.depthTestEnable || key.stencilTestEnable) { - pipe.pDepthStencilState = &dss; - } else { - pipe.pDepthStencilState = nullptr; - } - VkDynamicState dynamicStates[8]; int numDyn = 0; if (key.blendEnable) { @@ -147,8 +145,6 @@ static VulkanPipeline *CreateVulkanPipeline(VkDevice device, VkPipelineCache pip ds.pDynamicStates = dynamicStates; ds.dynamicStateCount = numDyn; - pipe.pDynamicState = &ds; - VkPipelineRasterizationStateCreateInfo rs; rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; rs.pNext = nullptr; @@ -160,10 +156,12 @@ static VulkanPipeline *CreateVulkanPipeline(VkDevice device, VkPipelineCache pip rs.polygonMode = VK_POLYGON_MODE_FILL; rs.depthClampEnable = false; - pipe.pRasterizationState = &rs; - - // We will use dynamic viewport state. - pipe.pViewportState = nullptr; + VkPipelineMultisampleStateCreateInfo ms; + memset(&ms, 0, sizeof(ms)); + ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + ms.pNext = nullptr; + ms.pSampleMask = nullptr; + ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; VkPipelineShaderStageCreateInfo ss[2]; ss[0].sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; @@ -180,9 +178,12 @@ static VulkanPipeline *CreateVulkanPipeline(VkDevice device, VkPipelineCache pip ss[1].module = fshader; ss[1].pName = "main"; ss[1].flags = 0; - pipe.stageCount = 2; - pipe.pStages = ss; - pipe.basePipelineIndex = 0; + + VkPipelineInputAssemblyStateCreateInfo inputAssembly; + inputAssembly.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + inputAssembly.pNext = nullptr; + inputAssembly.topology = key.topology; + inputAssembly.primitiveRestartEnable = false; int vertexStride = 0; @@ -208,6 +209,35 @@ static VulkanPipeline *CreateVulkanPipeline(VkDevice device, VkPipelineCache pip vis.vertexAttributeDescriptionCount = attributeCount; vis.pVertexAttributeDescriptions = attrs; + VkPipelineViewportStateCreateInfo vs; + vs.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + vs.pNext = nullptr; + vs.viewportCount = 1; + vs.scissorCount = 1; + vs.pViewports = nullptr; // dynamic + vs.pScissors = nullptr; // dynamic + + VkGraphicsPipelineCreateInfo pipe; + pipe.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pipe.pNext = nullptr; + pipe.stageCount = 2; + pipe.pStages = ss; + pipe.basePipelineIndex = 0; + + pipe.pColorBlendState = &cbs; + if (key.depthTestEnable || key.stencilTestEnable) { + pipe.pDepthStencilState = &dss; + } else { + pipe.pDepthStencilState = nullptr; + } + pipe.pRasterizationState = &rs; + + // We will use dynamic viewport state. + pipe.pViewportState = &vs; + pipe.pDynamicState = &ds; + pipe.pInputAssemblyState = &inputAssembly; + pipe.pMultisampleState = &ms; + VkPipeline pipeline; VkResult result = vkCreateGraphicsPipelines(device, pipelineCache, 1, &pipe, nullptr, &pipeline); if (result != VK_SUCCESS) { diff --git a/GPU/Vulkan/PipelineManagerVulkan.h b/GPU/Vulkan/PipelineManagerVulkan.h index d28514267e..b31a097539 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.h +++ b/GPU/Vulkan/PipelineManagerVulkan.h @@ -17,22 +17,13 @@ #pragma once +#include + #include "GPU/Common/VertexDecoderCommon.h" #include "GPU/Common/ShaderId.h" #include "GPU/Vulkan/VulkanUtil.h" -// The Descriptor Set used for the majority of PSP rendering looks like this: -// -// * binding 0: Vertex Data (up to 7 locations as defined in PspAttributeLocation) -// * binding 1: Texture Sampler (the PSP texture) -// * binding 2: Secondary texture sampler for shader blending or depal palettes -// * binding 3: Vertex Uniform Buffer -// * binding 4: Fragment Uniform Buffer -// -// All shaders conform to this layout, so they are all compatible with the same descriptor set. -// The format of the various uniform buffers may vary though - vertex shaders that don't skin -// won't get any bone data, etc. - +// PSP vertex format. enum class PspAttributeLocation { POSITION = 0, TEXCOORD = 1, @@ -71,10 +62,16 @@ struct VulkanPipelineRasterStateKey { VkStencilOp stencilPassOp : 4; VkStencilOp stencilFailOp : 4; VkStencilOp stencilDepthFailOp : 4; - // We'll use dynamic state for writemask, reference and comparemask to start with. + // We'll use dynamic state for writemask, reference and comparemask to start with, + // and viewport/scissor. // Rasterizer VkCullModeFlagBits cullMode : 2; + VkPrimitiveTopology topology : 4; + + bool operator < (const VulkanPipelineRasterStateKey &other) const { + return memcmp(this, &other, sizeof(*this)) < 0; + } }; // All the information needed. All PSP rendering (except full screen clears?) will make use of a single @@ -86,6 +83,16 @@ struct VulkanPipelineKey { uint32_t vertType; ShaderID vShaderId; ShaderID fShaderId; + + bool operator < (const VulkanPipelineKey &other) const { + if (raster < other.raster) return true; else if (other.raster < raster) return false; + if (prim < other.prim) return true; else if (other.prim < prim) return false; + if (pretransformed < other.pretransformed) return true; else if (other.pretransformed < pretransformed) return false; + if (vertType < other.vertType) return true; else if (other.vertType < vertType) return false; + if (vShaderId < other.vShaderId) return true; else if (other.vShaderId < vShaderId) return false; + if (fShaderId < other.fShaderId) return true; else if (other.fShaderId < fShaderId) return false; + return false; + } }; enum { @@ -102,5 +109,11 @@ struct VulkanPipeline { class PipelineManagerVulkan { public: + PipelineManagerVulkan(VulkanContext *ctx); + ~PipelineManagerVulkan(); +private: + std::map pipelines_; + VkPipelineCache pipelineCache_; + VulkanContext *vulkan_; }; diff --git a/GPU/Vulkan/ShaderManagerVulkan.h b/GPU/Vulkan/ShaderManagerVulkan.h index 00b6103bd7..ef7bdda917 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.h +++ b/GPU/Vulkan/ShaderManagerVulkan.h @@ -84,7 +84,7 @@ struct UB_VS_TransformCommon { float matAmbient[4]; }; -static const char *ub_vs_transformCommonStr = +static const char *ub_baseStr = R"(matrix4x4 proj; matrix4x4 view; matrix4x4 world; @@ -93,6 +93,23 @@ R"(matrix4x4 proj; vec4 depthRange; vec2 fogCoef; vec4 matAmbient; + // Blend function replacement + vec3 u_blendFixA; + vec3 u_blendFixB; + + // Texture clamp emulation + vec4 u_texclamp; + vec2 u_texclampoff; + + // Alpha/Color test emulation + vec4 u_alphacolorref; + ivec4 u_alphacolormask; + + // Stencil replacement + float u_stencilReplaceValue; + vec3 u_texenv; + + vec3 u_fogcolor; )"; struct UB_VS_Lights { diff --git a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp index 5d6c5197b8..d76ce47ed1 100644 --- a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp @@ -178,7 +178,7 @@ bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer) { // We will memcpy the parts into place in a big buffer so we can be quite dynamic about what parts // are present and what parts aren't, but we will not be ultra detailed about it. - WRITE(p, "layout (binding=3) uniform base {\n%s\n}\n", ub_vs_transformCommonStr); + WRITE(p, "layout (binding=3) uniform base {\n%s\n}\n", ub_baseStr); if (enableLighting) WRITE(p, "layout (binding=4) uniform light {\n%s\n}\n", ub_vs_lightsStr); if (enableBones) diff --git a/ext/native/thin3d/VulkanContext.cpp b/ext/native/thin3d/VulkanContext.cpp index 809a0f9abd..0af8e5ddd8 100644 --- a/ext/native/thin3d/VulkanContext.cpp +++ b/ext/native/thin3d/VulkanContext.cpp @@ -1390,6 +1390,19 @@ void VulkanContext::DestroyDevice() { device_ = NULL; } +VkPipelineCache VulkanContext::CreatePipelineCache() { + VkPipelineCache cache; + VkPipelineCacheCreateInfo pc; + pc.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; + pc.pNext = nullptr; + pc.pInitialData = nullptr; + pc.initialDataSize = 0; + pc.flags = 0; + VkResult res = vkCreatePipelineCache(device_, &pc, nullptr, &cache); + assert(VK_SUCCESS == res); + return cache; +} + void TransitionImageLayout( VkCommandBuffer cmd, VkImage image, diff --git a/ext/native/thin3d/VulkanContext.h b/ext/native/thin3d/VulkanContext.h index f43ba208d6..876d20c213 100644 --- a/ext/native/thin3d/VulkanContext.h +++ b/ext/native/thin3d/VulkanContext.h @@ -26,6 +26,7 @@ #ifndef UTIL_INIT #define UTIL_INIT +#include #include #include @@ -130,6 +131,9 @@ public: globalDeleteList_.QueueDelete(mem); } + + VkPipelineCache CreatePipelineCache(); + void InitSurfaceAndQueue(HINSTANCE conn, HWND wnd); void InitSwapchain(VkCommandBuffer cmd); void InitSurfaceRenderPass(bool include_depth, bool clear); @@ -306,6 +310,7 @@ private: // Wrapper around what you need to use a texture. // Not very optimal - if you have many small textures you should use other strategies. +// Only supports simple 2D textures for now. Mipmap support will be added later. class VulkanTexture { public: VkImage image; @@ -332,6 +337,101 @@ private: bool needStaging; }; +// Use these to push vertex, index and uniform data. +// TODO: Make it possible to suballocate pushbuffers from a large DeviceMemory block. +// TODO: Make this dynamically grow by chaining new buffers in the future. +// Until then, we cap at a maximum size. +// We'll have two of these that we alternate between on each frame. +// These will only be used for the "Thin3D" system - the PSP emulation etc will have +// their own similar buffer solutions. +class VulkanPushBuffer { +public: + VulkanPushBuffer(VulkanContext *vulkan, size_t size) : offset_(0), size_(size), writePtr_(nullptr), deviceMemory_(nullptr) { + VkDevice device = vulkan->GetDevice(); + + VkBufferCreateInfo b; + b.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + b.pNext = nullptr; + b.size = size; + b.flags = 0; + b.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + b.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + b.queueFamilyIndexCount = 0; + b.pQueueFamilyIndices = nullptr; + VkResult res = vkCreateBuffer(device, &b, nullptr, &buffer_); + assert(VK_SUCCESS == res); + + // Okay, that's the buffer. Now let's allocate some memory for it. + VkMemoryAllocateInfo alloc; + alloc.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + alloc.pNext = nullptr; + vulkan->MemoryTypeFromProperties(0xFFFFFFFF, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, &alloc.memoryTypeIndex); + alloc.allocationSize = size; + + res = vkAllocateMemory(device, &alloc, nullptr, &deviceMemory_); + assert(VK_SUCCESS == res); + res = vkBindBufferMemory(device, buffer_, deviceMemory_, 0); + assert(VK_SUCCESS == res); + } + + void Destroy(VulkanContext *vulkan) { + vulkan->QueueDelete(buffer_); + vulkan->QueueDelete(deviceMemory_); + } + + void Reset() { offset_ = 0; } + + void Begin(VkDevice device) { + offset_ = 0; + VkResult res = vkMapMemory(device, deviceMemory_, 0, size_, 0, (void **)(&writePtr_)); + assert(VK_SUCCESS == res); + } + + void End(VkDevice device) { + vkUnmapMemory(device, deviceMemory_); + writePtr_ = nullptr; + } + + + size_t Allocate(size_t numBytes) { + size_t out = offset_; + offset_ += (numBytes + 3) & ~3; // Round up to 4 bytes. + return out; + } + + // TODO: Add alignment support? + // Returns the offset that should be used when binding this buffer to get this data. + size_t Push(const void *data, size_t size) { + size_t off = Allocate(size); + memcpy(writePtr_ + off, data, size); + return off; + } + + size_t PushAligned(const void *data, size_t size, int align) { + offset_ = (offset_ + align - 1) & ~(align - 1); + size_t off = Allocate(size); + memcpy(writePtr_ + off, data, size); + return off; + } + + // "Zero-copy" variant - you can write the data directly as you compute it. + void *Push(size_t size, size_t *bindOffset) { + size_t off = Allocate(size); + *bindOffset = off; + return writePtr_ + off; + } + + VkBuffer GetVkBuffer() const { return buffer_; } + +private: + VkDeviceMemory deviceMemory_; + VkBuffer buffer_; + size_t offset_; + size_t size_; + uint8_t *writePtr_; +}; + + VkBool32 CheckLayers(const std::vector &layer_props, const std::vector &layer_names); void VulkanBeginCommandBuffer(VkCommandBuffer cmd); diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index 80dc0f4485..d2aa8faab8 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -108,98 +108,6 @@ static inline void Uint8x4ToFloat4(uint32_t u, float f[4]) { } -// Use these to push vertex, index and uniform data. -// TODO: Make this dynamically grow by chaining new buffers in the future. -// Until then, we cap at a maximum size. -// We'll have two of these that we alternate between on each frame. -// These will only be used for the "Thin3D" system - the PSP emulation etc will have -// their own similar buffer solutions. -class VulkanPushBuffer { -public: - VulkanPushBuffer(VkDevice device, VulkanContext *vulkan, size_t size) : offset_(0), size_(size), writePtr_(nullptr), deviceMemory_(nullptr) { - VkBufferCreateInfo b; - b.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - b.pNext = nullptr; - b.size = size; - b.flags = 0; - b.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; - b.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - b.queueFamilyIndexCount = 0; - b.pQueueFamilyIndices = nullptr; - VkResult res = vkCreateBuffer(device, &b, nullptr, &buffer_); - assert(VK_SUCCESS == res); - - // Okay, that's the buffer. Now let's allocate some memory for it. - VkMemoryAllocateInfo alloc; - alloc.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - alloc.pNext = nullptr; - vulkan->MemoryTypeFromProperties(0xFFFFFFFF, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, &alloc.memoryTypeIndex); - alloc.allocationSize = size; - - res = vkAllocateMemory(device, &alloc, nullptr, &deviceMemory_); - assert(VK_SUCCESS == res); - res = vkBindBufferMemory(device, buffer_, deviceMemory_, 0); - assert(VK_SUCCESS == res); - } - - void Destroy(VulkanContext *vulkan) { - vulkan->QueueDelete(buffer_); - vulkan->QueueDelete(deviceMemory_); - } - - void Reset() { offset_ = 0; } - - void Begin(VkDevice device) { - offset_ = 0; - VkResult res = vkMapMemory(device, deviceMemory_, 0, size_, 0, (void **)(&writePtr_)); - assert(VK_SUCCESS == res); - } - - void End(VkDevice device) { - vkUnmapMemory(device, deviceMemory_); - writePtr_ = nullptr; - } - - - size_t Allocate(size_t numBytes) { - size_t out = offset_; - offset_ += (numBytes + 3) & ~3; // Round up to 4 bytes. - return out; - } - - // TODO: Add alignment support? - // Returns the offset that should be used when binding this buffer to get this data. - size_t Push(const void *data, size_t size) { - size_t off = Allocate(size); - memcpy(writePtr_ + off, data, size); - return off; - } - - size_t PushAligned(const void *data, size_t size, int align) { - offset_ = (offset_ + align - 1) & ~(align - 1); - size_t off = Allocate(size); - memcpy(writePtr_ + off, data, size); - return off; - } - - // "Zero-copy" variant - you can write the data directly as you compute it. - void *Push(size_t size, size_t *bindOffset) { - size_t off = Allocate(size); - *bindOffset = off; - return writePtr_ + off; - } - - VkBuffer GetVkBuffer() const { return buffer_; } - -private: - // TODO: Make it possible to suballocate pushbuffers in a large DeviceMemory block. - VkDeviceMemory deviceMemory_; - VkBuffer buffer_; - size_t offset_; - size_t size_; - uint8_t *writePtr_; -}; - class Thin3DVKBlendState : public Thin3DBlendState { public: bool blendEnabled; @@ -714,12 +622,11 @@ Thin3DVKContext::Thin3DVKContext(VulkanContext *vulkan) res = vkCreateDescriptorPool(device_, &dp, nullptr, &frame_[1].descriptorPool); assert(VK_SUCCESS == res); - frame_[0].pushBuffer = new VulkanPushBuffer(device_, vulkan_, 1024 * 1024); - frame_[1].pushBuffer = new VulkanPushBuffer(device_, vulkan_, 1024 * 1024); + frame_[0].pushBuffer = new VulkanPushBuffer(vulkan_, 1024 * 1024); + frame_[1].pushBuffer = new VulkanPushBuffer(vulkan_, 1024 * 1024); // binding 0 - uniform data - // binding 1 - sampler - // binding 2 - image + // binding 1 - combined sampler/image VkDescriptorSetLayoutBinding bindings[2]; bindings[0].descriptorCount = 1; bindings[0].pImmutableSamplers = nullptr; @@ -750,14 +657,7 @@ Thin3DVKContext::Thin3DVKContext(VulkanContext *vulkan) res = vkCreatePipelineLayout(device_, &pl, nullptr, &pipelineLayout_); assert(VK_SUCCESS == res); - VkPipelineCacheCreateInfo pc; - pc.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; - pc.pNext = nullptr; - pc.pInitialData = nullptr; - pc.initialDataSize = 0; - pc.flags = 0; - res = vkCreatePipelineCache(device_, &pc, nullptr, &pipelineCache_); - assert(VK_SUCCESS == res); + pipelineCache_ = vulkan_->CreatePipelineCache(); } Thin3DVKContext::~Thin3DVKContext() {