diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h index 2cf904cbea..ddbdd02aee 100644 --- a/GPU/Common/DrawEngineCommon.h +++ b/GPU/Common/DrawEngineCommon.h @@ -88,7 +88,7 @@ protected: // Cached vertex decoders u32 lastVType_ = -1; std::unordered_map decoderMap_; - VertexDecoder *dec_; + VertexDecoder *dec_ = nullptr; VertexDecoderJitCache *decJitCache_; VertexDecoderOptions decOptions_; diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index 46fbe4c473..e6c3cd419a 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -83,7 +83,7 @@ DrawEngineD3D11::DrawEngineD3D11(Draw::DrawContext *draw, ID3D11Device *device, textureCache_(0), framebufferManager_(0), numDrawCalls(0), - vertexCountInDrawCalls(0), + vertexCountInDrawCalls_(0), decodeCounter_(0), dcid_(0) { device1_ = (ID3D11Device1 *)draw->GetNativeObject(Draw::NativeObject::DEVICE_EX); @@ -284,7 +284,7 @@ inline void DrawEngineD3D11::SetupVertexDecoderInternal(u32 vertType) { } void DrawEngineD3D11::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) { - if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls + vertexCount > VERTEX_BUFFER_MAX) + if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) Flush(); // TODO: Is this the right thing to do? @@ -309,17 +309,19 @@ void DrawEngineD3D11::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, dc.prim = prim; dc.vertexCount = vertexCount; - u32 dhash = dcid_; - dhash ^= (u32)(uintptr_t)verts; - dhash = __rotl(dhash, 13); - dhash ^= (u32)(uintptr_t)inds; - dhash = __rotl(dhash, 13); - dhash ^= (u32)vertType; - dhash = __rotl(dhash, 13); - dhash ^= (u32)vertexCount; - dhash = __rotl(dhash, 13); - dhash ^= (u32)prim; - dcid_ = dhash; + if (g_Config.bVertexCache) { + u32 dhash = dcid_; + dhash ^= (u32)(uintptr_t)verts; + dhash = __rotl(dhash, 13); + dhash ^= (u32)(uintptr_t)inds; + dhash = __rotl(dhash, 13); + dhash ^= (u32)vertType; + dhash = __rotl(dhash, 13); + dhash ^= (u32)vertexCount; + dhash = __rotl(dhash, 13); + dhash ^= (u32)prim; + dcid_ = dhash; + } if (inds) { GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound); @@ -331,7 +333,7 @@ void DrawEngineD3D11::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, uvScale[numDrawCalls] = gstate_c.uv; numDrawCalls++; - vertexCountInDrawCalls += vertexCount; + vertexCountInDrawCalls_ += vertexCount; if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) { DecodeVertsStep(); @@ -941,12 +943,12 @@ rotateVBO: } gpuStats.numDrawCalls += numDrawCalls; - gpuStats.numVertsSubmitted += vertexCountInDrawCalls; + gpuStats.numVertsSubmitted += vertexCountInDrawCalls_; indexGen.Reset(); decodedVerts_ = 0; numDrawCalls = 0; - vertexCountInDrawCalls = 0; + vertexCountInDrawCalls_ = 0; decodeCounter_ = 0; dcid_ = 0; prevPrim_ = GE_PRIM_INVALID; @@ -959,7 +961,10 @@ rotateVBO: gstate_c.vertBounds.maxU = 0; gstate_c.vertBounds.maxV = 0; +#if PPSSPP_PLATFORM(WINDOWS) && !PPSSPP_PLATFORM(UWP) + // We only support GPU debugging on Windows, and that's the only use case for this. host->GPUNotifyDraw(); +#endif } bool DrawEngineD3D11::IsCodePtrVertexDecoder(const u8 *ptr) const { diff --git a/GPU/D3D11/DrawEngineD3D11.h b/GPU/D3D11/DrawEngineD3D11.h index d8077a711c..64867b69a2 100644 --- a/GPU/D3D11/DrawEngineD3D11.h +++ b/GPU/D3D11/DrawEngineD3D11.h @@ -229,7 +229,7 @@ private: DeferredDrawCall drawCalls[MAX_DEFERRED_DRAW_CALLS]; int numDrawCalls; - int vertexCountInDrawCalls; + int vertexCountInDrawCalls_; int decimationCounter_; int decodeCounter_; diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 4368f72e8c..84d0bd5fc7 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -69,16 +69,12 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *dra draw_(draw), prevPrim_(GE_PRIM_INVALID), numDrawCalls(0), - vertexCountInDrawCalls(0), curFrame_(0), - nullTexture_(nullptr), stats_{} { - decOptions_.expandAllWeightsToFloat = false; decOptions_.expand8BitNormalsToFloat = false; - // Allocate nicely aligned memory. Maybe graphics drivers will - // appreciate it. + // Allocate nicely aligned memory. Maybe graphics drivers will appreciate it. // All this is a LOT of memory, need to see if we can cut down somehow. decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE); decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE); @@ -336,7 +332,7 @@ inline void DrawEngineVulkan::SetupVertexDecoderInternal(u32 vertType) { } void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) { - if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls + vertexCount > VERTEX_BUFFER_MAX) + if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) Flush(); // TODO: Is this the right thing to do? @@ -360,6 +356,20 @@ void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, dc.prim = prim; dc.vertexCount = vertexCount; + if (g_Config.bVertexCache) { + u32 dhash = dcid_; + dhash ^= (u32)(uintptr_t)verts; + dhash = __rotl(dhash, 13); + dhash ^= (u32)(uintptr_t)inds; + dhash = __rotl(dhash, 13); + dhash ^= (u32)vertType; + dhash = __rotl(dhash, 13); + dhash ^= (u32)vertexCount; + dhash = __rotl(dhash, 13); + dhash ^= (u32)prim; + dcid_ = dhash; + } + if (inds) { GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound); } else { @@ -370,7 +380,12 @@ void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, uvScale[numDrawCalls] = gstate_c.uv; numDrawCalls++; - vertexCountInDrawCalls += vertexCount; + vertexCountInDrawCalls_ += vertexCount; + + if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) { + DecodeVertsStep(decoded, decodeCounter_, decodedVerts_); + decodeCounter_++; + } if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) { // Rendertarget == texture? @@ -452,8 +467,6 @@ void DrawEngineVulkan::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) { } void DrawEngineVulkan::DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf) { - int decodedVerts = 0; - u8 *dest = decoded; // Figure out how much pushbuffer space we need to allocate. @@ -488,9 +501,9 @@ void DrawEngineVulkan::DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset, } const UVScale origUV = gstate_c.uv; - for (int i = 0; i < numDrawCalls; i++) { - gstate_c.uv = uvScale[i]; - DecodeVertsStep(dest, i, decodedVerts); // Note that this can modify i + for (; decodeCounter_ < numDrawCalls; decodeCounter_++) { + gstate_c.uv = uvScale[decodeCounter_]; + DecodeVertsStep(dest, decodeCounter_, decodedVerts_); // NOTE! DecodeVertsStep can modify i! } gstate_c.uv = origUV; @@ -685,9 +698,26 @@ void DrawEngineVulkan::DoFlush() { int vertexCount = 0; bool useElements = true; - // Decode directly into the pushbuffer + // Cannot cache vertex data with morph enabled. + bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK); + // Also avoid caching when software skinning. VkBuffer vbuf; - DecodeVerts(frame->pushVertex, &vbOffset, &vbuf); + if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) { + // If software skinning, we've already predecoded into "decoded". So push that content. + VkDeviceSize size = decodedVerts_ * dec_->GetDecVtxFmt().stride; + u8 *dest = (u8 *)frame->pushVertex->Push(size, &vbOffset, &vbuf); + memcpy(dest, decoded, size); + } else { + // Decode directly into the pushbuffer + DecodeVerts(frame->pushVertex, &vbOffset, &vbuf); + } + + useCache = false; + if (useCache) { + u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263 + // TODO: Actually support vertex caching + } + gpuStats.numUncachedVertsDrawn += indexGen.VertexCount(); useElements = !indexGen.SeenOnlyPurePrims(); vertexCount = indexGen.VertexCount(); @@ -752,7 +782,8 @@ void DrawEngineVulkan::DoFlush() { if (useElements) { VkBuffer ibuf; ibOffset = (uint32_t)frame->pushIndex->Push(decIndex, 2 * indexGen.VertexCount(), &ibuf); - // TODO: Avoid rebinding vertex/index buffers if the vertex size stays the same by using the offset arguments + // TODO (maybe): Avoid rebinding vertex/index buffers if the vertex size stays the same by using the offset arguments. + // Not sure if actually worth it, binding buffers should be fast. vkCmdBindVertexBuffers(cmd, 0, 1, &vbuf, offsets); vkCmdBindIndexBuffer(cmd, ibuf, ibOffset, VK_INDEX_TYPE_UINT16); int numInstances = (gstate_c.bezier || gstate_c.spline) ? numPatches : 1; @@ -888,11 +919,14 @@ void DrawEngineVulkan::DoFlush() { } gpuStats.numDrawCalls += numDrawCalls; - gpuStats.numVertsSubmitted += vertexCountInDrawCalls; + gpuStats.numVertsSubmitted += vertexCountInDrawCalls_; indexGen.Reset(); + decodedVerts_ = 0; numDrawCalls = 0; - vertexCountInDrawCalls = 0; + vertexCountInDrawCalls_ = 0; + decodeCounter_ = 0; + dcid_ = 0; prevPrim_ = GE_PRIM_INVALID; gstate_c.vertexFullAlpha = true; framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason); diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h index a2d809d47a..8081ae3387 100644 --- a/GPU/Vulkan/DrawEngineVulkan.h +++ b/GPU/Vulkan/DrawEngineVulkan.h @@ -195,6 +195,7 @@ private: // Vertex collector state IndexGenerator indexGen; + int decodedVerts_ = 0; GEPrimitiveType prevPrim_; TransformedVertex *transformed = nullptr; @@ -220,14 +221,18 @@ private: VkSampler sampler; // Null texture - VulkanTexture *nullTexture_; - VkSampler nullSampler_; + VulkanTexture *nullTexture_ = nullptr; + VkSampler nullSampler_ = VK_NULL_HANDLE; DeferredDrawCall drawCalls[MAX_DEFERRED_DRAW_CALLS]; - int numDrawCalls; - int vertexCountInDrawCalls; + int numDrawCalls = 0; + int vertexCountInDrawCalls_ = 0; UVScale uvScale[MAX_DEFERRED_DRAW_CALLS]; + int decimationCounter_ = 0; + int decodeCounter_ = 0; + u32 dcid_; + DrawEngineVulkanStats stats_; VulkanPipelineRasterStateKey pipelineKey_{}; diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 7fdbcbc694..bada57a1e8 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -198,12 +198,12 @@ void GPU_Vulkan::CheckGPUFeatures() { void GPU_Vulkan::BeginHostFrame() { drawEngine_.BeginFrame(); + UpdateCmdInfo(); if (resized_) { CheckGPUFeatures(); // In case the GPU changed. BuildReportingInfo(); - UpdateCmdInfo(); framebufferManager_->Resized(); drawEngine_.Resized(); textureCacheVulkan_->NotifyConfigChanged(); @@ -342,14 +342,13 @@ void GPU_Vulkan::UpdateVsyncInterval(bool force) { } void GPU_Vulkan::UpdateCmdInfo() { - /* if (g_Config.bSoftwareSkinning) { cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE; cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPU_Vulkan::Execute_VertexTypeSkinning; - } else {*/ + } else { cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE; cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPU_Vulkan::Execute_VertexType; - // } + } } void GPU_Vulkan::BeginFrameInternal() { @@ -539,6 +538,24 @@ void GPU_Vulkan::Execute_VertexType(u32 op, u32 diff) { } } +void GPU_Vulkan::Execute_VertexTypeSkinning(u32 op, u32 diff) { + // Don't flush when weight count changes, unless morph is enabled. + if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (op & GE_VTYPE_MORPHCOUNT_MASK) != 0) { + // Restore and flush + gstate.vertType ^= diff; + Flush(); + gstate.vertType ^= diff; + if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK)) + gstate_c.Dirty(DIRTY_UVSCALEOFFSET); + // In this case, we may be doing weights and morphs. + // Update any bone matrix uniforms so it uses them correctly. + if ((op & GE_VTYPE_MORPHCOUNT_MASK) != 0) { + gstate_c.Dirty(gstate_c.deferredVertTypeDirty); + gstate_c.deferredVertTypeDirty = 0; + } + } +} + void GPU_Vulkan::Execute_Bezier(u32 op, u32 diff) { // We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier. gstate_c.Dirty(DIRTY_UVSCALEOFFSET); diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h index 7cdb665054..b056cb02e2 100644 --- a/GPU/Vulkan/GPU_Vulkan.h +++ b/GPU/Vulkan/GPU_Vulkan.h @@ -77,6 +77,7 @@ public: void Execute_Bezier(u32 op, u32 diff); void Execute_Spline(u32 op, u32 diff); void Execute_VertexType(u32 op, u32 diff); + void Execute_VertexTypeSkinning(u32 op, u32 diff); void Execute_TexSize0(u32 op, u32 diff); void Execute_LoadClut(u32 op, u32 diff); void Execute_BoneMtxNum(u32 op, u32 diff);