From a0ac2dffc7496fd3ffe668c83546ef3d482496b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 1 Aug 2022 23:55:58 +0200 Subject: [PATCH 1/7] Defer depth copies until depth buffer is actually used. Fixes --- GPU/Common/FramebufferManagerCommon.cpp | 54 ++++++++++++++----------- GPU/Common/FramebufferManagerCommon.h | 2 + GPU/GPUCommon.cpp | 10 +++++ GPU/GPUState.h | 3 ++ 4 files changed, 45 insertions(+), 24 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index e25c0b9a35..eeae895d53 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -377,6 +377,11 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame // // We use a special fragment shader flag to convert color to depth. vfb = GetLatestDepthBufferAt(params.fb_address /* !!! */, params.fb_stride); + if (vfb) { + vfb->depthBindSeq = GetBindSeqCount(); + } + // Avoid causing another depth copy on top. + gstate_c.usingDepth = true; } gstate_c.SetFramebufferRenderMode(mode); @@ -416,6 +421,8 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame // None found? Create one. if (!vfb) { + gstate_c.usingDepth = false; // reset depth buffer tracking + vfb = new VirtualFramebuffer{}; vfb->fbo = nullptr; vfb->fb_address = params.fb_address; @@ -442,17 +449,6 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame ResizeFramebufFBO(vfb, drawing_width, drawing_height, true); NotifyRenderFramebufferCreated(vfb); - // Looks up by z_address, so if one is found here and not have last pointers equal to this one, - // there is another one. - VirtualFramebuffer *prevDepth = GetLatestDepthBufferAt(vfb->z_address, vfb->z_stride); - - // We might already want to copy depth, in case this is a temp buffer. See #7810. - if (prevDepth != vfb) { - if (!params.isClearingDepth && prevDepth) { - BlitFramebufferDepth(prevDepth, vfb); - } - } - SetColorUpdated(vfb, skipDrawReason); INFO_LOG(FRAMEBUF, "Creating FBO for %08x (z: %08x) : %d x %d x %s", vfb->fb_address, vfb->z_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->format)); @@ -510,18 +506,19 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame VirtualFramebuffer *prev = currentRenderVfb_; currentRenderVfb_ = vfb; NotifyRenderFramebufferSwitched(prev, vfb, params.isClearingDepth); + gstate_c.usingDepth = false; // reset depth buffer tracking } else { + // Something changed, but we still got the same framebuffer we were already rendering to. + // Might not be a lot to do here, we check in NotifyRenderFramebufferUpdated vfb->last_frame_render = gpuStats.numFlips; frameLastFramebufUsed_ = gpuStats.numFlips; vfb->dirtyAfterDisplay = true; if ((skipDrawReason & SKIPDRAW_SKIPFRAME) == 0) vfb->reallyDirtyAfterDisplay = true; - NotifyRenderFramebufferUpdated(vfb, vfbFormatChanged); } vfb->colorBindSeq = GetBindSeqCount(); - vfb->depthBindSeq = GetBindSeqCount(); gstate_c.curRTWidth = vfb->width; gstate_c.curRTHeight = vfb->height; @@ -530,6 +527,26 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame return vfb; } +// Called on the first use of depth in a render pass. +void FramebufferManagerCommon::SetDepthFrameBuffer() { + if (!currentRenderVfb_) { + return; + } + + // Looks up by z_address, so if one is found here and not have last pointers equal to this one, + // there is another one. + VirtualFramebuffer *prevDepth = GetLatestDepthBufferAt(currentRenderVfb_->z_address, currentRenderVfb_->z_stride); + + if (prevDepth != currentRenderVfb_) { + if (!gstate_c.clearingDepth && prevDepth) { + BlitFramebufferDepth(prevDepth, currentRenderVfb_); + } + prevDepth = currentRenderVfb_; + } + + currentRenderVfb_->depthBindSeq = GetBindSeqCount(); +} + void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) { // Notify the texture cache of both the color and depth buffers. textureCache_->NotifyFramebuffer(v, NOTIFY_FB_DESTROYED); @@ -669,17 +686,6 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe textureCache_->ForgetLastTexture(); shaderManager_->DirtyLastShader(); - // Copy depth between the framebuffers, if the z_address is the same (checked inside.) - VirtualFramebuffer * prevDepth = GetLatestDepthBufferAt(vfb->z_address, vfb->z_stride); - - // We might already want to copy depth, in case this is a temp buffer. See #7810. - if (prevDepth != vfb) { - if (!isClearingDepth && prevDepth) { - BlitFramebufferDepth(prevDepth, vfb); - } - prevDepth = vfb; - } - if (vfb->drawnFormat != vfb->format) { ReinterpretFramebuffer(vfb, vfb->drawnFormat, vfb->format); } diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index e592ed7b87..87a4e577cd 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -266,6 +266,8 @@ public: return vfb; } } + void SetDepthFrameBuffer(); + void RebindFramebuffer(const char *tag); std::vector GetFramebufferList() const; diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index a0deb403aa..d751291a9b 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1685,6 +1685,16 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { return; } + if (!gstate_c.usingDepth) { + bool isClearingDepth = gstate.isModeClear() && gstate.isClearModeDepthMask();; + + if ((gstate.isDepthTestEnabled() || isClearingDepth)) { + gstate_c.usingDepth = true; + gstate_c.clearingDepth = isClearingDepth; + framebufferManager_->SetDepthFrameBuffer(); + } + } + const void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr); const void *inds = nullptr; u32 vertexType = gstate.vertType; diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 8d6d04039b..3f30be89f7 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -572,6 +572,9 @@ struct GPUStateCache { uint64_t dirty; + bool usingDepth; // For deferred depth copies. + bool clearingDepth; + bool textureFullAlpha; bool vertexFullAlpha; From 886679c7ec71a7adb89e7ce9c2d21218b15e2df1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 17 Aug 2022 11:31:02 +0200 Subject: [PATCH 2/7] Remove the color-to-depth mode --- GPU/Common/FragmentShaderGenerator.cpp | 29 ++-------------------- GPU/Common/FramebufferManagerCommon.cpp | 33 ------------------------- GPU/Common/GPUStateUtils.cpp | 10 -------- GPU/Common/ShaderId.cpp | 5 ---- GPU/Common/ShaderId.h | 1 - GPU/Common/TextureCacheCommon.cpp | 9 ------- GPU/D3D11/StateMappingD3D11.cpp | 9 +------ GPU/Directx9/StateMappingDX9.cpp | 9 +------ GPU/GLES/StateMappingGLES.cpp | 6 +---- GPU/GPU.h | 5 ---- GPU/GPUState.h | 11 --------- GPU/Vulkan/StateMappingVulkan.cpp | 9 +------ 12 files changed, 6 insertions(+), 130 deletions(-) diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index ad3f22af8c..f956217b38 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -90,7 +90,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL) && !texture3D; // combination with texture3D not supported. Enforced elsewhere too. bool bgraTexture = id.Bit(FS_BIT_BGRA_TEXTURE); bool colorWriteMask = id.Bit(FS_BIT_COLOR_WRITEMASK) && compat.bitwiseOps; - bool colorToDepth = id.Bit(FS_BIT_COLOR_TO_DEPTH); GEComparison alphaTestFunc = (GEComparison)id.Bits(FS_BIT_ALPHA_TEST_FUNC, 3); GEComparison colorTestFunc = (GEComparison)id.Bits(FS_BIT_COLOR_TEST_FUNC, 2); @@ -123,7 +122,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu bool readFramebufferTex = readFramebuffer && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH); bool needFragCoord = readFramebuffer || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT); - bool writeDepth = gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT) || colorToDepth; + bool writeDepth = gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT); if (shaderDepal && !doTexture) { *errorString = "depal requires a texture"; @@ -136,11 +135,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } if (compat.shaderLanguage == ShaderLanguage::GLSL_VULKAN) { - if (colorToDepth) { - WRITE(p, "precision highp int;\n"); - WRITE(p, "precision highp float;\n"); - } - if (useDiscardStencilBugWorkaround && !gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) { WRITE(p, "layout (depth_unchanged) out float gl_FragDepth;\n"); } @@ -293,7 +287,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, "};\n"); } } else if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) { - if ((shaderDepal || colorWriteMask || colorToDepth) && gl_extensions.IsGLES) { + if ((shaderDepal || colorWriteMask) && gl_extensions.IsGLES) { WRITE(p, "precision highp int;\n"); } @@ -461,9 +455,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, "PS_OUT main( PS_IN In ) {\n"); WRITE(p, " PS_OUT outfragment;\n"); WRITE(p, " vec4 target;\n"); - if (colorToDepth) { - WRITE(p, " float gl_FragDepth;\n"); - } } else { WRITE(p, "void main() {\n"); } @@ -1070,22 +1061,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, " %s = vec4(0.0, 0.0, 0.0, %s.z); // blue to alpha\n", compat.fragColor0, compat.fragColor0); } - if (colorToDepth) { - DepthScaleFactors factors = GetDepthScaleFactors(); - - if (compat.bitwiseOps) { - WRITE(p, " highp float depthValue = float(int(%s.x * 31.99) | (int(%s.y * 63.99) << 5) | (int(%s.z * 31.99) << 11)) / 65535.0;\n", "v", "v", "v"); // compat.fragColor0, compat.fragColor0, compat.fragColor0); - } else { - // D3D9-compatible alternative - WRITE(p, " highp float depthValue = (floor(%s.x * 31.99) + floor(%s.y * 63.99) * 32.0 + floor(%s.z * 31.99) * 2048.0) / 65535.0;\n", "v", "v", "v"); // compat.fragColor0, compat.fragColor0, compat.fragColor0); - } - if (factors.scale != 1.0 || factors.offset != 0.0) { - WRITE(p, " gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale / 65535.0f, factors.offset); - } else { - WRITE(p, " gl_FragDepth = depthValue;\n"); - } - } - if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) { const double scale = DepthSliceFactor() * 65535.0; diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index eeae895d53..e0b05f76c7 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -280,8 +280,6 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame WARN_LOG_ONCE(color_equal_z, G3D, "Framebuffer bound with color addr == z addr, likely will not use Z in this pass: %08x", params.fb_address); } - RasterMode mode = RASTER_MODE_NORMAL; - // Find a matching framebuffer VirtualFramebuffer *vfb = nullptr; for (size_t i = 0; i < vfbs_.size(); ++i) { @@ -318,19 +316,6 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame vfb->height = drawing_height; } break; - } else if (params.fb_address == v->z_address && params.fmt != GE_FORMAT_8888 && params.fb_stride == v->z_stride && !params.isBlending) { - // Looks like the game might be intending to use color to write directly to a Z buffer. - // This is seen in Kuroyou 2. - - // Ignore this in this loop, BUT, we do a lookup in the depth tracking afterwards to - // make sure we get the latest one. - WARN_LOG_ONCE(color_matches_z, G3D, "Color framebuffer bound at %08x with likely intent to write explicit Z values using color. fmt = %s", params.fb_address, GeBufferFormatToString(params.fmt)); - // Seems impractical to use the other 16-bit formats for this due to the limited control over alpha, - // so we'll simply only support 565. - if (params.fmt == GE_FORMAT_565) { - mode = RASTER_MODE_COLOR_TO_DEPTH; - break; - } } else if (v->fb_stride == params.fb_stride && v->format == params.fmt) { u32 v_fb_first_line_end_ptr = v->fb_address + v->fb_stride * 4; // This should be * bpp, but leaving like this until after 1.13 to be safe. The God of War games use this for shadows. u32 v_fb_end_ptr = v->fb_address + v->fb_stride * v->height * bpp; @@ -368,24 +353,6 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame } } - if (mode == RASTER_MODE_COLOR_TO_DEPTH) { - // Lookup in the depth tracking to find which VFB has the latest version of this Z buffer. - // Then bind it in color-to-depth mode. - // - // We do this by having a special render mode where we take color and move to - // depth in the fragment shader, and set color writes to off. - // - // We use a special fragment shader flag to convert color to depth. - vfb = GetLatestDepthBufferAt(params.fb_address /* !!! */, params.fb_stride); - if (vfb) { - vfb->depthBindSeq = GetBindSeqCount(); - } - // Avoid causing another depth copy on top. - gstate_c.usingDepth = true; - } - - gstate_c.SetFramebufferRenderMode(mode); - if (vfb) { if ((drawing_width != vfb->bufferWidth || drawing_height != vfb->bufferHeight)) { // Even if it's not newly wrong, if this is larger we need to resize up. diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp index e86c056c5a..51f0e3f8eb 100644 --- a/GPU/Common/GPUStateUtils.cpp +++ b/GPU/Common/GPUStateUtils.cpp @@ -1018,16 +1018,6 @@ void ConvertMaskState(GenericMaskState &maskState, bool allowFramebufferRead) { return; } - if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) { - // Suppress color writes entirely in this mode. - maskState.applyFramebufferRead = false; - maskState.rgba[0] = false; - maskState.rgba[1] = false; - maskState.rgba[2] = false; - maskState.rgba[3] = false; - return; - } - // Invert to convert masks from the PSP's format where 1 is don't draw to PC where 1 is draw. uint32_t colorMask = ~((gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24)); diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 1176200799..510e90e929 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -240,8 +240,6 @@ std::string FragmentShaderDesc(const FShaderID &id) { if (id.Bit(FS_BIT_COLOR_AGAINST_ZERO)) desc << "ColorTest0 " << alphaTestFuncs[id.Bits(FS_BIT_COLOR_TEST_FUNC, 2)] << " "; // first 4 match; else if (id.Bit(FS_BIT_COLOR_TEST)) desc << "ColorTest " << alphaTestFuncs[id.Bits(FS_BIT_COLOR_TEST_FUNC, 2)] << " "; // first 4 match - if (id.Bit(FS_BIT_COLOR_TO_DEPTH)) desc << "ColorToDepth "; - return desc.str(); } @@ -264,7 +262,6 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) { bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT; bool useShaderDepal = gstate_c.useShaderDepal; bool colorWriteMask = IsColorWriteMaskComplex(gstate_c.allowFramebufferRead); - bool colorToDepth = gstate_c.renderMode == RasterMode::RASTER_MODE_COLOR_TO_DEPTH; // Note how we here recompute some of the work already done in state mapping. // Not ideal! At least we share the code. @@ -296,8 +293,6 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) { id.SetBit(FS_BIT_3D_TEXTURE, gstate_c.curTextureIs3D); } - id.SetBit(FS_BIT_COLOR_TO_DEPTH, colorToDepth); - id.SetBit(FS_BIT_LMODE, lmode); if (enableAlphaTest) { // 5 bits total. diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index bcdf4f723c..8dcee32c1e 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -94,7 +94,6 @@ enum FShaderBit : uint8_t { FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL = 49, FS_BIT_COLOR_WRITEMASK = 50, FS_BIT_3D_TEXTURE = 51, - FS_BIT_COLOR_TO_DEPTH = 52, }; static inline FShaderBit operator +(FShaderBit bit, int i) { diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 7f1c86feb3..488aa86d30 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -265,10 +265,6 @@ SamplerCacheKey TextureCacheCommon::GetSamplingParams(int maxLevel, const TexCac } } - if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) { - forceFiltering = TEX_FILTER_FORCE_NEAREST; - } - switch (forceFiltering) { case TEX_FILTER_AUTO: break; @@ -2247,11 +2243,6 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt plan.scaleFactor = 1; } - // Don't upscale textures in color-to-depth mode. - if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) { - plan.scaleFactor = 1; - } - if ((entry->status & TexCacheEntry::STATUS_CHANGE_FREQUENT) != 0 && plan.scaleFactor != 1 && plan.slowScaler) { // Remember for later that we /wanted/ to scale this texture. entry->status |= TexCacheEntry::STATUS_TO_SCALE; diff --git a/GPU/D3D11/StateMappingD3D11.cpp b/GPU/D3D11/StateMappingD3D11.cpp index 79a169a8ab..da9ec068e6 100644 --- a/GPU/D3D11/StateMappingD3D11.cpp +++ b/GPU/D3D11/StateMappingD3D11.cpp @@ -293,14 +293,7 @@ void DrawEngineD3D11::ApplyDrawState(int prim) { GenericStencilFuncState stencilState; ConvertStencilFuncState(stencilState); - if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) { - // Enforce plain depth writing. - keys_.depthStencil.value = 0; - keys_.depthStencil.depthTestEnable = true; - keys_.depthStencil.depthWriteEnable = true; - keys_.depthStencil.stencilTestEnable = false; - keys_.depthStencil.depthCompareOp = D3D11_COMPARISON_ALWAYS; - } else if (gstate.isModeClear()) { + if (gstate.isModeClear()) { keys_.depthStencil.value = 0; keys_.depthStencil.depthTestEnable = true; keys_.depthStencil.depthCompareOp = D3D11_COMPARISON_ALWAYS; diff --git a/GPU/Directx9/StateMappingDX9.cpp b/GPU/Directx9/StateMappingDX9.cpp index 0de9c1774d..3d06b380e0 100644 --- a/GPU/Directx9/StateMappingDX9.cpp +++ b/GPU/Directx9/StateMappingDX9.cpp @@ -211,14 +211,7 @@ void DrawEngineDX9::ApplyDrawState(int prim) { ConvertStencilFuncState(stencilState); // Set Stencil/Depth - - if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) { - // Enforce plain depth writing. - dxstate.depthTest.enable(); - dxstate.depthFunc.set(D3DCMP_ALWAYS); - dxstate.depthWrite.set(true); - dxstate.stencilTest.disable(); - } else if (gstate.isModeClear()) { + if (gstate.isModeClear()) { // Depth Test dxstate.depthTest.enable(); dxstate.depthFunc.set(D3DCMP_ALWAYS); diff --git a/GPU/GLES/StateMappingGLES.cpp b/GPU/GLES/StateMappingGLES.cpp index 54c94ccbd3..6e498875f1 100644 --- a/GPU/GLES/StateMappingGLES.cpp +++ b/GPU/GLES/StateMappingGLES.cpp @@ -251,11 +251,7 @@ void DrawEngineGLES::ApplyDrawState(int prim) { GenericStencilFuncState stencilState; ConvertStencilFuncState(stencilState); - if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) { - // Enforce plain depth writing. - renderManager->SetStencilDisabled(); - renderManager->SetDepth(true, true, GL_ALWAYS); - } else if (gstate.isModeClear()) { + if (gstate.isModeClear()) { // Depth Test if (gstate.isClearModeDepthMask()) { framebufferManager_->SetDepthUpdated(); diff --git a/GPU/GPU.h b/GPU/GPU.h index ecb9ae33c4..18af975ae8 100644 --- a/GPU/GPU.h +++ b/GPU/GPU.h @@ -24,11 +24,6 @@ class GPUInterface; class GPUDebugInterface; class GraphicsContext; -enum RasterMode { - RASTER_MODE_NORMAL = 0, - RASTER_MODE_COLOR_TO_DEPTH = 1, -}; - // PSP rasterization has two outputs, color and depth. Stencil is packed // into the alpha channel of color (if exists), so possibly RASTER_COLOR // should be named RASTER_COLOR_STENCIL but it gets kinda hard to read. diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 3f30be89f7..6aaa79a8fd 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -555,14 +555,6 @@ struct GPUStateCache { Dirty(DIRTY_FRAGMENTSHADER_STATE | (is3D ? DIRTY_MIPBIAS : 0)); } } - void SetFramebufferRenderMode(RasterMode mode) { - if (mode != renderMode) { - // This mode modifies the fragment shader to write depth, the depth state to write without testing, and the blend state to write nothing to color. - // So we need to re-evaluate those states. - Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_TEXTURE_PARAMS); - renderMode = mode; - } - } u32 featureFlags; @@ -616,9 +608,6 @@ struct GPUStateCache { // We detect this case and go into a special drawing mode. bool blueToAlpha; - // Some games try to write to the Z buffer using color. Catch that and actually do the writes to the Z buffer instead. - RasterMode renderMode; - // TODO: These should be accessed from the current VFB object directly. u32 curRTWidth; u32 curRTHeight; diff --git a/GPU/Vulkan/StateMappingVulkan.cpp b/GPU/Vulkan/StateMappingVulkan.cpp index 6aa176c864..166f3dc153 100644 --- a/GPU/Vulkan/StateMappingVulkan.cpp +++ b/GPU/Vulkan/StateMappingVulkan.cpp @@ -250,14 +250,7 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag GenericStencilFuncState stencilState; ConvertStencilFuncState(stencilState); - if (gstate_c.renderMode == RASTER_MODE_COLOR_TO_DEPTH) { - // Enforce plain depth writing. - key.depthTestEnable = true; - key.depthWriteEnable = true; - key.stencilTestEnable = false; - key.depthCompareOp = VK_COMPARE_OP_ALWAYS; - key.depthClampEnable = false; - } else if (gstate.isModeClear()) { + if (gstate.isModeClear()) { key.depthTestEnable = true; key.depthCompareOp = VK_COMPARE_OP_ALWAYS; key.depthWriteEnable = gstate.isClearModeDepthMask(); From 94ade8c75142fea41eda38089318457cdbf47ae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 17 Aug 2022 12:11:00 +0200 Subject: [PATCH 3/7] Extract depth copies to CopyToDepthFromOverlappingFramebuffers --- GPU/Common/FramebufferManagerCommon.cpp | 63 +++++++++++++++++++++++-- GPU/Common/FramebufferManagerCommon.h | 2 + 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index e0b05f76c7..0452b529a1 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -416,6 +416,10 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame ResizeFramebufFBO(vfb, drawing_width, drawing_height, true); NotifyRenderFramebufferCreated(vfb); + if (!params.isClearingDepth) { + CopyToDepthFromOverlappingFramebuffers(vfb); + } + SetColorUpdated(vfb, skipDrawReason); INFO_LOG(FRAMEBUF, "Creating FBO for %08x (z: %08x) : %d x %d x %s", vfb->fb_address, vfb->z_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->format)); @@ -459,7 +463,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame } } - // We already have it! + // We already have it! } else if (vfb != currentRenderVfb_) { // Use it as a render target. DEBUG_LOG(FRAMEBUF, "Switching render target to FBO for %08x: %d x %d x %d ", vfb->fb_address, vfb->width, vfb->height, vfb->format); @@ -514,6 +518,54 @@ void FramebufferManagerCommon::SetDepthFrameBuffer() { currentRenderVfb_->depthBindSeq = GetBindSeqCount(); } +void FramebufferManagerCommon::CopyToDepthFromOverlappingFramebuffers(VirtualFramebuffer *dest) { + struct CopySource { + VirtualFramebuffer *vfb; + RasterChannel channel; + + int seq() const { + return channel == RASTER_DEPTH ? vfb->depthBindSeq : vfb->colorBindSeq; + } + + bool operator < (const CopySource &other) const { + return seq() < other.seq(); + } + }; + + std::vector sources; + for (auto src: vfbs_) { + if (src == dest) + continue; + + if (src->fb_address == dest->z_address && src->fb_stride == dest->z_stride && src->format == GE_FORMAT_565) { + if (src->colorBindSeq > dest->depthBindSeq) { + // Source has older data than the current buffer, ignore. + continue; + } + sources.push_back(CopySource{ src, RASTER_COLOR }); + } else if (src->z_address == dest->z_address && src->z_stride == dest->z_stride && src->depthBindSeq > dest->depthBindSeq) { + sources.push_back(CopySource{ src, RASTER_DEPTH }); + } else { + // TODO: Do more detailed overlap checks here. + } + } + + // TODO: A full depth copy will overwrite anything else. So we can eliminate + // anything that comes before such a copy. + + for (auto &source : sources) { + if (source.channel == RASTER_DEPTH) { + // Good old depth->depth copy. + BlitFramebufferDepth(source.vfb, dest); + + gpuStats.numDepthCopies++; + dest->last_frame_depth_updated = gpuStats.numFlips; + } + } + + gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_BLEND_STATE); +} + void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) { // Notify the texture cache of both the color and depth buffers. textureCache_->NotifyFramebuffer(v, NOTIFY_FB_DESTROYED); @@ -588,9 +640,6 @@ void FramebufferManagerCommon::BlitFramebufferDepth(VirtualFramebuffer *src, Vir } draw_->InvalidateCachedState(); - - gpuStats.numDepthCopies++; - dst->last_frame_depth_updated = gpuStats.numFlips; } VirtualFramebuffer *FramebufferManagerCommon::GetLatestDepthBufferAt(u32 z_address, u16 z_stride) { @@ -653,6 +702,12 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe textureCache_->ForgetLastTexture(); shaderManager_->DirtyLastShader(); + if (!isClearingDepth) { + // TODO: We should do this as part of the bind below. Then we can optimize the RPAction properly, + // and also do the copies using raster. + CopyToDepthFromOverlappingFramebuffers(vfb); + } + if (vfb->drawnFormat != vfb->format) { ReinterpretFramebuffer(vfb, vfb->drawnFormat, vfb->format); } diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 87a4e577cd..901e821590 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -386,6 +386,8 @@ protected: void Ensure2DResources(); Draw::Pipeline *Create2DPipeline(RasterChannel (*generate)(ShaderWriter &)); + void CopyToDepthFromOverlappingFramebuffers(VirtualFramebuffer *dest); + bool UpdateSize(); void FlushBeforeCopy(); From 5d8fe4c2a86faaf10062c8e470483b8a5ba1d99c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 17 Aug 2022 14:28:34 +0200 Subject: [PATCH 4/7] Implement copying color to depth on load. However, weird stuff is happening.. --- GPU/Common/Draw2D.cpp | 38 +++++++++++-- GPU/Common/Draw2D.h | 19 +++++++ GPU/Common/FramebufferManagerCommon.cpp | 76 ++++++++++--------------- GPU/Common/FramebufferManagerCommon.h | 7 +-- 4 files changed, 85 insertions(+), 55 deletions(-) diff --git a/GPU/Common/Draw2D.cpp b/GPU/Common/Draw2D.cpp index 7170aeb5f8..d9d0c5bc93 100644 --- a/GPU/Common/Draw2D.cpp +++ b/GPU/Common/Draw2D.cpp @@ -25,6 +25,7 @@ #include "GPU/Common/DrawEngineCommon.h" #include "GPU/Common/FramebufferManagerCommon.h" #include "GPU/Common/TextureCacheCommon.h" +#include "GPU/Common/GPUStateUtils.h" static const InputDef inputs[2] = { { "vec2", "a_position", Draw::SEM_POSITION }, @@ -58,6 +59,24 @@ RasterChannel GenerateDraw2DDepthFs(ShaderWriter &writer) { return RASTER_DEPTH; } +RasterChannel GenerateDraw2D565ToDepthFs(ShaderWriter &writer) { + writer.DeclareSamplers(samplers); + writer.BeginFSMain(Slice::empty(), varyings, FSFLAG_WRITEDEPTH); + writer.C(" vec4 outColor = vec4(0.0, 0.0, 0.0, 0.0);\n"); + // Unlike when just copying a depth buffer, here we're generating new depth values so we'll + // have to apply the scaling. + DepthScaleFactors factors = GetDepthScaleFactors(); + writer.C(" vec3 rgb = ").SampleTexture2D("tex", "v_texcoord.xy").C(".xyz;\n"); + writer.F(" highp float depthValue = (floor(rgb.x * 31.99) + floor(rgb.y * 63.99) * 32.0 + floor(rgb.z * 31.99) * 2048.0) / 65535.0; \n"); + if (factors.scale != 1.0 || factors.offset != 0.0) { + writer.F(" gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale / 65535.0f, factors.offset); + } else { + writer.C(" gl_FragDepth = depthValue;\n"); + } + writer.EndFSMain("outColor", FSFLAG_WRITEDEPTH); + return RASTER_DEPTH; +} + void GenerateDraw2DVS(ShaderWriter &writer) { writer.BeginVSMain(inputs, Slice::empty(), varyings); @@ -159,22 +178,22 @@ Draw::Pipeline *FramebufferManagerCommon::Create2DPipeline(RasterChannel (*gener return pipeline; } -void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, RasterChannel channel) { +void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DShader shader) { using namespace Draw; Ensure2DResources(); const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc(); - switch (channel) { - case RASTER_COLOR: + switch (shader) { + case DRAW2D_COPY_COLOR: if (!draw2DPipelineColor_) { draw2DPipelineColor_ = Create2DPipeline(&GenerateDraw2DFs); } draw_->BindPipeline(draw2DPipelineColor_); break; - case RASTER_DEPTH: + case DRAW2D_COPY_DEPTH: if (!draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) { // Can't do it return; @@ -184,6 +203,17 @@ void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *ver } draw_->BindPipeline(draw2DPipelineDepth_); break; + + case DRAW2D_565_TO_DEPTH: + if (!draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) { + // Can't do it + return; + } + if (!draw2DPipeline565ToDepth_) { + draw2DPipeline565ToDepth_ = Create2DPipeline(&GenerateDraw2D565ToDepthFs); + } + draw_->BindPipeline(draw2DPipeline565ToDepth_); + break; } if (tex) { diff --git a/GPU/Common/Draw2D.h b/GPU/Common/Draw2D.h index 34c171e7a0..fee2d4cef3 100644 --- a/GPU/Common/Draw2D.h +++ b/GPU/Common/Draw2D.h @@ -1,5 +1,7 @@ #pragma once +#include "GPU/GPU.h" + // For framebuffer copies and similar things that just require passthrough. struct Draw2DVertex { float x; @@ -7,3 +9,20 @@ struct Draw2DVertex { float u; float v; }; + +enum Draw2DShader { + DRAW2D_COPY_COLOR, + DRAW2D_COPY_DEPTH, + DRAW2D_565_TO_DEPTH, +}; + +inline RasterChannel Draw2DSourceChannel(Draw2DShader shader) { + switch (shader) { + case DRAW2D_COPY_DEPTH: + return RASTER_DEPTH; + case DRAW2D_COPY_COLOR: + case DRAW2D_565_TO_DEPTH: + default: + return RASTER_COLOR; + } +} diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 0452b529a1..b07bb171ba 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -416,9 +416,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame ResizeFramebufFBO(vfb, drawing_width, drawing_height, true); NotifyRenderFramebufferCreated(vfb); - if (!params.isClearingDepth) { - CopyToDepthFromOverlappingFramebuffers(vfb); - } + // Note that we do not even think about depth right now. SetColorUpdated(vfb, skipDrawReason); @@ -504,16 +502,8 @@ void FramebufferManagerCommon::SetDepthFrameBuffer() { return; } - // Looks up by z_address, so if one is found here and not have last pointers equal to this one, - // there is another one. - VirtualFramebuffer *prevDepth = GetLatestDepthBufferAt(currentRenderVfb_->z_address, currentRenderVfb_->z_stride); - - if (prevDepth != currentRenderVfb_) { - if (!gstate_c.clearingDepth && prevDepth) { - BlitFramebufferDepth(prevDepth, currentRenderVfb_); - } - prevDepth = currentRenderVfb_; - } + // "Resolve" the depth buffer, by copying from any overlapping buffers with fresher content. + CopyToDepthFromOverlappingFramebuffers(currentRenderVfb_); currentRenderVfb_->depthBindSeq = GetBindSeqCount(); } @@ -538,11 +528,10 @@ void FramebufferManagerCommon::CopyToDepthFromOverlappingFramebuffers(VirtualFra continue; if (src->fb_address == dest->z_address && src->fb_stride == dest->z_stride && src->format == GE_FORMAT_565) { - if (src->colorBindSeq > dest->depthBindSeq) { - // Source has older data than the current buffer, ignore. - continue; + if (src->colorBindSeq < dest->depthBindSeq) { + // Source has newer data than the current buffer, use it. + sources.push_back(CopySource{ src, RASTER_COLOR }); } - sources.push_back(CopySource{ src, RASTER_COLOR }); } else if (src->z_address == dest->z_address && src->z_stride == dest->z_stride && src->depthBindSeq > dest->depthBindSeq) { sources.push_back(CopySource{ src, RASTER_DEPTH }); } else { @@ -550,16 +539,25 @@ void FramebufferManagerCommon::CopyToDepthFromOverlappingFramebuffers(VirtualFra } } - // TODO: A full depth copy will overwrite anything else. So we can eliminate + std::sort(sources.begin(), sources.end()); + + // TODO: A full copy will overwrite anything else. So we can eliminate // anything that comes before such a copy. - for (auto &source : sources) { + // For now, let's just do the last thing, if there are multiple. + + // for (auto &source : sources) { + if (sources.size()) { + auto &source = sources.back(); if (source.channel == RASTER_DEPTH) { // Good old depth->depth copy. BlitFramebufferDepth(source.vfb, dest); - gpuStats.numDepthCopies++; dest->last_frame_depth_updated = gpuStats.numFlips; + } else if (source.channel == RASTER_COLOR) { + VirtualFramebuffer *src = source.vfb; + // Copying color to depth. + BlitUsingRaster(src->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight, dest->fbo, 0.0f, 0.0f, dest->renderWidth, dest->renderHeight, false, DRAW2D_565_TO_DEPTH, "565_to_depth"); } } @@ -636,24 +634,12 @@ void FramebufferManagerCommon::BlitFramebufferDepth(VirtualFramebuffer *src, Vir draw_->BlitFramebuffer(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, Draw::FB_DEPTH_BIT, Draw::FB_BLIT_NEAREST, "BlitFramebufferDepth"); RebindFramebuffer("After BlitFramebufferDepth"); } else if (useRaster) { - BlitUsingRaster(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, false, RasterChannel::RASTER_DEPTH); + BlitUsingRaster(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, false, Draw2DShader::DRAW2D_COPY_DEPTH, "BlitDepthRaster"); } draw_->InvalidateCachedState(); } -VirtualFramebuffer *FramebufferManagerCommon::GetLatestDepthBufferAt(u32 z_address, u16 z_stride) { - int maxSeq = -1; - VirtualFramebuffer *latestDepth = nullptr; - for (auto vfb : vfbs_) { - if (vfb->z_address == z_address && vfb->z_stride == z_stride && vfb->depthBindSeq > maxSeq) { - maxSeq = vfb->depthBindSeq; - latestDepth = vfb; - } - } - return latestDepth; -} - void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) { if (!useBufferedRendering_) { // Let's ignore rendering to targets that have not (yet) been displayed. @@ -702,12 +688,6 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe textureCache_->ForgetLastTexture(); shaderManager_->DirtyLastShader(); - if (!isClearingDepth) { - // TODO: We should do this as part of the bind below. Then we can optimize the RPAction properly, - // and also do the copies using raster. - CopyToDepthFromOverlappingFramebuffers(vfb); - } - if (vfb->drawnFormat != vfb->format) { ReinterpretFramebuffer(vfb, vfb->drawnFormat, vfb->format); } @@ -2355,6 +2335,7 @@ void FramebufferManagerCommon::DeviceLost() { DoRelease(draw2DVs_); DoRelease(draw2DPipelineColor_); DoRelease(draw2DPipelineDepth_); + DoRelease(draw2DPipeline565ToDepth_); draw_ = nullptr; } @@ -2412,7 +2393,7 @@ void FramebufferManagerCommon::DrawActiveTexture(float x, float y, float w, floa // Rearrange to strip form. std::swap(coord[2], coord[3]); - DrawStrip2D(nullptr, coord, 4, (flags & DRAWTEX_LINEAR) != 0, RASTER_COLOR); + DrawStrip2D(nullptr, coord, 4, (flags & DRAWTEX_LINEAR) != 0, DRAW2D_COPY_COLOR); gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE); } @@ -2508,13 +2489,14 @@ void FramebufferManagerCommon::BlitFramebuffer(VirtualFramebuffer *dst, int dstX draw_->BlitFramebuffer(src->fbo, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2, channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, Draw::FB_BLIT_NEAREST, tag); } else { + Draw2DShader shader = channel == RASTER_COLOR ? DRAW2D_COPY_COLOR : DRAW2D_COPY_DEPTH; Draw::Framebuffer *srcFBO = src->fbo; if (src == dst) { Draw::Framebuffer *tempFBO = GetTempFBO(TempFBO::BLIT, src->renderWidth, src->renderHeight); - BlitUsingRaster(src->fbo, srcX1, srcY1, srcX2, srcY2, tempFBO, dstX1, dstY1, dstX2, dstY2, false, channel); + BlitUsingRaster(src->fbo, srcX1, srcY1, srcX2, srcY2, tempFBO, dstX1, dstY1, dstX2, dstY2, false, shader, tag); srcFBO = tempFBO; } - BlitUsingRaster(srcFBO, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2, false, channel); + BlitUsingRaster(srcFBO, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2, false, shader, tag); } draw_->InvalidateCachedState(); @@ -2526,9 +2508,9 @@ void FramebufferManagerCommon::BlitUsingRaster( Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2, Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2, bool linearFilter, - RasterChannel channel) { + Draw2DShader shader, const char *tag) { - if (channel == RASTER_DEPTH) { + if (shader == DRAW2D_COPY_DEPTH || shader == DRAW2D_565_TO_DEPTH) { _dbg_assert_(draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported); } @@ -2550,13 +2532,13 @@ void FramebufferManagerCommon::BlitUsingRaster( // Unbind the texture first to avoid the D3D11 hazard check (can't set render target to things bound as textures and vice versa, not even temporarily). draw_->BindTexture(0, nullptr); // This will get optimized away in case it's already bound (in VK and GL at least..) - draw_->BindFramebufferAsRenderTarget(dest, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "BlitUsingRaster"); - draw_->BindFramebufferAsTexture(src, 0, channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, 0); + draw_->BindFramebufferAsRenderTarget(dest, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag ? tag : "BlitUsingRaster"); + draw_->BindFramebufferAsTexture(src, 0, Draw2DSourceChannel(shader) == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, 0); Draw::Viewport vp{ 0.0f, 0.0f, (float)dest->Width(), (float)dest->Height(), 0.0f, 1.0f }; draw_->SetViewports(1, &vp); draw_->SetScissorRect(0, 0, (int)dest->Width(), (int)dest->Height()); - DrawStrip2D(nullptr, vtx, 4, linearFilter, channel); + DrawStrip2D(nullptr, vtx, 4, linearFilter, shader); gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE); } diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 901e821590..7fca956632 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -290,8 +290,6 @@ public: void DownloadFramebufferForClut(u32 fb_address, u32 loadBytes); void DrawFramebufferToOutput(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride); - VirtualFramebuffer *GetLatestDepthBufferAt(u32 z_address, u16 z_stride); - void DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); size_t NumVFBs() const { return vfbs_.size(); } @@ -382,7 +380,7 @@ protected: Draw::Texture *MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height); void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags); - void DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, RasterChannel channel); + void DrawStrip2D(Draw::Texture *tex, Draw2DVertex *verts, int vertexCount, bool linearFilter, Draw2DShader channel); void Ensure2DResources(); Draw::Pipeline *Create2DPipeline(RasterChannel (*generate)(ShaderWriter &)); @@ -398,7 +396,7 @@ protected: void BlitUsingRaster( Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2, - Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2, bool linearFilter, RasterChannel channel); + Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2, bool linearFilter, Draw2DShader shader, const char *tag); void CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags); @@ -517,6 +515,7 @@ protected: // Draw2D pipelines Draw::Pipeline *draw2DPipelineColor_ = nullptr; Draw::Pipeline *draw2DPipelineDepth_ = nullptr; + Draw::Pipeline *draw2DPipeline565ToDepth_ = nullptr; Draw::SamplerState *draw2DSamplerLinear_ = nullptr; Draw::SamplerState *draw2DSamplerNearest_ = nullptr; Draw::ShaderModule *draw2DVs_ = nullptr; From 97dbba0ec301fd5f003844e13e1252126c5172e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 17 Aug 2022 19:55:19 +0200 Subject: [PATCH 5/7] Minor fixes. Everything seems fine now. --- GPU/Common/Draw2D.cpp | 10 ++++------ GPU/Common/FramebufferManagerCommon.cpp | 18 ++++++++++-------- GPU/Common/FramebufferManagerCommon.h | 3 ++- GPU/Common/TextureCacheCommon.cpp | 8 +------- GPU/Common/TextureCacheCommon.h | 12 ++++++------ 5 files changed, 23 insertions(+), 28 deletions(-) diff --git a/GPU/Common/Draw2D.cpp b/GPU/Common/Draw2D.cpp index d9d0c5bc93..a6b4abb68c 100644 --- a/GPU/Common/Draw2D.cpp +++ b/GPU/Common/Draw2D.cpp @@ -67,12 +67,8 @@ RasterChannel GenerateDraw2D565ToDepthFs(ShaderWriter &writer) { // have to apply the scaling. DepthScaleFactors factors = GetDepthScaleFactors(); writer.C(" vec3 rgb = ").SampleTexture2D("tex", "v_texcoord.xy").C(".xyz;\n"); - writer.F(" highp float depthValue = (floor(rgb.x * 31.99) + floor(rgb.y * 63.99) * 32.0 + floor(rgb.z * 31.99) * 2048.0) / 65535.0; \n"); - if (factors.scale != 1.0 || factors.offset != 0.0) { - writer.F(" gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale / 65535.0f, factors.offset); - } else { - writer.C(" gl_FragDepth = depthValue;\n"); - } + writer.F(" highp float depthValue = (floor(rgb.x * 31.99) + floor(rgb.y * 63.99) * 32.0 + floor(rgb.z * 31.99) * 2048.0); \n"); + writer.F(" gl_FragDepth = (depthValue / %f) + %f;\n", factors.scale, factors.offset); writer.EndFSMain("outColor", FSFLAG_WRITEDEPTH); return RASTER_DEPTH; } @@ -200,6 +196,7 @@ void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *ver } if (!draw2DPipelineDepth_) { draw2DPipelineDepth_ = Create2DPipeline(&GenerateDraw2DDepthFs); + linearFilter = false; } draw_->BindPipeline(draw2DPipelineDepth_); break; @@ -211,6 +208,7 @@ void FramebufferManagerCommon::DrawStrip2D(Draw::Texture *tex, Draw2DVertex *ver } if (!draw2DPipeline565ToDepth_) { draw2DPipeline565ToDepth_ = Create2DPipeline(&GenerateDraw2D565ToDepthFs); + linearFilter = false; } draw_->BindPipeline(draw2DPipeline565ToDepth_); break; diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index b07bb171ba..df2e09514c 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -405,7 +405,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame vfb->lastFrameNewSize = gpuStats.numFlips; vfb->format = params.fmt; vfb->drawnFormat = params.fmt; - vfb->usageFlags = FB_USAGE_RENDERTARGET; + vfb->usageFlags = FB_USAGE_RENDER_COLOR; u32 byteSize = ColorBufferByteSize(vfb); if (Memory::IsVRAMAddress(params.fb_address) && params.fb_address + byteSize > framebufRangeEnd_) { @@ -454,7 +454,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame } else if (vfbs_[i]->z_stride != 0 && params.z_address == vfbs_[i]->z_address && params.fb_address != vfbs_[i]->fb_address && !sharingReported) { // This happens a lot, but virtually always it's cleared. // It's possible the other might not clear, but when every game is reported it's not useful. - if (params.isWritingDepth) { + if (params.isWritingDepth && (vfbs_[i]->usageFlags & FB_USAGE_RENDER_DEPTH)) { WARN_LOG(SCEGE, "FBO reusing depthbuffer, c=%08x/d=%08x and c=%08x/d=%08x", params.fb_address, params.z_address, vfbs_[i]->fb_address, vfbs_[i]->z_address); sharingReported = true; } @@ -465,7 +465,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame } else if (vfb != currentRenderVfb_) { // Use it as a render target. DEBUG_LOG(FRAMEBUF, "Switching render target to FBO for %08x: %d x %d x %d ", vfb->fb_address, vfb->width, vfb->height, vfb->format); - vfb->usageFlags |= FB_USAGE_RENDERTARGET; + vfb->usageFlags |= FB_USAGE_RENDER_COLOR; vfb->last_frame_render = gpuStats.numFlips; frameLastFramebufUsed_ = gpuStats.numFlips; vfb->dirtyAfterDisplay = true; @@ -505,6 +505,7 @@ void FramebufferManagerCommon::SetDepthFrameBuffer() { // "Resolve" the depth buffer, by copying from any overlapping buffers with fresher content. CopyToDepthFromOverlappingFramebuffers(currentRenderVfb_); + currentRenderVfb_->usageFlags |= FB_USAGE_RENDER_DEPTH; currentRenderVfb_->depthBindSeq = GetBindSeqCount(); } @@ -523,16 +524,17 @@ void FramebufferManagerCommon::CopyToDepthFromOverlappingFramebuffers(VirtualFra }; std::vector sources; - for (auto src: vfbs_) { + for (auto src : vfbs_) { if (src == dest) continue; if (src->fb_address == dest->z_address && src->fb_stride == dest->z_stride && src->format == GE_FORMAT_565) { - if (src->colorBindSeq < dest->depthBindSeq) { + if (src->colorBindSeq > dest->depthBindSeq) { // Source has newer data than the current buffer, use it. sources.push_back(CopySource{ src, RASTER_COLOR }); } } else if (src->z_address == dest->z_address && src->z_stride == dest->z_stride && src->depthBindSeq > dest->depthBindSeq) { + // Don't bother if the buffer was from another frame. This heuristic is old. sources.push_back(CopySource{ src, RASTER_DEPTH }); } else { // TODO: Do more detailed overlap checks here. @@ -1572,7 +1574,7 @@ VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAd vfb->bufferHeight = vfb->height; vfb->format = format; vfb->drawnFormat = GE_FORMAT_8888; - vfb->usageFlags = FB_USAGE_RENDERTARGET; + vfb->usageFlags = FB_USAGE_RENDER_COLOR; SetColorUpdated(vfb, 0); char name[64]; snprintf(name, sizeof(name), "%08x_color_RAM", vfb->fb_address); @@ -1640,7 +1642,7 @@ VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFram UpdateDownloadTempBuffer(nvfb); } - nvfb->usageFlags |= FB_USAGE_RENDERTARGET; + nvfb->usageFlags |= FB_USAGE_RENDER_COLOR; nvfb->last_frame_render = gpuStats.numFlips; nvfb->dirtyAfterDisplay = true; @@ -1957,7 +1959,7 @@ void FramebufferManagerCommon::UpdateFramebufUsage(VirtualFramebuffer *vfb) { checkFlag(FB_USAGE_DISPLAYED_FRAMEBUFFER, vfb->last_frame_displayed); checkFlag(FB_USAGE_TEXTURE, vfb->last_frame_used); - checkFlag(FB_USAGE_RENDERTARGET, vfb->last_frame_render); + checkFlag(FB_USAGE_RENDER_COLOR, vfb->last_frame_render); checkFlag(FB_USAGE_CLUT, vfb->last_frame_clut); } diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 7fca956632..13a1fd57d8 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -37,13 +37,14 @@ enum { FB_USAGE_DISPLAYED_FRAMEBUFFER = 1, - FB_USAGE_RENDERTARGET = 2, + FB_USAGE_RENDER_COLOR = 2, FB_USAGE_TEXTURE = 4, FB_USAGE_CLUT = 8, FB_USAGE_DOWNLOAD = 16, FB_USAGE_DOWNLOAD_CLEAR = 32, FB_USAGE_BLUE_TO_ALPHA = 64, FB_USAGE_FIRST_FRAME_SAVED = 128, + FB_USAGE_RENDER_DEPTH = 256, }; enum { diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 488aa86d30..014b8feb3a 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -106,13 +106,7 @@ inline int dimHeight(u16 dim) { // Vulkan color formats: // TODO TextureCacheCommon::TextureCacheCommon(Draw::DrawContext *draw) - : draw_(draw), - clutLastFormat_(0xFFFFFFFF), - clutTotalBytes_(0), - clutMaxBytes_(0), - clutRenderAddress_(0xFFFFFFFF), - clutAlphaLinear_(false), - isBgraBackend_(false) { + : draw_(draw) { decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL; // TODO: Clamp down to 256/1KB? Need to check mipmapShareClut and clamp loadclut. diff --git a/GPU/Common/TextureCacheCommon.h b/GPU/Common/TextureCacheCommon.h index a8b48878e2..12f0d64213 100644 --- a/GPU/Common/TextureCacheCommon.h +++ b/GPU/Common/TextureCacheCommon.h @@ -444,13 +444,13 @@ protected: u32 *clutBufConverted_; // This is the active one. u32 *clutBuf_; - u32 clutLastFormat_; - u32 clutTotalBytes_; - u32 clutMaxBytes_; - u32 clutRenderAddress_; + u32 clutLastFormat_ = 0xFFFFFFFF; + u32 clutTotalBytes_ = 0; + u32 clutMaxBytes_ = 0; + u32 clutRenderAddress_ = 0xFFFFFFFF; u32 clutRenderOffset_; // True if the clut is just alpha values in the same order (RGBA4444-bit only.) - bool clutAlphaLinear_; + bool clutAlphaLinear_ = false; u16 clutAlphaLinearColor_; int standardScaleFactor_; @@ -461,7 +461,7 @@ protected: bool nextNeedsChange_; bool nextNeedsRebuild_; - bool isBgraBackend_; + bool isBgraBackend_ = false; u32 expandClut_[256]; }; From 12db0e52d4d3d955c930bbdb942048a1d4cbb21d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Thu, 18 Aug 2022 09:38:17 +0200 Subject: [PATCH 6/7] Fix deferred-depth for bezier/spline. Move updating of last_frame_depth_render to GPUCommon. --- GPU/Common/FramebufferManagerCommon.cpp | 2 +- GPU/Common/FramebufferManagerCommon.h | 6 ----- GPU/D3D11/DrawEngineD3D11.cpp | 3 --- GPU/D3D11/StateMappingD3D11.cpp | 6 ----- GPU/Directx9/DrawEngineDX9.cpp | 3 --- GPU/Directx9/StateMappingDX9.cpp | 6 ----- GPU/GLES/DrawEngineGLES.cpp | 3 --- GPU/GLES/StateMappingGLES.cpp | 6 ----- GPU/GPUCommon.cpp | 33 ++++++++++++++++--------- GPU/GPUCommon.h | 17 +++++++------ GPU/Vulkan/FramebufferManagerVulkan.cpp | 3 --- GPU/Vulkan/StateMappingVulkan.cpp | 6 ----- 12 files changed, 33 insertions(+), 61 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index df2e09514c..7c2159146c 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -317,7 +317,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame } break; } else if (v->fb_stride == params.fb_stride && v->format == params.fmt) { - u32 v_fb_first_line_end_ptr = v->fb_address + v->fb_stride * 4; // This should be * bpp, but leaving like this until after 1.13 to be safe. The God of War games use this for shadows. + u32 v_fb_first_line_end_ptr = v->fb_address + v->fb_stride * bpp; u32 v_fb_end_ptr = v->fb_address + v->fb_stride * v->height * bpp; if (params.fb_address > v->fb_address && params.fb_address < v_fb_first_line_end_ptr) { diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 13a1fd57d8..3acff211dd 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -343,12 +343,6 @@ public: int GetTargetStride() const { return currentRenderVfb_ ? currentRenderVfb_->fb_stride : 512; } GEBufferFormat GetTargetFormat() const { return currentRenderVfb_ ? currentRenderVfb_->format : displayFormat_; } - void SetDepthUpdated() { - if (currentRenderVfb_) { - currentRenderVfb_->last_frame_depth_render = gpuStats.numFlips; - currentRenderVfb_->last_frame_depth_updated = gpuStats.numFlips; - } - } void SetColorUpdated(int skipDrawReason) { if (currentRenderVfb_) { SetColorUpdated(currentRenderVfb_, skipDrawReason); diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index 6bc808ef6a..91a4dda0d3 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -693,9 +693,6 @@ rotateVBO: if (gstate.isClearModeAlphaMask()) clearFlag |= Draw::FBChannel::FB_STENCIL_BIT; if (gstate.isClearModeDepthMask()) clearFlag |= Draw::FBChannel::FB_DEPTH_BIT; - if (clearFlag & Draw::FBChannel::FB_DEPTH_BIT) { - framebufferManager_->SetDepthUpdated(); - } if (clearFlag & Draw::FBChannel::FB_COLOR_BIT) { framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason); } diff --git a/GPU/D3D11/StateMappingD3D11.cpp b/GPU/D3D11/StateMappingD3D11.cpp index da9ec068e6..0cbc600ebc 100644 --- a/GPU/D3D11/StateMappingD3D11.cpp +++ b/GPU/D3D11/StateMappingD3D11.cpp @@ -298,9 +298,6 @@ void DrawEngineD3D11::ApplyDrawState(int prim) { keys_.depthStencil.depthTestEnable = true; keys_.depthStencil.depthCompareOp = D3D11_COMPARISON_ALWAYS; keys_.depthStencil.depthWriteEnable = gstate.isClearModeDepthMask(); - if (gstate.isClearModeDepthMask()) { - framebufferManager_->SetDepthUpdated(); - } // Stencil Test bool alphaMask = gstate.isClearModeAlphaMask(); @@ -329,9 +326,6 @@ void DrawEngineD3D11::ApplyDrawState(int prim) { keys_.depthStencil.depthTestEnable = true; keys_.depthStencil.depthCompareOp = compareOps[gstate.getDepthTestFunction()]; keys_.depthStencil.depthWriteEnable = gstate.isDepthWriteEnabled(); - if (gstate.isDepthWriteEnabled()) { - framebufferManager_->SetDepthUpdated(); - } } else { keys_.depthStencil.depthTestEnable = false; keys_.depthStencil.depthWriteEnable = false; diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index ffad2bd469..889e08387c 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -640,9 +640,6 @@ rotateVBO: if (gstate.isClearModeAlphaMask()) mask |= D3DCLEAR_STENCIL; if (gstate.isClearModeDepthMask()) mask |= D3DCLEAR_ZBUFFER; - if (mask & D3DCLEAR_ZBUFFER) { - framebufferManager_->SetDepthUpdated(); - } if (mask & D3DCLEAR_TARGET) { framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason); } diff --git a/GPU/Directx9/StateMappingDX9.cpp b/GPU/Directx9/StateMappingDX9.cpp index 3d06b380e0..828f9d4b4d 100644 --- a/GPU/Directx9/StateMappingDX9.cpp +++ b/GPU/Directx9/StateMappingDX9.cpp @@ -216,9 +216,6 @@ void DrawEngineDX9::ApplyDrawState(int prim) { dxstate.depthTest.enable(); dxstate.depthFunc.set(D3DCMP_ALWAYS); dxstate.depthWrite.set(gstate.isClearModeDepthMask()); - if (gstate.isClearModeDepthMask()) { - framebufferManager_->SetDepthUpdated(); - } // Stencil Test bool alphaMask = gstate.isClearModeAlphaMask(); @@ -239,9 +236,6 @@ void DrawEngineDX9::ApplyDrawState(int prim) { dxstate.depthTest.enable(); dxstate.depthFunc.set(ztests[gstate.getDepthTestFunction()]); dxstate.depthWrite.set(gstate.isDepthWriteEnabled()); - if (gstate.isDepthWriteEnabled()) { - framebufferManager_->SetDepthUpdated(); - } } else { dxstate.depthTest.disable(); } diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index ab0960abd5..6610fcde6a 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -423,9 +423,6 @@ void DrawEngineGLES::DoFlush() { bool colorMask = gstate.isClearModeColorMask(); bool alphaMask = gstate.isClearModeAlphaMask(); bool depthMask = gstate.isClearModeDepthMask(); - if (depthMask) { - framebufferManager_->SetDepthUpdated(); - } GLbitfield target = 0; // Without this, we will clear RGB when clearing stencil, which breaks games. diff --git a/GPU/GLES/StateMappingGLES.cpp b/GPU/GLES/StateMappingGLES.cpp index 6e498875f1..35f9329629 100644 --- a/GPU/GLES/StateMappingGLES.cpp +++ b/GPU/GLES/StateMappingGLES.cpp @@ -253,18 +253,12 @@ void DrawEngineGLES::ApplyDrawState(int prim) { if (gstate.isModeClear()) { // Depth Test - if (gstate.isClearModeDepthMask()) { - framebufferManager_->SetDepthUpdated(); - } renderManager->SetStencilFunc(gstate.isClearModeAlphaMask(), GL_ALWAYS, 0xFF, 0xFF); renderManager->SetStencilOp(stencilState.writeMask, GL_REPLACE, GL_REPLACE, GL_REPLACE); renderManager->SetDepth(true, gstate.isClearModeDepthMask() ? true : false, GL_ALWAYS); } else { // Depth Test renderManager->SetDepth(gstate.isDepthTestEnabled(), gstate.isDepthWriteEnabled(), compareOps[gstate.getDepthTestFunction()]); - if (gstate.isDepthTestEnabled() && gstate.isDepthWriteEnabled()) { - framebufferManager_->SetDepthUpdated(); - } // Stencil Test if (stencilState.enabled) { diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index d751291a9b..14c4cf1f5e 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1624,6 +1624,21 @@ void GPUCommon::Execute_VertexTypeSkinning(u32 op, u32 diff) { gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_CULLRANGE); } +void GPUCommon::CheckDepthUsage(VirtualFramebuffer *vfb) { + if (!gstate_c.usingDepth) { + bool isClearingDepth = gstate.isModeClear() && gstate.isClearModeDepthMask(); + + if ((gstate.isDepthTestEnabled() || isClearingDepth)) { + gstate_c.usingDepth = true; + gstate_c.clearingDepth = isClearingDepth; + vfb->last_frame_depth_render = gpuStats.numFlips; + if (isClearingDepth || gstate.isDepthWriteEnabled()) { + vfb->last_frame_depth_updated = gpuStats.numFlips; + } + framebufferManager_->SetDepthFrameBuffer(); + } + } +} void GPUCommon::Execute_Prim(u32 op, u32 diff) { // This drives all drawing. All other state we just buffer up, then we apply it only @@ -1685,15 +1700,7 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) { return; } - if (!gstate_c.usingDepth) { - bool isClearingDepth = gstate.isModeClear() && gstate.isClearModeDepthMask();; - - if ((gstate.isDepthTestEnabled() || isClearingDepth)) { - gstate_c.usingDepth = true; - gstate_c.clearingDepth = isClearingDepth; - framebufferManager_->SetDepthFrameBuffer(); - } - } + CheckDepthUsage(vfb); const void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr); const void *inds = nullptr; @@ -1893,12 +1900,14 @@ void GPUCommon::Execute_Bezier(u32 op, u32 diff) { gstate_c.framebufFormat = gstate.FrameBufFormat(); // This also make skipping drawing very effective. - framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); + VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { // TODO: Should this eat some cycles? Probably yes. Not sure if important. return; } + CheckDepthUsage(vfb); + if (!Memory::IsValidAddress(gstate_c.vertexAddr)) { ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr); return; @@ -1963,12 +1972,14 @@ void GPUCommon::Execute_Spline(u32 op, u32 diff) { gstate_c.framebufFormat = gstate.FrameBufFormat(); // This also make skipping drawing very effective. - framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); + VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { // TODO: Should this eat some cycles? Probably yes. Not sure if important. return; } + CheckDepthUsage(vfb); + if (!Memory::IsValidAddress(gstate_c.vertexAddr)) { ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr); return; diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index 59298d8e92..e61d5dada9 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -15,6 +15,8 @@ class FramebufferManagerCommon; class TextureCacheCommon; class DrawEngineCommon; class GraphicsContext; +struct VirtualFramebuffer; + namespace Draw { class DrawContext; } @@ -282,17 +284,11 @@ protected: void SlowRunLoop(DisplayList &list); void UpdatePC(u32 currentPC, u32 newPC); void UpdateState(GPURunState state); - void PopDLQueue(); - void CheckDrawSync(); - int GetNextListIndex(); - virtual void FastLoadBoneMatrix(u32 target); + void FastLoadBoneMatrix(u32 target); // TODO: Unify this. virtual void FinishDeferred() {} - void DoBlockTransfer(u32 skipDrawReason); - void DoExecuteCall(u32 target); - void AdvanceVerts(u32 vertType, int count, int bytesRead) { if ((vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) { int indexShift = ((vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT) - 1; @@ -362,6 +358,13 @@ protected: private: void FlushImm(); + void CheckDepthUsage(VirtualFramebuffer *vfb); + void DoBlockTransfer(u32 skipDrawReason); + void DoExecuteCall(u32 target); + void PopDLQueue(); + void CheckDrawSync(); + int GetNextListIndex(); + // Debug stats. double timeSteppingStarted_; double timeSpentStepping_; diff --git a/GPU/Vulkan/FramebufferManagerVulkan.cpp b/GPU/Vulkan/FramebufferManagerVulkan.cpp index 7748f24371..7061f68ae7 100644 --- a/GPU/Vulkan/FramebufferManagerVulkan.cpp +++ b/GPU/Vulkan/FramebufferManagerVulkan.cpp @@ -75,7 +75,4 @@ void FramebufferManagerVulkan::NotifyClear(bool clearColor, bool clearAlpha, boo if (clearColor || clearAlpha) { SetColorUpdated(gstate_c.skipDrawReason); } - if (clearDepth) { - SetDepthUpdated(); - } } diff --git a/GPU/Vulkan/StateMappingVulkan.cpp b/GPU/Vulkan/StateMappingVulkan.cpp index 166f3dc153..cb1f45aaca 100644 --- a/GPU/Vulkan/StateMappingVulkan.cpp +++ b/GPU/Vulkan/StateMappingVulkan.cpp @@ -254,9 +254,6 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag key.depthTestEnable = true; key.depthCompareOp = VK_COMPARE_OP_ALWAYS; key.depthWriteEnable = gstate.isClearModeDepthMask(); - if (gstate.isClearModeDepthMask()) { - fbManager.SetDepthUpdated(); - } // Stencil Test bool alphaMask = gstate.isClearModeAlphaMask(); @@ -287,9 +284,6 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag key.depthTestEnable = true; key.depthCompareOp = compareOps[gstate.getDepthTestFunction()]; key.depthWriteEnable = gstate.isDepthWriteEnabled(); - if (gstate.isDepthWriteEnabled()) { - fbManager.SetDepthUpdated(); - } } else { key.depthTestEnable = false; key.depthWriteEnable = false; From 29ea3ffe0c85a7d2bde037a304e0e4a836b240da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 20 Aug 2022 09:46:15 +0200 Subject: [PATCH 7/7] Restore the clearing optimization, avoiding unnecessary depth copies --- GPU/Common/FramebufferManagerCommon.cpp | 9 ++++++--- GPU/Common/FramebufferManagerCommon.h | 2 +- GPU/GPUCommon.cpp | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 7c2159146c..308f1b7fd5 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -497,13 +497,16 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame } // Called on the first use of depth in a render pass. -void FramebufferManagerCommon::SetDepthFrameBuffer() { +void FramebufferManagerCommon::SetDepthFrameBuffer(bool isClearingDepth) { if (!currentRenderVfb_) { return; } - // "Resolve" the depth buffer, by copying from any overlapping buffers with fresher content. - CopyToDepthFromOverlappingFramebuffers(currentRenderVfb_); + // If this first draw call is anything other than a clear, "resolve" the depth buffer, + // by copying from any overlapping buffers with fresher content. + if (!isClearingDepth) { + CopyToDepthFromOverlappingFramebuffers(currentRenderVfb_); + } currentRenderVfb_->usageFlags |= FB_USAGE_RENDER_DEPTH; currentRenderVfb_->depthBindSeq = GetBindSeqCount(); diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 3acff211dd..447554f376 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -267,7 +267,7 @@ public: return vfb; } } - void SetDepthFrameBuffer(); + void SetDepthFrameBuffer(bool isClearingDepth); void RebindFramebuffer(const char *tag); std::vector GetFramebufferList() const; diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 14c4cf1f5e..7db2fd4b42 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1635,7 +1635,7 @@ void GPUCommon::CheckDepthUsage(VirtualFramebuffer *vfb) { if (isClearingDepth || gstate.isDepthWriteEnabled()) { vfb->last_frame_depth_updated = gpuStats.numFlips; } - framebufferManager_->SetDepthFrameBuffer(); + framebufferManager_->SetDepthFrameBuffer(isClearingDepth); } } }