From 4ef4325fdb339900e93531c01abd417cf9cfb4a8 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 18 May 2020 21:30:56 -0700 Subject: [PATCH] GPU: Avoid unnecessary clear on stencil upload. In this common case, we've typically just bound the buffer to upload a texture to it. No need to start a new render pass. This dodges #12927 but doesn't really fix the underlying issue. --- GPU/Common/FramebufferCommon.cpp | 2 +- GPU/Common/FramebufferCommon.h | 7 ++++++- GPU/D3D11/FramebufferManagerD3D11.h | 2 +- GPU/D3D11/StencilBufferD3D11.cpp | 8 +++++--- GPU/Directx9/FramebufferDX9.h | 2 +- GPU/Directx9/StencilBufferDX9.cpp | 8 +++++--- GPU/GLES/FramebufferManagerGLES.h | 2 +- GPU/GLES/StencilBufferGLES.cpp | 4 ++-- GPU/Vulkan/FramebufferVulkan.h | 2 +- GPU/Vulkan/StencilBufferVulkan.cpp | 6 ++++-- 10 files changed, 27 insertions(+), 16 deletions(-) diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index 298f54d8ba..850b694701 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -379,7 +379,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame if (useBufferedRendering_ && !g_Config.bDisableSlowFramebufEffects) { gpu->PerformMemoryUpload(params.fb_address, byteSize); - NotifyStencilUpload(params.fb_address, byteSize, true); + NotifyStencilUpload(params.fb_address, byteSize, StencilUpload::STENCIL_IS_ZERO); // TODO: Is it worth trying to upload the depth buffer? } diff --git a/GPU/Common/FramebufferCommon.h b/GPU/Common/FramebufferCommon.h index bf92ae189d..93d2418417 100644 --- a/GPU/Common/FramebufferCommon.h +++ b/GPU/Common/FramebufferCommon.h @@ -146,6 +146,11 @@ inline DrawTextureFlags operator | (const DrawTextureFlags &lhs, const DrawTextu return DrawTextureFlags((u32)lhs | (u32)rhs); } +enum class StencilUpload { + NEEDS_CLEAR, + STENCIL_IS_ZERO, +}; + enum class TempFBO { DEPAL, BLIT, @@ -219,7 +224,7 @@ public: void NotifyVideoUpload(u32 addr, int size, int width, GEBufferFormat fmt); void UpdateFromMemory(u32 addr, int size, bool safe); void ApplyClearToMemory(int x1, int y1, int x2, int y2, u32 clearColor); - virtual bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false) = 0; + virtual bool NotifyStencilUpload(u32 addr, int size, StencilUpload flags = StencilUpload::NEEDS_CLEAR) = 0; // Returns true if it's sure this is a direct FBO->FBO transfer and it has already handle it. // In that case we hardly need to actually copy the bytes in VRAM, they will be wrong anyway (unless // read framebuffers is on, in which case this should always return false). diff --git a/GPU/D3D11/FramebufferManagerD3D11.h b/GPU/D3D11/FramebufferManagerD3D11.h index 73b917f326..1f91af61ae 100644 --- a/GPU/D3D11/FramebufferManagerD3D11.h +++ b/GPU/D3D11/FramebufferManagerD3D11.h @@ -49,7 +49,7 @@ public: void BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags); - virtual bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false) override; + virtual bool NotifyStencilUpload(u32 addr, int size, StencilUpload flags = StencilUpload::NEEDS_CLEAR) override; // TODO: Remove ID3D11Buffer *GetDynamicQuadBuffer() { diff --git a/GPU/D3D11/StencilBufferD3D11.cpp b/GPU/D3D11/StencilBufferD3D11.cpp index ad9faa7f0a..d3b08d69ff 100644 --- a/GPU/D3D11/StencilBufferD3D11.cpp +++ b/GPU/D3D11/StencilBufferD3D11.cpp @@ -70,7 +70,7 @@ VS_OUT main(VS_IN In) { )"; // TODO : If SV_StencilRef is available (D3D11.3) then this can be done in a single pass. -bool FramebufferManagerD3D11::NotifyStencilUpload(u32 addr, int size, bool skipZero) { +bool FramebufferManagerD3D11::NotifyStencilUpload(u32 addr, int size, StencilUpload flags) { addr &= 0x3FFFFFFF; if (!MayIntersectFramebuffer(addr)) { return false; @@ -117,7 +117,7 @@ bool FramebufferManagerD3D11::NotifyStencilUpload(u32 addr, int size, bool skipZ } if (usedBits == 0) { - if (skipZero) { + if (flags == StencilUpload::STENCIL_IS_ZERO) { // Common when creating buffers, it's already 0. We're done. return false; } @@ -164,7 +164,9 @@ bool FramebufferManagerD3D11::NotifyStencilUpload(u32 addr, int size, bool skipZ if (!tex) return false; if (dstBuffer->fbo) { - draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }); + // Typically, STENCIL_IS_ZERO means it's already bound. + Draw::RPAction stencilAction = flags == StencilUpload::STENCIL_IS_ZERO ? Draw::RPAction::KEEP : Draw::RPAction::CLEAR; + draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, stencilAction }); } else { // something is wrong... } diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index bab50d4dfc..d311c8aa78 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -54,7 +54,7 @@ public: void BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags); - virtual bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false) override; + virtual bool NotifyStencilUpload(u32 addr, int size, StencilUpload flags = StencilUpload::NEEDS_CLEAR) override; bool GetFramebuffer(u32 fb_address, int fb_stride, GEBufferFormat format, GPUDebugBuffer &buffer, int maxRes); bool GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_address, int z_stride, GPUDebugBuffer &buffer) override; diff --git a/GPU/Directx9/StencilBufferDX9.cpp b/GPU/Directx9/StencilBufferDX9.cpp index 5302c9e4f6..78981a8969 100644 --- a/GPU/Directx9/StencilBufferDX9.cpp +++ b/GPU/Directx9/StencilBufferDX9.cpp @@ -66,7 +66,7 @@ VS_OUT main(VS_IN In) { } )"; -bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZero) { +bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, StencilUpload flags) { addr &= 0x3FFFFFFF; if (!MayIntersectFramebuffer(addr)) { return false; @@ -113,7 +113,7 @@ bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZer } if (usedBits == 0) { - if (skipZero) { + if (flags == StencilUpload::STENCIL_IS_ZERO) { // Common when creating buffers, it's already 0. We're done. return false; } @@ -193,7 +193,9 @@ bool FramebufferManagerDX9::NotifyStencilUpload(u32 addr, int size, bool skipZer u16 h = dstBuffer->renderHeight; if (dstBuffer->fbo) { - draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }); + // Typically, STENCIL_IS_ZERO means it's already bound. + Draw::RPAction stencilAction = flags == StencilUpload::STENCIL_IS_ZERO ? Draw::RPAction::KEEP : Draw::RPAction::CLEAR; + draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, stencilAction }); } D3DVIEWPORT9 vp{ 0, 0, w, h, 0.0f, 1.0f }; device_->SetViewport(&vp); diff --git a/GPU/GLES/FramebufferManagerGLES.h b/GPU/GLES/FramebufferManagerGLES.h index 25f541fd81..f86fb1218f 100644 --- a/GPU/GLES/FramebufferManagerGLES.h +++ b/GPU/GLES/FramebufferManagerGLES.h @@ -54,7 +54,7 @@ public: // For use when texturing from a framebuffer. May create a duplicate if target. void BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags); - bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false) override; + bool NotifyStencilUpload(u32 addr, int size, StencilUpload flags = StencilUpload::NEEDS_CLEAR) override; bool GetOutputFramebuffer(GPUDebugBuffer &buffer) override; diff --git a/GPU/GLES/StencilBufferGLES.cpp b/GPU/GLES/StencilBufferGLES.cpp index 387001db26..5f67d145ef 100644 --- a/GPU/GLES/StencilBufferGLES.cpp +++ b/GPU/GLES/StencilBufferGLES.cpp @@ -68,7 +68,7 @@ void main() { } )"; -bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, bool skipZero) { +bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, StencilUpload flags) { addr &= 0x3FFFFFFF; if (!MayIntersectFramebuffer(addr)) { return false; @@ -114,7 +114,7 @@ bool FramebufferManagerGLES::NotifyStencilUpload(u32 addr, int size, bool skipZe } if (usedBits == 0) { - if (skipZero) { + if (flags == StencilUpload::STENCIL_IS_ZERO) { // Common when creating buffers, it's already 0. We're done. return false; } diff --git a/GPU/Vulkan/FramebufferVulkan.h b/GPU/Vulkan/FramebufferVulkan.h index bbd799e1d0..be6c89775c 100644 --- a/GPU/Vulkan/FramebufferVulkan.h +++ b/GPU/Vulkan/FramebufferVulkan.h @@ -58,7 +58,7 @@ public: void BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst) override; - bool NotifyStencilUpload(u32 addr, int size, bool skipZero = false) override; + bool NotifyStencilUpload(u32 addr, int size, StencilUpload flags = StencilUpload::NEEDS_CLEAR) override; VkImageView BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags); diff --git a/GPU/Vulkan/StencilBufferVulkan.cpp b/GPU/Vulkan/StencilBufferVulkan.cpp index a99480d497..3e9cc7e0aa 100644 --- a/GPU/Vulkan/StencilBufferVulkan.cpp +++ b/GPU/Vulkan/StencilBufferVulkan.cpp @@ -96,7 +96,7 @@ void main() { // In Vulkan we should be able to simply copy the stencil data directly to a stencil buffer without // messing about with bitplane textures and the like. Or actually, maybe not... Let's start with // the traditional approach. -bool FramebufferManagerVulkan::NotifyStencilUpload(u32 addr, int size, bool skipZero) { +bool FramebufferManagerVulkan::NotifyStencilUpload(u32 addr, int size, StencilUpload flags) { addr &= 0x3FFFFFFF; if (!MayIntersectFramebuffer(addr)) { return false; @@ -168,7 +168,9 @@ bool FramebufferManagerVulkan::NotifyStencilUpload(u32 addr, int size, bool skip return false; if (dstBuffer->fbo) { - draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }); + // Typically, STENCIL_IS_ZERO means it's already bound. + Draw::RPAction stencilAction = flags == StencilUpload::STENCIL_IS_ZERO ? Draw::RPAction::KEEP : Draw::RPAction::CLEAR; + draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, stencilAction }); } else { // something is wrong... }