From 370678c4989a7f4960ecc9afe179c500809ee98c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 24 May 2020 20:57:59 +0200 Subject: [PATCH] Do a similar thing for D3D (let the backend handle the dirtying). --- GPU/Common/FramebufferCommon.cpp | 14 +++----------- GPU/D3D11/DrawEngineD3D11.cpp | 10 ++++++++++ GPU/D3D11/DrawEngineD3D11.h | 2 ++ GPU/Directx9/DrawEngineDX9.cpp | 17 +++++++++++++++-- GPU/Directx9/DrawEngineDX9.h | 7 ++++++- GPU/Directx9/GPU_DX9.cpp | 2 +- GPU/GLES/DrawEngineGLES.cpp | 2 ++ GPU/GLES/DrawEngineGLES.h | 2 +- GPU/GLES/GPU_GLES.cpp | 1 - ext/native/thin3d/thin3d.h | 2 ++ ext/native/thin3d/thin3d_d3d11.cpp | 11 +++++++++++ ext/native/thin3d/thin3d_d3d9.cpp | 14 ++++++++++---- ext/native/thin3d/thin3d_gl.cpp | 4 ++++ ext/native/thin3d/thin3d_vulkan.cpp | 4 ++++ 14 files changed, 71 insertions(+), 21 deletions(-) diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index 47a9c2c89a..0d85958f94 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -547,8 +547,6 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe // This should only happen very briefly when toggling useBufferedRendering_. ResizeFramebufFBO(vfb, vfb->width, vfb->height, true); } - // Since we switched target, we need to redo viewportscissorstate. - gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE); } else { if (vfb->fbo) { // This should only happen very briefly when toggling useBufferedRendering_. @@ -566,7 +564,7 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe } textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED); - // ugly... + // ugly... is all this needed? if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) { gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE); } @@ -1064,7 +1062,6 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, INFO_LOG(FRAMEBUF, "Resizing FBO for %08x : %d x %d x %d", vfb->fb_address, w, h, vfb->format); if (vfb->fbo) { draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO"); - gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE); if (!skipCopy) { BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min((u16)oldWidth, std::min(vfb->bufferWidth, vfb->width)), std::min((u16)oldHeight, std::min(vfb->height, vfb->bufferHeight)), 0); } @@ -1075,7 +1072,6 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, } } else { draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO"); - gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE); } if (!vfb->fbo) { @@ -1845,7 +1841,7 @@ bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEB bool retval = draw_->CopyFramebufferToMemorySync(bound, Draw::FB_COLOR_BIT, 0, 0, w, h, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), w, "GetFramebuffer"); gpuStats.numReadbacks++; // After a readback we'll have flushed and started over, need to dirty a bunch of things to be safe. - gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS); + gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS); // We may have blitted to a temp FBO. RebindFramebuffer(); return retval; @@ -1880,7 +1876,7 @@ bool FramebufferManagerCommon::GetDepthbuffer(u32 fb_address, int fb_stride, u32 // No need to free on failure, that's the caller's job (it likely will reuse a buffer.) bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_DEPTH_BIT, 0, 0, w, h, Draw::DataFormat::D32F, buffer.GetData(), w, "GetDepthBuffer"); // After a readback we'll have flushed and started over, need to dirty a bunch of things to be safe. - gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS); + gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS); // That may have unbound the framebuffer, rebind to avoid crashes when debugging. RebindFramebuffer(); return retval; @@ -1971,9 +1967,6 @@ void FramebufferManagerCommon::PackFramebufferSync_(VirtualFramebuffer *vfb, int ERROR_LOG(G3D, "PackFramebufferSync_: Tried to readback to bad address %08x (stride = %d)", fb_address + dstByteOffset, vfb->fb_stride); } - // A new command buffer will begin after CopyFrameBufferToMemorySync, so we need to trigger - // updates of any dynamic command buffer state by dirtying some stuff. - gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE); gpuStats.numReadbacks++; } @@ -2056,7 +2049,6 @@ void FramebufferManagerCommon::RebindFramebuffer() { // Should this even happen? It could while debugging, but maybe we can just skip binding at all. draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "RebindFramebuffer_Bad"); } - gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE); } std::vector FramebufferManagerCommon::GetFramebufferList() { diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index fde2dd0102..371910cac7 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -307,6 +307,8 @@ void DrawEngineD3D11::BeginFrame() { NOTICE_LOG(G3D, buffer); } #endif + + lastRenderStepId_ = -1; } VertexArrayInfoD3D11::~VertexArrayInfoD3D11() { @@ -325,6 +327,14 @@ void DrawEngineD3D11::DoFlush() { gpuStats.numFlushes++; gpuStats.numTrackedVertexArrays = (int)vai_.size(); + // In D3D, we're synchronous and state carries over so all we reset here on a new step is the viewport/scissor. + int curRenderStepId = draw_->GetCurrentStepId(); + if (lastRenderStepId_ != curRenderStepId) { + // Dirty everything that has dynamic state that will need re-recording. + gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE); + lastRenderStepId_ = curRenderStepId; + } + // This is not done on every drawcall, we collect vertex data // until critical state changes. That's when we draw (flush). diff --git a/GPU/D3D11/DrawEngineD3D11.h b/GPU/D3D11/DrawEngineD3D11.h index 11ed3e41e9..3511f760f5 100644 --- a/GPU/D3D11/DrawEngineD3D11.h +++ b/GPU/D3D11/DrawEngineD3D11.h @@ -216,4 +216,6 @@ private: // Hardware tessellation TessellationDataTransferD3D11 *tessDataTransferD3D11; + + int lastRenderStepId_ = -1; }; diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index a39d48f986..436ea508fa 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -83,7 +83,7 @@ static const D3DVERTEXELEMENT9 TransformedVertexElements[] = { D3DDECL_END() }; -DrawEngineDX9::DrawEngineDX9(Draw::DrawContext *draw) : vai_(256), vertexDeclMap_(64) { +DrawEngineDX9::DrawEngineDX9(Draw::DrawContext *draw) : draw_(draw), vai_(256), vertexDeclMap_(64) { device_ = (LPDIRECT3DDEVICE9)draw->GetNativeObject(Draw::NativeObject::DEVICE); decOptions_.expandAllWeightsToFloat = true; decOptions_.expand8BitNormalsToFloat = true; @@ -299,14 +299,27 @@ static uint32_t SwapRB(uint32_t c) { return (c & 0xFF00FF00) | ((c >> 16) & 0xFF) | ((c << 16) & 0xFF0000); } +void DrawEngineDX9::BeginFrame() { + DecimateTrackedVertexArrays(); + + lastRenderStepId_ = -1; +} + // The inline wrapper in the header checks for numDrawCalls == 0 void DrawEngineDX9::DoFlush() { gpuStats.numFlushes++; gpuStats.numTrackedVertexArrays = (int)vai_.size(); + // In D3D, we're synchronous and state carries over so all we reset here on a new step is the viewport/scissor. + int curRenderStepId = draw_->GetCurrentStepId(); + if (lastRenderStepId_ != curRenderStepId) { + // Dirty everything that has dynamic state that will need re-recording. + gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE); + lastRenderStepId_ = curRenderStepId; + } + // This is not done on every drawcall, we should collect vertex data // until critical state changes. That's when we draw (flush). - GEPrimitiveType prim = prevPrim_; ApplyDrawState(prim); diff --git a/GPU/Directx9/DrawEngineDX9.h b/GPU/Directx9/DrawEngineDX9.h index 41c828908a..2d01617b40 100644 --- a/GPU/Directx9/DrawEngineDX9.h +++ b/GPU/Directx9/DrawEngineDX9.h @@ -123,7 +123,8 @@ public: void DestroyDeviceObjects(); void ClearTrackedVertexArrays() override; - void DecimateTrackedVertexArrays(); + + void BeginFrame(); // So that this can be inlined void Flush() { @@ -143,6 +144,7 @@ public: protected: // Not currently supported. bool UpdateUseHWTessellation(bool enable) override { return false; } + void DecimateTrackedVertexArrays(); private: void DoFlush(); @@ -156,6 +158,7 @@ private: void MarkUnreliable(VertexArrayInfoDX9 *vai); LPDIRECT3DDEVICE9 device_ = nullptr; + Draw::DrawContext *draw_; PrehashMap vai_; DenseHashMap vertexDeclMap_; @@ -170,6 +173,8 @@ private: // Hardware tessellation TessellationDataTransferDX9 *tessDataTransferDX9; + + int lastRenderStepId_ = -1; }; } // namespace diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 1b4aaf857b..d3de79ec2b 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -282,7 +282,7 @@ void GPU_DX9::ReapplyGfxState() { void GPU_DX9::BeginFrame() { textureCacheDX9_->StartFrame(); - drawEngine_.DecimateTrackedVertexArrays(); + drawEngine_.BeginFrame(); depalShaderCache_.Decimate(); // fragmentTestCache_.Decimate(); diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index 3d6282771f..7d80f20fd4 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -156,6 +156,8 @@ void DrawEngineGLES::ClearInputLayoutMap() { } void DrawEngineGLES::BeginFrame() { + DecimateTrackedVertexArrays(); + FrameData &frameData = frameData_[render_->GetCurFrame()]; render_->BeginPushBuffer(frameData.pushIndex); render_->BeginPushBuffer(frameData.pushVertex); diff --git a/GPU/GLES/DrawEngineGLES.h b/GPU/GLES/DrawEngineGLES.h index 51abeb6d24..165329a3d7 100644 --- a/GPU/GLES/DrawEngineGLES.h +++ b/GPU/GLES/DrawEngineGLES.h @@ -150,7 +150,6 @@ public: void DeviceRestore(Draw::DrawContext *draw); void ClearTrackedVertexArrays() override; - void DecimateTrackedVertexArrays(); void BeginFrame(); void EndFrame(); @@ -186,6 +185,7 @@ public: protected: bool UpdateUseHWTessellation(bool enable) override; + void DecimateTrackedVertexArrays(); private: void InitDeviceObjects(); diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index 81a53c3dc8..ceefecbcb3 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -373,7 +373,6 @@ void GPU_GLES::ReapplyGfxState() { void GPU_GLES::BeginFrame() { textureCacheGL_->StartFrame(); - drawEngine_.DecimateTrackedVertexArrays(); depalShaderCache_.Decimate(); fragmentTestCache_.Decimate(); diff --git a/ext/native/thin3d/thin3d.h b/ext/native/thin3d/thin3d.h index 22af453d83..4e61672b7f 100644 --- a/ext/native/thin3d/thin3d.h +++ b/ext/native/thin3d/thin3d.h @@ -673,6 +673,8 @@ public: // Flush state like scissors etc so the caller can do its own custom drawing. virtual void FlushState() {} + virtual int GetCurrentStepId() const = 0; + protected: ShaderModule *vsPresets_[VS_MAX_PRESET]; ShaderModule *fsPresets_[FS_MAX_PRESET]; diff --git a/ext/native/thin3d/thin3d_d3d11.cpp b/ext/native/thin3d/thin3d_d3d11.cpp index 719c306b44..96b9e1ad63 100644 --- a/ext/native/thin3d/thin3d_d3d11.cpp +++ b/ext/native/thin3d/thin3d_d3d11.cpp @@ -158,6 +158,10 @@ public: void HandleEvent(Event ev, int width, int height, void *param1, void *param2) override; + int GetCurrentStepId() const override { + return stepId_; + } + private: void ApplyCurrentState(); @@ -166,6 +170,7 @@ private: ID3D11DeviceContext *context_; ID3D11Device1 *device1_; ID3D11DeviceContext1 *context1_; + int stepId_ = -1; ID3D11Texture2D *bbRenderTargetTex_ = nullptr; // NOT OWNED ID3D11RenderTargetView *bbRenderTargetView_ = nullptr; @@ -353,6 +358,7 @@ void D3D11DrawContext::HandleEvent(Event ev, int width, int height, void *param1 // Make sure that we don't eliminate the next time the render target is set. curRenderTargetView_ = nullptr; curDepthStencilView_ = nullptr; + stepId_ = 0; break; } } @@ -1394,11 +1400,13 @@ void D3D11DrawContext::CopyFramebufferImage(Framebuffer *srcfb, int level, int x D3D11_BOX srcBox{ (UINT)x, (UINT)y, (UINT)z, (UINT)(x + width), (UINT)(y + height), (UINT)(z + depth) }; context_->CopySubresourceRegion(dstTex, dstLevel, dstX, dstY, dstZ, srcTex, level, &srcBox); } + stepId_++; } bool D3D11DrawContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dstfb, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) { // Unfortunately D3D11 has no equivalent to this, gotta render a quad. Well, in some cases we can issue a copy instead. Crash(); + stepId_++; return false; } @@ -1519,6 +1527,7 @@ bool D3D11DrawContext::CopyFramebufferToMemorySync(Framebuffer *src, int channel if (!useGlobalPacktex) { packTex->Release(); } + stepId_++; return true; } @@ -1562,6 +1571,8 @@ void D3D11DrawContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const Ren if (mask && curDepthStencilView_) { context_->ClearDepthStencilView(curDepthStencilView_, mask, rp.clearDepth, rp.clearStencil); } + + stepId_++; } // color must be 0, for now. diff --git a/ext/native/thin3d/thin3d_d3d9.cpp b/ext/native/thin3d/thin3d_d3d9.cpp index 42fc0d29a2..a787bbeef5 100644 --- a/ext/native/thin3d/thin3d_d3d9.cpp +++ b/ext/native/thin3d/thin3d_d3d9.cpp @@ -599,11 +599,16 @@ public: void HandleEvent(Event ev, int width, int height, void *param1, void *param2) override; + int GetCurrentStepId() const override { + return stepId_; + } + private: LPDIRECT3D9 d3d_; LPDIRECT3D9EX d3dEx_; LPDIRECT3DDEVICE9 device_; LPDIRECT3DDEVICE9EX deviceEx_; + int stepId_ = -1; int adapterId_ = -1; D3DADAPTER_IDENTIFIER9 identifier_{}; D3DCAPS9 d3dCaps_; @@ -1160,6 +1165,7 @@ void D3D9Context::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPa dxstate.scissorRect.restore(); dxstate.viewport.restore(); + stepId_++; } uintptr_t D3D9Context::GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) { @@ -1187,10 +1193,6 @@ uintptr_t D3D9Context::GetFramebufferAPITexture(Framebuffer *fbo, int channelBit } } -LPDIRECT3DSURFACE9 fbo_get_color_for_read(D3D9Framebuffer *fbo) { - return fbo->surf; -} - void D3D9Context::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int color) { D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo; switch (channelBit) { @@ -1228,6 +1230,7 @@ bool D3D9Context::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int RECT dstRect{ (LONG)dstX1, (LONG)dstY1, (LONG)dstX2, (LONG)dstY2 }; LPDIRECT3DSURFACE9 srcSurf = src ? src->surf : deviceRTsurf; LPDIRECT3DSURFACE9 dstSurf = dst ? dst->surf : deviceRTsurf; + stepId_++; return SUCCEEDED(device_->StretchRect(srcSurf, &srcRect, dstSurf, &dstRect, filter == FB_BLIT_LINEAR ? D3DTEXF_LINEAR : D3DTEXF_POINT)); } @@ -1245,6 +1248,9 @@ void D3D9Context::HandleEvent(Event ev, int width, int height, void *param1, voi device_->GetRenderTarget(0, &deviceRTsurf); device_->GetDepthStencilSurface(&deviceDSsurf); break; + case Event::PRESENTED: + stepId_ = 0; + break; } } diff --git a/ext/native/thin3d/thin3d_gl.cpp b/ext/native/thin3d/thin3d_gl.cpp index 2a45ae2256..0bbe2466d5 100644 --- a/ext/native/thin3d/thin3d_gl.cpp +++ b/ext/native/thin3d/thin3d_gl.cpp @@ -478,6 +478,10 @@ public: void HandleEvent(Event ev, int width, int height, void *param1, void *param2) override {} + int GetCurrentStepId() const { + return renderManager_.GetCurrentStepId(); + } + private: void ApplySamplers(); diff --git a/ext/native/thin3d/thin3d_vulkan.cpp b/ext/native/thin3d/thin3d_vulkan.cpp index e3160a944d..43fe931f6c 100644 --- a/ext/native/thin3d/thin3d_vulkan.cpp +++ b/ext/native/thin3d/thin3d_vulkan.cpp @@ -482,6 +482,10 @@ public: void HandleEvent(Event ev, int width, int height, void *param1, void *param2) override; + int GetCurrentStepId() const { + return renderManager_.GetCurrentStepId(); + } + private: VulkanTexture *GetNullTexture(); VulkanContext *vulkan_ = nullptr;