From e43b5e208165d68540e726970a0cdde0481e698f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 24 Jul 2022 16:31:02 +0200 Subject: [PATCH] thin3d: Make writeMask and compareMask dynamic in all backends. D3D11 needs emulation. --- Common/GPU/D3D11/thin3d_d3d11.cpp | 125 +++++++++++++++++++--------- Common/GPU/D3D9/thin3d_d3d9.cpp | 6 +- Common/GPU/OpenGL/thin3d_gl.cpp | 26 +++--- Common/GPU/Vulkan/thin3d_vulkan.cpp | 14 ++-- Common/GPU/thin3d.h | 4 +- UI/GPUDriverTestScreen.cpp | 12 ++- 6 files changed, 117 insertions(+), 70 deletions(-) diff --git a/Common/GPU/D3D11/thin3d_d3d11.cpp b/Common/GPU/D3D11/thin3d_d3d11.cpp index 98ec454c71..6495b5cf1a 100644 --- a/Common/GPU/D3D11/thin3d_d3d11.cpp +++ b/Common/GPU/D3D11/thin3d_d3d11.cpp @@ -13,6 +13,8 @@ #include "Common/Data/Encoding/Utf8.h" #include "Common/Log.h" +#include + #include #include #include @@ -39,6 +41,24 @@ class D3D11SamplerState; class D3D11RasterState; class D3D11Framebuffer; +// This must stay POD for the memcmp to work reliably. +struct D3D11DepthStencilKey { + DepthStencilStateDesc desc; + u8 writeMask; + u8 compareMask; + + bool operator < (const D3D11DepthStencilKey &other) const { + return memcmp(this, &other, sizeof(D3D11DepthStencilKey)) < 0; + } +}; + + +class D3D11DepthStencilState : public DepthStencilState { +public: + ~D3D11DepthStencilState() {} + DepthStencilStateDesc desc; +}; + class D3D11DrawContext : public DrawContext { public: D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *deviceContext, ID3D11Device1 *device1, ID3D11DeviceContext1 *deviceContext1, D3D_FEATURE_LEVEL featureLevel, HWND hWnd, std::vector deviceList); @@ -102,9 +122,11 @@ public: blendFactorDirty_ = true; } } - void SetStencilRef(uint8_t ref) override { - stencilRef_ = ref; - stencilRefDirty_ = true; + void SetStencilParams(uint8_t refValue, uint8_t writeMask, uint8_t compareMask) override { + stencilRef_ = refValue; + stencilWriteMask_ = writeMask; + stencilCompareMask_ = compareMask; + stencilDirty_ = true; } void EndFrame() override; @@ -174,6 +196,8 @@ public: private: void ApplyCurrentState(); + ID3D11DepthStencilState *GetCachedDepthStencilState(D3D11DepthStencilState *state, uint8_t stencilWriteMask, uint8_t stencilCompareMask); + HWND hWnd_; ID3D11Device *device_; ID3D11DeviceContext *context_; @@ -200,8 +224,11 @@ private: DeviceCaps caps_{}; AutoRef curBlend_; - AutoRef curDepth_; + AutoRef curDepthStencil_; AutoRef curRaster_; + + std::map depthStencilCache_; + ID3D11InputLayout *curInputLayout_ = nullptr; ID3D11VertexShader *curVS_ = nullptr; ID3D11PixelShader *curPS_ = nullptr; @@ -219,7 +246,9 @@ private: float blendFactor_[4]{}; bool blendFactorDirty_ = false; uint8_t stencilRef_ = 0; - bool stencilRefDirty_ = true; + uint8_t stencilWriteMask_ = 0xFF; + uint8_t stencilCompareMask_ = 0xFF; + bool stencilDirty_ = true; // Temporaries ID3D11Texture2D *packTexture_ = nullptr; @@ -415,14 +444,6 @@ void D3D11DrawContext::SetScissorRect(int left, int top, int width, int height) context_->RSSetScissorRects(1, &rc); } -class D3D11DepthStencilState : public DepthStencilState { -public: - ~D3D11DepthStencilState() { - dss->Release(); - } - ID3D11DepthStencilState *dss; -}; - static const D3D11_COMPARISON_FUNC compareToD3D11[] = { D3D11_COMPARISON_NEVER, D3D11_COMPARISON_LESS, @@ -494,21 +515,6 @@ inline void CopyStencilSide(D3D11_DEPTH_STENCILOP_DESC &side, const StencilSetup side.StencilPassOp = stencilOpToD3D11[(int)input.passOp]; } -DepthStencilState *D3D11DrawContext::CreateDepthStencilState(const DepthStencilStateDesc &desc) { - D3D11DepthStencilState *ds = new D3D11DepthStencilState(); - D3D11_DEPTH_STENCIL_DESC d3ddesc{}; - d3ddesc.DepthEnable = desc.depthTestEnabled; - d3ddesc.DepthWriteMask = desc.depthWriteEnabled ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO; - d3ddesc.DepthFunc = compareToD3D11[(int)desc.depthCompare]; - d3ddesc.StencilEnable = desc.stencilEnabled; - CopyStencilSide(d3ddesc.FrontFace, desc.stencil); - CopyStencilSide(d3ddesc.BackFace, desc.stencil); - if (SUCCEEDED(device_->CreateDepthStencilState(&d3ddesc, &ds->dss))) - return ds; - delete ds; - return nullptr; -} - static const D3D11_BLEND_OP blendOpToD3D11[] = { D3D11_BLEND_OP_ADD, D3D11_BLEND_OP_SUBTRACT, @@ -547,6 +553,44 @@ public: float blendFactor[4]; }; +ID3D11DepthStencilState *D3D11DrawContext::GetCachedDepthStencilState(D3D11DepthStencilState *state, uint8_t stencilWriteMask, uint8_t stencilCompareMask) { + D3D11DepthStencilKey key; + key.desc = state->desc; + key.writeMask = stencilWriteMask; + key.compareMask = stencilCompareMask; + + auto findResult = depthStencilCache_.find(key); + + if (findResult != depthStencilCache_.end()) { + return findResult->second; + } + + // OK, create and insert. + D3D11_DEPTH_STENCIL_DESC d3ddesc{}; + d3ddesc.DepthEnable = state->desc.depthTestEnabled; + d3ddesc.DepthWriteMask = state->desc.depthWriteEnabled ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO; + d3ddesc.DepthFunc = compareToD3D11[(int)state->desc.depthCompare]; + d3ddesc.StencilEnable = state->desc.stencilEnabled; + if (d3ddesc.StencilEnable) { + CopyStencilSide(d3ddesc.FrontFace, state->desc.stencil); + CopyStencilSide(d3ddesc.BackFace, state->desc.stencil); + } + + ID3D11DepthStencilState *dss = nullptr; + if (SUCCEEDED(device_->CreateDepthStencilState(&d3ddesc, &dss))) { + depthStencilCache_[key] = dss; + return dss; + } else { + return nullptr; + } +} + +DepthStencilState *D3D11DrawContext::CreateDepthStencilState(const DepthStencilStateDesc &desc) { + D3D11DepthStencilState *dss = new D3D11DepthStencilState(); + dss->desc = desc; + return dynamic_cast(dss); +} + BlendState *D3D11DrawContext::CreateBlendState(const BlendStateDesc &desc) { D3D11BlendState *bs = new D3D11BlendState(); D3D11_BLEND_DESC d3ddesc{}; @@ -677,8 +721,7 @@ InputLayout *D3D11DrawContext::CreateInputLayout(const InputLayoutDesc &desc) { class D3D11ShaderModule : public ShaderModule { public: - D3D11ShaderModule(const std::string &tag) : tag_(tag) { - } + D3D11ShaderModule(const std::string &tag) : tag_(tag) { } ~D3D11ShaderModule() { if (vs) vs->Release(); @@ -716,8 +759,11 @@ public: AutoRef input; ID3D11InputLayout *il = nullptr; AutoRef blend; - AutoRef depth; AutoRef raster; + + // Combined with dynamic state to key into cached D3D11DepthStencilState, to emulate dynamic parameters. + AutoRef depthStencil; + ID3D11VertexShader *vs = nullptr; ID3D11PixelShader *ps = nullptr; ID3D11GeometryShader *gs = nullptr; @@ -969,7 +1015,7 @@ ShaderModule *D3D11DrawContext::CreateShaderModule(ShaderStage stage, ShaderLang Pipeline *D3D11DrawContext::CreateGraphicsPipeline(const PipelineDesc &desc) { D3D11Pipeline *dPipeline = new D3D11Pipeline(); dPipeline->blend = (D3D11BlendState *)desc.blend; - dPipeline->depth = (D3D11DepthStencilState *)desc.depthStencil; + dPipeline->depthStencil = (D3D11DepthStencilState *)desc.depthStencil; dPipeline->input = (D3D11InputLayout *)desc.inputLayout; dPipeline->raster = (D3D11RasterState *)desc.raster; dPipeline->topology = primToD3D11[(int)desc.prim]; @@ -1042,7 +1088,7 @@ void D3D11DrawContext::UpdateDynamicUniformBuffer(const void *ub, size_t size) { void D3D11DrawContext::InvalidateCachedState() { // This is a signal to forget all our state caching. curBlend_ = nullptr; - curDepth_ = nullptr; + curDepthStencil_ = nullptr; curRaster_ = nullptr; curPS_ = nullptr; curVS_ = nullptr; @@ -1065,10 +1111,11 @@ void D3D11DrawContext::ApplyCurrentState() { curBlend_ = curPipeline_->blend; blendFactorDirty_ = false; } - if (curDepth_ != curPipeline_->depth || stencilRefDirty_) { - context_->OMSetDepthStencilState(curPipeline_->depth->dss, stencilRef_); - curDepth_ = curPipeline_->depth; - stencilRefDirty_ = false; + if (curDepthStencil_ != curPipeline_->depthStencil || stencilDirty_) { + ID3D11DepthStencilState *dss = GetCachedDepthStencilState(curPipeline_->depthStencil, stencilWriteMask_, stencilCompareMask_); + context_->OMSetDepthStencilState(dss, stencilRef_); + curDepthStencil_ = curPipeline_->depthStencil; + stencilDirty_ = false; } if (curRaster_ != curPipeline_->raster) { context_->RSSetState(curPipeline_->raster->rs); @@ -1375,8 +1422,8 @@ void D3D11DrawContext::BeginFrame() { if (curBlend_ != nullptr) { context_->OMSetBlendState(curBlend_->bs, blendFactor_, 0xFFFFFFFF); } - if (curDepth_ != nullptr) { - context_->OMSetDepthStencilState(curDepth_->dss, stencilRef_); + if (curDepthStencil_ != nullptr) { + context_->OMSetDepthStencilState(GetCachedDepthStencilState(curDepthStencil_, stencilWriteMask_, stencilCompareMask_), stencilRef_); } if (curRaster_ != nullptr) { context_->RSSetState(curRaster_->rs); diff --git a/Common/GPU/D3D9/thin3d_d3d9.cpp b/Common/GPU/D3D9/thin3d_d3d9.cpp index 65286ae805..62ad16c63e 100644 --- a/Common/GPU/D3D9/thin3d_d3d9.cpp +++ b/Common/GPU/D3D9/thin3d_d3d9.cpp @@ -556,9 +556,7 @@ public: void SetScissorRect(int left, int top, int width, int height) override; void SetViewports(int count, Viewport *viewports) override; void SetBlendFactor(float color[4]) override; - void SetStencilRef(uint8_t ref) override { - stencilRef_ = ref; - } + void SetStencilParams(uint8_t refValue, uint8_t writeMask, uint8_t compareMask) override; void Draw(int vertexCount, int offset) override; void DrawIndexed(int vertexCount, int offset) override; @@ -734,8 +732,6 @@ DepthStencilState *D3D9Context::CreateDepthStencilState(const DepthStencilStateD ds->stencilPass = stencilOpToD3D9[(int)desc.stencil.passOp]; ds->stencilFail = stencilOpToD3D9[(int)desc.stencil.failOp]; ds->stencilZFail = stencilOpToD3D9[(int)desc.stencil.depthFailOp]; - ds->stencilWriteMask = desc.stencil.writeMask; - ds->stencilCompareMask = desc.stencil.compareMask; return ds; } diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index 8302696922..5fa71a3445 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -175,10 +175,8 @@ public: GLuint stencilZFail; GLuint stencilPass; GLuint stencilCompareOp; - uint8_t stencilCompareMask; - uint8_t stencilWriteMask; - void Apply(GLRenderManager *render, uint8_t stencilRef) { + void Apply(GLRenderManager *render, uint8_t stencilRef, uint8_t stencilWriteMask, uint8_t stencilCompareMask) { render->SetDepth(depthTestEnabled, depthWriteEnabled, depthComp); render->SetStencilFunc(stencilEnabled, stencilCompareOp, stencilRef, stencilCompareMask); render->SetStencilOp(stencilWriteMask, stencilFail, stencilZFail, stencilPass); @@ -388,13 +386,21 @@ public: renderManager_.SetBlendFactor(color); } - void SetStencilRef(uint8_t ref) override { - stencilRef_ = ref; + void SetStencilParams(uint8_t refValue, uint8_t writeMask, uint8_t compareMask) override { + stencilRef_ = refValue; + stencilWriteMask_ = writeMask; + stencilCompareMask_ = compareMask; + // Do we need to update on the fly here? renderManager_.SetStencilFunc( curPipeline_->depthStencil->stencilEnabled, curPipeline_->depthStencil->stencilCompareOp, - ref, - curPipeline_->depthStencil->stencilCompareMask); + refValue, + compareMask); + renderManager_.SetStencilOp( + writeMask, + curPipeline_->depthStencil->stencilFail, + curPipeline_->depthStencil->stencilZFail, + curPipeline_->depthStencil->stencilPass); } void BindTextures(int start, int count, Texture **textures) override; @@ -491,6 +497,8 @@ private: AutoRef curRenderTarget_; uint8_t stencilRef_ = 0; + uint8_t stencilWriteMask_ = 0; + uint8_t stencilCompareMask_ = 0; // Frames in flight is not such a strict concept as with Vulkan until we start using glBufferStorage and fences. // But might as well have the structure ready, and can't hurt to rotate buffers. @@ -940,8 +948,6 @@ DepthStencilState *OpenGLContext::CreateDepthStencilState(const DepthStencilStat ds->stencilPass = stencilOpToGL[(int)desc.stencil.passOp]; ds->stencilFail = stencilOpToGL[(int)desc.stencil.failOp]; ds->stencilZFail = stencilOpToGL[(int)desc.stencil.depthFailOp]; - ds->stencilWriteMask = desc.stencil.writeMask; - ds->stencilCompareMask = desc.stencil.compareMask; return ds; } @@ -1185,7 +1191,7 @@ void OpenGLContext::BindPipeline(Pipeline *pipeline) { return; } curPipeline_->blend->Apply(&renderManager_); - curPipeline_->depthStencil->Apply(&renderManager_, stencilRef_); + curPipeline_->depthStencil->Apply(&renderManager_, stencilRef_, stencilWriteMask_, stencilCompareMask_); curPipeline_->raster->Apply(&renderManager_); renderManager_.BindProgram(curPipeline_->program_); } diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index 8cb2e4e0d3..a53f394b9d 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -407,7 +407,7 @@ public: void SetScissorRect(int left, int top, int width, int height) override; void SetViewports(int count, Viewport *viewports) override; void SetBlendFactor(float color[4]) override; - void SetStencilRef(uint8_t stencilRef) override; + void SetStencilParams(uint8_t refValue, uint8_t writeMask, uint8_t compareMask) override; void BindSamplerStates(int start, int count, SamplerState **state) override; void BindTextures(int start, int count, Texture **textures) override; @@ -554,6 +554,8 @@ private: DeviceCaps caps_{}; uint8_t stencilRef_ = 0; + uint8_t stencilWriteMask_ = 0xFF; + uint8_t stencilCompareMask_ = 0xFF; }; static int GetBpp(VkFormat format) { @@ -1160,10 +1162,12 @@ void VKContext::SetBlendFactor(float color[4]) { renderManager_.SetBlendFactor(col); } -void VKContext::SetStencilRef(uint8_t stencilRef) { +void VKContext::SetStencilParams(uint8_t refValue, uint8_t writeMask, uint8_t compareMask) { if (curPipeline_->usesStencil) - renderManager_.SetStencilParams(curPipeline_->stencilWriteMask, curPipeline_->stencilTestMask, stencilRef); - stencilRef_ = stencilRef; + renderManager_.SetStencilParams(writeMask, compareMask, refValue); + stencilRef_ = refValue; + stencilWriteMask_ = refValue; + stencilCompareMask_ = refValue; } InputLayout *VKContext::CreateInputLayout(const InputLayoutDesc &desc) { @@ -1208,8 +1212,6 @@ Texture *VKContext::CreateTexture(const TextureDesc &desc) { } static inline void CopySide(VkStencilOpState &dest, const StencilSetup &src) { - dest.compareMask = src.compareMask; - dest.writeMask = src.writeMask; dest.compareOp = compToVK[(int)src.compareOp]; dest.failOp = stencilOpToVK[(int)src.failOp]; dest.passOp = stencilOpToVK[(int)src.passOp]; diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index 004d6dc3e2..6c547ed6a9 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -458,8 +458,6 @@ struct StencilSetup { StencilOp passOp; StencilOp depthFailOp; Comparison compareOp; - uint8_t compareMask; - uint8_t writeMask; }; struct DepthStencilStateDesc { @@ -651,7 +649,7 @@ public: virtual void SetScissorRect(int left, int top, int width, int height) = 0; virtual void SetViewports(int count, Viewport *viewports) = 0; virtual void SetBlendFactor(float color[4]) = 0; - virtual void SetStencilRef(uint8_t ref) = 0; + virtual void SetStencilParams(uint8_t refValue, uint8_t writeMask, uint8_t compareMask) = 0; virtual void BindSamplerStates(int start, int count, SamplerState **state) = 0; virtual void BindTextures(int start, int count, Texture **textures) = 0; diff --git a/UI/GPUDriverTestScreen.cpp b/UI/GPUDriverTestScreen.cpp index 811e7c02a4..0320a5a858 100644 --- a/UI/GPUDriverTestScreen.cpp +++ b/UI/GPUDriverTestScreen.cpp @@ -329,12 +329,10 @@ void GPUDriverTestScreen::DiscardTest() { dsDesc.depthWriteEnabled = true; dsDesc.depthCompare = Comparison::ALWAYS; dsDesc.stencilEnabled = true; - dsDesc.stencil.compareMask = 0xFF; dsDesc.stencil.compareOp = Comparison::ALWAYS; dsDesc.stencil.passOp = StencilOp::REPLACE; dsDesc.stencil.failOp = StencilOp::REPLACE; // These two shouldn't matter, because the test that fails is discard, not stencil. dsDesc.stencil.depthFailOp = StencilOp::REPLACE; - dsDesc.stencil.writeMask = 0xFF; DepthStencilState *depthStencilWrite = draw->CreateDepthStencilState(dsDesc); // Write only depth. @@ -355,7 +353,6 @@ void GPUDriverTestScreen::DiscardTest() { dsDesc.stencil.compareOp = Comparison::EQUAL; dsDesc.stencil.failOp = StencilOp::KEEP; dsDesc.stencil.depthFailOp = StencilOp::KEEP; - dsDesc.stencil.writeMask = 0x0; DepthStencilState *stencilEqualDepthAlways = draw->CreateDepthStencilState(dsDesc); dsDesc.depthTestEnabled = false; @@ -500,27 +497,28 @@ void GPUDriverTestScreen::DiscardTest() { dc.BeginPipeline(writePipelines[j], samplerNearest_); // Draw the rectangle with stencil value 0, depth 0.1f and the text with stencil 0xFF, depth 0.9. Then set 0xFF as the stencil value and draw the rectangles at depth 0.5. - draw->SetStencilRef(0x0); + + draw->SetStencilParams(0, 0xFF, 0xFF); dc.SetCurZ(0.1f); dc.FillRect(UI::Drawable(bgColorBAD), bounds); // test bounds dc.Flush(); - draw->SetStencilRef(0xff); + draw->SetStencilParams(0xff, 0xFF, 0xFF); dc.SetCurZ(0.9f); dc.DrawTextRect("TEST OK", bounds, textColorBAD, ALIGN_HCENTER | ALIGN_VCENTER | FLAG_DYNAMIC_ASCII); dc.Flush(); // Draw rectangle that should result in the text dc.BeginPipeline(testPipeline1[i], samplerNearest_); - draw->SetStencilRef(0xff); + draw->SetStencilParams(0xff, 0, 0xFF); dc.SetCurZ(0.5f); dc.FillRect(UI::Drawable(textColorOK), bounds); dc.Flush(); // Draw rectangle that should result in the bg dc.BeginPipeline(testPipeline2[i], samplerNearest_); - draw->SetStencilRef(0xff); + draw->SetStencilParams(0xff, 0, 0xFF); dc.SetCurZ(0.5f); dc.FillRect(UI::Drawable(bgColorOK), bounds); dc.Flush();