From 01a1438dcecd302def73b6181313027dc8782df2 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Mon, 8 Sep 2014 23:10:23 +0200 Subject: [PATCH] Improve depth buffering in D3D by computing a depth-flipped proj matrix but only when needed. --- GPU/Directx9/ShaderManagerDX9.cpp | 24 +++++++++++------------ GPU/Directx9/StateMappingDX9.cpp | 10 +++++++--- GPU/Directx9/VertexShaderGeneratorDX9.cpp | 2 +- GPU/GPUState.cpp | 7 ++++++- GPU/GPUState.h | 1 + 5 files changed, 27 insertions(+), 17 deletions(-) diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index ea087efbc1..23ec98e073 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -255,19 +255,13 @@ void LinkedShaderDX9::SetMatrix(D3DXHANDLE uniform, const float* pMatrix) { m_vs->constant->SetMatrix(pD3Ddevice, uniform, pDxMat); } -// Depth in ogl is between -1;1 we need between 0;1 -// Pretty sure this is wrong, our Z buffer is screwed up anyhow.. -void ConvertProjMatrixToD3D(Matrix4x4 & in) { - /* - in.zz *= 0.5f; - in.wz += 1.f; - */ +// Depth in ogl is between -1;1 we need between 0;1 and optionally reverse it +void ConvertProjMatrixToD3D(Matrix4x4 & in, bool invert) { Matrix4x4 s; Matrix4x4 t; - s.setScaling(Vec3(1, 1, 0.5f)); + s.setScaling(Vec3(1, 1, invert ? -0.5 : 0.5f)); t.setTranslation(Vec3(0, 0, 0.5f)); - in = in * s; - in = in * t; + in = in * s * t; } void LinkedShaderDX9::use() { @@ -297,8 +291,14 @@ void LinkedShaderDX9::updateUniforms() { flippedMatrix[0] = -flippedMatrix[0]; flippedMatrix[12] = -flippedMatrix[12]; } + bool invert = false; + if (gstate_c.vpDepth < 0) { + invert = true; + //flippedMatrix[9] = -flippedMatrix[9]; + //flippedMatrix[14] = -flippedMatrix[14]; + } // Convert matrices ! - ConvertProjMatrixToD3D(flippedMatrix); + ConvertProjMatrixToD3D(flippedMatrix, invert); SetMatrix(u_proj, flippedMatrix.getReadPtr()); } @@ -308,7 +308,7 @@ void LinkedShaderDX9::updateUniforms() { proj_through.setOrtho(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1); // Convert matrices ! - ConvertProjMatrixToD3D(proj_through); + ConvertProjMatrixToD3D(proj_through, false); SetMatrix(u_proj_through, proj_through.getReadPtr()); } diff --git a/GPU/Directx9/StateMappingDX9.cpp b/GPU/Directx9/StateMappingDX9.cpp index aae6261624..4f72e746ce 100644 --- a/GPU/Directx9/StateMappingDX9.cpp +++ b/GPU/Directx9/StateMappingDX9.cpp @@ -381,7 +381,6 @@ void TransformDrawEngineDX9::ApplyDrawState(int prim) { float vpY0 = vpYb - offsetY + vpYa; // Need to account for sign of Y gstate_c.vpWidth = vpXa * 2.0f; gstate_c.vpHeight = -vpYa * 2.0f; - float vpWidth = fabsf(gstate_c.vpWidth); float vpHeight = fabsf(gstate_c.vpHeight); @@ -399,8 +398,13 @@ void TransformDrawEngineDX9::ApplyDrawState(int prim) { float zScale = getFloat24(gstate.viewportz1) / 65535.0f; float zOff = getFloat24(gstate.viewportz2) / 65535.0f; - float depthRangeMin = zOff - zScale; - float depthRangeMax = zOff + zScale; + + float depthRangeMin = zOff - fabsf(zScale); + float depthRangeMax = zOff + fabsf(zScale); + + gstate_c.vpDepth = zScale * 2; + + // D3D does not like viewports outside the screen. Let's clamp for now. dxstate.viewport.set(vpX0 + renderX, vpY0 + renderY, vpWidth, vpHeight, depthRangeMin, depthRangeMax); } diff --git a/GPU/Directx9/VertexShaderGeneratorDX9.cpp b/GPU/Directx9/VertexShaderGeneratorDX9.cpp index dca22cd963..cc4d189854 100644 --- a/GPU/Directx9/VertexShaderGeneratorDX9.cpp +++ b/GPU/Directx9/VertexShaderGeneratorDX9.cpp @@ -593,7 +593,7 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) { } - WRITE(p, "Out.gl_Position.z = (Out.gl_Position.z + Out.gl_Position.w) * 0.5f;"); + // WRITE(p, "Out.gl_Position.z = (Out.gl_Position.z + Out.gl_Position.w) * 0.5f;"); WRITE(p, " return Out; "); WRITE(p, "}\n"); } diff --git a/GPU/GPUState.cpp b/GPU/GPUState.cpp index 9ed6c211cb..8a6422bea7 100644 --- a/GPU/GPUState.cpp +++ b/GPU/GPUState.cpp @@ -266,7 +266,7 @@ struct GPUStateCache_v0 }; void GPUStateCache::DoState(PointerWrap &p) { - auto s = p.Section("GPUStateCache", 0, 3); + auto s = p.Section("GPUStateCache", 0, 4); if (!s) { // Old state, this was not versioned. GPUStateCache_v0 old; @@ -328,6 +328,11 @@ void GPUStateCache::DoState(PointerWrap &p) { p.Do(vpWidth); p.Do(vpHeight); + if (s >= 4) { + p.Do(vpDepth); + } else { + vpDepth = 1.0f; // any positive value should be fine + } p.Do(curRTWidth); p.Do(curRTHeight); diff --git a/GPU/GPUState.h b/GPU/GPUState.h index cc7b0e5424..178fe11d07 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -475,6 +475,7 @@ struct GPUStateCache float vpWidth; float vpHeight; + float vpDepth; u32 curRTWidth; u32 curRTHeight;