Merge pull request #8461 from unknownbrackets/gpu-depth

Swap depth range to use minz/maxz
2025-04-02 11:01:50 -04:00 · 2016-01-20 10:19:51 +01:00 · 2016-01-20 10:19:51 +01:00 · 4c8384aa09
commit 4c8384aa09
parent 6ad9af413e e00c9940e8
9 changed files with 79 additions and 73 deletions
--- a/GPU/Common/GPUStateUtils.cpp
+++ b/GPU/Common/GPUStateUtils.cpp
@ -498,6 +498,26 @@ LogicOpReplaceType ReplaceLogicOpType() {
 	return LOGICOPTYPE_NORMAL;
 }

+static const float depthSliceFactor = 4.0f;
+
+// This is used for float values which might not be integers, but are in the integer scale of 65535.
+static float ToScaledDepthFromInteger(float z) {
+	const float offset = 0.5f * (depthSliceFactor - 1.0f) * (1.0f / depthSliceFactor);
+	return z * (1.0f / depthSliceFactor) * (1.0f / 65535.0f) + offset;
+}
+
+float ToScaledDepth(u16 z) {
+	return ToScaledDepthFromInteger((float)(int)z);
+}
+
+float FromScaledDepth(float z) {
+	const float offset = 0.5f * (depthSliceFactor - 1.0f) * (1.0f / depthSliceFactor);
+	return (z - offset) * depthSliceFactor * 65535.0f;
+}
+
+float DepthSliceFactor() {
+	return depthSliceFactor;
+}

 void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) {
 	bool throughmode = gstate.isModeThrough();
@ -597,8 +617,6 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
 		float xOffset = 0.0f;
 		float hScale = 1.0f;
 		float yOffset = 0.0f;
-		float zScale = 1.0f;
-		float zOffset = 0.0f;

 		// If we're within the bounds, we want clipping the viewport way.  So leave it be.
 		if (left < 0.0f || right > renderWidth) {
@ -632,29 +650,35 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
 		out.viewportW = right - left;
 		out.viewportH = bottom - top;

+		// The depth viewport parameters are the same, but we handle it a bit differently.
+		// When clipping is enabled, depth is clamped to [0, 65535].  And minz/maxz discard.
+		// So, we apply the depth range as minz/maxz, and transform for the viewport.
 		float vpZScale = gstate.getViewportZScale();
 		float vpZCenter = gstate.getViewportZCenter();
-		// Near/far can be inverted.  We deal with that in the projection/scale.
-		float near = vpZCenter - fabsf(vpZScale);
-		float far = vpZCenter + fabsf(vpZScale);
+		float minz = gstate.getDepthRangeMin();
+		float maxz = gstate.getDepthRangeMax();

-		if (near < 0.0f || far > 65535.0f) {
-			float overageNear = std::max(-near, 0.0f);
-			float overageFar = std::max(far - 65535.0f, 0.0f);
-			float drift = overageFar - overageNear;
-
-			near += overageNear;
-			far -= overageFar;
-
-			zScale = (vpZScale * 2.0f) / (far - near);
-			zOffset = drift / (far - near);
-		} else if (vpZScale < 0.0f) {
-			// This flips to match our near/far.
-			zScale = -zScale;
+		if (gstate.isClippingEnabled() && (minz == 0 || maxz == 65535)) {
+			// Here, we should "clamp."  But clamping per fragment would be slow.
+			// So, instead, we just increase the available range and hope.
+			// If depthSliceFactor is 4, it means (75% / 2) of the depth lies in each direction.
+			float fullDepthRange = 65535.0f * (depthSliceFactor - 1.0f) * (1.0f / 2.0f);
+			if (minz == 0) {
+				minz -= fullDepthRange;
+			}
+			if (maxz == 65535) {
+				maxz += fullDepthRange;
+			}
 		}

-		out.depthRangeMin = near * (1.0f / 65535.0f);
-		out.depthRangeMax = far * (1.0f / 65535.0f);
+		// Okay.  So, in our shader, -1 will map to minz, and +1 will map to maxz.
+		float halfActualZRange = (maxz - minz) * (1.0f / 2.0f);
+		float zScale = vpZScale / halfActualZRange;
+		// This adjusts the center from halfActualZRange to vpZCenter.
+		float zOffset = (vpZCenter - (minz + halfActualZRange)) / halfActualZRange;
+
+		out.depthRangeMin = ToScaledDepthFromInteger(minz);
+		out.depthRangeMax = ToScaledDepthFromInteger(maxz);

 		bool scaleChanged = gstate_c.vpWidthScale != wScale || gstate_c.vpHeightScale != hScale;
 		bool offsetChanged = gstate_c.vpXOffset != xOffset || gstate_c.vpYOffset != yOffset;
@ -669,25 +693,9 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
 			out.dirtyProj = true;
 			out.dirtyDepth = depthChanged;
 		}
-
-#ifndef MOBILE_DEVICE
-		float minz = gstate.getDepthRangeMin();
-		float maxz = gstate.getDepthRangeMax();
-		if (minz > near || maxz < far) {
-			if ((gstate.clipEnable & 1) == 0) {
-				WARN_LOG_REPORT_ONCE(minmaxznoclip, G3D, "Unsupported depth range test without clipping - clip: %f-%f, test: %f-%f", near, far, minz, maxz);
-			} else {
-				WARN_LOG_REPORT_ONCE(minmaxz, G3D, "Unsupported depth range test - clip: %f-%f, test: %f-%f", near, far, minz, maxz);
-			}
-		}
-#endif
 	}
 }

-float ToScaledDepth(u16 z) {
-	return z * (1.0f / 65535.0f);
-}
-
 static const BlendFactor genericALookup[11] = {
 	BlendFactor::DST_COLOR,
 	BlendFactor::ONE_MINUS_DST_COLOR,
--- a/GPU/Common/GPUStateUtils.h
+++ b/GPU/Common/GPUStateUtils.h
@ -67,6 +67,8 @@ struct ViewportAndScissor {
 };
 void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out);
 float ToScaledDepth(u16 z);
+float FromScaledDepth(float z);
+float DepthSliceFactor();

 // These are common to all modern APIs and can be easily converted with a lookup table.
 enum class BlendFactor : uint8_t {
--- a/GPU/Directx9/GPU_DX9.cpp
+++ b/GPU/Directx9/GPU_DX9.cpp
@ -205,6 +205,7 @@ static const CommandTableEntry commandTable[] = {
 	{GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &DIRECTX9_GPU::Execute_ViewportType},
 	{GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &DIRECTX9_GPU::Execute_ViewportType},
 	{GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &DIRECTX9_GPU::Execute_ViewportType},
+	{GE_CMD_CLIPENABLE, FLAG_FLUSHBEFOREONCHANGE},

 	// Region
 	{GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &DIRECTX9_GPU::Execute_Region},
@ -288,7 +289,6 @@ static const CommandTableEntry commandTable[] = {
 	{GE_CMD_LSC3,	FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &DIRECTX9_GPU::Execute_Light3Param},

 	// Ignored commands
-	{GE_CMD_CLIPENABLE, 0},
 	{GE_CMD_TEXFLUSH, 0},
 	{GE_CMD_TEXLODSLOPE, 0},
 	{GE_CMD_TEXSYNC, 0},
--- a/GPU/Directx9/ShaderManagerDX9.cpp
+++ b/GPU/Directx9/ShaderManagerDX9.cpp
@ -480,23 +480,16 @@ void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) {
 	}

 	if (dirtyUniforms & DIRTY_DEPTHRANGE)	{
-		float viewZScale = gstate.getViewportZScale();
-		float viewZCenter = gstate.getViewportZCenter();
-		float viewZInvScale;
+		// Depth is [0, 1] mapping to [minz, maxz], not too hard.
+		float vpZScale = gstate.getViewportZScale();
+		float vpZCenter = gstate.getViewportZCenter();

-		// We had to scale and translate Z to account for our clamped Z range.
-		// Therefore, we also need to reverse this to round properly.
-		//
-		// Example: scale = 65535.0, center = 0.0
-		// Resulting range = -65535 to 65535, clamped to [0, 65535]
-		// gstate_c.vpDepthScale = 2.0f
-		// gstate_c.vpZOffset = -1.0f
-		//
-		// The projection already accounts for those, so we need to reverse them.
-		//
-		// Additionally, D3D9 uses a range from [0, 1].  We double and move the center.
-		viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f;
-		viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f;
+		// These are just the reverse of the formulas in GPUStateUtils.
+		float halfActualZRange = vpZScale / gstate_c.vpDepthScale;
+		float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;
+		float viewZScale = halfActualZRange * 2.0f;
+		float viewZCenter = minz;
+		float viewZInvScale;

 		if (viewZScale != 0.0) {
 			viewZInvScale = 1.0f / viewZScale;
--- a/GPU/GLES/FragmentShaderGenerator.cpp
+++ b/GPU/GLES/FragmentShaderGenerator.cpp
@ -676,8 +676,16 @@ bool GenerateFragmentShader(const ShaderID &id, char *buffer) {
 #endif

 	if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
+		const double scale = DepthSliceFactor() * 65535.0;
+
 		WRITE(p, "  highp float z = gl_FragCoord.z;\n");
-		WRITE(p, "  z = (1.0/65535.0) * floor(z * 65535.0);\n");
+		// We center the depth with an offset, but only its fraction matters.
+		// When (DepthSliceFactor() - 1) is odd, it will be 0.5, otherwise 0.
+		if (((int)(DepthSliceFactor() - 1.0f) & 1) == 1) {
+			WRITE(p, "  z = (floor((z * %f) - (1.0 / 2.0)) + (1.0 / 2.0)) * (1.0 / %f);\n", scale, scale);
+		} else {
+			WRITE(p, "  z = floor(z * %f) * (1.0 / %f);\n", scale, scale);
+		}
 		WRITE(p, "  gl_FragDepth = z;\n");
 	}

--- a/GPU/GLES/GLES_GPU.cpp
+++ b/GPU/GLES/GLES_GPU.cpp
@ -211,6 +211,7 @@ static const CommandTableEntry commandTable[] = {
 	{GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GLES_GPU::Execute_ViewportType},
 	{GE_CMD_VIEWPORTZSCALE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, DIRTY_DEPTHRANGE, &GLES_GPU::Execute_ViewportZType},
 	{GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, DIRTY_DEPTHRANGE, &GLES_GPU::Execute_ViewportZType},
+	{GE_CMD_CLIPENABLE, FLAG_FLUSHBEFOREONCHANGE},

 	// Region
 	{GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GLES_GPU::Execute_Region},
@ -294,7 +295,6 @@ static const CommandTableEntry commandTable[] = {
 	{GE_CMD_LSC3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, DIRTY_LIGHT3, &GLES_GPU::Execute_Light3Param},

 	// Ignored commands
-	{GE_CMD_CLIPENABLE, 0},
 	{GE_CMD_TEXFLUSH, 0},
 	{GE_CMD_TEXLODSLOPE, 0},
 	{GE_CMD_TEXSYNC, 0},
--- a/GPU/GLES/ShaderManager.cpp
+++ b/GPU/GLES/ShaderManager.cpp
@ -589,23 +589,16 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) {
 		SetMatrix4x3(u_texmtx, gstate.tgenMatrix);
 	}
 	if ((dirty & DIRTY_DEPTHRANGE) && u_depthRange != -1) {
-		float viewZScale = gstate.getViewportZScale();
-		float viewZCenter = gstate.getViewportZCenter();
-		float viewZInvScale;
+		// Since depth is [-1, 1] mapping to [minz, maxz], this is easyish.
+		float vpZScale = gstate.getViewportZScale();
+		float vpZCenter = gstate.getViewportZCenter();

-		// We had to scale and translate Z to account for our clamped Z range.
-		// Therefore, we also need to reverse this to round properly.
-		//
-		// Example: scale = 65535.0, center = 0.0
-		// Resulting range = -65535 to 65535, clamped to [0, 65535]
-		// gstate_c.vpDepthScale = 2.0f
-		// gstate_c.vpZOffset = -1.0f
-		//
-		// The projection already accounts for those, so we need to reverse them.
-		//
-		// Additionally, OpenGL uses a range from [-1, 1].  So we multiply by scale and add the center.
-		viewZScale *= 1.0f / gstate_c.vpDepthScale;
-		viewZCenter -= 65535.0f * gstate_c.vpZOffset;
+		// These are just the reverse of the formulas in GPUStateUtils.
+		float halfActualZRange = vpZScale / gstate_c.vpDepthScale;
+		float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;
+		float viewZScale = halfActualZRange;
+		float viewZCenter = minz + halfActualZRange;
+		float viewZInvScale;

 		if (viewZScale != 0.0) {
 			viewZInvScale = 1.0f / viewZScale;
--- a/GPU/GPUState.h
+++ b/GPU/GPUState.h
@ -375,6 +375,7 @@ struct GPUgstate {
 	int getRegionY2() const { return (region2 >> 10) & 0x3FF; }

 	// Note that the X1/Y1/Z1 here does not mean the upper-left corner, but half the dimensions. X2/Y2/Z2 are the center.
+	bool isClippingEnabled() const { return clipEnable & 1; }
 	float getViewportXScale() const { return getFloat24(viewportxscale); }
 	float getViewportYScale() const { return getFloat24(viewportyscale); }
 	float getViewportZScale() const { return getFloat24(viewportzscale); }
--- a/Windows/GEDebugger/GEDebugger.cpp
+++ b/Windows/GEDebugger/GEDebugger.cpp
@ -33,6 +33,7 @@
 #include "Windows/main.h"
 #include "GPU/GPUInterface.h"
 #include "GPU/Common/GPUDebugInterface.h"
+#include "GPU/Common/GPUStateUtils.h"
 #include "GPU/GPUState.h"
 #include "GPU/Debugger/Breakpoints.h"
 #include "GPU/Debugger/Stepping.h"
@ -398,7 +399,7 @@ void CGEDebugger::DescribePixel(u32 pix, GPUDebugBufferFormat fmt, int x, int y,

 	case GPU_DBG_FORMAT_24BIT_8X:
 		// These are only ever going to be depth values, so let's also show scaled to 16 bit.
-		_snwprintf(desc, 256, L"%d,%d: %d / %f / %f", x, y, pix & 0x00FFFFFF, (pix & 0x00FFFFFF) * (1.0f / 16777215.0f), (pix & 0x00FFFFFF) * (65535.0f / 16777215.0f));
+		_snwprintf(desc, 256, L"%d,%d: %d / %f / %f", x, y, pix & 0x00FFFFFF, (pix & 0x00FFFFFF) * (1.0f / 16777215.0f), FromScaledDepth((pix & 0x00FFFFFF) * (1.0f / 16777215.0f)));
 		break;

 	case GPU_DBG_FORMAT_24X_8BIT:
@ -406,7 +407,7 @@ void CGEDebugger::DescribePixel(u32 pix, GPUDebugBufferFormat fmt, int x, int y,
 		break;

 	case GPU_DBG_FORMAT_FLOAT:
-		_snwprintf(desc, 256, L"%d,%d: %f / %f", x, y, *(float *)&pix, *(float *)&pix * 65535.0f);
+		_snwprintf(desc, 256, L"%d,%d: %f / %f", x, y, *(float *)&pix, FromScaledDepth(*(float *)&pix));
 		break;

 	default: