Merge pull request #8461 from unknownbrackets/gpu-depth

Swap depth range to use minz/maxz
This commit is contained in:
Henrik Rydgård 2016-01-20 10:19:51 +01:00
commit 4c8384aa09
9 changed files with 79 additions and 73 deletions

View file

@ -498,6 +498,26 @@ LogicOpReplaceType ReplaceLogicOpType() {
return LOGICOPTYPE_NORMAL;
}
static const float depthSliceFactor = 4.0f;
// This is used for float values which might not be integers, but are in the integer scale of 65535.
static float ToScaledDepthFromInteger(float z) {
const float offset = 0.5f * (depthSliceFactor - 1.0f) * (1.0f / depthSliceFactor);
return z * (1.0f / depthSliceFactor) * (1.0f / 65535.0f) + offset;
}
float ToScaledDepth(u16 z) {
return ToScaledDepthFromInteger((float)(int)z);
}
float FromScaledDepth(float z) {
const float offset = 0.5f * (depthSliceFactor - 1.0f) * (1.0f / depthSliceFactor);
return (z - offset) * depthSliceFactor * 65535.0f;
}
float DepthSliceFactor() {
return depthSliceFactor;
}
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) {
bool throughmode = gstate.isModeThrough();
@ -597,8 +617,6 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
float xOffset = 0.0f;
float hScale = 1.0f;
float yOffset = 0.0f;
float zScale = 1.0f;
float zOffset = 0.0f;
// If we're within the bounds, we want clipping the viewport way. So leave it be.
if (left < 0.0f || right > renderWidth) {
@ -632,29 +650,35 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
out.viewportW = right - left;
out.viewportH = bottom - top;
// The depth viewport parameters are the same, but we handle it a bit differently.
// When clipping is enabled, depth is clamped to [0, 65535]. And minz/maxz discard.
// So, we apply the depth range as minz/maxz, and transform for the viewport.
float vpZScale = gstate.getViewportZScale();
float vpZCenter = gstate.getViewportZCenter();
// Near/far can be inverted. We deal with that in the projection/scale.
float near = vpZCenter - fabsf(vpZScale);
float far = vpZCenter + fabsf(vpZScale);
float minz = gstate.getDepthRangeMin();
float maxz = gstate.getDepthRangeMax();
if (near < 0.0f || far > 65535.0f) {
float overageNear = std::max(-near, 0.0f);
float overageFar = std::max(far - 65535.0f, 0.0f);
float drift = overageFar - overageNear;
near += overageNear;
far -= overageFar;
zScale = (vpZScale * 2.0f) / (far - near);
zOffset = drift / (far - near);
} else if (vpZScale < 0.0f) {
// This flips to match our near/far.
zScale = -zScale;
if (gstate.isClippingEnabled() && (minz == 0 || maxz == 65535)) {
// Here, we should "clamp." But clamping per fragment would be slow.
// So, instead, we just increase the available range and hope.
// If depthSliceFactor is 4, it means (75% / 2) of the depth lies in each direction.
float fullDepthRange = 65535.0f * (depthSliceFactor - 1.0f) * (1.0f / 2.0f);
if (minz == 0) {
minz -= fullDepthRange;
}
if (maxz == 65535) {
maxz += fullDepthRange;
}
}
out.depthRangeMin = near * (1.0f / 65535.0f);
out.depthRangeMax = far * (1.0f / 65535.0f);
// Okay. So, in our shader, -1 will map to minz, and +1 will map to maxz.
float halfActualZRange = (maxz - minz) * (1.0f / 2.0f);
float zScale = vpZScale / halfActualZRange;
// This adjusts the center from halfActualZRange to vpZCenter.
float zOffset = (vpZCenter - (minz + halfActualZRange)) / halfActualZRange;
out.depthRangeMin = ToScaledDepthFromInteger(minz);
out.depthRangeMax = ToScaledDepthFromInteger(maxz);
bool scaleChanged = gstate_c.vpWidthScale != wScale || gstate_c.vpHeightScale != hScale;
bool offsetChanged = gstate_c.vpXOffset != xOffset || gstate_c.vpYOffset != yOffset;
@ -669,25 +693,9 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
out.dirtyProj = true;
out.dirtyDepth = depthChanged;
}
#ifndef MOBILE_DEVICE
float minz = gstate.getDepthRangeMin();
float maxz = gstate.getDepthRangeMax();
if (minz > near || maxz < far) {
if ((gstate.clipEnable & 1) == 0) {
WARN_LOG_REPORT_ONCE(minmaxznoclip, G3D, "Unsupported depth range test without clipping - clip: %f-%f, test: %f-%f", near, far, minz, maxz);
} else {
WARN_LOG_REPORT_ONCE(minmaxz, G3D, "Unsupported depth range test - clip: %f-%f, test: %f-%f", near, far, minz, maxz);
}
}
#endif
}
}
float ToScaledDepth(u16 z) {
return z * (1.0f / 65535.0f);
}
static const BlendFactor genericALookup[11] = {
BlendFactor::DST_COLOR,
BlendFactor::ONE_MINUS_DST_COLOR,

View file

@ -67,6 +67,8 @@ struct ViewportAndScissor {
};
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out);
float ToScaledDepth(u16 z);
float FromScaledDepth(float z);
float DepthSliceFactor();
// These are common to all modern APIs and can be easily converted with a lookup table.
enum class BlendFactor : uint8_t {

View file

@ -205,6 +205,7 @@ static const CommandTableEntry commandTable[] = {
{GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &DIRECTX9_GPU::Execute_ViewportType},
{GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &DIRECTX9_GPU::Execute_ViewportType},
{GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &DIRECTX9_GPU::Execute_ViewportType},
{GE_CMD_CLIPENABLE, FLAG_FLUSHBEFOREONCHANGE},
// Region
{GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &DIRECTX9_GPU::Execute_Region},
@ -288,7 +289,6 @@ static const CommandTableEntry commandTable[] = {
{GE_CMD_LSC3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &DIRECTX9_GPU::Execute_Light3Param},
// Ignored commands
{GE_CMD_CLIPENABLE, 0},
{GE_CMD_TEXFLUSH, 0},
{GE_CMD_TEXLODSLOPE, 0},
{GE_CMD_TEXSYNC, 0},

View file

@ -480,23 +480,16 @@ void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) {
}
if (dirtyUniforms & DIRTY_DEPTHRANGE) {
float viewZScale = gstate.getViewportZScale();
float viewZCenter = gstate.getViewportZCenter();
float viewZInvScale;
// Depth is [0, 1] mapping to [minz, maxz], not too hard.
float vpZScale = gstate.getViewportZScale();
float vpZCenter = gstate.getViewportZCenter();
// We had to scale and translate Z to account for our clamped Z range.
// Therefore, we also need to reverse this to round properly.
//
// Example: scale = 65535.0, center = 0.0
// Resulting range = -65535 to 65535, clamped to [0, 65535]
// gstate_c.vpDepthScale = 2.0f
// gstate_c.vpZOffset = -1.0f
//
// The projection already accounts for those, so we need to reverse them.
//
// Additionally, D3D9 uses a range from [0, 1]. We double and move the center.
viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f;
viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f;
// These are just the reverse of the formulas in GPUStateUtils.
float halfActualZRange = vpZScale / gstate_c.vpDepthScale;
float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;
float viewZScale = halfActualZRange * 2.0f;
float viewZCenter = minz;
float viewZInvScale;
if (viewZScale != 0.0) {
viewZInvScale = 1.0f / viewZScale;

View file

@ -676,8 +676,16 @@ bool GenerateFragmentShader(const ShaderID &id, char *buffer) {
#endif
if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
const double scale = DepthSliceFactor() * 65535.0;
WRITE(p, " highp float z = gl_FragCoord.z;\n");
WRITE(p, " z = (1.0/65535.0) * floor(z * 65535.0);\n");
// We center the depth with an offset, but only its fraction matters.
// When (DepthSliceFactor() - 1) is odd, it will be 0.5, otherwise 0.
if (((int)(DepthSliceFactor() - 1.0f) & 1) == 1) {
WRITE(p, " z = (floor((z * %f) - (1.0 / 2.0)) + (1.0 / 2.0)) * (1.0 / %f);\n", scale, scale);
} else {
WRITE(p, " z = floor(z * %f) * (1.0 / %f);\n", scale, scale);
}
WRITE(p, " gl_FragDepth = z;\n");
}

View file

@ -211,6 +211,7 @@ static const CommandTableEntry commandTable[] = {
{GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GLES_GPU::Execute_ViewportType},
{GE_CMD_VIEWPORTZSCALE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, DIRTY_DEPTHRANGE, &GLES_GPU::Execute_ViewportZType},
{GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, DIRTY_DEPTHRANGE, &GLES_GPU::Execute_ViewportZType},
{GE_CMD_CLIPENABLE, FLAG_FLUSHBEFOREONCHANGE},
// Region
{GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GLES_GPU::Execute_Region},
@ -294,7 +295,6 @@ static const CommandTableEntry commandTable[] = {
{GE_CMD_LSC3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, DIRTY_LIGHT3, &GLES_GPU::Execute_Light3Param},
// Ignored commands
{GE_CMD_CLIPENABLE, 0},
{GE_CMD_TEXFLUSH, 0},
{GE_CMD_TEXLODSLOPE, 0},
{GE_CMD_TEXSYNC, 0},

View file

@ -589,23 +589,16 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) {
SetMatrix4x3(u_texmtx, gstate.tgenMatrix);
}
if ((dirty & DIRTY_DEPTHRANGE) && u_depthRange != -1) {
float viewZScale = gstate.getViewportZScale();
float viewZCenter = gstate.getViewportZCenter();
float viewZInvScale;
// Since depth is [-1, 1] mapping to [minz, maxz], this is easyish.
float vpZScale = gstate.getViewportZScale();
float vpZCenter = gstate.getViewportZCenter();
// We had to scale and translate Z to account for our clamped Z range.
// Therefore, we also need to reverse this to round properly.
//
// Example: scale = 65535.0, center = 0.0
// Resulting range = -65535 to 65535, clamped to [0, 65535]
// gstate_c.vpDepthScale = 2.0f
// gstate_c.vpZOffset = -1.0f
//
// The projection already accounts for those, so we need to reverse them.
//
// Additionally, OpenGL uses a range from [-1, 1]. So we multiply by scale and add the center.
viewZScale *= 1.0f / gstate_c.vpDepthScale;
viewZCenter -= 65535.0f * gstate_c.vpZOffset;
// These are just the reverse of the formulas in GPUStateUtils.
float halfActualZRange = vpZScale / gstate_c.vpDepthScale;
float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;
float viewZScale = halfActualZRange;
float viewZCenter = minz + halfActualZRange;
float viewZInvScale;
if (viewZScale != 0.0) {
viewZInvScale = 1.0f / viewZScale;

View file

@ -375,6 +375,7 @@ struct GPUgstate {
int getRegionY2() const { return (region2 >> 10) & 0x3FF; }
// Note that the X1/Y1/Z1 here does not mean the upper-left corner, but half the dimensions. X2/Y2/Z2 are the center.
bool isClippingEnabled() const { return clipEnable & 1; }
float getViewportXScale() const { return getFloat24(viewportxscale); }
float getViewportYScale() const { return getFloat24(viewportyscale); }
float getViewportZScale() const { return getFloat24(viewportzscale); }

View file

@ -33,6 +33,7 @@
#include "Windows/main.h"
#include "GPU/GPUInterface.h"
#include "GPU/Common/GPUDebugInterface.h"
#include "GPU/Common/GPUStateUtils.h"
#include "GPU/GPUState.h"
#include "GPU/Debugger/Breakpoints.h"
#include "GPU/Debugger/Stepping.h"
@ -398,7 +399,7 @@ void CGEDebugger::DescribePixel(u32 pix, GPUDebugBufferFormat fmt, int x, int y,
case GPU_DBG_FORMAT_24BIT_8X:
// These are only ever going to be depth values, so let's also show scaled to 16 bit.
_snwprintf(desc, 256, L"%d,%d: %d / %f / %f", x, y, pix & 0x00FFFFFF, (pix & 0x00FFFFFF) * (1.0f / 16777215.0f), (pix & 0x00FFFFFF) * (65535.0f / 16777215.0f));
_snwprintf(desc, 256, L"%d,%d: %d / %f / %f", x, y, pix & 0x00FFFFFF, (pix & 0x00FFFFFF) * (1.0f / 16777215.0f), FromScaledDepth((pix & 0x00FFFFFF) * (1.0f / 16777215.0f)));
break;
case GPU_DBG_FORMAT_24X_8BIT:
@ -406,7 +407,7 @@ void CGEDebugger::DescribePixel(u32 pix, GPUDebugBufferFormat fmt, int x, int y,
break;
case GPU_DBG_FORMAT_FLOAT:
_snwprintf(desc, 256, L"%d,%d: %f / %f", x, y, *(float *)&pix, *(float *)&pix * 65535.0f);
_snwprintf(desc, 256, L"%d,%d: %f / %f", x, y, *(float *)&pix, FromScaledDepth(*(float *)&pix));
break;
default: