mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #8461 from unknownbrackets/gpu-depth
Swap depth range to use minz/maxz
This commit is contained in:
commit
4c8384aa09
9 changed files with 79 additions and 73 deletions
|
@ -498,6 +498,26 @@ LogicOpReplaceType ReplaceLogicOpType() {
|
|||
return LOGICOPTYPE_NORMAL;
|
||||
}
|
||||
|
||||
static const float depthSliceFactor = 4.0f;
|
||||
|
||||
// This is used for float values which might not be integers, but are in the integer scale of 65535.
|
||||
static float ToScaledDepthFromInteger(float z) {
|
||||
const float offset = 0.5f * (depthSliceFactor - 1.0f) * (1.0f / depthSliceFactor);
|
||||
return z * (1.0f / depthSliceFactor) * (1.0f / 65535.0f) + offset;
|
||||
}
|
||||
|
||||
float ToScaledDepth(u16 z) {
|
||||
return ToScaledDepthFromInteger((float)(int)z);
|
||||
}
|
||||
|
||||
float FromScaledDepth(float z) {
|
||||
const float offset = 0.5f * (depthSliceFactor - 1.0f) * (1.0f / depthSliceFactor);
|
||||
return (z - offset) * depthSliceFactor * 65535.0f;
|
||||
}
|
||||
|
||||
float DepthSliceFactor() {
|
||||
return depthSliceFactor;
|
||||
}
|
||||
|
||||
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) {
|
||||
bool throughmode = gstate.isModeThrough();
|
||||
|
@ -597,8 +617,6 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
|
|||
float xOffset = 0.0f;
|
||||
float hScale = 1.0f;
|
||||
float yOffset = 0.0f;
|
||||
float zScale = 1.0f;
|
||||
float zOffset = 0.0f;
|
||||
|
||||
// If we're within the bounds, we want clipping the viewport way. So leave it be.
|
||||
if (left < 0.0f || right > renderWidth) {
|
||||
|
@ -632,29 +650,35 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
|
|||
out.viewportW = right - left;
|
||||
out.viewportH = bottom - top;
|
||||
|
||||
// The depth viewport parameters are the same, but we handle it a bit differently.
|
||||
// When clipping is enabled, depth is clamped to [0, 65535]. And minz/maxz discard.
|
||||
// So, we apply the depth range as minz/maxz, and transform for the viewport.
|
||||
float vpZScale = gstate.getViewportZScale();
|
||||
float vpZCenter = gstate.getViewportZCenter();
|
||||
// Near/far can be inverted. We deal with that in the projection/scale.
|
||||
float near = vpZCenter - fabsf(vpZScale);
|
||||
float far = vpZCenter + fabsf(vpZScale);
|
||||
float minz = gstate.getDepthRangeMin();
|
||||
float maxz = gstate.getDepthRangeMax();
|
||||
|
||||
if (near < 0.0f || far > 65535.0f) {
|
||||
float overageNear = std::max(-near, 0.0f);
|
||||
float overageFar = std::max(far - 65535.0f, 0.0f);
|
||||
float drift = overageFar - overageNear;
|
||||
|
||||
near += overageNear;
|
||||
far -= overageFar;
|
||||
|
||||
zScale = (vpZScale * 2.0f) / (far - near);
|
||||
zOffset = drift / (far - near);
|
||||
} else if (vpZScale < 0.0f) {
|
||||
// This flips to match our near/far.
|
||||
zScale = -zScale;
|
||||
if (gstate.isClippingEnabled() && (minz == 0 || maxz == 65535)) {
|
||||
// Here, we should "clamp." But clamping per fragment would be slow.
|
||||
// So, instead, we just increase the available range and hope.
|
||||
// If depthSliceFactor is 4, it means (75% / 2) of the depth lies in each direction.
|
||||
float fullDepthRange = 65535.0f * (depthSliceFactor - 1.0f) * (1.0f / 2.0f);
|
||||
if (minz == 0) {
|
||||
minz -= fullDepthRange;
|
||||
}
|
||||
if (maxz == 65535) {
|
||||
maxz += fullDepthRange;
|
||||
}
|
||||
}
|
||||
|
||||
out.depthRangeMin = near * (1.0f / 65535.0f);
|
||||
out.depthRangeMax = far * (1.0f / 65535.0f);
|
||||
// Okay. So, in our shader, -1 will map to minz, and +1 will map to maxz.
|
||||
float halfActualZRange = (maxz - minz) * (1.0f / 2.0f);
|
||||
float zScale = vpZScale / halfActualZRange;
|
||||
// This adjusts the center from halfActualZRange to vpZCenter.
|
||||
float zOffset = (vpZCenter - (minz + halfActualZRange)) / halfActualZRange;
|
||||
|
||||
out.depthRangeMin = ToScaledDepthFromInteger(minz);
|
||||
out.depthRangeMax = ToScaledDepthFromInteger(maxz);
|
||||
|
||||
bool scaleChanged = gstate_c.vpWidthScale != wScale || gstate_c.vpHeightScale != hScale;
|
||||
bool offsetChanged = gstate_c.vpXOffset != xOffset || gstate_c.vpYOffset != yOffset;
|
||||
|
@ -669,25 +693,9 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
|
|||
out.dirtyProj = true;
|
||||
out.dirtyDepth = depthChanged;
|
||||
}
|
||||
|
||||
#ifndef MOBILE_DEVICE
|
||||
float minz = gstate.getDepthRangeMin();
|
||||
float maxz = gstate.getDepthRangeMax();
|
||||
if (minz > near || maxz < far) {
|
||||
if ((gstate.clipEnable & 1) == 0) {
|
||||
WARN_LOG_REPORT_ONCE(minmaxznoclip, G3D, "Unsupported depth range test without clipping - clip: %f-%f, test: %f-%f", near, far, minz, maxz);
|
||||
} else {
|
||||
WARN_LOG_REPORT_ONCE(minmaxz, G3D, "Unsupported depth range test - clip: %f-%f, test: %f-%f", near, far, minz, maxz);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
float ToScaledDepth(u16 z) {
|
||||
return z * (1.0f / 65535.0f);
|
||||
}
|
||||
|
||||
static const BlendFactor genericALookup[11] = {
|
||||
BlendFactor::DST_COLOR,
|
||||
BlendFactor::ONE_MINUS_DST_COLOR,
|
||||
|
|
|
@ -67,6 +67,8 @@ struct ViewportAndScissor {
|
|||
};
|
||||
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out);
|
||||
float ToScaledDepth(u16 z);
|
||||
float FromScaledDepth(float z);
|
||||
float DepthSliceFactor();
|
||||
|
||||
// These are common to all modern APIs and can be easily converted with a lookup table.
|
||||
enum class BlendFactor : uint8_t {
|
||||
|
|
|
@ -205,6 +205,7 @@ static const CommandTableEntry commandTable[] = {
|
|||
{GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &DIRECTX9_GPU::Execute_ViewportType},
|
||||
{GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &DIRECTX9_GPU::Execute_ViewportType},
|
||||
{GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &DIRECTX9_GPU::Execute_ViewportType},
|
||||
{GE_CMD_CLIPENABLE, FLAG_FLUSHBEFOREONCHANGE},
|
||||
|
||||
// Region
|
||||
{GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &DIRECTX9_GPU::Execute_Region},
|
||||
|
@ -288,7 +289,6 @@ static const CommandTableEntry commandTable[] = {
|
|||
{GE_CMD_LSC3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &DIRECTX9_GPU::Execute_Light3Param},
|
||||
|
||||
// Ignored commands
|
||||
{GE_CMD_CLIPENABLE, 0},
|
||||
{GE_CMD_TEXFLUSH, 0},
|
||||
{GE_CMD_TEXLODSLOPE, 0},
|
||||
{GE_CMD_TEXSYNC, 0},
|
||||
|
|
|
@ -480,23 +480,16 @@ void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) {
|
|||
}
|
||||
|
||||
if (dirtyUniforms & DIRTY_DEPTHRANGE) {
|
||||
float viewZScale = gstate.getViewportZScale();
|
||||
float viewZCenter = gstate.getViewportZCenter();
|
||||
float viewZInvScale;
|
||||
// Depth is [0, 1] mapping to [minz, maxz], not too hard.
|
||||
float vpZScale = gstate.getViewportZScale();
|
||||
float vpZCenter = gstate.getViewportZCenter();
|
||||
|
||||
// We had to scale and translate Z to account for our clamped Z range.
|
||||
// Therefore, we also need to reverse this to round properly.
|
||||
//
|
||||
// Example: scale = 65535.0, center = 0.0
|
||||
// Resulting range = -65535 to 65535, clamped to [0, 65535]
|
||||
// gstate_c.vpDepthScale = 2.0f
|
||||
// gstate_c.vpZOffset = -1.0f
|
||||
//
|
||||
// The projection already accounts for those, so we need to reverse them.
|
||||
//
|
||||
// Additionally, D3D9 uses a range from [0, 1]. We double and move the center.
|
||||
viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f;
|
||||
viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f;
|
||||
// These are just the reverse of the formulas in GPUStateUtils.
|
||||
float halfActualZRange = vpZScale / gstate_c.vpDepthScale;
|
||||
float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;
|
||||
float viewZScale = halfActualZRange * 2.0f;
|
||||
float viewZCenter = minz;
|
||||
float viewZInvScale;
|
||||
|
||||
if (viewZScale != 0.0) {
|
||||
viewZInvScale = 1.0f / viewZScale;
|
||||
|
|
|
@ -676,8 +676,16 @@ bool GenerateFragmentShader(const ShaderID &id, char *buffer) {
|
|||
#endif
|
||||
|
||||
if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
|
||||
const double scale = DepthSliceFactor() * 65535.0;
|
||||
|
||||
WRITE(p, " highp float z = gl_FragCoord.z;\n");
|
||||
WRITE(p, " z = (1.0/65535.0) * floor(z * 65535.0);\n");
|
||||
// We center the depth with an offset, but only its fraction matters.
|
||||
// When (DepthSliceFactor() - 1) is odd, it will be 0.5, otherwise 0.
|
||||
if (((int)(DepthSliceFactor() - 1.0f) & 1) == 1) {
|
||||
WRITE(p, " z = (floor((z * %f) - (1.0 / 2.0)) + (1.0 / 2.0)) * (1.0 / %f);\n", scale, scale);
|
||||
} else {
|
||||
WRITE(p, " z = floor(z * %f) * (1.0 / %f);\n", scale, scale);
|
||||
}
|
||||
WRITE(p, " gl_FragDepth = z;\n");
|
||||
}
|
||||
|
||||
|
|
|
@ -211,6 +211,7 @@ static const CommandTableEntry commandTable[] = {
|
|||
{GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GLES_GPU::Execute_ViewportType},
|
||||
{GE_CMD_VIEWPORTZSCALE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, DIRTY_DEPTHRANGE, &GLES_GPU::Execute_ViewportZType},
|
||||
{GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, DIRTY_DEPTHRANGE, &GLES_GPU::Execute_ViewportZType},
|
||||
{GE_CMD_CLIPENABLE, FLAG_FLUSHBEFOREONCHANGE},
|
||||
|
||||
// Region
|
||||
{GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GLES_GPU::Execute_Region},
|
||||
|
@ -294,7 +295,6 @@ static const CommandTableEntry commandTable[] = {
|
|||
{GE_CMD_LSC3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, DIRTY_LIGHT3, &GLES_GPU::Execute_Light3Param},
|
||||
|
||||
// Ignored commands
|
||||
{GE_CMD_CLIPENABLE, 0},
|
||||
{GE_CMD_TEXFLUSH, 0},
|
||||
{GE_CMD_TEXLODSLOPE, 0},
|
||||
{GE_CMD_TEXSYNC, 0},
|
||||
|
|
|
@ -589,23 +589,16 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) {
|
|||
SetMatrix4x3(u_texmtx, gstate.tgenMatrix);
|
||||
}
|
||||
if ((dirty & DIRTY_DEPTHRANGE) && u_depthRange != -1) {
|
||||
float viewZScale = gstate.getViewportZScale();
|
||||
float viewZCenter = gstate.getViewportZCenter();
|
||||
float viewZInvScale;
|
||||
// Since depth is [-1, 1] mapping to [minz, maxz], this is easyish.
|
||||
float vpZScale = gstate.getViewportZScale();
|
||||
float vpZCenter = gstate.getViewportZCenter();
|
||||
|
||||
// We had to scale and translate Z to account for our clamped Z range.
|
||||
// Therefore, we also need to reverse this to round properly.
|
||||
//
|
||||
// Example: scale = 65535.0, center = 0.0
|
||||
// Resulting range = -65535 to 65535, clamped to [0, 65535]
|
||||
// gstate_c.vpDepthScale = 2.0f
|
||||
// gstate_c.vpZOffset = -1.0f
|
||||
//
|
||||
// The projection already accounts for those, so we need to reverse them.
|
||||
//
|
||||
// Additionally, OpenGL uses a range from [-1, 1]. So we multiply by scale and add the center.
|
||||
viewZScale *= 1.0f / gstate_c.vpDepthScale;
|
||||
viewZCenter -= 65535.0f * gstate_c.vpZOffset;
|
||||
// These are just the reverse of the formulas in GPUStateUtils.
|
||||
float halfActualZRange = vpZScale / gstate_c.vpDepthScale;
|
||||
float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;
|
||||
float viewZScale = halfActualZRange;
|
||||
float viewZCenter = minz + halfActualZRange;
|
||||
float viewZInvScale;
|
||||
|
||||
if (viewZScale != 0.0) {
|
||||
viewZInvScale = 1.0f / viewZScale;
|
||||
|
|
|
@ -375,6 +375,7 @@ struct GPUgstate {
|
|||
int getRegionY2() const { return (region2 >> 10) & 0x3FF; }
|
||||
|
||||
// Note that the X1/Y1/Z1 here does not mean the upper-left corner, but half the dimensions. X2/Y2/Z2 are the center.
|
||||
bool isClippingEnabled() const { return clipEnable & 1; }
|
||||
float getViewportXScale() const { return getFloat24(viewportxscale); }
|
||||
float getViewportYScale() const { return getFloat24(viewportyscale); }
|
||||
float getViewportZScale() const { return getFloat24(viewportzscale); }
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include "Windows/main.h"
|
||||
#include "GPU/GPUInterface.h"
|
||||
#include "GPU/Common/GPUDebugInterface.h"
|
||||
#include "GPU/Common/GPUStateUtils.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/Debugger/Breakpoints.h"
|
||||
#include "GPU/Debugger/Stepping.h"
|
||||
|
@ -398,7 +399,7 @@ void CGEDebugger::DescribePixel(u32 pix, GPUDebugBufferFormat fmt, int x, int y,
|
|||
|
||||
case GPU_DBG_FORMAT_24BIT_8X:
|
||||
// These are only ever going to be depth values, so let's also show scaled to 16 bit.
|
||||
_snwprintf(desc, 256, L"%d,%d: %d / %f / %f", x, y, pix & 0x00FFFFFF, (pix & 0x00FFFFFF) * (1.0f / 16777215.0f), (pix & 0x00FFFFFF) * (65535.0f / 16777215.0f));
|
||||
_snwprintf(desc, 256, L"%d,%d: %d / %f / %f", x, y, pix & 0x00FFFFFF, (pix & 0x00FFFFFF) * (1.0f / 16777215.0f), FromScaledDepth((pix & 0x00FFFFFF) * (1.0f / 16777215.0f)));
|
||||
break;
|
||||
|
||||
case GPU_DBG_FORMAT_24X_8BIT:
|
||||
|
@ -406,7 +407,7 @@ void CGEDebugger::DescribePixel(u32 pix, GPUDebugBufferFormat fmt, int x, int y,
|
|||
break;
|
||||
|
||||
case GPU_DBG_FORMAT_FLOAT:
|
||||
_snwprintf(desc, 256, L"%d,%d: %f / %f", x, y, *(float *)&pix, *(float *)&pix * 65535.0f);
|
||||
_snwprintf(desc, 256, L"%d,%d: %f / %f", x, y, *(float *)&pix, FromScaledDepth(*(float *)&pix));
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
Loading…
Add table
Reference in a new issue