mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Vulkan: Implement verex range culling.
Also D3D11, since they are very similar.
This commit is contained in:
parent
639a3f406d
commit
44ba31fbc6
4 changed files with 65 additions and 7 deletions
|
@ -192,6 +192,44 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
|
|||
ub->depthRange[3] = viewZInvScale;
|
||||
}
|
||||
|
||||
if (dirtyUniforms & DIRTY_CULLRANGE) {
|
||||
// Account for the projection viewport adjustment when viewport is too large.
|
||||
auto reverseViewportX = [](float x) {
|
||||
float pspViewport = (x - gstate.getViewportXCenter()) * (1.0f / gstate.getViewportXScale());
|
||||
return pspViewport * (1.0f / gstate_c.vpWidthScale);
|
||||
};
|
||||
auto reverseViewportY = [flipViewport](float y) {
|
||||
float heightScale = gstate_c.vpHeightScale;
|
||||
if (flipViewport) {
|
||||
// For D3D11.
|
||||
heightScale = -heightScale;
|
||||
}
|
||||
float pspViewport = (y - gstate.getViewportYCenter()) * (1.0f / gstate.getViewportYScale());
|
||||
return pspViewport * (1.0f / gstate_c.vpHeightScale);
|
||||
};
|
||||
auto reverseViewportZ = [](float z) {
|
||||
float pspViewport = (z - gstate.getViewportZCenter()) * (1.0f / gstate.getViewportZScale());
|
||||
// Differs from GLES: depth is 0 to 1, not -1 to 1.
|
||||
return (pspViewport - gstate_c.vpZOffset) * (1.0f / gstate_c.vpDepthScale) * 0.5f + 0.5f;
|
||||
};
|
||||
auto sortPair = [](float a, float b) {
|
||||
return a > b ? std::make_pair(b, a) : std::make_pair(a, b);
|
||||
};
|
||||
|
||||
// The PSP seems to use 0.12.4 for X and Y, and 0.16.0 for Z.
|
||||
// Any vertex outside this range (unless depth clamp enabled) is discarded.
|
||||
auto x = sortPair(reverseViewportX(0.0f), reverseViewportX(4096.0f));
|
||||
auto y = sortPair(reverseViewportY(0.0f), reverseViewportY(4096.0f));
|
||||
auto z = sortPair(reverseViewportZ(0.0f), reverseViewportZ(65535.5f));
|
||||
// Since we have space in w, use it to pass the depth clamp flag. We also pass NAN for w "discard".
|
||||
float clampEnable = gstate.isDepthClampEnabled() ? 1.0f : 0.0f;
|
||||
|
||||
float minValues[4]{ x.first, y.first, z.first, clampEnable };
|
||||
memcpy(ub->cullRangeMin, minValues, sizeof(ub->cullRangeMin));
|
||||
float maxValues[4]{ x.second, y.second, z.second, NAN };
|
||||
memcpy(ub->cullRangeMax, maxValues, sizeof(ub->cullRangeMax));
|
||||
}
|
||||
|
||||
if (dirtyUniforms & DIRTY_BEZIERSPLINE) {
|
||||
ub->spline_counts = BytesToUint32(gstate_c.spline_count_u, gstate_c.spline_count_v, gstate_c.spline_type_u, gstate_c.spline_type_v);
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@ enum : uint64_t {
|
|||
};
|
||||
|
||||
// TODO: Split into two structs, one for software transform and one for hardware transform, to save space.
|
||||
// 512 bytes. Probably can't get to 256 (nVidia's UBO alignment).
|
||||
// Currently 512 bytes. Probably can't get to 256 (nVidia's UBO alignment).
|
||||
// Every line here is a 4-float.
|
||||
struct UB_VS_FS_Base {
|
||||
float proj[16];
|
||||
|
@ -32,6 +32,8 @@ struct UB_VS_FS_Base {
|
|||
float matAmbient[4];
|
||||
uint32_t spline_counts; uint32_t depal_mask_shift_off_fmt; // 4 params packed into one.
|
||||
int pad2; int pad3;
|
||||
float cullRangeMin[4];
|
||||
float cullRangeMax[4];
|
||||
// Fragment data
|
||||
float fogColor[4];
|
||||
float texEnvColor[4];
|
||||
|
@ -58,6 +60,8 @@ R"( mat4 proj_mtx;
|
|||
uint depal_mask_shift_off_fmt;
|
||||
int pad2;
|
||||
int pad3;
|
||||
vec4 cullRangeMin;
|
||||
vec4 cullRangeMax;
|
||||
vec3 fogcolor;
|
||||
vec3 texenv;
|
||||
ivec4 alphacolorref;
|
||||
|
@ -84,6 +88,8 @@ R"( float4x4 u_proj;
|
|||
uint u_depal_mask_shift_off_fmt;
|
||||
int pad2;
|
||||
int pad3;
|
||||
float4 u_cullRangeMin;
|
||||
float4 u_cullRangeMax;
|
||||
float3 u_fogcolor;
|
||||
float3 u_texenv;
|
||||
uint4 u_alphacolorref;
|
||||
|
|
|
@ -815,7 +815,7 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
|
|||
}
|
||||
}
|
||||
|
||||
if (lang == HLSL_DX9 && !isModeThrough) {
|
||||
if (!isModeThrough) {
|
||||
WRITE(p, " float3 projPos = outPos.xyz / outPos.w;\n");
|
||||
// Vertex range culling doesn't happen when depth is clamped, so only do this if in range.
|
||||
WRITE(p, " if (u_cullRangeMin.w <= 0.0f || (projPos.z >= u_cullRangeMin.z && projPos.z <= u_cullRangeMax.z)) {\n");
|
||||
|
|
|
@ -317,13 +317,13 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) {
|
|||
WRITE(p, " v_fogdepth = position.w;\n");
|
||||
}
|
||||
if (isModeThrough) {
|
||||
WRITE(p, " gl_Position = base.proj_through_mtx * vec4(position.xyz, 1.0);\n");
|
||||
WRITE(p, " vec4 outPos = base.proj_through_mtx * vec4(position.xyz, 1.0);\n");
|
||||
} else {
|
||||
// The viewport is used in this case, so need to compensate for that.
|
||||
if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
|
||||
WRITE(p, " gl_Position = depthRoundZVP(base.proj_mtx * vec4(position.xyz, 1.0));\n");
|
||||
WRITE(p, " vec4 outPos = depthRoundZVP(base.proj_mtx * vec4(position.xyz, 1.0));\n");
|
||||
} else {
|
||||
WRITE(p, " gl_Position = base.proj_mtx * vec4(position.xyz, 1.0);\n");
|
||||
WRITE(p, " vec4 outPos = base.proj_mtx * vec4(position.xyz, 1.0);\n");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -472,9 +472,9 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) {
|
|||
|
||||
// Final view and projection transforms.
|
||||
if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
|
||||
WRITE(p, " gl_Position = depthRoundZVP(base.proj_mtx * viewPos);\n");
|
||||
WRITE(p, " vec4 outPos = depthRoundZVP(base.proj_mtx * viewPos);\n");
|
||||
} else {
|
||||
WRITE(p, " gl_Position = base.proj_mtx * viewPos;\n");
|
||||
WRITE(p, " vec4 outPos = base.proj_mtx * viewPos;\n");
|
||||
}
|
||||
|
||||
// TODO: Declare variables for dots for shade mapping if needed.
|
||||
|
@ -694,6 +694,20 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) {
|
|||
if (enableFog)
|
||||
WRITE(p, " v_fogdepth = (viewPos.z + base.fogcoef.x) * base.fogcoef.y;\n");
|
||||
}
|
||||
|
||||
if (!isModeThrough) {
|
||||
WRITE(p, " vec3 projPos = outPos.xyz / outPos.w;\n");
|
||||
// Vertex range culling doesn't happen when depth is clamped, so only do this if in range.
|
||||
WRITE(p, " if (base.cullRangeMin.w <= 0.0f || (projPos.z >= base.cullRangeMin.z && projPos.z <= base.cullRangeMax.z)) {\n");
|
||||
const char *outMin = "projPos.x < base.cullRangeMin.x || projPos.y < base.cullRangeMin.y || projPos.z < base.cullRangeMin.z";
|
||||
const char *outMax = "projPos.x > base.cullRangeMax.x || projPos.y > base.cullRangeMax.y || projPos.z > base.cullRangeMax.z";
|
||||
WRITE(p, " if (%s || %s) {\n", outMin, outMax);
|
||||
WRITE(p, " outPos.w = base.cullRangeMax.w;\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " }\n");
|
||||
}
|
||||
WRITE(p, " gl_Position = outPos;\n");
|
||||
|
||||
WRITE(p, "}\n");
|
||||
return true;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue