From 3e5c09d432a9135711031bf264b4d95c1ffd1bde Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Wed, 5 Oct 2022 19:41:59 -0700 Subject: [PATCH] Vulkan: Clip clamped depth in geometry shader. This corrects deformed geometry on Mali devices which don't support user-space clipping but do support depth clamp. --- GPU/Common/GeometryShaderGenerator.cpp | 211 ++++++++++++++++--------- GPU/Common/ShaderId.cpp | 13 +- GPU/Common/ShaderId.h | 1 + 3 files changed, 149 insertions(+), 76 deletions(-) diff --git a/GPU/Common/GeometryShaderGenerator.cpp b/GPU/Common/GeometryShaderGenerator.cpp index 6d1b7bdcd9..525dc1f0f5 100644 --- a/GPU/Common/GeometryShaderGenerator.cpp +++ b/GPU/Common/GeometryShaderGenerator.cpp @@ -48,7 +48,7 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu ShaderWriter p(buffer, compat, ShaderStage::Geometry, gl_exts.data(), gl_exts.size()); p.C("layout(triangles) in;\n"); - p.C("layout(triangle_strip, max_vertices = 6) out;\n"); + p.C("layout(triangle_strip, max_vertices = 12) out;\n"); if (compat.shaderLanguage == GLSL_VULKAN) { WRITE(p, "\n"); @@ -58,6 +58,8 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu } std::vector varyings, outVaryings; + bool vertexRangeCulling = !id.Bit(GS_BIT_CURVE); + bool clipClampedDepth = gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP) && !gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE); if (id.Bit(GS_BIT_DO_TEXTURE)) { varyings.push_back(VaryingDef{ "vec3", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" }); @@ -74,53 +76,87 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu p.BeginGSMain(varyings, outVaryings); - // Apply culling - p.C(" bool anyInside = false;\n"); + // Apply culling. + if (vertexRangeCulling) { + p.C(" bool anyInside = false;\n"); + } // And apply manual clipping if necessary. if (!gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) { p.C(" float clip0[3];\n"); + if (clipClampedDepth) { + p.C(" float clip1[3];\n"); + } } p.C(" for (int i = 0; i < 3; i++) {\n"); // TODO: 3 or gl_in.length()? which will be faster? p.C(" vec4 outPos = gl_in[i].gl_Position;\n"); p.C(" vec3 projPos = outPos.xyz / outPos.w;\n"); - p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n"); - // Vertex range culling doesn't happen when Z clips, note sign of w is important. - p.C(" if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n"); - const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y"; - const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y"; - p.F(" if ((%s) || (%s)) {\n", outMin, outMax); - p.C(" return;\n"); // Cull! - p.C(" }\n"); - p.C(" }\n"); - p.C(" if (u_cullRangeMin.w <= 0.0) {\n"); - p.C(" if (projPos.z < u_cullRangeMin.z || projPos.z > u_cullRangeMax.z) {\n"); - // When not clamping depth, cull the triangle of Z is outside the valid range (not based on clip Z.) - p.C(" return;\n"); - p.C(" }\n"); - p.C(" } else {\n"); - p.C(" if (projPos.z >= u_cullRangeMin.z) { anyInside = true; }\n"); - p.C(" if (projPos.z <= u_cullRangeMax.z) { anyInside = true; }\n"); - p.C(" }\n"); + + if (vertexRangeCulling) { + p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n"); + // Vertex range culling doesn't happen when Z clips, note sign of w is important. + p.C(" if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n"); + const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y"; + const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y"; + p.F(" if ((%s) || (%s)) {\n", outMin, outMax); + p.C(" return;\n"); // Cull! + p.C(" }\n"); + p.C(" }\n"); + p.C(" if (u_cullRangeMin.w <= 0.0) {\n"); + p.C(" if (projPos.z < u_cullRangeMin.z || projPos.z > u_cullRangeMax.z) {\n"); + // When not clamping depth, cull the triangle of Z is outside the valid range (not based on clip Z.) + p.C(" return;\n"); + p.C(" }\n"); + p.C(" } else {\n"); + p.C(" if (projPos.z >= u_cullRangeMin.z) { anyInside = true; }\n"); + p.C(" if (projPos.z <= u_cullRangeMax.z) { anyInside = true; }\n"); + p.C(" }\n"); + } if (!gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) { // This is basically the same value as gl_ClipDistance would take, z + w. - // TODO: Ignore triangles from GE_PRIM_RECTANGLES in transform mode, which should not clip to neg z. - p.F(" clip0[i] = projZ * outPos.w + outPos.w;\n"); + if (vertexRangeCulling) { + p.C(" clip0[i] = projZ * outPos.w + outPos.w;\n"); + } else { + // Let's not complicate the code overly for this case. We'll clipClampedDepth. + p.C(" clip0[i] = 0.0;\n"); + } + + // This one does happen for rectangles. + if (clipClampedDepth) { + if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) { + // On OpenGL/GLES, these values account for the -1 -> 1 range. + p.C(" if (u_depthRange.y - u_depthRange.x >= 1.0) {\n"); + p.C(" clip1[i] = outPos.w + outPos.z;\n"); + } else { + // Everywhere else, it's 0 -> 1, simpler. + p.C(" if (u_depthRange.y >= 1.0) {\n"); + p.C(" clip1[i] = outPos.z;\n"); + } + // This is similar, but for maxz when it's below 65535.0. -1/0 don't matter here. + p.C(" } else if (u_depthRange.x + u_depthRange.y <= 65534.0) {\n"); + p.C(" clip1[i] = outPos.w - outPos.z;\n"); + p.C(" } else {\n"); + p.C(" clip1[i] = 0.0;\n"); + p.C(" }\n"); + } } p.C(" } // for\n"); // Cull any triangle fully outside in the same direction when depth clamp enabled. // Basically simulate cull distances. - p.C(" if (u_cullRangeMin.w > 0.0 && !anyInside) {\n"); - p.C(" return;\n"); - p.C(" }\n"); + if (vertexRangeCulling) { + p.C(" if (u_cullRangeMin.w > 0.0 && !anyInside) {\n"); + p.C(" return;\n"); + p.C(" }\n"); + } if (!gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) { // Clipping against one half-space cuts a triangle (17/27), culls (7/27), or creates two triangles (3/27). - p.C(" int indices[4];\n"); - p.C(" float factors[4];\n"); + // We clip against two, so we can generate up to 4 triangles, a polygon with 6 points. + p.C(" int indices[6];\n"); + p.C(" float factors[6];\n"); p.C(" int ind = 0;\n"); // Pass 1 - clip against first half-space. @@ -143,22 +179,80 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu p.C(" }\n"); + // Pass 2 - further clip against clamped Z. + if (clipClampedDepth) { + p.C(" int count0 = ind;\n"); + p.C(" int indices1[6];\n"); + p.C(" float factors1[6];\n"); + p.C(" ind = 0;\n"); + + // Let's start by interpolating the clip values. + p.C(" float clip1after[4];\n"); + p.C(" for (int i = 0; i < count0; i++) {\n"); + p.C(" int idx = indices[i];\n"); + p.C(" float factor = factors[i];\n"); + p.C(" int next = idx == 2 ? 0 : idx + 1;\n"); + p.C(" clip1after[i] = mix(clip1[idx], clip1[next], factor);\n"); + p.C(" }\n"); + + // Alright, now time to clip, again. + p.C(" for (int i = 0; i < count0; i++) {\n"); + // First, use this vertex if it doesn't need clipping. + p.C(" if (clip1after[i] >= 0.0) {\n"); + p.C(" indices1[ind] = i;\n"); + p.C(" factors1[ind] = 0.0;\n"); + p.C(" ind++;\n"); + p.C(" }\n"); + + // Next, we generate an interpolated vertex if signs differ. + p.C(" int inext = i == count0 - 1 ? 0 : i + 1;\n"); + p.C(" if (clip1after[i] * clip1after[inext] < 0.0) {\n"); + p.C(" float t = clip1after[i] < 0.0 ? clip1after[i] / (clip1after[i] - clip1after[inext]) : 1.0 - (clip1after[inext] / (clip1after[inext] - clip1after[i]));\n"); + p.C(" indices1[ind] = i;\n"); + p.C(" factors1[ind] = t;\n"); + p.C(" ind++;\n"); + p.C(" }\n"); + + p.C(" }\n"); + } + p.C(" if (ind < 3) {\n"); p.C(" return;\n"); p.C(" }\n"); + p.C(" int idx;\n"); + p.C(" int next;\n"); + p.C(" float factor;\n"); + + auto emitIndex = [&](const char *which) { + if (clipClampedDepth) { + // We have to interpolate between four vertices. + p.F(" idx = indices1[%s];\n", which); + p.F(" factor = factors1[%s];\n", which); + p.C(" next = idx == count0 - 1 ? 0 : idx + 1;\n"); + p.C(" gl_Position = mix(mix(gl_in[indices[idx]].gl_Position, gl_in[(indices[idx] + 1) % 3].gl_Position, factors[idx]), mix(gl_in[indices[next]].gl_Position, gl_in[(indices[next] + 1) % 3].gl_Position, factors[next]), factor);\n"); + for (size_t i = 0; i < varyings.size(); i++) { + const VaryingDef &in = varyings[i]; + const VaryingDef &out = outVaryings[i]; + p.F(" %s = mix(mix(%s[indices[idx]], %s[(indices[idx] + 1) % 3], factors[idx]), mix(%s[indices[next]], %s[(indices[next] + 1) % 3], factors[next]), factor);\n", out.name, in.name, in.name, in.name, in.name); + } + } else { + p.F(" idx = indices[%s];\n", which); + p.F(" factor = factors[%s];\n", which); + p.C(" next = idx == 2 ? 0 : idx + 1;\n"); + p.C(" gl_Position = mix(gl_in[idx].gl_Position, gl_in[next].gl_Position, factor);\n"); + for (size_t i = 0; i < varyings.size(); i++) { + const VaryingDef &in = varyings[i]; + const VaryingDef &out = outVaryings[i]; + p.F(" %s = mix(%s[idx], %s[next], factor);\n", out.name, in.name, in.name); + } + } + p.C(" EmitVertex();\n"); + }; + // Alright, time to actually emit the first triangle. p.C(" for (int i = 0; i < 3; i++) {\n"); - p.C(" int idx = indices[i];\n"); - p.C(" float factor = factors[i];\n"); - p.C(" int next = idx == 2 ? 0 : idx + 1;\n"); - p.C(" gl_Position = mix(gl_in[idx].gl_Position, gl_in[next].gl_Position, factor);\n"); - for (size_t i = 0; i < varyings.size(); i++) { - VaryingDef &in = varyings[i]; - VaryingDef &out = outVaryings[i]; - p.F(" %s = mix(%s[idx], %s[next], factor);\n", outVaryings[i].name, varyings[i].name, varyings[i].name); - } - p.C(" EmitVertex();\n"); + emitIndex("i"); p.C(" }\n"); // Did we end up with additional triangles? We'll do three points each for the rest. @@ -166,40 +260,13 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu p.C(" EndPrimitive();\n"); // Point one, always index zero. - p.C(" int idx = indices[0];\n"); - p.C(" float factor = factors[0];\n"); - p.C(" int next = idx == 2 ? 0 : idx + 1;\n"); - p.C(" gl_Position = mix(gl_in[idx].gl_Position, gl_in[next].gl_Position, factor);\n"); - for (size_t i = 0; i < varyings.size(); i++) { - VaryingDef &in = varyings[i]; - VaryingDef &out = outVaryings[i]; - p.F(" %s = mix(%s[idx], %s[next], factor);\n", outVaryings[i].name, varyings[i].name, varyings[i].name); - } - p.C(" EmitVertex();\n"); + emitIndex("0"); // After that, one less than i (basically a triangle fan.) - p.C(" idx = indices[i - 1];\n"); - p.C(" factor = factors[i - 1];\n"); - p.C(" next = idx == 2 ? 0 : idx + 1;\n"); - p.C(" gl_Position = mix(gl_in[idx].gl_Position, gl_in[next].gl_Position, factor);\n"); - for (size_t i = 0; i < varyings.size(); i++) { - VaryingDef &in = varyings[i]; - VaryingDef &out = outVaryings[i]; - p.F(" %s = mix(%s[idx], %s[next], factor);\n", outVaryings[i].name, varyings[i].name, varyings[i].name); - } - p.C(" EmitVertex();\n"); + emitIndex("(i - 1)"); // And the new vertex itself. - p.C(" idx = indices[i];\n"); - p.C(" factor = factors[i];\n"); - p.C(" next = idx == 2 ? 0 : idx + 1;\n"); - p.C(" gl_Position = mix(gl_in[idx].gl_Position, gl_in[next].gl_Position, factor);\n"); - for (size_t i = 0; i < varyings.size(); i++) { - VaryingDef &in = varyings[i]; - VaryingDef &out = outVaryings[i]; - p.F(" %s = mix(%s[idx], %s[next], factor);\n", outVaryings[i].name, varyings[i].name, varyings[i].name); - } - p.C(" EmitVertex();\n"); + emitIndex("i"); p.C(" }\n"); } else { @@ -209,16 +276,16 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu p.C(" vec4 outPos = gl_in[i].gl_Position;\n"); p.C(" vec3 projPos = outPos.xyz / outPos.w;\n"); p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n"); - // TODO: Ignore triangles from GE_PRIM_RECTANGLES in transform mode, which should not clip to neg z. + // We shouldn't need to worry about rectangles-as-triangles here, since we don't use geometry shaders for that. p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", clipSuffix0); p.C(" gl_Position = outPos;\n"); if (gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) { } for (size_t i = 0; i < varyings.size(); i++) { - VaryingDef &in = varyings[i]; - VaryingDef &out = outVaryings[i]; - p.F(" %s = %s[i];\n", outVaryings[i].name, varyings[i].name); + const VaryingDef &in = varyings[i]; + const VaryingDef &out = outVaryings[i]; + p.F(" %s = %s[i];\n", out.name, in.name); } // Debug - null the red channel //p.C(" if (i == 0) v_color0Out.x = 0.0;\n"); diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 9c8cb3905b..94ade2fd26 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -379,22 +379,27 @@ std::string GeometryShaderDesc(const GShaderID &id) { void ComputeGeometryShaderID(GShaderID *id_out, const Draw::Bugs &bugs, int prim) { GShaderID id; - bool vertexRangeCulling = - !gstate.isModeThrough() && gstate_c.submitType == SubmitType::DRAW; // neither hw nor sw spline/bezier. See #11692 + bool isModeThrough = gstate.isModeThrough(); + bool isCurve = gstate_c.submitType != SubmitType::DRAW; + bool isTriangle = prim == GE_PRIM_TRIANGLES || prim == GE_PRIM_TRIANGLE_FAN || prim == GE_PRIM_TRIANGLE_STRIP; + + bool vertexRangeCulling = !isCurve; + bool clipClampedDepth = gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP) && !gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE); // If we're not using GS culling, return a zero ID. // Also, only use this for triangle primitives. - if (!vertexRangeCulling || !gstate_c.Supports(GPU_SUPPORTS_GS_CULLING) || (prim != GE_PRIM_TRIANGLES && prim != GE_PRIM_TRIANGLE_FAN && prim != GE_PRIM_TRIANGLE_STRIP)) { + if ((!vertexRangeCulling && !clipClampedDepth) || isModeThrough || !isTriangle || !gstate_c.Supports(GPU_SUPPORTS_GS_CULLING)) { *id_out = id; return; } id.SetBit(GS_BIT_ENABLED, true); + // Vertex range culling doesn't seem tno happen for spline/bezier, see #11692. + id.SetBit(GS_BIT_CURVE, isCurve); if (gstate.isModeClear()) { // No attribute bits. } else { - bool isModeThrough = gstate.isModeThrough(); bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled() && !isModeThrough; id.SetBit(GS_BIT_LMODE, lmode); diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index ec25abb4d3..560b8a873a 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -109,6 +109,7 @@ enum GShaderBit : uint8_t { GS_BIT_ENABLED = 0, // If not set, we don't use a geo shader. GS_BIT_DO_TEXTURE = 1, // presence of texcoords GS_BIT_LMODE = 2, // presence of specular color (regular color always present) + GS_BIT_CURVE = 3, // curve, which means don't do range culling. }; static inline GShaderBit operator +(GShaderBit bit, int i) {