diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp index 4f2b4dd3bd..9da2049d61 100644 --- a/GPU/Common/GPUStateUtils.cpp +++ b/GPU/Common/GPUStateUtils.cpp @@ -113,6 +113,24 @@ bool IsAlphaTestTriviallyTrue() { } } +bool NeedsTestDiscard() { + // We assume this is called only when enabled and not trivially true (may also be for color testing.) + if (gstate.isStencilTestEnabled() && (gstate.pmska & 0xFF) != 0xFF) + return true; + if (gstate.isDepthTestEnabled() && gstate.isDepthWriteEnabled()) + return true; + if (!gstate.isAlphaBlendEnabled()) + return true; + if (gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY) + return true; + if (gstate.getBlendFuncA() != GE_SRCBLEND_SRCALPHA && gstate.getBlendFuncA() != GE_DSTBLEND_DOUBLESRCALPHA) + return true; + if (!safeDestFactors[(int)gstate.getBlendFuncB()]) + return true; + + return false; +} + bool IsAlphaTestAgainstZero() { return gstate.getAlphaTestRef() == 0 && gstate.getAlphaTestMask() == 0xFF; } diff --git a/GPU/Common/GPUStateUtils.h b/GPU/Common/GPUStateUtils.h index e6b231d8e1..f6a4e11c21 100644 --- a/GPU/Common/GPUStateUtils.h +++ b/GPU/Common/GPUStateUtils.h @@ -44,6 +44,7 @@ bool IsAlphaTestTriviallyTrue(); bool IsColorTestAgainstZero(); bool IsColorTestTriviallyTrue(); bool IsAlphaTestAgainstZero(); +bool NeedsTestDiscard(); StencilValueType ReplaceAlphaWithStencilType(); ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend); diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 4a33f7c216..9a6d9afb82 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -268,12 +268,15 @@ void ComputeFragmentShaderID(ShaderID *id_out) { id.SetBit(FS_BIT_ALPHA_TEST); id.SetBits(FS_BIT_ALPHA_TEST_FUNC, 3, gstate.getAlphaTestFunction()); id.SetBit(FS_BIT_ALPHA_AGAINST_ZERO, IsAlphaTestAgainstZero()); + id.SetBit(FS_BIT_TEST_DISCARD_TO_ZERO, !NeedsTestDiscard()); } if (enableColorTest) { // 4 bits total. id.SetBit(FS_BIT_COLOR_TEST); id.SetBits(FS_BIT_COLOR_TEST_FUNC, 2, gstate.getColorTestFunction()); id.SetBit(FS_BIT_COLOR_AGAINST_ZERO, IsColorTestAgainstZero()); + // This is alos set in enableAlphaTest - color test is uncommon, but we can skip discard the same way. + id.SetBit(FS_BIT_TEST_DISCARD_TO_ZERO, !NeedsTestDiscard()); } id.SetBit(FS_BIT_ENABLE_FOG, enableFog); diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index 7d2e1b1cdd..740e321fe8 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -86,7 +86,8 @@ enum { FS_BIT_BLENDFUNC_B = 42, // 4 bits FS_BIT_FLATSHADE = 46, FS_BIT_BGRA_TEXTURE = 47, - // 48+ are free. + FS_BIT_TEST_DISCARD_TO_ZERO = 48, + // 49+ are free. }; struct ShaderID { diff --git a/GPU/GLES/FragmentShaderGeneratorGLES.cpp b/GPU/GLES/FragmentShaderGeneratorGLES.cpp index 20cfebbfe5..0c11d82344 100644 --- a/GPU/GLES/FragmentShaderGeneratorGLES.cpp +++ b/GPU/GLES/FragmentShaderGeneratorGLES.cpp @@ -151,6 +151,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform bool enableAlphaTest = id.Bit(FS_BIT_ALPHA_TEST); bool alphaTestAgainstZero = id.Bit(FS_BIT_ALPHA_AGAINST_ZERO); + bool testForceToZero = id.Bit(FS_BIT_TEST_DISCARD_TO_ZERO); bool enableColorTest = id.Bit(FS_BIT_COLOR_TEST); bool colorTestAgainstZero = id.Bit(FS_BIT_COLOR_AGAINST_ZERO); bool enableColorDoubling = id.Bit(FS_BIT_COLOR_DOUBLE); @@ -510,39 +511,40 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform } } + const char *discardStatement = testForceToZero ? "v.a = 0.0;" : "discard;"; if (enableAlphaTest) { if (alphaTestAgainstZero) { // When testing against 0 (extremely common), we can avoid some math. // 0.002 is approximately half of 1.0 / 255.0. if (alphaTestFunc == GE_COMP_NOTEQUAL || alphaTestFunc == GE_COMP_GREATER) { - WRITE(p, " if (v.a < 0.002) discard;\n"); + WRITE(p, " if (v.a < 0.002) %s\n", discardStatement); } else if (alphaTestFunc != GE_COMP_NEVER) { // Anything else is a test for == 0. Happens sometimes, actually... - WRITE(p, " if (v.a > 0.002) discard;\n"); + WRITE(p, " if (v.a > 0.002) %s\n", discardStatement); } else { // NEVER has been logged as used by games, although it makes little sense - statically failing. // Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here. - WRITE(p, " discard;\n"); + WRITE(p, " %s\n", discardStatement); } } else if (g_Config.bFragmentTestCache) { WRITE(p, " float aResult = %s(testtex, vec2(%s, 0)).a;\n", texture, alphaTestXCoord.c_str()); - WRITE(p, " if (aResult < 0.5) discard;\n"); + WRITE(p, " if (aResult < 0.5) %s\n", discardStatement); } else { const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " }; if (alphaTestFuncs[alphaTestFunc][0] != '#') { if (bitwiseOps) { - WRITE(p, " if ((roundAndScaleTo255i(v.a) & u_alphacolormask.a) %s int(u_alphacolorref.a)) discard;\n", alphaTestFuncs[alphaTestFunc]); + WRITE(p, " if ((roundAndScaleTo255i(v.a) & u_alphacolormask.a) %s int(u_alphacolorref.a)) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement); } else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) { // Work around bad PVR driver problem where equality check + discard just doesn't work. if (alphaTestFunc != GE_COMP_NOTEQUAL) { - WRITE(p, " if (roundTo255thf(v.a) %s u_alphacolorref.a) discard;\n", alphaTestFuncs[alphaTestFunc]); + WRITE(p, " if (roundTo255thf(v.a) %s u_alphacolorref.a) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement); } } else { - WRITE(p, " if (roundAndScaleTo255f(v.a) %s u_alphacolorref.a) discard;\n", alphaTestFuncs[alphaTestFunc]); + WRITE(p, " if (roundAndScaleTo255f(v.a) %s u_alphacolorref.a) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement); } } else { // This means NEVER. See above. - WRITE(p, " discard;\n"); + WRITE(p, " %s\n", discardStatement); } } } @@ -552,14 +554,14 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform // When testing against 0 (common), we can avoid some math. // 0.002 is approximately half of 1.0 / 255.0. if (colorTestFunc == GE_COMP_NOTEQUAL) { - WRITE(p, " if (v.r < 0.002 && v.g < 0.002 && v.b < 0.002) discard;\n"); + WRITE(p, " if (v.r < 0.002 && v.g < 0.002 && v.b < 0.002) %s\n", discardStatement); } else if (colorTestFunc != GE_COMP_NEVER) { // Anything else is a test for == 0. - WRITE(p, " if (v.r > 0.002 || v.g > 0.002 || v.b > 0.002) discard;\n"); + WRITE(p, " if (v.r > 0.002 || v.g > 0.002 || v.b > 0.002) %s\n", discardStatement); } else { // NEVER has been logged as used by games, although it makes little sense - statically failing. // Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here. - WRITE(p, " discard;\n"); + WRITE(p, " %s\n", discardStatement); } } else if (g_Config.bFragmentTestCache) { WRITE(p, " float rResult = %s(testtex, vec2(vScale256.r, 0)).r;\n", texture); @@ -567,10 +569,10 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform WRITE(p, " float bResult = %s(testtex, vec2(vScale256.b, 0)).b;\n", texture); if (colorTestFunc == GE_COMP_EQUAL) { // Equal means all parts must be equal. - WRITE(p, " if (rResult < 0.5 || gResult < 0.5 || bResult < 0.5) discard;\n"); + WRITE(p, " if (rResult < 0.5 || gResult < 0.5 || bResult < 0.5) %s\n", discardStatement); } else { // Not equal means any part must be not equal. - WRITE(p, " if (rResult < 0.5 && gResult < 0.5 && bResult < 0.5) discard;\n"); + WRITE(p, " if (rResult < 0.5 && gResult < 0.5 && bResult < 0.5) %s\n", discardStatement); } } else { const char *colorTestFuncs[] = { "#", "#", " != ", " == " }; @@ -580,14 +582,14 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n"); const char *maskedFragColor = "ivec3(v_scaled.r & u_alphacolormask.r, v_scaled.g & u_alphacolormask.g, v_scaled.b & u_alphacolormask.b)"; const char *maskedColorRef = "ivec3(int(u_alphacolorref.r) & u_alphacolormask.r, int(u_alphacolorref.g) & u_alphacolormask.g, int(u_alphacolorref.b) & u_alphacolormask.b)"; - WRITE(p, " if (%s %s %s) discard;\n", maskedFragColor, colorTestFuncs[colorTestFunc], maskedColorRef); + WRITE(p, " if (%s %s %s) %s\n", maskedFragColor, colorTestFuncs[colorTestFunc], maskedColorRef, discardStatement); } else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) { - WRITE(p, " if (roundTo255thv(v.rgb) %s u_alphacolorref.rgb) discard;\n", colorTestFuncs[colorTestFunc]); + WRITE(p, " if (roundTo255thv(v.rgb) %s u_alphacolorref.rgb) %s\n", colorTestFuncs[colorTestFunc], discardStatement); } else { - WRITE(p, " if (roundAndScaleTo255v(v.rgb) %s u_alphacolorref.rgb) discard;\n", colorTestFuncs[colorTestFunc]); + WRITE(p, " if (roundAndScaleTo255v(v.rgb) %s u_alphacolorref.rgb) %s\n", colorTestFuncs[colorTestFunc], discardStatement); } } else { - WRITE(p, " discard;\n"); + WRITE(p, " %s\n", discardStatement); } } } diff --git a/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp b/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp index a8d3100098..d35ade99e5 100644 --- a/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp @@ -56,6 +56,7 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) { bool enableAlphaTest = id.Bit(FS_BIT_ALPHA_TEST); bool alphaTestAgainstZero = id.Bit(FS_BIT_ALPHA_AGAINST_ZERO); + bool testForceToZero = id.Bit(FS_BIT_TEST_DISCARD_TO_ZERO); bool enableColorTest = id.Bit(FS_BIT_COLOR_TEST); bool colorTestAgainstZero = id.Bit(FS_BIT_COLOR_AGAINST_ZERO); bool enableColorDoubling = id.Bit(FS_BIT_COLOR_DOUBLE); @@ -82,7 +83,7 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) { bool isModeClear = id.Bit(FS_BIT_CLEARMODE); const char *shading = doFlatShading ? "flat" : ""; - bool earlyFragmentTests = !enableAlphaTest && !enableColorTest && !gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT); + bool earlyFragmentTests = ((!enableAlphaTest && !enableColorTest) || testForceToZero) && !gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT); if (earlyFragmentTests) { WRITE(p, "layout (early_fragment_tests) in;\n"); @@ -347,27 +348,28 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) { // So we have to scale to account for the difference. std::string alphaTestXCoord = "0"; + const char *discardStatement = testForceToZero ? "v.a = 0.0;" : "discard;"; if (enableAlphaTest) { if (alphaTestAgainstZero) { // When testing against 0 (extremely common), we can avoid some math. // 0.002 is approximately half of 1.0 / 255.0. if (alphaTestFunc == GE_COMP_NOTEQUAL || alphaTestFunc == GE_COMP_GREATER) { - WRITE(p, " if (v.a < 0.002) discard;\n"); + WRITE(p, " if (v.a < 0.002) %s\n", discardStatement); } else if (alphaTestFunc != GE_COMP_NEVER) { // Anything else is a test for == 0. Happens sometimes, actually... - WRITE(p, " if (v.a > 0.002) discard;\n"); + WRITE(p, " if (v.a > 0.002) %s\n", discardStatement); } else { // NEVER has been logged as used by games, although it makes little sense - statically failing. // Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here. - WRITE(p, " discard;\n"); + WRITE(p, " %s\n", discardStatement); } } else { const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " }; if (alphaTestFuncs[alphaTestFunc][0] != '#') { - WRITE(p, " if ((roundAndScaleTo255i(v.a) & base.alphacolormask.a) %s base.alphacolorref.a) discard;\n", alphaTestFuncs[alphaTestFunc]); + WRITE(p, " if ((roundAndScaleTo255i(v.a) & base.alphacolormask.a) %s base.alphacolorref.a) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement); } else { // This means NEVER. See above. - WRITE(p, " discard;\n"); + WRITE(p, " %s\n", discardStatement); } } } @@ -378,22 +380,22 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) { // Have my doubts that this special case is actually worth it, but whatever. // 0.002 is approximately half of 1.0 / 255.0. if (colorTestFunc == GE_COMP_NOTEQUAL) { - WRITE(p, " if (v.r + v.g + v.b < 0.002) discard;\n"); + WRITE(p, " if (v.r + v.g + v.b < 0.002) %s\n", discardStatement); } else if (colorTestFunc != GE_COMP_NEVER) { // Anything else is a test for == 0. - WRITE(p, " if (v.r + v.g + v.b > 0.002) discard;\n"); + WRITE(p, " if (v.r + v.g + v.b > 0.002) %s\n", discardStatement); } else { // NEVER has been logged as used by games, although it makes little sense - statically failing. // Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here. - WRITE(p, " discard;\n"); + WRITE(p, " %s\n", discardStatement); } } else { const char *colorTestFuncs[] = { "#", "#", " != ", " == " }; if (colorTestFuncs[colorTestFunc][0] != '#') { WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n"); - WRITE(p, " if ((v_scaled & base.alphacolormask.rgb) %s (base.alphacolorref.rgb & base.alphacolormask.rgb)) discard;\n", colorTestFuncs[colorTestFunc]); + WRITE(p, " if ((v_scaled & base.alphacolormask.rgb) %s (base.alphacolorref.rgb & base.alphacolormask.rgb)) %s\n", colorTestFuncs[colorTestFunc], discardStatement); } else { - WRITE(p, " discard;\n"); + WRITE(p, " %s\n", discardStatement); } } }