From f0676b0c85d589fb1e1d3e145d201c16767cd504 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 8 Mar 2015 19:08:21 -0700 Subject: [PATCH] Optimize color tests against zero. They seem to be somewhat common. Speeds up Tales of Phantasia X a bit. --- GPU/Directx9/PixelShaderGeneratorDX9.cpp | 55 +++++++++++++++++------- GPU/Directx9/PixelShaderGeneratorDX9.h | 1 + GPU/GLES/FragmentShaderGenerator.cpp | 41 +++++++++++++----- GPU/GLES/FragmentShaderGenerator.h | 1 + GPU/GLES/FragmentTestCache.cpp | 11 +++-- 5 files changed, 79 insertions(+), 30 deletions(-) diff --git a/GPU/Directx9/PixelShaderGeneratorDX9.cpp b/GPU/Directx9/PixelShaderGeneratorDX9.cpp index 7c54f12862..371caf96a7 100644 --- a/GPU/Directx9/PixelShaderGeneratorDX9.cpp +++ b/GPU/Directx9/PixelShaderGeneratorDX9.cpp @@ -107,6 +107,10 @@ bool IsAlphaTestAgainstZero() { return gstate.getAlphaTestRef() == 0 && gstate.getAlphaTestMask() == 0xFF; } +bool IsColorTestAgainstZero() { + return gstate.getColorTestRef() == 0 && gstate.getColorTestMask() == 0xFFFFFF; +} + const bool nonAlphaSrcFactors[16] = { true, // GE_SRCBLEND_DSTCOLOR, true, // GE_SRCBLEND_INVDSTCOLOR, @@ -453,19 +457,20 @@ void ComputeFragmentShaderIDDX9(FragmentShaderIDDX9 *id) { } #endif if (enableColorTest) { - // 3 bits total. + // 4 bits total. id0 |= 1 << 17; id0 |= gstate.getColorTestFunction() << 18; + id0 |= (IsColorTestAgainstZero() & 1) << 20; } - id0 |= (enableFog & 1) << 20; - id0 |= (doTextureProjection & 1) << 21; - id0 |= (enableColorDoubling & 1) << 22; + id0 |= (enableFog & 1) << 21; + id0 |= (doTextureProjection & 1) << 22; + id0 |= (enableColorDoubling & 1) << 23; // 2 bits - id0 |= (stencilToAlpha) << 23; + id0 |= (stencilToAlpha) << 24; if (stencilToAlpha != REPLACE_ALPHA_NO) { // 4 bits - id0 |= ReplaceAlphaWithStencilType() << 25; + id0 |= ReplaceAlphaWithStencilType() << 26; } if (enableAlphaTest) @@ -473,7 +478,6 @@ void ComputeFragmentShaderIDDX9(FragmentShaderIDDX9 *id) { else gpuStats.numNonAlphaTestedDraws++; - id0 |= (gstate_c.bgraTexture & 1) << 29; // 2 bits. id0 |= ReplaceLogicOpType() << 30; @@ -485,6 +489,10 @@ void ComputeFragmentShaderIDDX9(FragmentShaderIDDX9 *id) { id1 |= gstate.getBlendFuncA() << 6; id1 |= gstate.getBlendFuncB() << 10; } + + // TODO: Flat shading? + + id1 |= (gstate_c.bgraTexture & 1) << 15; } id->d[0] = id0; @@ -502,6 +510,7 @@ void GenerateFragmentShaderDX9(char *buffer) { bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue() && !gstate.isModeClear() && !g_Config.bDisableAlphaTest; bool alphaTestAgainstZero = IsAlphaTestAgainstZero(); bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue() && !gstate.isModeClear(); + bool colorTestAgainstZero = IsColorTestAgainstZero(); bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled(); bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX; bool doTextureAlpha = gstate.isTextureAlphaUsed(); @@ -712,15 +721,31 @@ void GenerateFragmentShaderDX9(char *buffer) { } #endif if (enableColorTest) { - GEComparison colorTestFunc = gstate.getColorTestFunction(); - const char *colorTestFuncs[] = { "#", "#", " != ", " == " }; // never/always don't make sense - u32 colorTestMask = gstate.getColorTestMask(); - if (colorTestFuncs[colorTestFunc][0] != '#') { - const char * test = colorTestFuncs[colorTestFunc]; - WRITE(p, " float3 colortest = roundAndScaleTo255v(v.rgb);\n"); - WRITE(p, " if ((colortest.r %s u_alphacolorref.r) && (colortest.g %s u_alphacolorref.g) && (colortest.b %s u_alphacolorref.b )) clip(-1);\n", test, test, test); + if (colorTestAgainstZero) { + GEComparison colorTestFunc = gstate.getColorTestFunction(); + // When testing against 0 (common), we can avoid some math. + // 0.002 is approximately half of 1.0 / 255.0. + if (colorTestFunc == GE_COMP_NOTEQUAL) { + WRITE(p, " if (v.r < 0.002 && v.g < 0.002 && v.b < 0.002) clip(-1);\n"); + } else if (colorTestFunc != GE_COMP_NEVER) { + // Anything else is a test for == 0. + WRITE(p, " if (v.r > 0.002 || v.g > 0.002 || v.b > 0.002) clip(-1);\n"); + } else { + // NEVER has been logged as used by games, although it makes little sense - statically failing. + // Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here. + WRITE(p, " clip(-1);\n"); + } } else { - WRITE(p, " clip(-1);\n"); + GEComparison colorTestFunc = gstate.getColorTestFunction(); + const char *colorTestFuncs[] = { "#", "#", " != ", " == " }; // never/always don't make sense + u32 colorTestMask = gstate.getColorTestMask(); + if (colorTestFuncs[colorTestFunc][0] != '#') { + const char * test = colorTestFuncs[colorTestFunc]; + WRITE(p, " float3 colortest = roundAndScaleTo255v(v.rgb);\n"); + WRITE(p, " if ((colortest.r %s u_alphacolorref.r) && (colortest.g %s u_alphacolorref.g) && (colortest.b %s u_alphacolorref.b )) clip(-1);\n", test, test, test); + } else { + WRITE(p, " clip(-1);\n"); + } } } diff --git a/GPU/Directx9/PixelShaderGeneratorDX9.h b/GPU/Directx9/PixelShaderGeneratorDX9.h index 5ad26dc7db..1387cc5705 100644 --- a/GPU/Directx9/PixelShaderGeneratorDX9.h +++ b/GPU/Directx9/PixelShaderGeneratorDX9.h @@ -79,6 +79,7 @@ enum ReplaceBlendType { bool IsAlphaTestAgainstZero(); bool IsAlphaTestTriviallyTrue(); +bool IsColorTestAgainstZero(); bool IsColorTestTriviallyTrue(); StencilValueType ReplaceAlphaWithStencilType(); ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend); diff --git a/GPU/GLES/FragmentShaderGenerator.cpp b/GPU/GLES/FragmentShaderGenerator.cpp index 2bb1896b3a..3f2a26c761 100644 --- a/GPU/GLES/FragmentShaderGenerator.cpp +++ b/GPU/GLES/FragmentShaderGenerator.cpp @@ -115,6 +115,10 @@ bool IsAlphaTestAgainstZero() { return gstate.getAlphaTestRef() == 0 && gstate.getAlphaTestMask() == 0xFF; } +bool IsColorTestAgainstZero() { + return gstate.getColorTestRef() == 0 && gstate.getColorTestMask() == 0xFFFFFF; +} + const bool nonAlphaSrcFactors[16] = { true, // GE_SRCBLEND_DSTCOLOR, true, // GE_SRCBLEND_INVDSTCOLOR, @@ -436,19 +440,20 @@ void ComputeFragmentShaderID(ShaderID *id) { } #endif if (enableColorTest) { - // 3 bits total. + // 4 bits total. id0 |= 1 << 17; id0 |= gstate.getColorTestFunction() << 18; + id0 |= (IsColorTestAgainstZero() & 1) << 20; } - id0 |= (enableFog & 1) << 20; - id0 |= (doTextureProjection & 1) << 21; - id0 |= (enableColorDoubling & 1) << 22; + id0 |= (enableFog & 1) << 21; + id0 |= (doTextureProjection & 1) << 22; + id0 |= (enableColorDoubling & 1) << 23; // 2 bits - id0 |= (stencilToAlpha) << 23; + id0 |= (stencilToAlpha) << 24; if (stencilToAlpha != REPLACE_ALPHA_NO) { // 4 bits - id0 |= ReplaceAlphaWithStencilType() << 25; + id0 |= ReplaceAlphaWithStencilType() << 26; } if (enableAlphaTest) @@ -456,7 +461,6 @@ void ComputeFragmentShaderID(ShaderID *id) { else gpuStats.numNonAlphaTestedDraws++; - // 29 is free. // 2 bits. id0 |= ReplaceLogicOpType() << 30; @@ -573,6 +577,7 @@ void GenerateFragmentShader(char *buffer) { bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue() && !gstate.isModeClear() && !g_Config.bDisableAlphaTest; bool alphaTestAgainstZero = IsAlphaTestAgainstZero(); bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue() && !gstate.isModeClear(); + bool colorTestAgainstZero = IsColorTestAgainstZero(); bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled(); bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX; bool doTextureAlpha = gstate.isTextureAlphaUsed(); @@ -617,7 +622,7 @@ void GenerateFragmentShader(char *buffer) { WRITE(p, "uniform sampler2D testtex;\n"); } else { WRITE(p, "uniform vec4 u_alphacolorref;\n"); - if (bitwiseOps && (enableColorTest || !alphaTestAgainstZero)) { + if (bitwiseOps && ((enableColorTest && !colorTestAgainstZero) || (enableAlphaTest && !alphaTestAgainstZero))) { WRITE(p, "uniform ivec4 u_alphacolormask;\n"); } } @@ -652,7 +657,7 @@ void GenerateFragmentShader(char *buffer) { WRITE(p, "float roundAndScaleTo255f(in float x) { return floor(x * 255.0 + 0.5); }\n"); } } - if (enableColorTest) { + if (enableColorTest && !colorTestAgainstZero) { if (bitwiseOps) { WRITE(p, "ivec3 roundAndScaleTo255iv(in vec3 x) { return ivec3(floor(x * 255.0 + 0.5)); }\n"); } else if (gl_extensions.gpuVendor == GPU_VENDOR_POWERVR) { @@ -810,7 +815,7 @@ void GenerateFragmentShader(char *buffer) { // So we have to scale to account for the difference. std::string alphaTestXCoord = "0"; if (g_Config.bFragmentTestCache) { - if (enableColorTest) { + if (enableColorTest && !colorTestAgainstZero) { WRITE(p, " vec4 vScale256 = v * %f + %f;\n", 255.0 / 256.0, 0.5 / 256.0); alphaTestXCoord = "vScale256.a"; } else if (enableAlphaTest && !alphaTestAgainstZero) { @@ -860,7 +865,21 @@ void GenerateFragmentShader(char *buffer) { } if (enableColorTest) { - if (g_Config.bFragmentTestCache) { + if (colorTestAgainstZero) { + GEComparison colorTestFunc = gstate.getColorTestFunction(); + // When testing against 0 (common), we can avoid some math. + // 0.002 is approximately half of 1.0 / 255.0. + if (colorTestFunc == GE_COMP_NOTEQUAL) { + WRITE(p, " if (v.r < 0.002 && v.g < 0.002 && v.b < 0.002) discard;\n"); + } else if (colorTestFunc != GE_COMP_NEVER) { + // Anything else is a test for == 0. + WRITE(p, " if (v.r > 0.002 || v.g > 0.002 || v.b > 0.002) discard;\n"); + } else { + // NEVER has been logged as used by games, although it makes little sense - statically failing. + // Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here. + WRITE(p, " discard;\n"); + } + } else if (g_Config.bFragmentTestCache) { WRITE(p, " float rResult = %s(testtex, vec2(vScale256.r, 0)).r;\n", texture); WRITE(p, " float gResult = %s(testtex, vec2(vScale256.g, 0)).g;\n", texture); WRITE(p, " float bResult = %s(testtex, vec2(vScale256.b, 0)).b;\n", texture); diff --git a/GPU/GLES/FragmentShaderGenerator.h b/GPU/GLES/FragmentShaderGenerator.h index 11339ada13..74c0456e06 100644 --- a/GPU/GLES/FragmentShaderGenerator.h +++ b/GPU/GLES/FragmentShaderGenerator.h @@ -54,6 +54,7 @@ enum ReplaceBlendType { bool IsAlphaTestAgainstZero(); bool IsAlphaTestTriviallyTrue(); +bool IsColorTestAgainstZero(); bool IsColorTestTriviallyTrue(); StencilValueType ReplaceAlphaWithStencilType(); ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend); diff --git a/GPU/GLES/FragmentTestCache.cpp b/GPU/GLES/FragmentTestCache.cpp index cbf36834a7..1cd5496241 100644 --- a/GPU/GLES/FragmentTestCache.cpp +++ b/GPU/GLES/FragmentTestCache.cpp @@ -38,6 +38,13 @@ void FragmentTestCache::BindTestTexture(GLenum unit) { return; } + bool alphaNeedsTexture = gstate.isAlphaTestEnabled() && !IsAlphaTestAgainstZero() && !IsAlphaTestTriviallyTrue(); + bool colorNeedsTexture = gstate.isColorTestEnabled() && !IsColorTestAgainstZero() && !IsColorTestTriviallyTrue(); + if (!alphaNeedsTexture && !colorNeedsTexture) { + // Common case: testing against zero. Just skip it, faster not to bind anything. + return; + } + const FragmentTestID id = GenerateTestID(); const auto cached = cache_.find(id); if (cached != cache_.end()) { @@ -47,10 +54,6 @@ void FragmentTestCache::BindTestTexture(GLenum unit) { // Already bound, hurray. return; } - if (!gstate.isColorTestEnabled() && (IsAlphaTestAgainstZero() || IsAlphaTestTriviallyTrue())) { - // Common case: testing against zero. Just skip it. - return; - } glActiveTexture(unit); glBindTexture(GL_TEXTURE_2D, tex); // Always return to the default.