mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Optimize color tests against zero.
They seem to be somewhat common. Speeds up Tales of Phantasia X a bit.
This commit is contained in:
parent
534b06d3ff
commit
f0676b0c85
5 changed files with 79 additions and 30 deletions
|
@ -107,6 +107,10 @@ bool IsAlphaTestAgainstZero() {
|
|||
return gstate.getAlphaTestRef() == 0 && gstate.getAlphaTestMask() == 0xFF;
|
||||
}
|
||||
|
||||
bool IsColorTestAgainstZero() {
|
||||
return gstate.getColorTestRef() == 0 && gstate.getColorTestMask() == 0xFFFFFF;
|
||||
}
|
||||
|
||||
const bool nonAlphaSrcFactors[16] = {
|
||||
true, // GE_SRCBLEND_DSTCOLOR,
|
||||
true, // GE_SRCBLEND_INVDSTCOLOR,
|
||||
|
@ -453,19 +457,20 @@ void ComputeFragmentShaderIDDX9(FragmentShaderIDDX9 *id) {
|
|||
}
|
||||
#endif
|
||||
if (enableColorTest) {
|
||||
// 3 bits total.
|
||||
// 4 bits total.
|
||||
id0 |= 1 << 17;
|
||||
id0 |= gstate.getColorTestFunction() << 18;
|
||||
id0 |= (IsColorTestAgainstZero() & 1) << 20;
|
||||
}
|
||||
id0 |= (enableFog & 1) << 20;
|
||||
id0 |= (doTextureProjection & 1) << 21;
|
||||
id0 |= (enableColorDoubling & 1) << 22;
|
||||
id0 |= (enableFog & 1) << 21;
|
||||
id0 |= (doTextureProjection & 1) << 22;
|
||||
id0 |= (enableColorDoubling & 1) << 23;
|
||||
// 2 bits
|
||||
id0 |= (stencilToAlpha) << 23;
|
||||
id0 |= (stencilToAlpha) << 24;
|
||||
|
||||
if (stencilToAlpha != REPLACE_ALPHA_NO) {
|
||||
// 4 bits
|
||||
id0 |= ReplaceAlphaWithStencilType() << 25;
|
||||
id0 |= ReplaceAlphaWithStencilType() << 26;
|
||||
}
|
||||
|
||||
if (enableAlphaTest)
|
||||
|
@ -473,7 +478,6 @@ void ComputeFragmentShaderIDDX9(FragmentShaderIDDX9 *id) {
|
|||
else
|
||||
gpuStats.numNonAlphaTestedDraws++;
|
||||
|
||||
id0 |= (gstate_c.bgraTexture & 1) << 29;
|
||||
// 2 bits.
|
||||
id0 |= ReplaceLogicOpType() << 30;
|
||||
|
||||
|
@ -485,6 +489,10 @@ void ComputeFragmentShaderIDDX9(FragmentShaderIDDX9 *id) {
|
|||
id1 |= gstate.getBlendFuncA() << 6;
|
||||
id1 |= gstate.getBlendFuncB() << 10;
|
||||
}
|
||||
|
||||
// TODO: Flat shading?
|
||||
|
||||
id1 |= (gstate_c.bgraTexture & 1) << 15;
|
||||
}
|
||||
|
||||
id->d[0] = id0;
|
||||
|
@ -502,6 +510,7 @@ void GenerateFragmentShaderDX9(char *buffer) {
|
|||
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue() && !gstate.isModeClear() && !g_Config.bDisableAlphaTest;
|
||||
bool alphaTestAgainstZero = IsAlphaTestAgainstZero();
|
||||
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue() && !gstate.isModeClear();
|
||||
bool colorTestAgainstZero = IsColorTestAgainstZero();
|
||||
bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled();
|
||||
bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX;
|
||||
bool doTextureAlpha = gstate.isTextureAlphaUsed();
|
||||
|
@ -712,15 +721,31 @@ void GenerateFragmentShaderDX9(char *buffer) {
|
|||
}
|
||||
#endif
|
||||
if (enableColorTest) {
|
||||
GEComparison colorTestFunc = gstate.getColorTestFunction();
|
||||
const char *colorTestFuncs[] = { "#", "#", " != ", " == " }; // never/always don't make sense
|
||||
u32 colorTestMask = gstate.getColorTestMask();
|
||||
if (colorTestFuncs[colorTestFunc][0] != '#') {
|
||||
const char * test = colorTestFuncs[colorTestFunc];
|
||||
WRITE(p, " float3 colortest = roundAndScaleTo255v(v.rgb);\n");
|
||||
WRITE(p, " if ((colortest.r %s u_alphacolorref.r) && (colortest.g %s u_alphacolorref.g) && (colortest.b %s u_alphacolorref.b )) clip(-1);\n", test, test, test);
|
||||
if (colorTestAgainstZero) {
|
||||
GEComparison colorTestFunc = gstate.getColorTestFunction();
|
||||
// When testing against 0 (common), we can avoid some math.
|
||||
// 0.002 is approximately half of 1.0 / 255.0.
|
||||
if (colorTestFunc == GE_COMP_NOTEQUAL) {
|
||||
WRITE(p, " if (v.r < 0.002 && v.g < 0.002 && v.b < 0.002) clip(-1);\n");
|
||||
} else if (colorTestFunc != GE_COMP_NEVER) {
|
||||
// Anything else is a test for == 0.
|
||||
WRITE(p, " if (v.r > 0.002 || v.g > 0.002 || v.b > 0.002) clip(-1);\n");
|
||||
} else {
|
||||
// NEVER has been logged as used by games, although it makes little sense - statically failing.
|
||||
// Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here.
|
||||
WRITE(p, " clip(-1);\n");
|
||||
}
|
||||
} else {
|
||||
WRITE(p, " clip(-1);\n");
|
||||
GEComparison colorTestFunc = gstate.getColorTestFunction();
|
||||
const char *colorTestFuncs[] = { "#", "#", " != ", " == " }; // never/always don't make sense
|
||||
u32 colorTestMask = gstate.getColorTestMask();
|
||||
if (colorTestFuncs[colorTestFunc][0] != '#') {
|
||||
const char * test = colorTestFuncs[colorTestFunc];
|
||||
WRITE(p, " float3 colortest = roundAndScaleTo255v(v.rgb);\n");
|
||||
WRITE(p, " if ((colortest.r %s u_alphacolorref.r) && (colortest.g %s u_alphacolorref.g) && (colortest.b %s u_alphacolorref.b )) clip(-1);\n", test, test, test);
|
||||
} else {
|
||||
WRITE(p, " clip(-1);\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -79,6 +79,7 @@ enum ReplaceBlendType {
|
|||
|
||||
bool IsAlphaTestAgainstZero();
|
||||
bool IsAlphaTestTriviallyTrue();
|
||||
bool IsColorTestAgainstZero();
|
||||
bool IsColorTestTriviallyTrue();
|
||||
StencilValueType ReplaceAlphaWithStencilType();
|
||||
ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend);
|
||||
|
|
|
@ -115,6 +115,10 @@ bool IsAlphaTestAgainstZero() {
|
|||
return gstate.getAlphaTestRef() == 0 && gstate.getAlphaTestMask() == 0xFF;
|
||||
}
|
||||
|
||||
bool IsColorTestAgainstZero() {
|
||||
return gstate.getColorTestRef() == 0 && gstate.getColorTestMask() == 0xFFFFFF;
|
||||
}
|
||||
|
||||
const bool nonAlphaSrcFactors[16] = {
|
||||
true, // GE_SRCBLEND_DSTCOLOR,
|
||||
true, // GE_SRCBLEND_INVDSTCOLOR,
|
||||
|
@ -436,19 +440,20 @@ void ComputeFragmentShaderID(ShaderID *id) {
|
|||
}
|
||||
#endif
|
||||
if (enableColorTest) {
|
||||
// 3 bits total.
|
||||
// 4 bits total.
|
||||
id0 |= 1 << 17;
|
||||
id0 |= gstate.getColorTestFunction() << 18;
|
||||
id0 |= (IsColorTestAgainstZero() & 1) << 20;
|
||||
}
|
||||
id0 |= (enableFog & 1) << 20;
|
||||
id0 |= (doTextureProjection & 1) << 21;
|
||||
id0 |= (enableColorDoubling & 1) << 22;
|
||||
id0 |= (enableFog & 1) << 21;
|
||||
id0 |= (doTextureProjection & 1) << 22;
|
||||
id0 |= (enableColorDoubling & 1) << 23;
|
||||
// 2 bits
|
||||
id0 |= (stencilToAlpha) << 23;
|
||||
id0 |= (stencilToAlpha) << 24;
|
||||
|
||||
if (stencilToAlpha != REPLACE_ALPHA_NO) {
|
||||
// 4 bits
|
||||
id0 |= ReplaceAlphaWithStencilType() << 25;
|
||||
id0 |= ReplaceAlphaWithStencilType() << 26;
|
||||
}
|
||||
|
||||
if (enableAlphaTest)
|
||||
|
@ -456,7 +461,6 @@ void ComputeFragmentShaderID(ShaderID *id) {
|
|||
else
|
||||
gpuStats.numNonAlphaTestedDraws++;
|
||||
|
||||
// 29 is free.
|
||||
// 2 bits.
|
||||
id0 |= ReplaceLogicOpType() << 30;
|
||||
|
||||
|
@ -573,6 +577,7 @@ void GenerateFragmentShader(char *buffer) {
|
|||
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue() && !gstate.isModeClear() && !g_Config.bDisableAlphaTest;
|
||||
bool alphaTestAgainstZero = IsAlphaTestAgainstZero();
|
||||
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue() && !gstate.isModeClear();
|
||||
bool colorTestAgainstZero = IsColorTestAgainstZero();
|
||||
bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled();
|
||||
bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX;
|
||||
bool doTextureAlpha = gstate.isTextureAlphaUsed();
|
||||
|
@ -617,7 +622,7 @@ void GenerateFragmentShader(char *buffer) {
|
|||
WRITE(p, "uniform sampler2D testtex;\n");
|
||||
} else {
|
||||
WRITE(p, "uniform vec4 u_alphacolorref;\n");
|
||||
if (bitwiseOps && (enableColorTest || !alphaTestAgainstZero)) {
|
||||
if (bitwiseOps && ((enableColorTest && !colorTestAgainstZero) || (enableAlphaTest && !alphaTestAgainstZero))) {
|
||||
WRITE(p, "uniform ivec4 u_alphacolormask;\n");
|
||||
}
|
||||
}
|
||||
|
@ -652,7 +657,7 @@ void GenerateFragmentShader(char *buffer) {
|
|||
WRITE(p, "float roundAndScaleTo255f(in float x) { return floor(x * 255.0 + 0.5); }\n");
|
||||
}
|
||||
}
|
||||
if (enableColorTest) {
|
||||
if (enableColorTest && !colorTestAgainstZero) {
|
||||
if (bitwiseOps) {
|
||||
WRITE(p, "ivec3 roundAndScaleTo255iv(in vec3 x) { return ivec3(floor(x * 255.0 + 0.5)); }\n");
|
||||
} else if (gl_extensions.gpuVendor == GPU_VENDOR_POWERVR) {
|
||||
|
@ -810,7 +815,7 @@ void GenerateFragmentShader(char *buffer) {
|
|||
// So we have to scale to account for the difference.
|
||||
std::string alphaTestXCoord = "0";
|
||||
if (g_Config.bFragmentTestCache) {
|
||||
if (enableColorTest) {
|
||||
if (enableColorTest && !colorTestAgainstZero) {
|
||||
WRITE(p, " vec4 vScale256 = v * %f + %f;\n", 255.0 / 256.0, 0.5 / 256.0);
|
||||
alphaTestXCoord = "vScale256.a";
|
||||
} else if (enableAlphaTest && !alphaTestAgainstZero) {
|
||||
|
@ -860,7 +865,21 @@ void GenerateFragmentShader(char *buffer) {
|
|||
}
|
||||
|
||||
if (enableColorTest) {
|
||||
if (g_Config.bFragmentTestCache) {
|
||||
if (colorTestAgainstZero) {
|
||||
GEComparison colorTestFunc = gstate.getColorTestFunction();
|
||||
// When testing against 0 (common), we can avoid some math.
|
||||
// 0.002 is approximately half of 1.0 / 255.0.
|
||||
if (colorTestFunc == GE_COMP_NOTEQUAL) {
|
||||
WRITE(p, " if (v.r < 0.002 && v.g < 0.002 && v.b < 0.002) discard;\n");
|
||||
} else if (colorTestFunc != GE_COMP_NEVER) {
|
||||
// Anything else is a test for == 0.
|
||||
WRITE(p, " if (v.r > 0.002 || v.g > 0.002 || v.b > 0.002) discard;\n");
|
||||
} else {
|
||||
// NEVER has been logged as used by games, although it makes little sense - statically failing.
|
||||
// Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here.
|
||||
WRITE(p, " discard;\n");
|
||||
}
|
||||
} else if (g_Config.bFragmentTestCache) {
|
||||
WRITE(p, " float rResult = %s(testtex, vec2(vScale256.r, 0)).r;\n", texture);
|
||||
WRITE(p, " float gResult = %s(testtex, vec2(vScale256.g, 0)).g;\n", texture);
|
||||
WRITE(p, " float bResult = %s(testtex, vec2(vScale256.b, 0)).b;\n", texture);
|
||||
|
|
|
@ -54,6 +54,7 @@ enum ReplaceBlendType {
|
|||
|
||||
bool IsAlphaTestAgainstZero();
|
||||
bool IsAlphaTestTriviallyTrue();
|
||||
bool IsColorTestAgainstZero();
|
||||
bool IsColorTestTriviallyTrue();
|
||||
StencilValueType ReplaceAlphaWithStencilType();
|
||||
ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend);
|
||||
|
|
|
@ -38,6 +38,13 @@ void FragmentTestCache::BindTestTexture(GLenum unit) {
|
|||
return;
|
||||
}
|
||||
|
||||
bool alphaNeedsTexture = gstate.isAlphaTestEnabled() && !IsAlphaTestAgainstZero() && !IsAlphaTestTriviallyTrue();
|
||||
bool colorNeedsTexture = gstate.isColorTestEnabled() && !IsColorTestAgainstZero() && !IsColorTestTriviallyTrue();
|
||||
if (!alphaNeedsTexture && !colorNeedsTexture) {
|
||||
// Common case: testing against zero. Just skip it, faster not to bind anything.
|
||||
return;
|
||||
}
|
||||
|
||||
const FragmentTestID id = GenerateTestID();
|
||||
const auto cached = cache_.find(id);
|
||||
if (cached != cache_.end()) {
|
||||
|
@ -47,10 +54,6 @@ void FragmentTestCache::BindTestTexture(GLenum unit) {
|
|||
// Already bound, hurray.
|
||||
return;
|
||||
}
|
||||
if (!gstate.isColorTestEnabled() && (IsAlphaTestAgainstZero() || IsAlphaTestTriviallyTrue())) {
|
||||
// Common case: testing against zero. Just skip it.
|
||||
return;
|
||||
}
|
||||
glActiveTexture(unit);
|
||||
glBindTexture(GL_TEXTURE_2D, tex);
|
||||
// Always return to the default.
|
||||
|
|
Loading…
Add table
Reference in a new issue