From 4b978ed6f9c082cbe119c1f0efc0ac90e448c46f Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 11 May 2014 09:51:35 -0700 Subject: [PATCH 01/11] Apply unsupported blending modes in the shader. This handles unsupported fixed color combinations, alpha doubling, etc. Where possible, tries to avoid it - it means using a blit (without the framebuffer fetch extension), which can slow things down a lot with tons of drawcalls. --- GPU/GLES/FragmentShaderGenerator.cpp | 165 +++++++++++++++++++++++++-- GPU/GLES/FragmentShaderGenerator.h | 8 +- GPU/GLES/Framebuffer.cpp | 4 + GPU/GLES/GLES_GPU.cpp | 18 ++- GPU/GLES/GLES_GPU.h | 2 + GPU/GLES/ShaderManager.cpp | 11 ++ GPU/GLES/ShaderManager.h | 7 +- GPU/GLES/StateMapping.cpp | 16 ++- 8 files changed, 209 insertions(+), 22 deletions(-) diff --git a/GPU/GLES/FragmentShaderGenerator.cpp b/GPU/GLES/FragmentShaderGenerator.cpp index 90078f7272..ec483b46d7 100644 --- a/GPU/GLES/FragmentShaderGenerator.cpp +++ b/GPU/GLES/FragmentShaderGenerator.cpp @@ -145,6 +145,8 @@ ReplaceAlphaType ReplaceAlphaWithStencil() { if (gstate.isAlphaBlendEnabled()) { if (nonAlphaSrcFactors[gstate.getBlendFuncA()] && nonAlphaDestFactors[gstate.getBlendFuncB()]) { return REPLACE_ALPHA_YES; + } else if (ShouldUseShaderBlending()) { + return REPLACE_ALPHA_YES; } else { if (gl_extensions.ARB_blend_func_extended) { return REPLACE_ALPHA_DUALSOURCE; @@ -277,10 +279,69 @@ static bool CanDoubleSrcBlendMode() { } } +// TODO: Setting to disable? +bool ShouldUseShaderBlending() { + if (!gstate.isAlphaBlendEnabled()) { + return false; + } + // We can't blit on GLES2, so we don't support it. We also want texelFetch (OpenGL 3.0+ / GLES3+.) + if (!gl_extensions.VersionGEThan(3, 0, 0) && !gl_extensions.GLES3) { + return false; + } + + GEBlendSrcFactor funcA = gstate.getBlendFuncA(); + GEBlendDstFactor funcB = gstate.getBlendFuncB(); + GEBlendMode eq = gstate.getBlendEq(); + + if (eq == GE_BLENDMODE_ABSDIFF) { + return true; + } + + // This normally involves a blit, so try to skip it. + if (AlphaToColorDoubling() || CanDoubleSrcBlendMode()) { + return false; + } + + switch (funcA) { + case GE_SRCBLEND_DOUBLESRCALPHA: + case GE_SRCBLEND_DOUBLEINVSRCALPHA: + case GE_SRCBLEND_DOUBLEDSTALPHA: + case GE_SRCBLEND_DOUBLEINVDSTALPHA: + return true; + + case GE_SRCBLEND_FIXA: + if (funcB == GE_DSTBLEND_FIXB) { + u32 fixA = gstate.getFixA(); + u32 fixB = gstate.getFixB(); + // OpenGL only supports one constant color, so check if we could be more exact. + if (fixA != fixB && fixA != 0xFFFFFF - fixB) { + return true; + } + } + + default: + break; + } + + switch (funcB) { + case GE_DSTBLEND_DOUBLESRCALPHA: + case GE_DSTBLEND_DOUBLEINVSRCALPHA: + case GE_DSTBLEND_DOUBLEDSTALPHA: + case GE_DSTBLEND_DOUBLEINVDSTALPHA: + return true; + + default: + break; + } + + return false; +} + // Here we must take all the bits of the gstate that determine what the fragment shader will // look like, and concatenate them together into an ID. void ComputeFragmentShaderID(FragmentShaderID *id) { int id0 = 0; + int id1 = 0; if (gstate.isModeClear()) { // We only need one clear shader, so let's ignore the rest of the bits. id0 = 1; @@ -296,7 +357,6 @@ void ComputeFragmentShaderID(FragmentShaderID *id) { bool enableAlphaDoubling = !alphaToColorDoubling && CanDoubleSrcBlendMode(); bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX; bool doTextureAlpha = gstate.isTextureAlphaUsed(); - bool computeAbsdiff = gstate.getBlendEq() == GE_BLENDMODE_ABSDIFF; ReplaceAlphaType stencilToAlpha = ReplaceAlphaWithStencil(); // All texfuncs except replace are the same for RGB as for RGBA with full alpha. @@ -338,12 +398,16 @@ void ComputeFragmentShaderID(FragmentShaderID *id) { else gpuStats.numNonAlphaTestedDraws++; - if (computeAbsdiff) { - id0 |= (computeAbsdiff & 1) << 25; + if (ShouldUseShaderBlending()) { + // 11 bits total. + id1 |= (gstate.getBlendEq() << 0); + id1 |= (gstate.getBlendFuncA() << 3); + id1 |= (gstate.getBlendFuncB() << 7); } } id->d[0] = id0; + id->d[1] = id1; } // Missing: Z depth range @@ -423,7 +487,6 @@ void GenerateFragmentShader(char *buffer) { bool enableAlphaDoubling = !alphaToColorDoubling && CanDoubleSrcBlendMode(); bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX; bool doTextureAlpha = gstate.isTextureAlphaUsed(); - bool computeAbsdiff = gstate.getBlendEq() == GE_BLENDMODE_ABSDIFF; ReplaceAlphaType stencilToAlpha = ReplaceAlphaWithStencil(); if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE) @@ -431,6 +494,17 @@ void GenerateFragmentShader(char *buffer) { if (doTexture) WRITE(p, "uniform sampler2D tex;\n"); + if (ShouldUseShaderBlending() && !gstate.isModeClear()) { + if (!gl_extensions.NV_shader_framebuffer_fetch) { + WRITE(p, "uniform sampler2D fbotex;\n"); + } + if (gstate.getBlendFuncA() == GE_SRCBLEND_FIXA) { + WRITE(p, "uniform vec3 u_blendFixA;\n"); + } + if (gstate.getBlendFuncB() == GE_DSTBLEND_FIXB) { + WRITE(p, "uniform vec3 u_blendFixB;\n"); + } + } if (enableAlphaTest || enableColorTest) { WRITE(p, "uniform vec4 u_alphacolorref;\n"); @@ -607,12 +681,85 @@ void GenerateFragmentShader(char *buffer) { WRITE(p, " v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n"); // WRITE(p, " v.x = v_depth;\n"); } - } - // Handle ABSDIFF blending mode using NV_shader_framebuffer_fetch - if (computeAbsdiff && gl_extensions.NV_shader_framebuffer_fetch) { - WRITE(p, " lowp vec4 destColor = gl_LastFragData[0];\n"); - WRITE(p, " gl_FragColor = abs(destColor - v);\n"); + if (ShouldUseShaderBlending()) { + // If we have NV_shader_framebuffer_fetch / EXT_shader_framebuffer_fetch, we skip the blit. + // We can just read the prev value more directly. + // TODO: EXT_shader_framebuffer_fetch on iOS 6, possibly others. + if (gl_extensions.NV_shader_framebuffer_fetch) { + WRITE(p, " lowp vec4 destColor = gl_LastFragData[0];\n"); + } else { + WRITE(p, " lowp vec4 destColor = texelFetch(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n"); + } + + GEBlendSrcFactor funcA = gstate.getBlendFuncA(); + GEBlendDstFactor funcB = gstate.getBlendFuncB(); + GEBlendMode eq = gstate.getBlendEq(); + + const char *srcFactor = "vec3(1.0)"; + const char *dstFactor = "vec3(0.0)"; + + switch (funcA) + { + case GE_SRCBLEND_DSTCOLOR: srcFactor = "destColor.rgb"; break; + case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "(vec3(1.0) - destColor.rgb)"; break; + case GE_SRCBLEND_SRCALPHA: srcFactor = "vec3(v.a)"; break; + case GE_SRCBLEND_INVSRCALPHA: srcFactor = "vec3(1.0 - v.a)"; break; + case GE_SRCBLEND_DSTALPHA: srcFactor = "vec3(destColor.a)"; break; + case GE_SRCBLEND_INVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a)"; break; + case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "vec3(v.a + v.a)"; break; + // TODO: Double inverse, or inverse double? Following softgpu for now... + case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a - v.a)"; break; + case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "vec3(destColor.a + destColor.a)"; break; + case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a - destColor.a)"; break; + case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break; + } + switch (funcB) + { + case GE_DSTBLEND_SRCCOLOR: dstFactor = "v.rgb"; break; + case GE_DSTBLEND_INVSRCCOLOR: dstFactor = "(vec3(1.0) - v.rgb)"; break; + case GE_DSTBLEND_SRCALPHA: dstFactor = "vec3(v.a)"; break; + case GE_DSTBLEND_INVSRCALPHA: dstFactor = "vec3(1.0 - v.a)"; break; + case GE_DSTBLEND_DSTALPHA: dstFactor = "vec3(destColor.a)"; break; + case GE_DSTBLEND_INVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a)"; break; + case GE_DSTBLEND_DOUBLESRCALPHA: dstFactor = "vec3(v.a + v.a)"; break; + case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "vec3(1.0 - v.a - v.a)"; break; + case GE_DSTBLEND_DOUBLEDSTALPHA: dstFactor = "vec3(destColor.a + destColor.a)"; break; + case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a - destColor.a)"; break; + case GE_DSTBLEND_FIXB: dstFactor = "u_blendFixB"; break; + } + + switch (eq) + { + case GE_BLENDMODE_MUL_AND_ADD: + WRITE(p, " v.rgb = v.rgb * %s + destColor.rgb * %s;\n", srcFactor, dstFactor); + break; + case GE_BLENDMODE_MUL_AND_SUBTRACT: + WRITE(p, " v.rgb = v.rgb * %s - destColor.rgb * %s;\n", srcFactor, dstFactor); + break; + case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE: + WRITE(p, " v.rgb = destColor.rgb * %s - v.rgb * %s;\n", srcFactor, dstFactor); + break; + case GE_BLENDMODE_MIN: + if (funcA != GE_SRCBLEND_DSTCOLOR || funcB != GE_DSTBLEND_SRCCOLOR) { + WARN_LOG_REPORT(G3D, "Using MIN blend equation with odd factors: %d, %d", funcA, funcB); + } + WRITE(p, " v.rgb = min(v.rgb, destColor.rgb);\n", srcFactor, dstFactor); + break; + case GE_BLENDMODE_MAX: + if (funcA != GE_SRCBLEND_DSTCOLOR || funcB != GE_DSTBLEND_SRCCOLOR) { + WARN_LOG_REPORT(G3D, "Using MAX blend equation with odd factors: %d, %d", funcA, funcB); + } + WRITE(p, " v.rgb = max(v.rgb, destColor.rgb);\n", srcFactor, dstFactor); + break; + case GE_BLENDMODE_ABSDIFF: + if (funcA != GE_SRCBLEND_DSTCOLOR || funcB != GE_DSTBLEND_SRCCOLOR) { + WARN_LOG_REPORT(G3D, "Using ABSDIFF blend equation with odd factors: %d, %d", funcA, funcB); + } + WRITE(p, " v.rgb = abs(v.rgb - destColor.rgb);\n", srcFactor, dstFactor); + break; + } + } } switch (stencilToAlpha) { diff --git a/GPU/GLES/FragmentShaderGenerator.h b/GPU/GLES/FragmentShaderGenerator.h index 73c605e310..2357739d37 100644 --- a/GPU/GLES/FragmentShaderGenerator.h +++ b/GPU/GLES/FragmentShaderGenerator.h @@ -20,9 +20,9 @@ #include "Globals.h" struct FragmentShaderID { - FragmentShaderID() {d[0] = 0xFFFFFFFF;} - void clear() {d[0] = 0xFFFFFFFF;} - u32 d[1]; + FragmentShaderID() {clear();} + void clear() {d[0] = 0xFFFFFFFF; d[1] = 0xFFFFFFFF;} + u32 d[2]; bool operator < (const FragmentShaderID &other) const { for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++) { if (d[i] < other.d[i]) @@ -62,4 +62,4 @@ bool IsAlphaTestTriviallyTrue(); bool IsColorTestTriviallyTrue(); StencilValueType ReplaceAlphaWithStencilType(); ReplaceAlphaType ReplaceAlphaWithStencil(); - +bool ShouldUseShaderBlending(); diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index e1fc7265ef..87b9318b3d 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -1007,6 +1007,10 @@ void FramebufferManager::BindFramebufferDepth(VirtualFramebuffer *sourceframebuf } void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer) { + if (framebuffer == NULL) { + framebuffer = currentRenderVfb_; + } + if (!framebuffer->fbo || !useBufferedRendering_) { glBindTexture(GL_TEXTURE_2D, 0); gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE; diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index dbda6f590b..4fd48aa1fc 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -162,8 +162,8 @@ static const CommandTableEntry commandTable[] = { {GE_CMD_STENCILTESTENABLE, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_ALPHABLENDENABLE, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_BLENDMODE, FLAG_FLUSHBEFOREONCHANGE}, - {GE_CMD_BLENDFIXEDA, FLAG_FLUSHBEFOREONCHANGE}, - {GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE}, + {GE_CMD_BLENDFIXEDA, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_BlendFixA}, + {GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_BlendFixB}, {GE_CMD_MASKRGB, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_ZTEST, FLAG_FLUSHBEFOREONCHANGE}, @@ -1080,6 +1080,14 @@ void GLES_GPU::Execute_ColorRef(u32 op, u32 diff) { shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF); } +void GLES_GPU::Execute_BlendFixA(u32 op, u32 diff) { + shaderManager_->DirtyUniform(DIRTY_BLENDFIX); +} + +void GLES_GPU::Execute_BlendFixB(u32 op, u32 diff) { + shaderManager_->DirtyUniform(DIRTY_BLENDFIX); +} + void GLES_GPU::Execute_WorldMtxNum(u32 op, u32 diff) { // This is almost always followed by GE_CMD_WORLDMATRIXDATA. const u32_le *src = (const u32_le *)Memory::GetPointer(currentList->pc + 4); @@ -1607,8 +1615,14 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { ////////////////////////////////////////////////////////////////// case GE_CMD_ALPHABLENDENABLE: case GE_CMD_BLENDMODE: + break; + case GE_CMD_BLENDFIXEDA: + Execute_BlendFixA(op, diff); + break; + case GE_CMD_BLENDFIXEDB: + Execute_BlendFixB(op, diff); break; case GE_CMD_ALPHATESTENABLE: diff --git a/GPU/GLES/GLES_GPU.h b/GPU/GLES/GLES_GPU.h index 872811c913..a3fa4f1055 100644 --- a/GPU/GLES/GLES_GPU.h +++ b/GPU/GLES/GLES_GPU.h @@ -123,6 +123,8 @@ public: void Execute_AlphaTest(u32 op, u32 diff); void Execute_StencilTest(u32 op, u32 diff); void Execute_ColorRef(u32 op, u32 diff); + void Execute_BlendFixA(u32 op, u32 diff); + void Execute_BlendFixB(u32 op, u32 diff); void Execute_WorldMtxNum(u32 op, u32 diff); void Execute_WorldMtxData(u32 op, u32 diff); void Execute_ViewMtxNum(u32 op, u32 diff); diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 08c4d41dee..8872960182 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -155,6 +155,10 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTrans u_colormask = glGetUniformLocation(program, "u_colormask"); u_stencilReplaceValue = glGetUniformLocation(program, "u_stencilReplaceValue"); + u_fbotex = glGetUniformLocation(program, "fbotex"); + u_blendFixA = glGetUniformLocation(program, "u_blendFixA"); + u_blendFixB = glGetUniformLocation(program, "u_blendFixB"); + // Transform u_view = glGetUniformLocation(program, "u_view"); u_world = glGetUniformLocation(program, "u_world"); @@ -225,6 +229,7 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTrans if (u_view != -1) availableUniforms |= DIRTY_VIEWMATRIX; if (u_texmtx != -1) availableUniforms |= DIRTY_TEXMATRIX; if (u_stencilReplaceValue != -1) availableUniforms |= DIRTY_STENCILREPLACEVALUE; + if (u_blendFixA != -1 || u_blendFixB != -1) availableUniforms |= DIRTY_BLENDFIX; // Looping up to numBones lets us avoid checking u_bone[i] for (int i = 0; i < numBones; i++) { @@ -247,6 +252,7 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTrans // Default uniform values glUniform1i(u_tex, 0); + glUniform1i(u_fbotex, 1); // The rest, use the "dirty" mechanism. dirtyUniforms = DIRTY_ALL; use(vertType, previous); @@ -520,6 +526,11 @@ void LinkedShader::UpdateUniforms(u32 vertType) { } #endif + if (dirty & DIRTY_BLENDFIX) { + SetColorUniform3(u_blendFixA, gstate.getFixA()); + SetColorUniform3(u_blendFixB, gstate.getFixB()); + } + // Lighting if (dirty & DIRTY_AMBIENT) { SetColorUniform3Alpha(u_ambient, gstate.ambientcolor, gstate.getAmbientA()); diff --git a/GPU/GLES/ShaderManager.h b/GPU/GLES/ShaderManager.h index b589b66f90..fe7056fa46 100644 --- a/GPU/GLES/ShaderManager.h +++ b/GPU/GLES/ShaderManager.h @@ -73,6 +73,11 @@ public: #endif int numBones; + // Shader blending. + int u_fbotex; + int u_blendFixA; + int u_blendFixB; + // Fragment processing inputs int u_alphacolorref; int u_colormask; @@ -123,7 +128,7 @@ enum DIRTY_AMBIENT = (1 << 15), DIRTY_MATAMBIENTALPHA = (1 << 16), - // 1 << 17 is free! + DIRTY_BLENDFIX = (1 << 17), // (either one.) DIRTY_UVSCALEOFFSET = (1 << 18), // this will be dirtied ALL THE TIME... maybe we'll need to do "last value with this shader compares" diff --git a/GPU/GLES/StateMapping.cpp b/GPU/GLES/StateMapping.cpp index 70bdf98cf0..516d7196ab 100644 --- a/GPU/GLES/StateMapping.cpp +++ b/GPU/GLES/StateMapping.cpp @@ -176,6 +176,15 @@ void TransformDrawEngine::ApplyDrawState(int prim) { // Set blend bool wantBlend = !gstate.isModeClear() && gstate.isAlphaBlendEnabled(); + if (wantBlend && ShouldUseShaderBlending()) { + if (!gl_extensions.NV_shader_framebuffer_fetch) { + glActiveTexture(GL_TEXTURE1); + framebufferManager_->BindFramebufferColor(NULL); + glActiveTexture(GL_TEXTURE0); + } + // None of the below logic is interesting, we're gonna do it entirely in the shader. + wantBlend = false; + } glstate.blend.set(wantBlend); if (wantBlend) { // This can't be done exactly as there are several PSP blend modes that are impossible to do on OpenGL ES 2.0, and some even on regular OpenGL for desktop. @@ -323,12 +332,7 @@ void TransformDrawEngine::ApplyDrawState(int prim) { } if (((blendFuncEq >= GE_BLENDMODE_MIN) && gl_extensions.EXT_blend_minmax) || gl_extensions.GLES3) { - if (blendFuncEq == GE_BLENDMODE_ABSDIFF && gl_extensions.NV_shader_framebuffer_fetch) { - // Handle GE_BLENDMODE_ABSDIFF in fragment shader and turn off regular alpha blending here. - glstate.blend.set(false); - } else { - glstate.blendEquation.set(eqLookup[blendFuncEq]); - } + glstate.blendEquation.set(eqLookup[blendFuncEq]); } else { glstate.blendEquation.set(eqLookupNoMinMax[blendFuncEq]); } From a67327759fd7ab999b2c4005b0782e5491907bed Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 11 May 2014 10:07:07 -0700 Subject: [PATCH 02/11] Double using multiply not add, should be fine. --- GPU/GLES/FragmentShaderGenerator.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/GPU/GLES/FragmentShaderGenerator.cpp b/GPU/GLES/FragmentShaderGenerator.cpp index ec483b46d7..390846bdc6 100644 --- a/GPU/GLES/FragmentShaderGenerator.cpp +++ b/GPU/GLES/FragmentShaderGenerator.cpp @@ -707,11 +707,11 @@ void GenerateFragmentShader(char *buffer) { case GE_SRCBLEND_INVSRCALPHA: srcFactor = "vec3(1.0 - v.a)"; break; case GE_SRCBLEND_DSTALPHA: srcFactor = "vec3(destColor.a)"; break; case GE_SRCBLEND_INVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a)"; break; - case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "vec3(v.a + v.a)"; break; + case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "vec3(v.a * 2.0)"; break; // TODO: Double inverse, or inverse double? Following softgpu for now... - case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a - v.a)"; break; - case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "vec3(destColor.a + destColor.a)"; break; - case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a - destColor.a)"; break; + case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a * 2.0)"; break; + case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "vec3(destColor.a * 2.0)"; break; + case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a * 2.0)"; break; case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break; } switch (funcB) @@ -722,10 +722,10 @@ void GenerateFragmentShader(char *buffer) { case GE_DSTBLEND_INVSRCALPHA: dstFactor = "vec3(1.0 - v.a)"; break; case GE_DSTBLEND_DSTALPHA: dstFactor = "vec3(destColor.a)"; break; case GE_DSTBLEND_INVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a)"; break; - case GE_DSTBLEND_DOUBLESRCALPHA: dstFactor = "vec3(v.a + v.a)"; break; - case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "vec3(1.0 - v.a - v.a)"; break; - case GE_DSTBLEND_DOUBLEDSTALPHA: dstFactor = "vec3(destColor.a + destColor.a)"; break; - case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a - destColor.a)"; break; + case GE_DSTBLEND_DOUBLESRCALPHA: dstFactor = "vec3(v.a * 2.0)"; break; + case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "vec3(1.0 - v.a * 2.0)"; break; + case GE_DSTBLEND_DOUBLEDSTALPHA: dstFactor = "vec3(destColor.a * 2.0)"; break; + case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a * 2.0)"; break; case GE_DSTBLEND_FIXB: dstFactor = "u_blendFixB"; break; } From 41ca303beeff30cf7f2ee175e760595f2e975848 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 11 May 2014 11:59:31 -0700 Subject: [PATCH 03/11] Skip a few more fixa/fixb combos. --- GPU/GLES/FragmentShaderGenerator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GLES/FragmentShaderGenerator.cpp b/GPU/GLES/FragmentShaderGenerator.cpp index 390846bdc6..1313baa555 100644 --- a/GPU/GLES/FragmentShaderGenerator.cpp +++ b/GPU/GLES/FragmentShaderGenerator.cpp @@ -314,7 +314,7 @@ bool ShouldUseShaderBlending() { u32 fixA = gstate.getFixA(); u32 fixB = gstate.getFixB(); // OpenGL only supports one constant color, so check if we could be more exact. - if (fixA != fixB && fixA != 0xFFFFFF - fixB) { + if (fixA != fixB && fixA != 0xFFFFFF - fixB && fixA != 0 && fixB != 0 && fixA != 0xFFFFFF && fixB != 0xFFFFFF) { return true; } } From b02fe1db3d5f3aadd20c3b6fb721d251ac419b37 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 11 May 2014 14:42:29 -0700 Subject: [PATCH 04/11] Log when blitting a ton per frame, it can get slow. --- GPU/GLES/StateMapping.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/GPU/GLES/StateMapping.cpp b/GPU/GLES/StateMapping.cpp index 516d7196ab..176b3fff05 100644 --- a/GPU/GLES/StateMapping.cpp +++ b/GPU/GLES/StateMapping.cpp @@ -178,6 +178,19 @@ void TransformDrawEngine::ApplyDrawState(int prim) { bool wantBlend = !gstate.isModeClear() && gstate.isAlphaBlendEnabled(); if (wantBlend && ShouldUseShaderBlending()) { if (!gl_extensions.NV_shader_framebuffer_fetch) { + static const int MAX_REASONABLE_BLITS_PER_FRAME = 24; + + static int lastFrameBlit = -1; + static int blitsThisFrame = 0; + if (lastFrameBlit != gpuStats.numFlips) { + if (blitsThisFrame > MAX_REASONABLE_BLITS_PER_FRAME) { + WARN_LOG_REPORT_ONCE(blendingBlit, G3D, "Lots of blits needed for obscure blending: %d per frame, blend %d/%d/%d", blitsThisFrame, gstate.getBlendFuncA(), gstate.getBlendFuncB(), gstate.getBlendEq()); + } + blitsThisFrame = 0; + lastFrameBlit = gpuStats.numFlips; + } + ++blitsThisFrame; + glActiveTexture(GL_TEXTURE1); framebufferManager_->BindFramebufferColor(NULL); glActiveTexture(GL_TEXTURE0); From f80dee4164e5aa2d7b16787d3492bf5071beec06 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 11 May 2014 14:48:40 -0700 Subject: [PATCH 05/11] Remove outdated comment. Was all lies. --- GPU/GLES/StateMapping.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/GPU/GLES/StateMapping.cpp b/GPU/GLES/StateMapping.cpp index 176b3fff05..833c14dce2 100644 --- a/GPU/GLES/StateMapping.cpp +++ b/GPU/GLES/StateMapping.cpp @@ -171,9 +171,6 @@ void TransformDrawEngine::ApplyDrawState(int prim) { gstate_c.textureChanged = TEXCHANGE_UNCHANGED; } - // TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a - // single fullscreen pass that converts alpha to stencil (or 2 passes, to set both the 0 and 1 values) very easily. - // Set blend bool wantBlend = !gstate.isModeClear() && gstate.isAlphaBlendEnabled(); if (wantBlend && ShouldUseShaderBlending()) { From b8a2410934175038929dce228b01c8b538263851 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 11 May 2014 17:55:02 -0700 Subject: [PATCH 06/11] Oops, typo. --- GPU/GLES/FragmentShaderGenerator.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GPU/GLES/FragmentShaderGenerator.cpp b/GPU/GLES/FragmentShaderGenerator.cpp index 1313baa555..272b0e1002 100644 --- a/GPU/GLES/FragmentShaderGenerator.cpp +++ b/GPU/GLES/FragmentShaderGenerator.cpp @@ -744,19 +744,19 @@ void GenerateFragmentShader(char *buffer) { if (funcA != GE_SRCBLEND_DSTCOLOR || funcB != GE_DSTBLEND_SRCCOLOR) { WARN_LOG_REPORT(G3D, "Using MIN blend equation with odd factors: %d, %d", funcA, funcB); } - WRITE(p, " v.rgb = min(v.rgb, destColor.rgb);\n", srcFactor, dstFactor); + WRITE(p, " v.rgb = min(v.rgb, destColor.rgb);\n"); break; case GE_BLENDMODE_MAX: if (funcA != GE_SRCBLEND_DSTCOLOR || funcB != GE_DSTBLEND_SRCCOLOR) { WARN_LOG_REPORT(G3D, "Using MAX blend equation with odd factors: %d, %d", funcA, funcB); } - WRITE(p, " v.rgb = max(v.rgb, destColor.rgb);\n", srcFactor, dstFactor); + WRITE(p, " v.rgb = max(v.rgb, destColor.rgb);\n"); break; case GE_BLENDMODE_ABSDIFF: if (funcA != GE_SRCBLEND_DSTCOLOR || funcB != GE_DSTBLEND_SRCCOLOR) { WARN_LOG_REPORT(G3D, "Using ABSDIFF blend equation with odd factors: %d, %d", funcA, funcB); } - WRITE(p, " v.rgb = abs(v.rgb - destColor.rgb);\n", srcFactor, dstFactor); + WRITE(p, " v.rgb = abs(v.rgb - destColor.rgb);\n"); break; } } From 0eae30dddb58bb1dc840d4b94d99998f972d697f Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 12 May 2014 08:39:58 -0700 Subject: [PATCH 07/11] Set a bit to account for fixa/fixb/etc. logic. --- GPU/GLES/FragmentShaderGenerator.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/GPU/GLES/FragmentShaderGenerator.cpp b/GPU/GLES/FragmentShaderGenerator.cpp index 272b0e1002..164f60df7c 100644 --- a/GPU/GLES/FragmentShaderGenerator.cpp +++ b/GPU/GLES/FragmentShaderGenerator.cpp @@ -399,10 +399,11 @@ void ComputeFragmentShaderID(FragmentShaderID *id) { gpuStats.numNonAlphaTestedDraws++; if (ShouldUseShaderBlending()) { - // 11 bits total. - id1 |= (gstate.getBlendEq() << 0); - id1 |= (gstate.getBlendFuncA() << 3); - id1 |= (gstate.getBlendFuncB() << 7); + // 12 bits total. + id1 |= 1; + id1 |= (gstate.getBlendEq() << 1); + id1 |= (gstate.getBlendFuncA() << 4); + id1 |= (gstate.getBlendFuncB() << 8); } } From 8f9a2b988288ca61e0adbcffe8cdbf7e72b7aaec Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 12 May 2014 21:47:55 -0700 Subject: [PATCH 08/11] Unbind the fbo texture when it's not used. Just in case there are problems with decimating it or etc. --- GPU/GLES/StateMapping.cpp | 8 +++++++- GPU/GLES/TransformPipeline.cpp | 3 ++- GPU/GLES/TransformPipeline.h | 2 ++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/GPU/GLES/StateMapping.cpp b/GPU/GLES/StateMapping.cpp index 833c14dce2..c89173fadf 100644 --- a/GPU/GLES/StateMapping.cpp +++ b/GPU/GLES/StateMapping.cpp @@ -171,7 +171,7 @@ void TransformDrawEngine::ApplyDrawState(int prim) { gstate_c.textureChanged = TEXCHANGE_UNCHANGED; } - // Set blend + // Set blend - unless we need to do it in the shader. bool wantBlend = !gstate.isModeClear() && gstate.isAlphaBlendEnabled(); if (wantBlend && ShouldUseShaderBlending()) { if (!gl_extensions.NV_shader_framebuffer_fetch) { @@ -191,10 +191,16 @@ void TransformDrawEngine::ApplyDrawState(int prim) { glActiveTexture(GL_TEXTURE1); framebufferManager_->BindFramebufferColor(NULL); glActiveTexture(GL_TEXTURE0); + fboTexBound_ = true; } // None of the below logic is interesting, we're gonna do it entirely in the shader. wantBlend = false; + } else if (fboTexBound_) { + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0); } + glstate.blend.set(wantBlend); if (wantBlend) { // This can't be done exactly as there are several PSP blend modes that are impossible to do on OpenGL ES 2.0, and some even on regular OpenGL for desktop. diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index b9ac5c2800..0c96223e2d 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -126,7 +126,8 @@ TransformDrawEngine::TransformDrawEngine() numDrawCalls(0), vertexCountInDrawCalls(0), decodeCounter_(0), - uvScale(0) { + uvScale(0), + fboTexBound_(false) { decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL; // Allocate nicely aligned memory. Maybe graphics drivers will // appreciate it. diff --git a/GPU/GLES/TransformPipeline.h b/GPU/GLES/TransformPipeline.h index 8e23c78c52..4795224e83 100644 --- a/GPU/GLES/TransformPipeline.h +++ b/GPU/GLES/TransformPipeline.h @@ -243,4 +243,6 @@ private: u32 dcid_; UVScale *uvScale; + + bool fboTexBound_; }; From fce7d8c6972f6c1b46d05de90b8c846c2b32354e Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Tue, 27 May 2014 00:46:18 -0700 Subject: [PATCH 09/11] Ignore the funcs for min/max/absdiff. Seems from reports that they are indeed ignored, just wanted to verify. --- GPU/GLES/FragmentShaderGenerator.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/GPU/GLES/FragmentShaderGenerator.cpp b/GPU/GLES/FragmentShaderGenerator.cpp index 164f60df7c..7b089be3a5 100644 --- a/GPU/GLES/FragmentShaderGenerator.cpp +++ b/GPU/GLES/FragmentShaderGenerator.cpp @@ -742,21 +742,12 @@ void GenerateFragmentShader(char *buffer) { WRITE(p, " v.rgb = destColor.rgb * %s - v.rgb * %s;\n", srcFactor, dstFactor); break; case GE_BLENDMODE_MIN: - if (funcA != GE_SRCBLEND_DSTCOLOR || funcB != GE_DSTBLEND_SRCCOLOR) { - WARN_LOG_REPORT(G3D, "Using MIN blend equation with odd factors: %d, %d", funcA, funcB); - } WRITE(p, " v.rgb = min(v.rgb, destColor.rgb);\n"); break; case GE_BLENDMODE_MAX: - if (funcA != GE_SRCBLEND_DSTCOLOR || funcB != GE_DSTBLEND_SRCCOLOR) { - WARN_LOG_REPORT(G3D, "Using MAX blend equation with odd factors: %d, %d", funcA, funcB); - } WRITE(p, " v.rgb = max(v.rgb, destColor.rgb);\n"); break; case GE_BLENDMODE_ABSDIFF: - if (funcA != GE_SRCBLEND_DSTCOLOR || funcB != GE_DSTBLEND_SRCCOLOR) { - WARN_LOG_REPORT(G3D, "Using ABSDIFF blend equation with odd factors: %d, %d", funcA, funcB); - } WRITE(p, " v.rgb = abs(v.rgb - destColor.rgb);\n"); break; } From 3e24786eab1eb2caafbb179d240bc7dc13ae83a1 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Tue, 27 May 2014 02:00:28 -0700 Subject: [PATCH 10/11] Disable the scissor test around blits. Because it applies. --- GPU/GLES/Framebuffer.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 87b9318b3d..2ce3b826a5 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -992,6 +992,7 @@ void FramebufferManager::BindFramebufferDepth(VirtualFramebuffer *sourceframebuf // Let's only do this if not clearing. if (!gstate.isModeClear() || !gstate.isClearModeDepthMask()) { fbo_bind_for_read(sourceframebuffer->fbo); + glDisable(GL_SCISSOR_TEST); #if defined(USING_GLES2) && (defined(ANDROID) || defined(BLACKBERRY)) // We only support this extension on Android, it's not even available on PC. if (useNV) { @@ -1000,6 +1001,8 @@ void FramebufferManager::BindFramebufferDepth(VirtualFramebuffer *sourceframebuf #endif // defined(USING_GLES2) && (defined(ANDROID) || defined(BLACKBERRY)) glBlitFramebuffer(0, 0, sourceframebuffer->renderWidth, sourceframebuffer->renderHeight, 0, 0, targetframebuffer->renderWidth, targetframebuffer->renderHeight, GL_DEPTH_BUFFER_BIT, GL_NEAREST); // If we set targetframebuffer->depthUpdated here, our optimization above would be pointless. + + glstate.scissorTest.restore(); } #endif } @@ -1045,6 +1048,7 @@ void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer) { fbo_bind_as_render_target(renderCopy); glViewport(0, 0, framebuffer->renderWidth, framebuffer->renderHeight); + glDisable(GL_SCISSOR_TEST); fbo_bind_for_read(framebuffer->fbo); #if defined(USING_GLES2) && (defined(ANDROID) || defined(BLACKBERRY)) // We only support this extension on Android, it's not even available on PC. @@ -1056,6 +1060,8 @@ void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer) { fbo_bind_as_render_target(currentRenderVfb_->fbo); fbo_bind_color_as_texture(renderCopy, 0); + glstate.viewport.restore(); + glstate.scissorTest.restore(); #endif } else { fbo_bind_color_as_texture(framebuffer->fbo, 0); @@ -1321,7 +1327,7 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int } fbo_bind_as_render_target(dst->fbo); - + glDisable(GL_SCISSOR_TEST); #ifndef USING_GLES2 if (gl_extensions.FBO_ARB) { @@ -1377,7 +1383,7 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int // Make sure our 2D drawing program is ready. Compiles only if not already compiled. CompileDraw2DProgram(); - glstate.viewport.set(0, 0, dst->width, dst->height); + glViewport(0, 0, dst->width, dst->height); DisableState(); // The first four coordinates are relative to the 6th and 7th arguments of DrawActiveTexture. @@ -1388,6 +1394,8 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int glBindTexture(GL_TEXTURE_2D, 0); } + glstate.scissorTest.restore(); + glstate.viewport.restore(); fbo_unbind(); } From ba2cf4f1d90d08d34a1f46503d6dd68a0db648a7 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Tue, 27 May 2014 02:00:49 -0700 Subject: [PATCH 11/11] Make sure to rebind a texture after changing it. --- GPU/GLES/Framebuffer.cpp | 11 ++++++----- GPU/GLES/TextureCache.h | 5 +++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 2ce3b826a5..f34e8a9dc8 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -784,7 +784,7 @@ void FramebufferManager::DoSetRenderFrameBuffer() { // None found? Create one. if (!vfb) { - gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; + textureCache_->ForgetLastTexture(); vfb = new VirtualFramebuffer(); vfb->fbo = 0; vfb->fb_address = fb_address; @@ -891,7 +891,7 @@ void FramebufferManager::DoSetRenderFrameBuffer() { // Use it as a render target. DEBUG_LOG(SCEGE, "Switching render target to FBO for %08x: %i x %i x %i ", vfb->fb_address, vfb->width, vfb->height, vfb->format); vfb->usageFlags |= FB_USAGE_RENDERTARGET; - gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; + textureCache_->ForgetLastTexture(); vfb->last_frame_render = gpuStats.numFlips; frameLastFramebufUsed = gpuStats.numFlips; vfb->dirtyAfterDisplay = true; @@ -1248,7 +1248,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s glEnable(GL_DITHER); } else { nvfb->usageFlags |= FB_USAGE_RENDERTARGET; - gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; + textureCache_->ForgetLastTexture(); nvfb->last_frame_render = gpuStats.numFlips; nvfb->dirtyAfterDisplay = true; @@ -1392,6 +1392,7 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int float srcH = src->height; DrawActiveTexture(0, dstX, dstY, w, h, dst->width, dst->height, false, srcX / srcW, srcY / srcH, (srcX + w) / srcW, (srcY + h) / srcH, draw2dprogram_); glBindTexture(GL_TEXTURE_2D, 0); + textureCache_->ForgetLastTexture(); } glstate.scissorTest.restore(); @@ -1923,7 +1924,7 @@ bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size) { fbo_unbind(); } glstate.viewport.restore(); - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + textureCache_->ForgetLastTexture(); // This is a memcpy, let's still copy just in case. return false; } @@ -2057,7 +2058,7 @@ void FramebufferManager::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, fbo_unbind(); } glstate.viewport.restore(); - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + textureCache_->ForgetLastTexture(); } } } diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index 53552ca137..10ccc857dc 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -65,6 +65,11 @@ public: return cache.size(); } + void ForgetLastTexture() { + lastBoundTexture = -1; + gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; + } + // Only used by Qt UI? bool DecodeTexture(u8 *output, GPUgstate state);