diff --git a/GPU/GLES/FragmentShaderGenerator.cpp b/GPU/GLES/FragmentShaderGenerator.cpp index 90078f7272..7b089be3a5 100644 --- a/GPU/GLES/FragmentShaderGenerator.cpp +++ b/GPU/GLES/FragmentShaderGenerator.cpp @@ -145,6 +145,8 @@ ReplaceAlphaType ReplaceAlphaWithStencil() { if (gstate.isAlphaBlendEnabled()) { if (nonAlphaSrcFactors[gstate.getBlendFuncA()] && nonAlphaDestFactors[gstate.getBlendFuncB()]) { return REPLACE_ALPHA_YES; + } else if (ShouldUseShaderBlending()) { + return REPLACE_ALPHA_YES; } else { if (gl_extensions.ARB_blend_func_extended) { return REPLACE_ALPHA_DUALSOURCE; @@ -277,10 +279,69 @@ static bool CanDoubleSrcBlendMode() { } } +// TODO: Setting to disable? +bool ShouldUseShaderBlending() { + if (!gstate.isAlphaBlendEnabled()) { + return false; + } + // We can't blit on GLES2, so we don't support it. We also want texelFetch (OpenGL 3.0+ / GLES3+.) + if (!gl_extensions.VersionGEThan(3, 0, 0) && !gl_extensions.GLES3) { + return false; + } + + GEBlendSrcFactor funcA = gstate.getBlendFuncA(); + GEBlendDstFactor funcB = gstate.getBlendFuncB(); + GEBlendMode eq = gstate.getBlendEq(); + + if (eq == GE_BLENDMODE_ABSDIFF) { + return true; + } + + // This normally involves a blit, so try to skip it. + if (AlphaToColorDoubling() || CanDoubleSrcBlendMode()) { + return false; + } + + switch (funcA) { + case GE_SRCBLEND_DOUBLESRCALPHA: + case GE_SRCBLEND_DOUBLEINVSRCALPHA: + case GE_SRCBLEND_DOUBLEDSTALPHA: + case GE_SRCBLEND_DOUBLEINVDSTALPHA: + return true; + + case GE_SRCBLEND_FIXA: + if (funcB == GE_DSTBLEND_FIXB) { + u32 fixA = gstate.getFixA(); + u32 fixB = gstate.getFixB(); + // OpenGL only supports one constant color, so check if we could be more exact. + if (fixA != fixB && fixA != 0xFFFFFF - fixB && fixA != 0 && fixB != 0 && fixA != 0xFFFFFF && fixB != 0xFFFFFF) { + return true; + } + } + + default: + break; + } + + switch (funcB) { + case GE_DSTBLEND_DOUBLESRCALPHA: + case GE_DSTBLEND_DOUBLEINVSRCALPHA: + case GE_DSTBLEND_DOUBLEDSTALPHA: + case GE_DSTBLEND_DOUBLEINVDSTALPHA: + return true; + + default: + break; + } + + return false; +} + // Here we must take all the bits of the gstate that determine what the fragment shader will // look like, and concatenate them together into an ID. void ComputeFragmentShaderID(FragmentShaderID *id) { int id0 = 0; + int id1 = 0; if (gstate.isModeClear()) { // We only need one clear shader, so let's ignore the rest of the bits. id0 = 1; @@ -296,7 +357,6 @@ void ComputeFragmentShaderID(FragmentShaderID *id) { bool enableAlphaDoubling = !alphaToColorDoubling && CanDoubleSrcBlendMode(); bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX; bool doTextureAlpha = gstate.isTextureAlphaUsed(); - bool computeAbsdiff = gstate.getBlendEq() == GE_BLENDMODE_ABSDIFF; ReplaceAlphaType stencilToAlpha = ReplaceAlphaWithStencil(); // All texfuncs except replace are the same for RGB as for RGBA with full alpha. @@ -338,12 +398,17 @@ void ComputeFragmentShaderID(FragmentShaderID *id) { else gpuStats.numNonAlphaTestedDraws++; - if (computeAbsdiff) { - id0 |= (computeAbsdiff & 1) << 25; + if (ShouldUseShaderBlending()) { + // 12 bits total. + id1 |= 1; + id1 |= (gstate.getBlendEq() << 1); + id1 |= (gstate.getBlendFuncA() << 4); + id1 |= (gstate.getBlendFuncB() << 8); } } id->d[0] = id0; + id->d[1] = id1; } // Missing: Z depth range @@ -423,7 +488,6 @@ void GenerateFragmentShader(char *buffer) { bool enableAlphaDoubling = !alphaToColorDoubling && CanDoubleSrcBlendMode(); bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX; bool doTextureAlpha = gstate.isTextureAlphaUsed(); - bool computeAbsdiff = gstate.getBlendEq() == GE_BLENDMODE_ABSDIFF; ReplaceAlphaType stencilToAlpha = ReplaceAlphaWithStencil(); if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE) @@ -431,6 +495,17 @@ void GenerateFragmentShader(char *buffer) { if (doTexture) WRITE(p, "uniform sampler2D tex;\n"); + if (ShouldUseShaderBlending() && !gstate.isModeClear()) { + if (!gl_extensions.NV_shader_framebuffer_fetch) { + WRITE(p, "uniform sampler2D fbotex;\n"); + } + if (gstate.getBlendFuncA() == GE_SRCBLEND_FIXA) { + WRITE(p, "uniform vec3 u_blendFixA;\n"); + } + if (gstate.getBlendFuncB() == GE_DSTBLEND_FIXB) { + WRITE(p, "uniform vec3 u_blendFixB;\n"); + } + } if (enableAlphaTest || enableColorTest) { WRITE(p, "uniform vec4 u_alphacolorref;\n"); @@ -607,12 +682,76 @@ void GenerateFragmentShader(char *buffer) { WRITE(p, " v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n"); // WRITE(p, " v.x = v_depth;\n"); } - } - // Handle ABSDIFF blending mode using NV_shader_framebuffer_fetch - if (computeAbsdiff && gl_extensions.NV_shader_framebuffer_fetch) { - WRITE(p, " lowp vec4 destColor = gl_LastFragData[0];\n"); - WRITE(p, " gl_FragColor = abs(destColor - v);\n"); + if (ShouldUseShaderBlending()) { + // If we have NV_shader_framebuffer_fetch / EXT_shader_framebuffer_fetch, we skip the blit. + // We can just read the prev value more directly. + // TODO: EXT_shader_framebuffer_fetch on iOS 6, possibly others. + if (gl_extensions.NV_shader_framebuffer_fetch) { + WRITE(p, " lowp vec4 destColor = gl_LastFragData[0];\n"); + } else { + WRITE(p, " lowp vec4 destColor = texelFetch(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n"); + } + + GEBlendSrcFactor funcA = gstate.getBlendFuncA(); + GEBlendDstFactor funcB = gstate.getBlendFuncB(); + GEBlendMode eq = gstate.getBlendEq(); + + const char *srcFactor = "vec3(1.0)"; + const char *dstFactor = "vec3(0.0)"; + + switch (funcA) + { + case GE_SRCBLEND_DSTCOLOR: srcFactor = "destColor.rgb"; break; + case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "(vec3(1.0) - destColor.rgb)"; break; + case GE_SRCBLEND_SRCALPHA: srcFactor = "vec3(v.a)"; break; + case GE_SRCBLEND_INVSRCALPHA: srcFactor = "vec3(1.0 - v.a)"; break; + case GE_SRCBLEND_DSTALPHA: srcFactor = "vec3(destColor.a)"; break; + case GE_SRCBLEND_INVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a)"; break; + case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "vec3(v.a * 2.0)"; break; + // TODO: Double inverse, or inverse double? Following softgpu for now... + case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a * 2.0)"; break; + case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "vec3(destColor.a * 2.0)"; break; + case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a * 2.0)"; break; + case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break; + } + switch (funcB) + { + case GE_DSTBLEND_SRCCOLOR: dstFactor = "v.rgb"; break; + case GE_DSTBLEND_INVSRCCOLOR: dstFactor = "(vec3(1.0) - v.rgb)"; break; + case GE_DSTBLEND_SRCALPHA: dstFactor = "vec3(v.a)"; break; + case GE_DSTBLEND_INVSRCALPHA: dstFactor = "vec3(1.0 - v.a)"; break; + case GE_DSTBLEND_DSTALPHA: dstFactor = "vec3(destColor.a)"; break; + case GE_DSTBLEND_INVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a)"; break; + case GE_DSTBLEND_DOUBLESRCALPHA: dstFactor = "vec3(v.a * 2.0)"; break; + case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "vec3(1.0 - v.a * 2.0)"; break; + case GE_DSTBLEND_DOUBLEDSTALPHA: dstFactor = "vec3(destColor.a * 2.0)"; break; + case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a * 2.0)"; break; + case GE_DSTBLEND_FIXB: dstFactor = "u_blendFixB"; break; + } + + switch (eq) + { + case GE_BLENDMODE_MUL_AND_ADD: + WRITE(p, " v.rgb = v.rgb * %s + destColor.rgb * %s;\n", srcFactor, dstFactor); + break; + case GE_BLENDMODE_MUL_AND_SUBTRACT: + WRITE(p, " v.rgb = v.rgb * %s - destColor.rgb * %s;\n", srcFactor, dstFactor); + break; + case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE: + WRITE(p, " v.rgb = destColor.rgb * %s - v.rgb * %s;\n", srcFactor, dstFactor); + break; + case GE_BLENDMODE_MIN: + WRITE(p, " v.rgb = min(v.rgb, destColor.rgb);\n"); + break; + case GE_BLENDMODE_MAX: + WRITE(p, " v.rgb = max(v.rgb, destColor.rgb);\n"); + break; + case GE_BLENDMODE_ABSDIFF: + WRITE(p, " v.rgb = abs(v.rgb - destColor.rgb);\n"); + break; + } + } } switch (stencilToAlpha) { diff --git a/GPU/GLES/FragmentShaderGenerator.h b/GPU/GLES/FragmentShaderGenerator.h index 73c605e310..2357739d37 100644 --- a/GPU/GLES/FragmentShaderGenerator.h +++ b/GPU/GLES/FragmentShaderGenerator.h @@ -20,9 +20,9 @@ #include "Globals.h" struct FragmentShaderID { - FragmentShaderID() {d[0] = 0xFFFFFFFF;} - void clear() {d[0] = 0xFFFFFFFF;} - u32 d[1]; + FragmentShaderID() {clear();} + void clear() {d[0] = 0xFFFFFFFF; d[1] = 0xFFFFFFFF;} + u32 d[2]; bool operator < (const FragmentShaderID &other) const { for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++) { if (d[i] < other.d[i]) @@ -62,4 +62,4 @@ bool IsAlphaTestTriviallyTrue(); bool IsColorTestTriviallyTrue(); StencilValueType ReplaceAlphaWithStencilType(); ReplaceAlphaType ReplaceAlphaWithStencil(); - +bool ShouldUseShaderBlending(); diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index b2df8d2f50..367fdfd703 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -784,7 +784,7 @@ void FramebufferManager::DoSetRenderFrameBuffer() { // None found? Create one. if (!vfb) { - gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; + textureCache_->ForgetLastTexture(); vfb = new VirtualFramebuffer(); vfb->fbo = 0; vfb->fb_address = fb_address; @@ -891,7 +891,7 @@ void FramebufferManager::DoSetRenderFrameBuffer() { // Use it as a render target. DEBUG_LOG(SCEGE, "Switching render target to FBO for %08x: %i x %i x %i ", vfb->fb_address, vfb->width, vfb->height, vfb->format); vfb->usageFlags |= FB_USAGE_RENDERTARGET; - gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; + textureCache_->ForgetLastTexture(); vfb->last_frame_render = gpuStats.numFlips; frameLastFramebufUsed = gpuStats.numFlips; vfb->dirtyAfterDisplay = true; @@ -992,6 +992,7 @@ void FramebufferManager::BindFramebufferDepth(VirtualFramebuffer *sourceframebuf // Let's only do this if not clearing. if (!gstate.isModeClear() || !gstate.isClearModeDepthMask()) { fbo_bind_for_read(sourceframebuffer->fbo); + glDisable(GL_SCISSOR_TEST); #if defined(USING_GLES2) && (defined(ANDROID) || defined(BLACKBERRY)) // We only support this extension on Android, it's not even available on PC. if (useNV) { @@ -1000,6 +1001,8 @@ void FramebufferManager::BindFramebufferDepth(VirtualFramebuffer *sourceframebuf #endif // defined(USING_GLES2) && (defined(ANDROID) || defined(BLACKBERRY)) glBlitFramebuffer(0, 0, sourceframebuffer->renderWidth, sourceframebuffer->renderHeight, 0, 0, targetframebuffer->renderWidth, targetframebuffer->renderHeight, GL_DEPTH_BUFFER_BIT, GL_NEAREST); // If we set targetframebuffer->depthUpdated here, our optimization above would be pointless. + + glstate.scissorTest.restore(); } #endif } @@ -1007,6 +1010,10 @@ void FramebufferManager::BindFramebufferDepth(VirtualFramebuffer *sourceframebuf } void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer) { + if (framebuffer == NULL) { + framebuffer = currentRenderVfb_; + } + if (!framebuffer->fbo || !useBufferedRendering_) { glBindTexture(GL_TEXTURE_2D, 0); gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE; @@ -1041,6 +1048,7 @@ void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer) { fbo_bind_as_render_target(renderCopy); glViewport(0, 0, framebuffer->renderWidth, framebuffer->renderHeight); + glDisable(GL_SCISSOR_TEST); fbo_bind_for_read(framebuffer->fbo); #if defined(USING_GLES2) && (defined(ANDROID) || defined(BLACKBERRY)) // We only support this extension on Android, it's not even available on PC. @@ -1052,6 +1060,8 @@ void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer) { fbo_bind_as_render_target(currentRenderVfb_->fbo); fbo_bind_color_as_texture(renderCopy, 0); + glstate.viewport.restore(); + glstate.scissorTest.restore(); #endif } else { fbo_bind_color_as_texture(framebuffer->fbo, 0); @@ -1238,7 +1248,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s glEnable(GL_DITHER); } else { nvfb->usageFlags |= FB_USAGE_RENDERTARGET; - gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; + textureCache_->ForgetLastTexture(); nvfb->last_frame_render = gpuStats.numFlips; nvfb->dirtyAfterDisplay = true; @@ -1317,7 +1327,7 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int } fbo_bind_as_render_target(dst->fbo); - + glDisable(GL_SCISSOR_TEST); #ifndef USING_GLES2 if (gl_extensions.FBO_ARB) { @@ -1373,7 +1383,7 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int // Make sure our 2D drawing program is ready. Compiles only if not already compiled. CompileDraw2DProgram(); - glstate.viewport.set(0, 0, dst->width, dst->height); + glViewport(0, 0, dst->width, dst->height); DisableState(); // The first four coordinates are relative to the 6th and 7th arguments of DrawActiveTexture. @@ -1382,8 +1392,11 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int float srcH = src->height; DrawActiveTexture(0, dstX, dstY, w, h, dst->width, dst->height, false, srcX / srcW, srcY / srcH, (srcX + w) / srcW, (srcY + h) / srcH, draw2dprogram_); glBindTexture(GL_TEXTURE_2D, 0); + textureCache_->ForgetLastTexture(); } + glstate.scissorTest.restore(); + glstate.viewport.restore(); fbo_unbind(); } @@ -1911,7 +1924,7 @@ bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool fbo_unbind(); } glstate.viewport.restore(); - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + textureCache_->ForgetLastTexture(); // This is a memcpy, let's still copy just in case. return false; } @@ -2045,7 +2058,7 @@ void FramebufferManager::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, fbo_unbind(); } glstate.viewport.restore(); - gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; + textureCache_->ForgetLastTexture(); } } } diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index af620f5bd8..160257bba1 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -162,8 +162,8 @@ static const CommandTableEntry commandTable[] = { {GE_CMD_STENCILTESTENABLE, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_ALPHABLENDENABLE, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_BLENDMODE, FLAG_FLUSHBEFOREONCHANGE}, - {GE_CMD_BLENDFIXEDA, FLAG_FLUSHBEFOREONCHANGE}, - {GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE}, + {GE_CMD_BLENDFIXEDA, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_BlendFixA}, + {GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_BlendFixB}, {GE_CMD_MASKRGB, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_ZTEST, FLAG_FLUSHBEFOREONCHANGE}, @@ -1084,6 +1084,14 @@ void GLES_GPU::Execute_ColorRef(u32 op, u32 diff) { shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF); } +void GLES_GPU::Execute_BlendFixA(u32 op, u32 diff) { + shaderManager_->DirtyUniform(DIRTY_BLENDFIX); +} + +void GLES_GPU::Execute_BlendFixB(u32 op, u32 diff) { + shaderManager_->DirtyUniform(DIRTY_BLENDFIX); +} + void GLES_GPU::Execute_WorldMtxNum(u32 op, u32 diff) { // This is almost always followed by GE_CMD_WORLDMATRIXDATA. const u32_le *src = (const u32_le *)Memory::GetPointer(currentList->pc + 4); @@ -1611,8 +1619,14 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) { ////////////////////////////////////////////////////////////////// case GE_CMD_ALPHABLENDENABLE: case GE_CMD_BLENDMODE: + break; + case GE_CMD_BLENDFIXEDA: + Execute_BlendFixA(op, diff); + break; + case GE_CMD_BLENDFIXEDB: + Execute_BlendFixB(op, diff); break; case GE_CMD_ALPHATESTENABLE: diff --git a/GPU/GLES/GLES_GPU.h b/GPU/GLES/GLES_GPU.h index c2625a1c78..5a57540186 100644 --- a/GPU/GLES/GLES_GPU.h +++ b/GPU/GLES/GLES_GPU.h @@ -124,6 +124,8 @@ public: void Execute_AlphaTest(u32 op, u32 diff); void Execute_StencilTest(u32 op, u32 diff); void Execute_ColorRef(u32 op, u32 diff); + void Execute_BlendFixA(u32 op, u32 diff); + void Execute_BlendFixB(u32 op, u32 diff); void Execute_WorldMtxNum(u32 op, u32 diff); void Execute_WorldMtxData(u32 op, u32 diff); void Execute_ViewMtxNum(u32 op, u32 diff); diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 08c4d41dee..8872960182 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -155,6 +155,10 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTrans u_colormask = glGetUniformLocation(program, "u_colormask"); u_stencilReplaceValue = glGetUniformLocation(program, "u_stencilReplaceValue"); + u_fbotex = glGetUniformLocation(program, "fbotex"); + u_blendFixA = glGetUniformLocation(program, "u_blendFixA"); + u_blendFixB = glGetUniformLocation(program, "u_blendFixB"); + // Transform u_view = glGetUniformLocation(program, "u_view"); u_world = glGetUniformLocation(program, "u_world"); @@ -225,6 +229,7 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTrans if (u_view != -1) availableUniforms |= DIRTY_VIEWMATRIX; if (u_texmtx != -1) availableUniforms |= DIRTY_TEXMATRIX; if (u_stencilReplaceValue != -1) availableUniforms |= DIRTY_STENCILREPLACEVALUE; + if (u_blendFixA != -1 || u_blendFixB != -1) availableUniforms |= DIRTY_BLENDFIX; // Looping up to numBones lets us avoid checking u_bone[i] for (int i = 0; i < numBones; i++) { @@ -247,6 +252,7 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTrans // Default uniform values glUniform1i(u_tex, 0); + glUniform1i(u_fbotex, 1); // The rest, use the "dirty" mechanism. dirtyUniforms = DIRTY_ALL; use(vertType, previous); @@ -520,6 +526,11 @@ void LinkedShader::UpdateUniforms(u32 vertType) { } #endif + if (dirty & DIRTY_BLENDFIX) { + SetColorUniform3(u_blendFixA, gstate.getFixA()); + SetColorUniform3(u_blendFixB, gstate.getFixB()); + } + // Lighting if (dirty & DIRTY_AMBIENT) { SetColorUniform3Alpha(u_ambient, gstate.ambientcolor, gstate.getAmbientA()); diff --git a/GPU/GLES/ShaderManager.h b/GPU/GLES/ShaderManager.h index b589b66f90..fe7056fa46 100644 --- a/GPU/GLES/ShaderManager.h +++ b/GPU/GLES/ShaderManager.h @@ -73,6 +73,11 @@ public: #endif int numBones; + // Shader blending. + int u_fbotex; + int u_blendFixA; + int u_blendFixB; + // Fragment processing inputs int u_alphacolorref; int u_colormask; @@ -123,7 +128,7 @@ enum DIRTY_AMBIENT = (1 << 15), DIRTY_MATAMBIENTALPHA = (1 << 16), - // 1 << 17 is free! + DIRTY_BLENDFIX = (1 << 17), // (either one.) DIRTY_UVSCALEOFFSET = (1 << 18), // this will be dirtied ALL THE TIME... maybe we'll need to do "last value with this shader compares" diff --git a/GPU/GLES/StateMapping.cpp b/GPU/GLES/StateMapping.cpp index 70bdf98cf0..c89173fadf 100644 --- a/GPU/GLES/StateMapping.cpp +++ b/GPU/GLES/StateMapping.cpp @@ -171,11 +171,36 @@ void TransformDrawEngine::ApplyDrawState(int prim) { gstate_c.textureChanged = TEXCHANGE_UNCHANGED; } - // TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a - // single fullscreen pass that converts alpha to stencil (or 2 passes, to set both the 0 and 1 values) very easily. - - // Set blend + // Set blend - unless we need to do it in the shader. bool wantBlend = !gstate.isModeClear() && gstate.isAlphaBlendEnabled(); + if (wantBlend && ShouldUseShaderBlending()) { + if (!gl_extensions.NV_shader_framebuffer_fetch) { + static const int MAX_REASONABLE_BLITS_PER_FRAME = 24; + + static int lastFrameBlit = -1; + static int blitsThisFrame = 0; + if (lastFrameBlit != gpuStats.numFlips) { + if (blitsThisFrame > MAX_REASONABLE_BLITS_PER_FRAME) { + WARN_LOG_REPORT_ONCE(blendingBlit, G3D, "Lots of blits needed for obscure blending: %d per frame, blend %d/%d/%d", blitsThisFrame, gstate.getBlendFuncA(), gstate.getBlendFuncB(), gstate.getBlendEq()); + } + blitsThisFrame = 0; + lastFrameBlit = gpuStats.numFlips; + } + ++blitsThisFrame; + + glActiveTexture(GL_TEXTURE1); + framebufferManager_->BindFramebufferColor(NULL); + glActiveTexture(GL_TEXTURE0); + fboTexBound_ = true; + } + // None of the below logic is interesting, we're gonna do it entirely in the shader. + wantBlend = false; + } else if (fboTexBound_) { + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0); + } + glstate.blend.set(wantBlend); if (wantBlend) { // This can't be done exactly as there are several PSP blend modes that are impossible to do on OpenGL ES 2.0, and some even on regular OpenGL for desktop. @@ -323,12 +348,7 @@ void TransformDrawEngine::ApplyDrawState(int prim) { } if (((blendFuncEq >= GE_BLENDMODE_MIN) && gl_extensions.EXT_blend_minmax) || gl_extensions.GLES3) { - if (blendFuncEq == GE_BLENDMODE_ABSDIFF && gl_extensions.NV_shader_framebuffer_fetch) { - // Handle GE_BLENDMODE_ABSDIFF in fragment shader and turn off regular alpha blending here. - glstate.blend.set(false); - } else { - glstate.blendEquation.set(eqLookup[blendFuncEq]); - } + glstate.blendEquation.set(eqLookup[blendFuncEq]); } else { glstate.blendEquation.set(eqLookupNoMinMax[blendFuncEq]); } diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index 53552ca137..10ccc857dc 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -65,6 +65,11 @@ public: return cache.size(); } + void ForgetLastTexture() { + lastBoundTexture = -1; + gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; + } + // Only used by Qt UI? bool DecodeTexture(u8 *output, GPUgstate state); diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index b9ac5c2800..0c96223e2d 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -126,7 +126,8 @@ TransformDrawEngine::TransformDrawEngine() numDrawCalls(0), vertexCountInDrawCalls(0), decodeCounter_(0), - uvScale(0) { + uvScale(0), + fboTexBound_(false) { decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL; // Allocate nicely aligned memory. Maybe graphics drivers will // appreciate it. diff --git a/GPU/GLES/TransformPipeline.h b/GPU/GLES/TransformPipeline.h index 8e23c78c52..4795224e83 100644 --- a/GPU/GLES/TransformPipeline.h +++ b/GPU/GLES/TransformPipeline.h @@ -243,4 +243,6 @@ private: u32 dcid_; UVScale *uvScale; + + bool fboTexBound_; };