From cc841bbe4c7ba1b1e4caf1786a382fc7bafb4121 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 7 Jun 2014 12:21:52 -0700 Subject: [PATCH] Apply tex wrap/clamp in shader for render-to-tex. Fixes graphical artifacts in Wild Arms XF (which depends on how you get there, because it's based on the size of the framebuffer.) --- GPU/GLES/FragmentShaderGenerator.cpp | 42 ++++++++++++++++++++++++---- GPU/GLES/ShaderManager.cpp | 17 +++++++++++ GPU/GLES/ShaderManager.h | 2 ++ GPU/GLES/StateMapping.cpp | 8 ++++++ GPU/GLES/TextureCache.cpp | 16 ++++++++--- GPU/GPUState.cpp | 2 ++ GPU/GPUState.h | 1 + 7 files changed, 78 insertions(+), 10 deletions(-) diff --git a/GPU/GLES/FragmentShaderGenerator.cpp b/GPU/GLES/FragmentShaderGenerator.cpp index e255003f23..8442a97399 100644 --- a/GPU/GLES/FragmentShaderGenerator.cpp +++ b/GPU/GLES/FragmentShaderGenerator.cpp @@ -402,10 +402,17 @@ void ComputeFragmentShaderID(FragmentShaderID *id) { if (ShouldUseShaderBlending()) { // 12 bits total. - id1 |= 1; - id1 |= (gstate.getBlendEq() << 1); - id1 |= (gstate.getBlendFuncA() << 4); - id1 |= (gstate.getBlendFuncB() << 8); + id1 |= 1 << 0; + id1 |= gstate.getBlendEq() << 1; + id1 |= gstate.getBlendFuncA() << 4; + id1 |= gstate.getBlendFuncB() << 8; + } + + if (gstate_c.needShaderTexClamp) { + // 3 bits total. + id1 |= 1 << 12; + id1 |= gstate.isTexCoordClampedS() << 13; + id1 |= gstate.isTexCoordClampedT() << 14; } } @@ -508,6 +515,9 @@ void GenerateFragmentShader(char *buffer) { WRITE(p, "uniform vec3 u_blendFixB;\n"); } } + if (gstate_c.needShaderTexClamp) { + WRITE(p, "uniform vec2 u_texclamp;"); + } if (enableAlphaTest || enableColorTest) { WRITE(p, "uniform vec4 u_alphacolorref;\n"); @@ -568,10 +578,30 @@ void GenerateFragmentShader(char *buffer) { } if (gstate.isTextureMapEnabled()) { + const char *texcoord = "v_texcoord"; + // TODO: Not sure the right way to do this for projection. + if (gstate_c.needShaderTexClamp && !doTextureProjection) { + // We may be clamping inside a larger surface (tex = 64x64, buffer=480x272). + // We may also be wrapping in such a surface, or either one in a too-small surface. + // Obviously, clamping to a smaller surface won't work. But better to clamp to something. + const char *ucoord = "mod(v_texcoord.x, u_texclamp.x)"; + if (gstate.isTexCoordClampedS()) { + ucoord = "clamp(v_texcoord.x, 0.0, u_texclamp.x)"; + } + // The v coordinate is more tricky, since it's flipped. + const char *vcoord = "1.0 - mod(1.0 - v_texcoord.y, u_texclamp.y)"; + if (gstate.isTexCoordClampedT()) { + vcoord = "1.0 - clamp(1.0 - v_texcoord.y, 0.0, u_texclamp.y)"; + } + + WRITE(p, " vec2 fixedcoord = vec2(%s, %s);\n", ucoord, vcoord); + texcoord = "fixedcoord"; + } + if (doTextureProjection) { - WRITE(p, " vec4 t = %sProj(tex, v_texcoord);\n", texture); + WRITE(p, " vec4 t = %sProj(tex, %s);\n", texture, texcoord); } else { - WRITE(p, " vec4 t = %s(tex, v_texcoord);\n", texture); + WRITE(p, " vec4 t = %s(tex, %s);\n", texture, texcoord); } WRITE(p, " vec4 p = v_color0;\n"); diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 8872960182..c3fbe6cf3b 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -185,6 +185,7 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTrans u_matspecular = glGetUniformLocation(program, "u_matspecular"); u_matemissive = glGetUniformLocation(program, "u_matemissive"); u_uvscaleoffset = glGetUniformLocation(program, "u_uvscaleoffset"); + u_texclamp = glGetUniformLocation(program, "u_texclamp"); for (int i = 0; i < 4; i++) { char temp[64]; @@ -225,6 +226,7 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTrans if (u_fogcoef != -1) availableUniforms |= DIRTY_FOGCOEF; if (u_texenv != -1) availableUniforms |= DIRTY_TEXENV; if (u_uvscaleoffset != -1) availableUniforms |= DIRTY_UVSCALEOFFSET; + if (u_texclamp != -1) availableUniforms |= DIRTY_TEXCLAMP; if (u_world != -1) availableUniforms |= DIRTY_WORLDMATRIX; if (u_view != -1) availableUniforms |= DIRTY_VIEWMATRIX; if (u_texmtx != -1) availableUniforms |= DIRTY_TEXMATRIX; @@ -477,6 +479,21 @@ void LinkedShader::UpdateUniforms(u32 vertType) { glUniform4fv(u_uvscaleoffset, 1, uvscaleoff); } + if (dirty & DIRTY_TEXCLAMP) { + const float invW = 1.0f / (float)gstate_c.curTextureWidth; + const float invH = 1.0f / (float)gstate_c.curTextureHeight; + const int w = gstate.getTextureWidth(0); + const int h = gstate.getTextureHeight(0); + const float widthFactor = (float)w * invW; + const float heightFactor = (float)h * invH; + + const float texclamp[2] = { + widthFactor, + heightFactor, + }; + glUniform2fv(u_texclamp, 1, texclamp); + } + // Transform if (dirty & DIRTY_WORLDMATRIX) { SetMatrix4x3(u_world, gstate.worldMatrix); diff --git a/GPU/GLES/ShaderManager.h b/GPU/GLES/ShaderManager.h index fe7056fa46..0aefd24de6 100644 --- a/GPU/GLES/ShaderManager.h +++ b/GPU/GLES/ShaderManager.h @@ -86,6 +86,7 @@ public: // Texturing int u_uvscaleoffset; + int u_texclamp; // Lighting int u_ambient; @@ -131,6 +132,7 @@ enum DIRTY_BLENDFIX = (1 << 17), // (either one.) DIRTY_UVSCALEOFFSET = (1 << 18), // this will be dirtied ALL THE TIME... maybe we'll need to do "last value with this shader compares" + DIRTY_TEXCLAMP = (1 << 19), DIRTY_WORLDMATRIX = (1 << 21), DIRTY_VIEWMATRIX = (1 << 22), // Maybe we'll fold this into projmatrix eventually diff --git a/GPU/GLES/StateMapping.cpp b/GPU/GLES/StateMapping.cpp index c257feef90..8d9df3689a 100644 --- a/GPU/GLES/StateMapping.cpp +++ b/GPU/GLES/StateMapping.cpp @@ -169,6 +169,14 @@ void TransformDrawEngine::ApplyDrawState(int prim) { if (gstate_c.textureChanged != TEXCHANGE_UNCHANGED && !gstate.isModeClear() && gstate.isTextureMapEnabled()) { textureCache_->SetTexture(); gstate_c.textureChanged = TEXCHANGE_UNCHANGED; + if (gstate_c.needShaderTexClamp) { + // We will rarely need to set this, so let's do it every time on use rather than in runloop. + // Most of the time non-framebuffer textures will be used which can be clamped themselves. + shaderManager_->DirtyUniform(DIRTY_TEXCLAMP); + } + } else { + // Let's not leave this set. + gstate_c.needShaderTexClamp = false; } // Set blend - unless we need to do it in the shader. diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index b545002325..b82b565a5e 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -588,10 +588,16 @@ void TextureCache::UpdateSamplingParams(TexCacheEntry &entry, bool force) { } // Platforms without non-pow-2 extensions can't wrap non-pow-2 textures. - // Only framebuffer textures are non-pow-2 so this check works but excludes some cases - // where we could have enabled wrapping. TODO - if (!gl_extensions.OES_texture_npot && entry.framebuffer) - return; + // Only framebuffer textures can be non-pow-2. + if (!gl_extensions.OES_texture_npot && entry.framebuffer) { + // Check if it matches the size, in which case we can still enable wrapping. + int w = gstate.getTextureWidth(0); + int h = gstate.getTextureHeight(0); + if (w != entry.framebuffer->bufferWidth || h != entry.framebuffer->bufferHeight) { + // We'll do it in the shader. + return; + } + } if (force || entry.sClamp != sClamp) { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, sClamp ? GL_CLAMP_TO_EDGE : GL_REPEAT); @@ -974,6 +980,7 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffe gstate_c.curTextureWidth = framebuffer->bufferWidth; gstate_c.curTextureHeight = framebuffer->bufferHeight; gstate_c.flipTexture = true; + gstate_c.needShaderTexClamp = gstate_c.curTextureWidth != gstate.getTextureWidth(0) || gstate_c.curTextureHeight != gstate.getTextureHeight(0); UpdateSamplingParams(*entry, true); } else { if (framebuffer->fbo) @@ -1077,6 +1084,7 @@ void TextureCache::SetTexture(bool force) { TexCache::iterator iter = cache.find(cachekey); TexCacheEntry *entry = NULL; gstate_c.flipTexture = false; + gstate_c.needShaderTexClamp = false; gstate_c.skipDrawReason &= ~SKIPDRAW_BAD_FB_TEXTURE; bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE; bool replaceImages = false; diff --git a/GPU/GPUState.cpp b/GPU/GPUState.cpp index 3f55935767..982b4c17cb 100644 --- a/GPU/GPUState.cpp +++ b/GPU/GPUState.cpp @@ -302,6 +302,8 @@ void GPUStateCache::DoState(PointerWrap &p) { p.Do(flipTexture); } + // needShaderTexClamp doesn't need to be saved. + if (s >= 3) { p.Do(textureSimpleAlpha); } else { diff --git a/GPU/GPUState.h b/GPU/GPUState.h index b8e79488e7..c8683130af 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -458,6 +458,7 @@ struct GPUStateCache UVScale uv; bool flipTexture; + bool needShaderTexClamp; float morphWeights[8];