diff --git a/GPU/GLES/FragmentShaderGeneratorGLES.cpp b/GPU/GLES/FragmentShaderGeneratorGLES.cpp index 913e6b5829..d8b117601f 100644 --- a/GPU/GLES/FragmentShaderGeneratorGLES.cpp +++ b/GPU/GLES/FragmentShaderGeneratorGLES.cpp @@ -157,6 +157,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ); bool doTextureAlpha = id.Bit(FS_BIT_TEXALPHA); bool doFlatShading = id.Bit(FS_BIT_FLATSHADE); + bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL); GEComparison alphaTestFunc = (GEComparison)id.Bits(FS_BIT_ALPHA_TEST_FUNC, 3); GEComparison colorTestFunc = (GEComparison)id.Bits(FS_BIT_COLOR_TEST_FUNC, 2); @@ -217,6 +218,12 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform } } + if (shaderDepal) { + WRITE(p, "uniform sampler2D pal;\n"); + WRITE(p, "uniform int u_depal;\n"); + *uniformMask |= DIRTY_DEPAL; + } + StencilValueType replaceAlphaWithStencilType = (StencilValueType)id.Bits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4); if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) { *uniformMask |= DIRTY_STENCILREPLACEVALUE; @@ -336,10 +343,95 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform if (doTextureProjection) { WRITE(p, " vec4 t = %sProj(tex, %s);\n", texture, texcoord); + if (shaderDepal) { + WRITE(p, " vec4 t1 = %sProjOffset(tex, %s, ivec2(1, 0));\n", texture, texcoord); + WRITE(p, " vec4 t2 = %sProjOffset(tex, %s, ivec2(0, 1));\n", texture, texcoord); + WRITE(p, " vec4 t3 = %sProjOffset(tex, %s, ivec2(1, 1));\n", texture, texcoord); + } } else { WRITE(p, " vec4 t = %s(tex, %s.xy);\n", texture, texcoord); + if (shaderDepal) { + WRITE(p, " vec4 t1 = %sOffset(tex, %s.xy, ivec2(1, 0));\n", texture, texcoord); + WRITE(p, " vec4 t2 = %sOffset(tex, %s.xy, ivec2(0, 1));\n", texture, texcoord); + WRITE(p, " vec4 t3 = %sOffset(tex, %s.xy, ivec2(1, 1));\n", texture, texcoord); + } } - WRITE(p, " vec4 p = v_color0;\n"); + + if (shaderDepal) { + WRITE(p, " int depalMask = (u_depal & 0xFF);\n"); + WRITE(p, " int depalShift = ((u_depal >> 8) & 0xFF);\n"); + WRITE(p, " int depalOffset = (((u_depal >> 16) & 0xFF) << 4);\n"); + WRITE(p, " int depalFmt = ((u_depal >> 24) & 0x3);\n"); + WRITE(p, " bool bilinear = (u_depal >> 31) != 0;\n"); + WRITE(p, " vec2 fraction = fract(%s.xy);\n", texcoord); + WRITE(p, " ivec4 col; int index0; int index1; int index2; int index3;\n"); + WRITE(p, " switch (depalFmt) {\n"); // We might want to include fmt in the shader ID if this is a performance issue. + WRITE(p, " case 0:\n"); // 565 + WRITE(p, " col = ivec4(t.rgb * vec3(31.99, 63.99, 31.99), 0);\n"); + WRITE(p, " index0 = (col.b << 11) | (col.g << 5) | (col.r);\n"); + WRITE(p, " if (bilinear) {\n"); + WRITE(p, " col = ivec4(t1.rgb * vec3(31.99, 63.99, 31.99), 0);\n"); + WRITE(p, " index1 = (col.b << 11) | (col.g << 5) | (col.r);\n"); + WRITE(p, " col = ivec4(t2.rgb * vec3(31.99, 63.99, 31.99), 0);\n"); + WRITE(p, " index2 = (col.b << 11) | (col.g << 5) | (col.r);\n"); + WRITE(p, " col = ivec4(t3.rgb * vec3(31.99, 63.99, 31.99), 0);\n"); + WRITE(p, " index3 = (col.b << 11) | (col.g << 5) | (col.r);\n"); + WRITE(p, " }\n"); + WRITE(p, " break;\n"); + WRITE(p, " case 1:\n"); // 5551 + WRITE(p, " col = ivec4(t.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n"); + WRITE(p, " index0 = (col.a << 15) | (col.b << 10) | (col.g << 5) | (col.r);\n"); + WRITE(p, " if (bilinear) {\n"); + WRITE(p, " col = ivec4(t1.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n"); + WRITE(p, " index1 = (col.a << 15) | (col.b << 10) | (col.g << 5) | (col.r);\n"); + WRITE(p, " col = ivec4(t2.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n"); + WRITE(p, " index2 = (col.a << 15) | (col.b << 10) | (col.g << 5) | (col.r);\n"); + WRITE(p, " col = ivec4(t3.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n"); + WRITE(p, " index3 = (col.a << 15) | (col.b << 10) | (col.g << 5) | (col.r);\n"); + WRITE(p, " }\n"); + WRITE(p, " break;\n"); + WRITE(p, " case 2:\n"); // 4444 + WRITE(p, " col = ivec4(t.rgba * vec4(15.99, 15.99, 15.99, 15.99));\n"); + WRITE(p, " index0 = (col.a << 12) | (col.b << 8) | (col.g << 4) | (col.r);\n"); + WRITE(p, " if (bilinear) {\n"); + WRITE(p, " col = ivec4(t1.rgba * vec4(15.99, 15.99, 15.99, 15.99));\n"); + WRITE(p, " index1 = (col.a << 12) | (col.b << 8) | (col.g << 4) | (col.r);\n"); + WRITE(p, " col = ivec4(t2.rgba * vec4(15.99, 15.99, 15.99, 15.99));\n"); + WRITE(p, " index2 = (col.a << 12) | (col.b << 8) | (col.g << 4) | (col.r);\n"); + WRITE(p, " col = ivec4(t3.rgba * vec4(15.99, 15.99, 15.99, 15.99));\n"); + WRITE(p, " index3 = (col.a << 12) | (col.b << 8) | (col.g << 4) | (col.r);\n"); + WRITE(p, " }\n"); + WRITE(p, " break;\n"); + WRITE(p, " case 3:\n"); // 8888 + WRITE(p, " col = ivec4(t.rgba * vec4(255.99, 255.99, 255.99, 255.99));\n"); + WRITE(p, " index0 = (col.a << 24) | (col.b << 16) | (col.g << 8) | (col.r);\n"); + WRITE(p, " if (bilinear) {\n"); + WRITE(p, " col = ivec4(t1.rgba * vec4(255.99, 255.99, 255.99, 255.99));\n"); + WRITE(p, " index1 = (col.a << 24) | (col.b << 16) | (col.g << 8) | (col.r);\n"); + WRITE(p, " col = ivec4(t2.rgba * vec4(255.99, 255.99, 255.99, 255.99));\n"); + WRITE(p, " index2 = (col.a << 24) | (col.b << 16) | (col.g << 8) | (col.r);\n"); + WRITE(p, " col = ivec4(t3.rgba * vec4(255.99, 255.99, 255.99, 255.99));\n"); + WRITE(p, " index3 = (col.a << 24) | (col.b << 16) | (col.g << 8) | (col.r);\n"); + WRITE(p, " }\n"); + WRITE(p, " break;\n"); + WRITE(p, " };\n"); + WRITE(p, " index0 = ((index0 >> depalShift) & depalMask) | depalOffset;\n"); + WRITE(p, " t = texelFetch(pal, ivec2(index0, 0), 0);\n"); + WRITE(p, " if (bilinear) {\n"); + WRITE(p, " index1 = ((index1 >> depalShift) & depalMask) | depalOffset;\n"); + WRITE(p, " index2 = ((index2 >> depalShift) & depalMask) | depalOffset;\n"); + WRITE(p, " index3 = ((index3 >> depalShift) & depalMask) | depalOffset;\n"); + WRITE(p, " t1 = texelFetch(pal, ivec2(index1, 0), 0);\n"); + WRITE(p, " t2 = texelFetch(pal, ivec2(index2, 0), 0);\n"); + WRITE(p, " t3 = texelFetch(pal, ivec2(index3, 0), 0);\n"); + WRITE(p, " t = mix(t, t1, fraction.x);\n"); + WRITE(p, " t2 = mix(t2, t3, fraction.x);\n"); + WRITE(p, " t = mix(t, t2, fraction.y);\n"); + WRITE(p, " }\n"); + } + + if (texFunc != GE_TEXFUNC_REPLACE || !doTextureAlpha) + WRITE(p, " vec4 p = v_color0;\n"); if (doTextureAlpha) { // texfmt == RGBA switch (texFunc) { diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 0fee3e6500..63bf981bcc 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -103,6 +103,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, queries.push_back({ &u_blendFixA, "u_blendFixA" }); queries.push_back({ &u_blendFixB, "u_blendFixB" }); queries.push_back({ &u_fbotexSize, "u_fbotexSize" }); + queries.push_back({ &u_pal, "pal" }); // Transform queries.push_back({ &u_view, "u_view" }); @@ -161,6 +162,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, queries.push_back({ &u_spline_count_v, "u_spline_count_v" }); queries.push_back({ &u_spline_type_u, "u_spline_type_u" }); queries.push_back({ &u_spline_type_v, "u_spline_type_v" }); + queries.push_back({ &u_depal, "u_depal" }); attrMask = vs->GetAttrMask(); availableUniforms = vs->GetUniformMask() | fs->GetUniformMask(); @@ -169,6 +171,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, initialize.push_back({ &u_tex, 0, 0 }); initialize.push_back({ &u_fbotex, 0, 1 }); initialize.push_back({ &u_testtex, 0, 2 }); + initialize.push_back({ &u_pal, 0, 3 }); // CLUT initialize.push_back({ &u_tess_pos_tex, 0, 4 }); // Texture unit 4 initialize.push_back({ &u_tess_tex_tex, 0, 5 }); // Texture unit 5 initialize.push_back({ &u_tess_col_tex, 0, 6 }); // Texture unit 6 @@ -283,6 +286,17 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) { if (!dirty) return; + if (dirty & DIRTY_DEPAL) { + int indexMask = gstate.getClutIndexMask(); + int indexShift = gstate.getClutIndexShift(); + int indexOffset = gstate.getClutIndexStartPos() >> 4; + int format = gstate_c.depalFramebufferFormat; + uint32_t val = BytesToUint32(indexMask, indexShift, indexOffset, format); + // Poke in a bilinear filter flag in the top bit. + val |= gstate.isMagnifyFilteringEnabled() << 31; + render_->SetUniformI1(&u_depal, val); + } + // Update any dirty uniforms before we draw if (dirty & DIRTY_PROJMATRIX) { Matrix4x4 flippedMatrix; @@ -810,7 +824,7 @@ std::string ShaderManagerGLES::DebugGetShaderString(std::string id, DebugShaderT // as sometimes these features might have an effect on the ID bits. #define CACHE_HEADER_MAGIC 0x83277592 -#define CACHE_VERSION 11 +#define CACHE_VERSION 12 struct CacheHeader { uint32_t magic; uint32_t version; diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index e04a9a8c8b..c9ca56f15a 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -85,6 +85,10 @@ public: int u_blendFixB; int u_fbotexSize; + // Shader depal + int u_pal; // the texture + int u_depal; // the params + // Fragment processing inputs int u_alphacolorref; int u_alphacolormask; diff --git a/GPU/GLES/TextureCacheGLES.cpp b/GPU/GLES/TextureCacheGLES.cpp index 38d5aa280a..031c1bdad5 100644 --- a/GPU/GLES/TextureCacheGLES.cpp +++ b/GPU/GLES/TextureCacheGLES.cpp @@ -161,7 +161,7 @@ void TextureCacheGLES::UpdateSamplingParams(TexCacheEntry &entry, bool force) { render_->SetTextureSampler(0, sClamp ? GL_CLAMP_TO_EDGE : GL_REPEAT, tClamp ? GL_CLAMP_TO_EDGE : GL_REPEAT, MagFiltGL[magFilt], MinFiltGL[minFilt], aniso); } -void TextureCacheGLES::SetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight) { +void TextureCacheGLES::SetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight, bool forcePoint) { int minFilt; int magFilt; bool sClamp; @@ -171,6 +171,10 @@ void TextureCacheGLES::SetFramebufferSamplingParams(u16 bufferWidth, u16 bufferH GetSamplingParams(minFilt, magFilt, sClamp, tClamp, lodBias, 0, 0, mode); minFilt &= 1; // framebuffers can't mipmap. + if (forcePoint) { + minFilt &= ~1; + magFilt &= ~1; + } // Often the framebuffer will not match the texture size. We'll wrap/clamp in the shader in that case. // This happens whether we have OES_texture_npot or not. @@ -324,6 +328,7 @@ void TextureCacheGLES::BindTexture(TexCacheEntry *entry) { lastBoundTexture = entry->textureName; } UpdateSamplingParams(*entry, false); + gstate_c.useShaderDepal = false; } void TextureCacheGLES::Unbind() { @@ -434,7 +439,33 @@ protected: void TextureCacheGLES::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) { DepalShader *depal = nullptr; uint32_t clutMode = gstate.clutformat & 0xFFFFFF; + +#if 0 + bool useShaderDepal = gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300); +#else + bool useShaderDepal = false; +#endif + if ((entry->status & TexCacheEntry::STATUS_DEPALETTIZE) && !g_Config.bDisableSlowFramebufEffects) { + if (useShaderDepal) { + const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); + GLRTexture *clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBuf_); + render_->BindTexture(TEX_SLOT_CLUT, clutTexture); + framebufferManagerGL_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); + SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight, true); + InvalidateLastTexture(); + + // Since we started/ended render passes, might need these. + gstate_c.Dirty(DIRTY_DEPAL); + gstate_c.useShaderDepal = true; + gstate_c.depalFramebufferFormat = framebuffer->drawnFormat; + const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16); + const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor; + TexCacheEntry::TexStatus alphaStatus = CheckAlpha((const uint8_t *)clutBuf_, getClutDestFormat(clutFormat), clutTotalColors, clutTotalColors, 1); + gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL); + return; + } + depal = depalShaderCache_->GetDepalettizeShader(clutMode, framebuffer->drawnFormat); } if (depal) { @@ -472,7 +503,7 @@ void TextureCacheGLES::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFram } framebufferManagerGL_->RebindFramebuffer(); - SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight); + SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight, false); InvalidateLastTexture(); diff --git a/GPU/GLES/TextureCacheGLES.h b/GPU/GLES/TextureCacheGLES.h index a88eb8d7ec..be60d7eb77 100644 --- a/GPU/GLES/TextureCacheGLES.h +++ b/GPU/GLES/TextureCacheGLES.h @@ -63,7 +63,7 @@ public: } } - void SetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight); + void SetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight, bool forcePoint); bool GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level) override; void DeviceLost(); diff --git a/ext/native/thin3d/GLQueueRunner.cpp b/ext/native/thin3d/GLQueueRunner.cpp index ce13132cce..653125d3fe 100644 --- a/ext/native/thin3d/GLQueueRunner.cpp +++ b/ext/native/thin3d/GLQueueRunner.cpp @@ -4,6 +4,7 @@ #include "GLRenderManager.h" #include "DataFormatGL.h" #include "base/logging.h" +#include "base/stringutil.h" #include "gfx/gl_common.h" #include "gfx/gl_debug_log.h" #include "gfx_es2/gpu_features.h" @@ -156,9 +157,10 @@ void GLQueueRunner::RunInitSteps(const std::vector &steps) { #ifdef _WIN32 OutputDebugStringUTF8(buf); - OutputDebugStringUTF8(vsCode); + if (vsCode) + OutputDebugStringUTF8(LineNumberString(vsCode).c_str()); if (fsCode) - OutputDebugStringUTF8(fsCode); + OutputDebugStringUTF8(LineNumberString(fsCode).c_str()); #endif delete[] buf; } else {