From aec22491fe2b28fb836f98b5785b6839bec8296c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 10 Oct 2022 17:16:52 +0200 Subject: [PATCH] Don't expand alphaColorRef to 128 bytes on backends where we don't need to. --- GPU/Common/FragmentShaderGenerator.cpp | 10 +++++++--- GPU/Common/ShaderUniforms.cpp | 4 ++-- GPU/Common/ShaderUniforms.h | 11 ++++------- GPU/GLES/ShaderManagerGLES.cpp | 16 ++++++++++------ GPU/GLES/ShaderManagerGLES.h | 2 +- 5 files changed, 24 insertions(+), 19 deletions(-) diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 2f8f248dbd..cce87a5487 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -353,7 +353,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, "uniform sampler2D testtex;\n"); } else { *uniformMask |= DIRTY_ALPHACOLORREF; - WRITE(p, "uniform vec4 u_alphacolorref;\n"); + if (compat.bitwiseOps) { + WRITE(p, "uniform uint u_alphacolorref;\n"); + } else { + WRITE(p, "uniform vec4 u_alphacolorref;\n"); + } if (compat.bitwiseOps && ((enableColorTest && !colorTestAgainstZero) || (enableAlphaTest && !alphaTestAgainstZero))) { *uniformMask |= DIRTY_ALPHACOLORMASK; WRITE(p, "uniform uint u_alphacolormask;\n"); @@ -882,7 +886,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " }; if (alphaTestFuncs[alphaTestFunc][0] != '#') { if (compat.bitwiseOps) { - WRITE(p, " if ((roundAndScaleTo255i(v.a) & int(u_alphacolormask >> 24)) %s int(u_alphacolorref.a)) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement); + WRITE(p, " if ((roundAndScaleTo255i(v.a) & int(u_alphacolormask >> 24)) %s int(u_alphacolorref >> 24)) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement); } else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) { // Work around bad PVR driver problem where equality check + discard just doesn't work. if (alphaTestFunc != GE_COMP_NOTEQUAL) { @@ -946,7 +950,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } else if (compat.bitwiseOps) { WRITE(p, " uint v_uint = roundAndScaleTo8x4(v.rgb);\n"); WRITE(p, " uint v_masked = v_uint & u_alphacolormask;\n"); - WRITE(p, " uint colorTestRef = packFloatsTo8x4(u_alphacolorref.rgb) & u_alphacolormask;\n"); + WRITE(p, " uint colorTestRef = (u_alphacolorref & u_alphacolormask) & 0xFFFFFFu;\n"); WRITE(p, " if (v_masked %s colorTestRef) %s\n", test, discardStatement); } else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) { WRITE(p, " if (roundTo255thv(v.rgb) %s u_alphacolorref.rgb) %s\n", test, discardStatement); diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index b276f7f094..3c56d833e5 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -77,13 +77,13 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView Uint8x3ToFloat3(ub->texEnvColor, gstate.texenvcolor); } if (dirtyUniforms & DIRTY_ALPHACOLORREF) { - Uint8x3ToInt4_Alpha(ub->alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask()); + ub->alphaColorRef = gstate.getColorTestRef() | ((gstate.getAlphaTestRef() & gstate.getAlphaTestMask()) << 24); } if (dirtyUniforms & DIRTY_ALPHACOLORMASK) { ub->colorTestMask = gstate.getColorTestMask() | (gstate.getAlphaTestMask() << 24); } if (dirtyUniforms & DIRTY_FOGCOLOR) { - Uint8x3ToFloat4(ub->fogColor, gstate.fogcolor); + Uint8x3ToFloat3(ub->fogColor, gstate.fogcolor); } if (dirtyUniforms & DIRTY_SHADERBLEND) { Uint8x3ToFloat3(ub->blendFixA, gstate.getFixA()); diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h index 04b6437dff..68834e2fd8 100644 --- a/GPU/Common/ShaderUniforms.h +++ b/GPU/Common/ShaderUniforms.h @@ -17,7 +17,7 @@ enum : uint64_t { DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT, }; -// Currently 480 bytes. Probably can't get to 256 (nVidia's UBO alignment, also common in other vendors). +// Currently 448 bytes. // Every line here is a 4-float. struct alignas(16) UB_VS_FS_Base { float proj[16]; @@ -34,9 +34,8 @@ struct alignas(16) UB_VS_FS_Base { uint32_t spline_counts; uint32_t depal_mask_shift_off_fmt; // 4 params packed into one. uint32_t colorWriteMask; float mipBias; // Fragment data - float fogColor[4]; // .w is unused + float fogColor[3]; uint32_t alphaColorRef; float texEnvColor[3]; uint32_t colorTestMask; - int alphaColorRef[4]; float blendFixA[3]; float stencil; float blendFixB[3]; float rotation; float texClamp[4]; @@ -58,10 +57,8 @@ R"( mat4 u_proj; uint u_depal_mask_shift_off_fmt; uint u_colorWriteMask; float u_mipBias; - vec3 u_fogcolor; - vec3 u_texenv; - uint u_alphacolormask; - ivec4 u_alphacolorref; + vec3 u_fogcolor; uint u_alphacolorref; + vec3 u_texenv; uint u_alphacolormask; vec3 u_blendFixA; float u_stencilReplaceValue; vec3 u_blendFixB; float u_rotation; vec4 u_texclamp; diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index a1f37a12f4..bc0193e1d6 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -359,7 +359,7 @@ void LinkedShader::use(const ShaderID &VSID) { // Note that we no longer track attr masks here - we do it for the input layouts instead. } -void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBufferedRendering) { +void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBufferedRendering, const Draw::DeviceCaps &caps) { u64 dirty = dirtyUniforms & availableUniforms; dirtyUniforms = 0; @@ -432,8 +432,7 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu render_->SetUniformM4x4(&u_proj, flippedMatrix.m); render_->SetUniformF1(&u_rotation, useBufferedRendering ? 0 : (float)g_display_rotation); } - if (dirty & DIRTY_PROJTHROUGHMATRIX) - { + if (dirty & DIRTY_PROJTHROUGHMATRIX) { Matrix4x4 proj_through; if (useBufferedRendering) { proj_through.setOrtho(0.0f, gstate_c.curRTWidth, 0.0f, gstate_c.curRTHeight, 0.0f, 1.0f); @@ -446,7 +445,12 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu SetColorUniform3(render_, &u_texenv, gstate.texenvcolor); } if (dirty & DIRTY_ALPHACOLORREF) { - SetColorUniform3Alpha255(render_, &u_alphacolorref, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask()); + if (caps.fragmentShaderInt32Supported) { + // Same as bitwiseOps really + render_->SetUniformUI1(&u_alphacolorref, gstate.getColorTestRef() | ((gstate.getAlphaTestRef() & gstate.getAlphaTestMask()) << 24)); + } else { + SetColorUniform3Alpha255(render_, &u_alphacolorref, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask()); + } } if (dirty & DIRTY_ALPHACOLORMASK) { render_->SetUniformUI1(&u_alphacolormask, gstate.getColorTestMask() | (gstate.getAlphaTestMask() << 24)); @@ -813,7 +817,7 @@ LinkedShader *ShaderManagerGLES::ApplyFragmentShader(VShaderID VSID, Shader *vs, } if (lastVShaderSame_ && FSID == lastFSID_) { - lastShader_->UpdateUniforms(vertType, VSID, useBufferedRendering); + lastShader_->UpdateUniforms(vertType, VSID, useBufferedRendering, draw_->GetDeviceCaps()); return lastShader_; } @@ -856,7 +860,7 @@ LinkedShader *ShaderManagerGLES::ApplyFragmentShader(VShaderID VSID, Shader *vs, } else { ls->use(VSID); } - ls->UpdateUniforms(vertType, VSID, useBufferedRendering); + ls->UpdateUniforms(vertType, VSID, useBufferedRendering, draw_->GetDeviceCaps()); lastShader_ = ls; return ls; diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index 2dd715cc0b..47fd2105aa 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -35,7 +35,7 @@ public: ~LinkedShader(); void use(const ShaderID &VSID); - void UpdateUniforms(u32 vertType, const ShaderID &VSID, bool useBufferedRendering); + void UpdateUniforms(u32 vertType, const ShaderID &VSID, bool useBufferedRendering, const Draw::DeviceCaps &caps); GLRenderManager *render_; Shader *vs_;