From 07ca9e46565e439a47976590d078c84bf561a8b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Mon, 26 Sep 2022 11:20:17 +0200 Subject: [PATCH] Fold the "materialUpdate" flag into the light ubershader part. This reduces the number of vertex shaders and thus pipelines by quite a bit more in a few games, like Tekken and GoW, continuing the fight against shader compile stutter. The perf impact should be minimal if not positive due to less pipeline changes. GLES fixes Make the vertex input declarations match (always declare fog input). Fixes D3D11 validation Tess fix --- GPU/Common/FragmentShaderGenerator.cpp | 10 +++------- GPU/Common/ShaderId.cpp | 2 +- GPU/Common/ShaderUniforms.cpp | 5 +++++ GPU/Common/VertexShaderGenerator.cpp | 21 +++++++++++++++------ GPU/GPUCommon.cpp | 8 ++++++++ GPU/Vulkan/ShaderManagerVulkan.cpp | 2 +- 6 files changed, 33 insertions(+), 15 deletions(-) diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index e2088731af..04d8aa93ed 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -174,9 +174,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu WRITE(p, "layout (location = 1) %s in lowp vec4 v_color0;\n", shading); if (lmode) WRITE(p, "layout (location = 2) %s in lowp vec3 v_color1;\n", shading); - if (enableFog) { - WRITE(p, "layout (location = 3) in highp float v_fogdepth;\n"); - } + WRITE(p, "layout (location = 3) in highp float v_fogdepth;\n"); if (doTexture) { WRITE(p, "layout (location = 0) in highp vec3 v_texcoord;\n"); } @@ -279,9 +277,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (lmode) { WRITE(p, " vec3 v_color1: COLOR1;\n"); } - if (enableFog) { - WRITE(p, " float v_fogdepth: TEXCOORD1;\n"); - } + WRITE(p, " float v_fogdepth: TEXCOORD1;\n"); if (needFragCoord) { if (compat.shaderLanguage == HLSL_D3D11) { WRITE(p, " vec4 pixelPos : SV_POSITION;\n"); @@ -394,8 +390,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (enableFog) { *uniformMask |= DIRTY_FOGCOLOR; WRITE(p, "uniform vec3 u_fogcolor;\n"); - WRITE(p, "%s %s float v_fogdepth;\n", compat.varying_fs, highpFog ? "highp" : "mediump"); } + WRITE(p, "%s %s float v_fogdepth;\n", compat.varying_fs, highpFog ? "highp" : "mediump"); if (doTexture) { WRITE(p, "%s %s vec3 v_texcoord;\n", compat.varying_fs, highpTexcoord ? "highp" : "mediump"); } diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index d530758759..bb8dd54237 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -123,11 +123,11 @@ void ComputeVertexShaderID(VShaderID *id_out, u32 vertType, bool useHWTransform, if (gstate.isLightingEnabled()) { // doShadeMapping is stored as UVGenMode, and light type doesn't matter for shade mapping. - id.SetBits(VS_BIT_MATERIAL_UPDATE, 3, gstate.getMaterialUpdate()); id.SetBit(VS_BIT_LIGHTING_ENABLE); if (gstate_c.Supports(GPU_USE_LIGHT_UBERSHADER)) { id.SetBit(VS_BIT_LIGHT_UBERSHADER); } else { + id.SetBits(VS_BIT_MATERIAL_UPDATE, 3, gstate.getMaterialUpdate()); // Light bits for (int i = 0; i < 4; i++) { bool chanEnabled = gstate.isLightChanEnabled(i) != 0; diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index 8bf1f69356..f5cc8ca6c9 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -271,6 +271,8 @@ uint32_t PackLightControlBits() { // Bit organization // Bottom 4 bits are enable bits for each light. // Then, for each light, comes 2 bits for "comp" and 2 bits for "type". + // At the end, at bit 20, we put the three material update bits. + uint32_t lightControl = 0; for (int i = 0; i < 4; i++) { if (gstate.isLightChanEnabled(i)) { @@ -282,6 +284,9 @@ uint32_t PackLightControlBits() { lightControl |= computation << (4 + i * 4); lightControl |= type << (4 + i * 4 + 2); } + + lightControl |= gstate.getMaterialUpdate() << 20; + return lightControl; } diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index 5d2da2d06a..4b0a16977f 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -552,7 +552,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag if (enableLighting) { WRITE(p, "uniform lowp vec4 u_ambient;\n"); *uniformMask |= DIRTY_AMBIENT; - if ((matUpdate & 2) == 0 || !hasColor) { + if (lightUberShader || (matUpdate & 2) == 0 || !hasColor) { WRITE(p, "uniform lowp vec3 u_matdiffuse;\n"); *uniformMask |= DIRTY_MATDIFFUSE; } @@ -952,13 +952,22 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag const char *srcCol = "color0"; if (doBezier || doSpline) { // TODO: Probably, should use hasColorTess but FF4 has a problem with drawing the background. - srcCol = "tess"; + srcCol = "tess.col"; } - p.F(" vec4 ambientColor = %s;\n", (matUpdate & 1) && hasColor ? srcCol : "u_matambientalpha"); - if (enableLighting) { - p.F(" vec3 diffuseColor = %s.rgb;\n", (matUpdate & 2) && hasColor ? srcCol : "u_matdiffuse"); - p.F(" vec3 specularColor = %s.rgb;\n", (matUpdate & 4) && hasColor ? srcCol : "u_matspecular"); + if (lightUberShader && hasColor) { + p.F(" vec4 ambientColor = ((u_lightControl & (1u << 20u)) != 0u) ? %s : u_matambientalpha;\n", srcCol); + if (enableLighting) { + p.F(" vec3 diffuseColor = ((u_lightControl & (1u << 21u)) != 0u) ? %s.rgb : u_matdiffuse;\n", srcCol); + p.F(" vec3 specularColor = ((u_lightControl & (1u << 22u)) != 0u) ? %s.rgb : u_matspecular.rgb;\n", srcCol); + } + } else { + // This path also takes care of the lightUberShader && !hasColor path, because all comparisons fail. + p.F(" vec4 ambientColor = %s;\n", (matUpdate & 1) && hasColor ? srcCol : "u_matambientalpha"); + if (enableLighting) { + p.F(" vec3 diffuseColor = %s.rgb;\n", (matUpdate & 2) && hasColor ? srcCol : "u_matdiffuse"); + p.F(" vec3 specularColor = %s.rgb;\n", (matUpdate & 4) && hasColor ? srcCol : "u_matspecular"); + } } bool diffuseIsZero = true; diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index ce09871c6a..56dbbbcf59 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -465,6 +465,14 @@ void GPUCommon::UpdateCmdInfo() { cmdInfo_[GE_CMD_LIGHTTYPE0 + i].AddDirty(DIRTY_VERTEXSHADER_STATE); } } + + if (gstate_c.Supports(GPU_USE_LIGHT_UBERSHADER)) { + cmdInfo_[GE_CMD_MATERIALUPDATE].RemoveDirty(DIRTY_VERTEXSHADER_STATE); + cmdInfo_[GE_CMD_MATERIALUPDATE].AddDirty(DIRTY_LIGHT_CONTROL); + } else { + cmdInfo_[GE_CMD_MATERIALUPDATE].RemoveDirty(DIRTY_LIGHT_CONTROL); + cmdInfo_[GE_CMD_MATERIALUPDATE].AddDirty(DIRTY_VERTEXSHADER_STATE); + } } void GPUCommon::BeginHostFrame() { diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index acdeba7b4a..6c39280a82 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -376,7 +376,7 @@ VulkanFragmentShader *ShaderManagerVulkan::GetFragmentShaderFromModule(VkShaderM // instantaneous. #define CACHE_HEADER_MAGIC 0xff51f420 -#define CACHE_VERSION 27 +#define CACHE_VERSION 28 struct VulkanCacheHeader { uint32_t magic; uint32_t version;