Merge pull request #13576 from hrydgard/merge-glsl-fragment-shadergens

Merge the two GLSL fragment shader generators
2025-04-02 11:01:50 -04:00 · 2020-10-23 14:21:19 +02:00 · 2020-10-23 14:21:19 +02:00 · 3d1cf3733a
commit 3d1cf3733a
parent ee11c796a1 020fb55a65
22 changed files with 440 additions and 993 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1263,8 +1263,6 @@ set(GPU_VULKAN
 	GPU/Vulkan/DebugVisVulkan.h
 	GPU/Vulkan/DrawEngineVulkan.cpp
 	GPU/Vulkan/DrawEngineVulkan.h
-	GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp
-	GPU/Vulkan/FragmentShaderGeneratorVulkan.h
 	GPU/Vulkan/FramebufferManagerVulkan.cpp
 	GPU/Vulkan/FramebufferManagerVulkan.h
 	GPU/Vulkan/GPU_Vulkan.cpp
--- a/GPU/Common/ShaderCommon.cpp
+++ b/GPU/Common/ShaderCommon.cpp
@ -5,6 +5,8 @@

 #include "ext/glslang/SPIRV/GlslangToSpv.h"

+#include "ShaderCommon.h"
+
 void init_resources(TBuiltInResource &Resources) {
 	Resources.maxLights = 32;
 	Resources.maxClipPlanes = 6;
@ -98,4 +100,21 @@ void init_resources(TBuiltInResource &Resources) {
 	Resources.limits.generalSamplerIndexing = 1;
 	Resources.limits.generalVariableIndexing = 1;
 	Resources.limits.generalConstantMatrixVectorIndexing = 1;
-}
+}
+
+void GLSLShaderCompat::SetupForVulkan() {
+	fragColor0 = "fragColor0";
+	fragColor1 = "fragColor1";
+	varying_fs = "in";
+	varying_vs = "out";
+	attribute = "in";
+	bitwiseOps = true;
+	framebufferFetchExtension = nullptr;
+	gles = false;
+	glslES30 = true;
+	glslVersionNumber = 450;
+	lastFragData = nullptr;
+	texture = "texture";
+	texelFetch = "texelFetch";
+	vulkan = true;
+}
--- a/GPU/Common/ShaderCommon.h
+++ b/GPU/Common/ShaderCommon.h
@ -18,6 +18,7 @@
 #pragma once

 #include <cstdint>
+#include <vector>

 namespace Draw {
 	class DrawContext;
@ -139,13 +140,20 @@ enum DoLightComputation {
 };

 struct GLSLShaderCompat {
-	const char *varying;
+	int glslVersionNumber;
+	bool gles;
+	bool vulkan;
+	const char *varying_fs;
+	const char *varying_vs;
 	const char *attribute;
 	const char *fragColor0;
 	const char *fragColor1;
 	const char *texture;
 	const char *texelFetch;
 	const char *lastFragData;
+	const char *framebufferFetchExtension;
 	bool glslES30;
 	bool bitwiseOps;
+
+	void SetupForVulkan();
 };
--- a/GPU/Directx9/FragmentShaderGeneratorHLSL.cpp
+++ b/GPU/Directx9/FragmentShaderGeneratorHLSL.cpp
@ -291,6 +291,11 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
 			WRITE(p, "  float4 v = In.v_color0 %s;\n", secondary);
 		}

+		if (enableFog) {
+			WRITE(p, "  float fogCoef = clamp(In.v_fogdepth, 0.0, 1.0);\n");
+			WRITE(p, "  v = lerp(float4(u_fogcolor, v.a), v, fogCoef);\n");
+		}
+
 		if (enableAlphaTest) {
 			if (alphaTestAgainstZero) {
 				// When testing against 0 (extremely common), we can avoid some math.
@ -322,11 +327,6 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
 			}
 		}

-		if (enableFog) {
-			WRITE(p, "  float fogCoef = clamp(In.v_fogdepth, 0.0, 1.0);\n");
-			WRITE(p, "  v = lerp(float4(u_fogcolor, v.a), v, fogCoef);\n");
-		}
-
 		if (enableColorTest) {
 			if (colorTestAgainstZero) {
 				// When testing against 0 (common), we can avoid some math.
--- a/GPU/Directx9/VertexShaderGeneratorHLSL.cpp
+++ b/GPU/Directx9/VertexShaderGeneratorHLSL.cpp
@ -423,29 +423,6 @@ bool GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
 				"a_w2.x", "a_w2.y", "a_w2.z", "a_w2.w",
 			};

-#if defined(USE_FOR_LOOP) && defined(USE_BONE_ARRAY)
-
-			// To loop through the weights, we unfortunately need to put them in a float array.
-			// GLSL ES sucks - no way to directly initialize an array!
-			switch (numBoneWeights) {
-			case 1: WRITE(p, "  float w[1]; w[0] = a_w1;\n"); break;
-			case 2: WRITE(p, "  float w[2]; w[0] = a_w1.x; w[1] = a_w1.y;\n"); break;
-			case 3: WRITE(p, "  float w[3]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z;\n"); break;
-			case 4: WRITE(p, "  float w[4]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z; w[3] = a_w1.w;\n"); break;
-			case 5: WRITE(p, "  float w[5]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z; w[3] = a_w1.w; w[4] = a_w2;\n"); break;
-			case 6: WRITE(p, "  float w[6]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z; w[3] = a_w1.w; w[4] = a_w2.x; w[5] = a_w2.y;\n"); break;
-			case 7: WRITE(p, "  float w[7]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z; w[3] = a_w1.w; w[4] = a_w2.x; w[5] = a_w2.y; w[6] = a_w2.z;\n"); break;
-			case 8: WRITE(p, "  float w[8]; w[0] = a_w1.x; w[1] = a_w1.y; w[2] = a_w1.z; w[3] = a_w1.w; w[4] = a_w2.x; w[5] = a_w2.y; w[6] = a_w2.z; w[7] = a_w2.w;\n"); break;
-			}
-
-			WRITE(p, "  mat4 skinMatrix = w[0] * u_bone[0];\n");
-			if (numBoneWeights > 1) {
-				WRITE(p, "  for (int i = 1; i < %i; i++) {\n", numBoneWeights);
-				WRITE(p, "    skinMatrix += w[i] * u_bone[i];\n");
-				WRITE(p, "  }\n");
-			}
-
-#else
 			if (lang == HLSL_D3D11 || lang == HLSL_D3D11_LEVEL9) {
 				if (numBoneWeights == 1)
 					WRITE(p, "  float4x3 skinMatrix = mul(In.a_w1, u_bone[0])");
@ -471,7 +448,6 @@ bool GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
 					WRITE(p, " + mul(In.%s, u_bone%i)", weightAttr, i);
 				}
 			}
-#endif

 			WRITE(p, ";\n");

--- a/GPU/GLES/FragmentShaderGeneratorGLES.cpp
+++ b/GPU/GLES/FragmentShaderGeneratorGLES.cpp
@ -25,6 +25,7 @@
 #include "Core/Config.h"
 #include "GPU/Common/GPUStateUtils.h"
 #include "GPU/Common/ShaderId.h"
+#include "GPU/Common/ShaderUniforms.h"
 #include "GPU/GLES/FragmentShaderGeneratorGLES.h"
 #include "GPU/GLES/FramebufferManagerGLES.h"
 #include "GPU/GLES/ShaderManagerGLES.h"
@ -33,116 +34,55 @@

 #define WRITE p+=sprintf

-// #define DEBUG_SHADER
-
-// Missing: Z depth range
-bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, uint64_t *uniformMask, std::string *errorString) {
-	char *p = buffer;
+static const char *vulkan_glsl_preamble =
+"#version 450\n"
+"#extension GL_ARB_separate_shader_objects : enable\n"
+"#extension GL_ARB_shading_language_420pack : enable\n"
+"#extension GL_ARB_conservative_depth : enable\n"
+"#extension GL_ARB_shader_image_load_store : enable\n"
+"#define splat3(x) vec3(x)\n\n";

+bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, const GLSLShaderCompat &compat, uint64_t *uniformMask, std::string *errorString) {
 	*uniformMask = 0;
-	// In GLSL ES 3.0, you use "in" variables instead of varying.

-	GLSLShaderCompat compat{};
-	compat.varying = "varying";
-	compat.fragColor0 = "gl_FragColor";
-	compat.fragColor1 = "fragColor1";
-	compat.texture = "texture2D";
-	compat.texelFetch = NULL;
-	compat.bitwiseOps = false;
-	compat.lastFragData = nullptr;
 	bool highpFog = false;
 	bool highpTexcoord = false;
+	bool fragmentTestCache = g_Config.bFragmentTestCache && !compat.vulkan;

-	ReplaceAlphaType stencilToAlpha = static_cast<ReplaceAlphaType>(id.Bits(FS_BIT_STENCIL_TO_ALPHA, 2));
-
-	if (gl_extensions.IsGLES) {
-		// ES doesn't support dual source alpha :(
-		if (gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300)) {
-			WRITE(p, "#version 300 es\n");  // GLSL ES 3.0
-			compat.fragColor0 = "fragColor0";
-			compat.texture = "texture";
-			compat.glslES30 = true;
-			compat.bitwiseOps = true;
-			compat.texelFetch = "texelFetch";
-
-			if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE && gl_extensions.EXT_blend_func_extended) {
-				WRITE(p, "#extension GL_EXT_blend_func_extended : require\n");
-			}
-		} else {
-			WRITE(p, "#version 100\n");  // GLSL ES 1.0
-			if (gl_extensions.EXT_gpu_shader4) {
-				WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n");
-				compat.bitwiseOps = true;
-				compat.texelFetch = "texelFetch2D";
-			}
-			if (gl_extensions.EXT_blend_func_extended) {
-				// Oldy moldy GLES, so use the fixed output name.
-				compat.fragColor1 = "gl_SecondaryFragColorEXT";
-
-				if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE && gl_extensions.EXT_blend_func_extended) {
-					WRITE(p, "#extension GL_EXT_blend_func_extended : require\n");
-				}
-			}
-		}
-
+	if (compat.gles) {
 		// PowerVR needs highp to do the fog in MHU correctly.
 		// Others don't, and some can't handle highp in the fragment shader.
 		highpFog = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? true : false;
 		highpTexcoord = highpFog;
-
-		if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) {
-			if (gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300) && gl_extensions.EXT_shader_framebuffer_fetch) {
-				WRITE(p, "#extension GL_EXT_shader_framebuffer_fetch : require\n");
-				compat.lastFragData = "fragColor0";
-			} else if (gl_extensions.EXT_shader_framebuffer_fetch) {
-				WRITE(p, "#extension GL_EXT_shader_framebuffer_fetch : require\n");
-				compat.lastFragData = "gl_LastFragData[0]";
-			} else if (gl_extensions.NV_shader_framebuffer_fetch) {
-				// GL_NV_shader_framebuffer_fetch is available on mobile platform and ES 2.0 only but not on desktop.
-				WRITE(p, "#extension GL_NV_shader_framebuffer_fetch : require\n");
-				compat.lastFragData = "gl_LastFragData[0]";
-			} else if (gl_extensions.ARM_shader_framebuffer_fetch) {
-				WRITE(p, "#extension GL_ARM_shader_framebuffer_fetch : require\n");
-				compat.lastFragData = "gl_LastFragColorARM";
-			}
-		}
-
-		WRITE(p, "precision lowp float;\n");
-	} else {
-		if (!gl_extensions.ForceGL2 || gl_extensions.IsCoreContext) {
-			if (gl_extensions.VersionGEThan(3, 3, 0)) {
-				compat.fragColor0 = "fragColor0";
-				compat.texture = "texture";
-				compat.glslES30 = true;
-				compat.bitwiseOps = true;
-				compat.texelFetch = "texelFetch";
-				WRITE(p, "#version 330\n");
-			} else if (gl_extensions.VersionGEThan(3, 0, 0)) {
-				compat.fragColor0 = "fragColor0";
-				compat.bitwiseOps = true;
-				compat.texelFetch = "texelFetch";
-				WRITE(p, "#version 130\n");
-				if (gl_extensions.EXT_gpu_shader4) {
-					WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n");
-				}
-			} else {
-				WRITE(p, "#version 110\n");
-				if (gl_extensions.EXT_gpu_shader4) {
-					WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n");
-					compat.bitwiseOps = true;
-					compat.texelFetch = "texelFetch2D";
-				}
-			}
-		}
-
-		// We remove these everywhere - GL4, GL3, Mac-forced-GL2, etc.
-		WRITE(p, "#define lowp\n");
-		WRITE(p, "#define mediump\n");
-		WRITE(p, "#define highp\n");
 	}

-	if (compat.glslES30 || gl_extensions.IsCoreContext) {
-		compat.varying = "in";
+	ReplaceAlphaType stencilToAlpha = static_cast<ReplaceAlphaType>(id.Bits(FS_BIT_STENCIL_TO_ALPHA, 2));
+
+	char *p = buffer;
+
+	if (compat.vulkan) {
+		WRITE(p, "%s", vulkan_glsl_preamble);
+	} else {
+		WRITE(p, "#version %d%s\n", compat.glslVersionNumber, compat.gles ? " es" : "");
+
+		if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE && gl_extensions.EXT_blend_func_extended) {
+			WRITE(p, "#extension GL_EXT_blend_func_extended : require\n");
+		}
+		if (gl_extensions.EXT_gpu_shader4) {
+			WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n");
+		}
+		if (compat.framebufferFetchExtension) {
+			WRITE(p, "%s\n", compat.framebufferFetchExtension);
+		}
+		if (!compat.gles) {
+			WRITE(p, "#define lowp\n");
+			WRITE(p, "#define mediump\n");
+			WRITE(p, "#define highp\n");
+		} else {
+			WRITE(p, "precision lowp float;\n");
+		}
+
+		WRITE(p, "#define splat3(x) vec3(x)\n");
 	}

 	bool lmode = id.Bit(FS_BIT_LMODE);
@ -172,118 +112,167 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, uint64_t *uni
 	GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4);
 	GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4);
 	GEBlendMode replaceBlendEq = (GEBlendMode)id.Bits(FS_BIT_BLENDEQ, 3);
+	StencilValueType replaceAlphaWithStencilType = (StencilValueType)id.Bits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4);

 	bool isModeClear = id.Bit(FS_BIT_CLEARMODE);

-	if (shaderDepal && gl_extensions.IsGLES) {
-		WRITE(p, "precision highp int;\n");
-	}
-
 	const char *shading = "";
-	if (compat.glslES30)
+	if (compat.glslES30 || compat.vulkan)
 		shading = doFlatShading ? "flat" : "";

-	if (doTexture)
-		WRITE(p, "uniform sampler2D tex;\n");
+	bool earlyFragmentTests = ((!enableAlphaTest && !enableColorTest) || testForceToZero) && !gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
+	bool useAdrenoBugWorkaround = id.Bit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL);

-	if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) {
-		*uniformMask |= DIRTY_SHADERBLEND;
-		if (!gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) && replaceBlend == REPLACE_BLEND_COPY_FBO) {
-			if (!compat.texelFetch) {
-				WRITE(p, "uniform vec2 u_fbotexSize;\n");
-			}
-			WRITE(p, "uniform sampler2D fbotex;\n");
+	if (compat.vulkan) {
+		if (earlyFragmentTests) {
+			WRITE(p, "layout (early_fragment_tests) in;\n");
+		} else if (useAdrenoBugWorkaround && !gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
+			WRITE(p, "layout (depth_unchanged) out float gl_FragDepth;\n");
 		}
-		if (replaceBlendFuncA >= GE_SRCBLEND_FIXA) {
-			WRITE(p, "uniform vec3 u_blendFixA;\n");
-		}
-		if (replaceBlendFuncB >= GE_DSTBLEND_FIXB) {
-			WRITE(p, "uniform vec3 u_blendFixB;\n");
-		}
-	}

-	if (needShaderTexClamp && doTexture) {
-		*uniformMask |= DIRTY_TEXCLAMP;
-		WRITE(p, "uniform vec4 u_texclamp;\n");
-		if (id.Bit(FS_BIT_TEXTURE_AT_OFFSET)) {
-			WRITE(p, "uniform vec2 u_texclampoff;\n");
+		WRITE(p, "layout (std140, set = 0, binding = 3) uniform baseUBO {\n%s};\n", ub_baseStr);
+		if (doTexture) {
+			WRITE(p, "layout (binding = 0) uniform sampler2D tex;\n");
 		}
-	}

-	if (enableAlphaTest || enableColorTest) {
-		if (g_Config.bFragmentTestCache) {
-			WRITE(p, "uniform sampler2D testtex;\n");
-		} else {
-			*uniformMask |= DIRTY_ALPHACOLORREF;
-			WRITE(p, "uniform vec4 u_alphacolorref;\n");
-			if (compat.bitwiseOps && ((enableColorTest && !colorTestAgainstZero) || (enableAlphaTest && !alphaTestAgainstZero))) {
-				*uniformMask |= DIRTY_ALPHACOLORMASK;
-				WRITE(p, "uniform ivec4 u_alphacolormask;\n");
+		if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) {
+			if (replaceBlend == REPLACE_BLEND_COPY_FBO) {
+				WRITE(p, "layout (binding = 1) uniform sampler2D fbotex;\n");
 			}
 		}
-	}

-	if (shaderDepal) {
-		WRITE(p, "uniform sampler2D pal;\n");
-		WRITE(p, "uniform int u_depal;\n");
-		*uniformMask |= DIRTY_DEPAL;
-	}
+		if (shaderDepal) {
+			WRITE(p, "layout (binding = 2) uniform sampler2D pal;\n");
+		}

-	StencilValueType replaceAlphaWithStencilType = (StencilValueType)id.Bits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4);
-	if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) {
-		*uniformMask |= DIRTY_STENCILREPLACEVALUE;
-		WRITE(p, "uniform float u_stencilReplaceValue;\n");
-	}
-	if (doTexture && texFunc == GE_TEXFUNC_BLEND) {
-		*uniformMask |= DIRTY_TEXENV;
-		WRITE(p, "uniform vec3 u_texenv;\n");
-	}
+		WRITE(p, "layout (location = 1) %s in vec4 v_color0;\n", shading);
+		if (lmode)
+			WRITE(p, "layout (location = 2) %s in vec3 v_color1;\n", shading);
+		if (enableFog) {
+			WRITE(p, "layout (location = 3) in float v_fogdepth;\n");
+		}
+		if (doTexture) {
+			WRITE(p, "layout (location = 0) in vec3 v_texcoord;\n");
+		}

-	WRITE(p, "%s %s vec4 v_color0;\n", shading, compat.varying);
-	if (lmode)
-		WRITE(p, "%s %s vec3 v_color1;\n", shading, compat.varying);
-	if (enableFog) {
-		*uniformMask |= DIRTY_FOGCOLOR;
-		WRITE(p, "uniform vec3 u_fogcolor;\n");
-		WRITE(p, "%s %s float v_fogdepth;\n", compat.varying, highpFog ? "highp" : "mediump");
-	}
-	if (doTexture) {
-		WRITE(p, "%s %s vec3 v_texcoord;\n", compat.varying, highpTexcoord ? "highp" : "mediump");
-	}
-
-	if (!g_Config.bFragmentTestCache) {
 		if (enableAlphaTest && !alphaTestAgainstZero) {
-			if (compat.bitwiseOps) {
-				WRITE(p, "int roundAndScaleTo255i(in float x) { return int(floor(x * 255.0 + 0.5)); }\n");
-			} else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
-				WRITE(p, "float roundTo255thf(in mediump float x) { mediump float y = x + (0.5/255.0); return y - fract(y * 255.0) * (1.0 / 255.0); }\n");
-			} else {
-				WRITE(p, "float roundAndScaleTo255f(in float x) { return floor(x * 255.0 + 0.5); }\n");
-			}
+			WRITE(p, "int roundAndScaleTo255i(in float x) { return int(floor(x * 255.0 + 0.5)); }\n");
 		}
 		if (enableColorTest && !colorTestAgainstZero) {
-			if (compat.bitwiseOps) {
-				WRITE(p, "ivec3 roundAndScaleTo255iv(in vec3 x) { return ivec3(floor(x * 255.0 + 0.5)); }\n");
-			} else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
-				WRITE(p, "vec3 roundTo255thv(in vec3 x) { vec3 y = x + (0.5/255.0); return y - fract(y * 255.0) * (1.0 / 255.0); }\n");
-			} else {
-				WRITE(p, "vec3 roundAndScaleTo255v(in vec3 x) { return floor(x * 255.0 + 0.5); }\n");
+			WRITE(p, "ivec3 roundAndScaleTo255iv(in vec3 x) { return ivec3(floor(x * 255.0 + 0.5)); }\n");
+		}
+
+		WRITE(p, "layout (location = 0, index = 0) out vec4 fragColor0;\n");
+		if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE) {
+			WRITE(p, "layout (location = 0, index = 1) out vec4 fragColor1;\n");
+		}
+	} else {
+		if (shaderDepal && gl_extensions.IsGLES) {
+			WRITE(p, "precision highp int;\n");
+		}
+
+		if (doTexture)
+			WRITE(p, "uniform sampler2D tex;\n");
+
+		if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) {
+			*uniformMask |= DIRTY_SHADERBLEND;
+			if (!gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH) && replaceBlend == REPLACE_BLEND_COPY_FBO) {
+				if (!compat.texelFetch) {
+					WRITE(p, "uniform vec2 u_fbotexSize;\n");
+				}
+				WRITE(p, "uniform sampler2D fbotex;\n");
+			}
+			if (replaceBlendFuncA >= GE_SRCBLEND_FIXA) {
+				WRITE(p, "uniform vec3 u_blendFixA;\n");
+			}
+			if (replaceBlendFuncB >= GE_DSTBLEND_FIXB) {
+				WRITE(p, "uniform vec3 u_blendFixB;\n");
 			}
 		}
-	}

-	if (!strcmp(compat.fragColor0, "fragColor0")) {
-		const char *qualifierColor0 = "out";
-		if (compat.lastFragData && !strcmp(compat.lastFragData, compat.fragColor0)) {
-			qualifierColor0 = "inout";
+		if (needShaderTexClamp && doTexture) {
+			*uniformMask |= DIRTY_TEXCLAMP;
+			WRITE(p, "uniform vec4 u_texclamp;\n");
+			if (id.Bit(FS_BIT_TEXTURE_AT_OFFSET)) {
+				WRITE(p, "uniform vec2 u_texclampoff;\n");
+			}
 		}
-		// Output the output color definitions.
-		if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE) {
-			WRITE(p, "%s vec4 fragColor0;\n", qualifierColor0);
-			WRITE(p, "out vec4 fragColor1;\n");
-		} else {
-			WRITE(p, "%s vec4 fragColor0;\n", qualifierColor0);
+
+		if (enableAlphaTest || enableColorTest) {
+			if (g_Config.bFragmentTestCache) {
+				WRITE(p, "uniform sampler2D testtex;\n");
+			} else {
+				*uniformMask |= DIRTY_ALPHACOLORREF;
+				WRITE(p, "uniform vec4 u_alphacolorref;\n");
+				if (compat.bitwiseOps && ((enableColorTest && !colorTestAgainstZero) || (enableAlphaTest && !alphaTestAgainstZero))) {
+					*uniformMask |= DIRTY_ALPHACOLORMASK;
+					WRITE(p, "uniform ivec4 u_alphacolormask;\n");
+				}
+			}
 		}
+
+		if (shaderDepal) {
+			WRITE(p, "uniform sampler2D pal;\n");
+			WRITE(p, "uniform int u_depal_mask_shift_off_fmt;\n");
+			*uniformMask |= DIRTY_DEPAL;
+		}
+
+		if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) {
+			*uniformMask |= DIRTY_STENCILREPLACEVALUE;
+			WRITE(p, "uniform float u_stencilReplaceValue;\n");
+		}
+		if (doTexture && texFunc == GE_TEXFUNC_BLEND) {
+			*uniformMask |= DIRTY_TEXENV;
+			WRITE(p, "uniform vec3 u_texenv;\n");
+		}
+
+		WRITE(p, "%s %s vec4 v_color0;\n", shading, compat.varying_fs);
+		if (lmode)
+			WRITE(p, "%s %s vec3 v_color1;\n", shading, compat.varying_fs);
+		if (enableFog) {
+			*uniformMask |= DIRTY_FOGCOLOR;
+			WRITE(p, "uniform vec3 u_fogcolor;\n");
+			WRITE(p, "%s %s float v_fogdepth;\n", compat.varying_fs, highpFog ? "highp" : "mediump");
+		}
+		if (doTexture) {
+			WRITE(p, "%s %s vec3 v_texcoord;\n", compat.varying_fs, highpTexcoord ? "highp" : "mediump");
+		}
+
+		if (!g_Config.bFragmentTestCache) {
+			if (enableAlphaTest && !alphaTestAgainstZero) {
+				if (compat.bitwiseOps) {
+					WRITE(p, "int roundAndScaleTo255i(in float x) { return int(floor(x * 255.0 + 0.5)); }\n");
+				} else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
+					WRITE(p, "float roundTo255thf(in mediump float x) { mediump float y = x + (0.5/255.0); return y - fract(y * 255.0) * (1.0 / 255.0); }\n");
+				} else {
+					WRITE(p, "float roundAndScaleTo255f(in float x) { return floor(x * 255.0 + 0.5); }\n");
+				}
+			}
+			if (enableColorTest && !colorTestAgainstZero) {
+				if (compat.bitwiseOps) {
+					WRITE(p, "ivec3 roundAndScaleTo255iv(in vec3 x) { return ivec3(floor(x * 255.0 + 0.5)); }\n");
+				} else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
+					WRITE(p, "vec3 roundTo255thv(in vec3 x) { vec3 y = x + (0.5/255.0); return y - fract(y * 255.0) * (1.0 / 255.0); }\n");
+				} else {
+					WRITE(p, "vec3 roundAndScaleTo255v(in vec3 x) { return floor(x * 255.0 + 0.5); }\n");
+				}
+			}
+		}
+
+		if (!strcmp(compat.fragColor0, "fragColor0")) {
+			const char *qualifierColor0 = "out";
+			if (compat.lastFragData && !strcmp(compat.lastFragData, compat.fragColor0)) {
+				qualifierColor0 = "inout";
+			}
+			// Output the output color definitions.
+			if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE) {
+				WRITE(p, "%s vec4 fragColor0;\n", qualifierColor0);
+				WRITE(p, "out vec4 fragColor1;\n");
+			} else {
+				WRITE(p, "%s vec4 fragColor0;\n", qualifierColor0);
+			}
+		}
+
 	}

 	// PowerVR needs a custom modulo function. For some reason, this has far higher precision than the builtin one.
@ -292,7 +281,6 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, uint64_t *uni
 	}

 	WRITE(p, "void main() {\n");
-
 	if (isModeClear) {
 		// Clear mode does not allow any fancy shading.
 		WRITE(p, "  vec4 v = v_color0;\n");
@ -367,7 +355,7 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, uint64_t *uni
 				}
 				WRITE(p, "  vec2 tsize = vec2(textureSize(tex, 0));\n");
 				WRITE(p, "  vec2 fraction;\n");
-				WRITE(p, "  bool bilinear = (u_depal >> 31) != 0;\n");
+				WRITE(p, "  bool bilinear = (u_depal_mask_shift_off_fmt >> 31) != 0;\n");
 				WRITE(p, "  if (bilinear) {\n");
 				WRITE(p, "    uv_round = uv * tsize - vec2(0.5, 0.5);\n");
 				WRITE(p, "    fraction = fract(uv_round);\n");
@ -379,57 +367,57 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, uint64_t *uni
 				WRITE(p, "  vec4 t1 = %sOffset(tex, uv_round, ivec2(1, 0));\n", compat.texture);
 				WRITE(p, "  vec4 t2 = %sOffset(tex, uv_round, ivec2(0, 1));\n", compat.texture);
 				WRITE(p, "  vec4 t3 = %sOffset(tex, uv_round, ivec2(1, 1));\n", compat.texture);
-				WRITE(p, "  int depalMask = (u_depal & 0xFF);\n");
-				WRITE(p, "  int depalShift = ((u_depal >> 8) & 0xFF);\n");
-				WRITE(p, "  int depalOffset = (((u_depal >> 16) & 0xFF) << 4);\n");
-				WRITE(p, "  int depalFmt = ((u_depal >> 24) & 0x3);\n");
-				WRITE(p, "  ivec4 col; int index0; int index1; int index2; int index3;\n");
+				WRITE(p, "  uint depalMask = (u_depal_mask_shift_off_fmt & 0xFF);\n");
+				WRITE(p, "  uint depalShift = (u_depal_mask_shift_off_fmt >> 8) & 0xFF;\n");
+				WRITE(p, "  uint depalOffset = ((u_depal_mask_shift_off_fmt >> 16) & 0xFF) << 4;\n");
+				WRITE(p, "  uint depalFmt = (u_depal_mask_shift_off_fmt >> 24) & 0x3;\n");
+				WRITE(p, "  uvec4 col; uint index0; uint index1; uint index2; uint index3;\n");
 				WRITE(p, "  switch (depalFmt) {\n");  // We might want to include fmt in the shader ID if this is a performance issue.
 				WRITE(p, "  case 0:\n");  // 565
-				WRITE(p, "    col = ivec4(t.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
+				WRITE(p, "    col = uvec4(t.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
 				WRITE(p, "    index0 = (col.b << 11) | (col.g << 5) | (col.r);\n");
 				WRITE(p, "    if (bilinear) {\n");
-				WRITE(p, "      col = ivec4(t1.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
+				WRITE(p, "      col = uvec4(t1.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
 				WRITE(p, "      index1 = (col.b << 11) | (col.g << 5) | (col.r);\n");
-				WRITE(p, "      col = ivec4(t2.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
+				WRITE(p, "      col = uvec4(t2.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
 				WRITE(p, "      index2 = (col.b << 11) | (col.g << 5) | (col.r);\n");
-				WRITE(p, "      col = ivec4(t3.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
+				WRITE(p, "      col = uvec4(t3.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
 				WRITE(p, "      index3 = (col.b << 11) | (col.g << 5) | (col.r);\n");
 				WRITE(p, "    }\n");
 				WRITE(p, "    break;\n");
 				WRITE(p, "  case 1:\n");  // 5551
-				WRITE(p, "    col = ivec4(t.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
+				WRITE(p, "    col = uvec4(t.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
 				WRITE(p, "    index0 = (col.a << 15) | (col.b << 10) | (col.g << 5) | (col.r);\n");
 				WRITE(p, "    if (bilinear) {\n");
-				WRITE(p, "      col = ivec4(t1.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
+				WRITE(p, "      col = uvec4(t1.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
 				WRITE(p, "      index1 = (col.a << 15) | (col.b << 10) | (col.g << 5) | (col.r);\n");
-				WRITE(p, "      col = ivec4(t2.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
+				WRITE(p, "      col = uvec4(t2.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
 				WRITE(p, "      index2 = (col.a << 15) | (col.b << 10) | (col.g << 5) | (col.r);\n");
-				WRITE(p, "      col = ivec4(t3.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
+				WRITE(p, "      col = uvec4(t3.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
 				WRITE(p, "      index3 = (col.a << 15) | (col.b << 10) | (col.g << 5) | (col.r);\n");
 				WRITE(p, "    }\n");
 				WRITE(p, "    break;\n");
 				WRITE(p, "  case 2:\n");  // 4444
-				WRITE(p, "    col = ivec4(t.rgba * vec4(15.99, 15.99, 15.99, 15.99));\n");
+				WRITE(p, "    col = uvec4(t.rgba * 15.99);\n");
 				WRITE(p, "    index0 = (col.a << 12) | (col.b << 8) | (col.g << 4) | (col.r);\n");
 				WRITE(p, "    if (bilinear) {\n");
-				WRITE(p, "      col = ivec4(t1.rgba * vec4(15.99, 15.99, 15.99, 15.99));\n");
+				WRITE(p, "      col = uvec4(t1.rgba * 15.99);\n");
 				WRITE(p, "      index1 = (col.a << 12) | (col.b << 8) | (col.g << 4) | (col.r);\n");
-				WRITE(p, "      col = ivec4(t2.rgba * vec4(15.99, 15.99, 15.99, 15.99));\n");
+				WRITE(p, "      col = uvec4(t2.rgba * 15.99);\n");
 				WRITE(p, "      index2 = (col.a << 12) | (col.b << 8) | (col.g << 4) | (col.r);\n");
-				WRITE(p, "      col = ivec4(t3.rgba * vec4(15.99, 15.99, 15.99, 15.99));\n");
+				WRITE(p, "      col = uvec4(t3.rgba * 15.99);\n");
 				WRITE(p, "      index3 = (col.a << 12) | (col.b << 8) | (col.g << 4) | (col.r);\n");
 				WRITE(p, "    }\n");
 				WRITE(p, "    break;\n");
 				WRITE(p, "  case 3:\n");  // 8888
-				WRITE(p, "    col = ivec4(t.rgba * vec4(255.99, 255.99, 255.99, 255.99));\n");
+				WRITE(p, "    col = uvec4(t.rgba * 255.99);\n");
 				WRITE(p, "    index0 = (col.a << 24) | (col.b << 16) | (col.g << 8) | (col.r);\n");
 				WRITE(p, "    if (bilinear) {\n");
-				WRITE(p, "      col = ivec4(t1.rgba * vec4(255.99, 255.99, 255.99, 255.99));\n");
+				WRITE(p, "      col = uvec4(t1.rgba * 255.99);\n");
 				WRITE(p, "      index1 = (col.a << 24) | (col.b << 16) | (col.g << 8) | (col.r);\n");
-				WRITE(p, "      col = ivec4(t2.rgba * vec4(255.99, 255.99, 255.99, 255.99));\n");
+				WRITE(p, "      col = uvec4(t2.rgba * 255.99);\n");
 				WRITE(p, "      index2 = (col.a << 24) | (col.b << 16) | (col.g << 8) | (col.r);\n");
-				WRITE(p, "      col = ivec4(t3.rgba * vec4(255.99, 255.99, 255.99, 255.99));\n");
+				WRITE(p, "      col = uvec4(t3.rgba * 255.99);\n");
 				WRITE(p, "      index3 = (col.a << 24) | (col.b << 16) | (col.g << 8) | (col.r);\n");
 				WRITE(p, "    }\n");
 				WRITE(p, "    break;\n");
@ -525,7 +513,7 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, uint64_t *uni
 		// Texture access is at half texels [0.5/256, 255.5/256], but colors are normalized [0, 255].
 		// So we have to scale to account for the difference.
 		std::string alphaTestXCoord = "0";
-		if (g_Config.bFragmentTestCache) {
+		if (fragmentTestCache) {
 			if (enableColorTest && !colorTestAgainstZero) {
 				WRITE(p, "  vec4 vScale256 = v * %f + %f;\n", 255.0 / 256.0, 0.5 / 256.0);
 				alphaTestXCoord = "vScale256.a";
@ -551,7 +539,7 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, uint64_t *uni
 					// Maybe we could discard the drawcall, but it's pretty rare.  Let's just statically discard here.
 					WRITE(p, "  %s\n", discardStatement);
 				}
-			} else if (g_Config.bFragmentTestCache) {
+			} else if (fragmentTestCache) {
 				WRITE(p, "  float aResult = %s(testtex, vec2(%s, 0)).a;\n", compat.texture, alphaTestXCoord.c_str());
 				WRITE(p, "  if (aResult < 0.5) %s\n", discardStatement);
 			} else {
@ -605,9 +593,14 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, uint64_t *uni
 					if (compat.bitwiseOps) {
 						// Apparently GLES3 does not support vector bitwise ops.
 						WRITE(p, "  ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
-						const char *maskedFragColor = "ivec3(v_scaled.r & u_alphacolormask.r, v_scaled.g & u_alphacolormask.g, v_scaled.b & u_alphacolormask.b)";
-						const char *maskedColorRef = "ivec3(int(u_alphacolorref.r) & u_alphacolormask.r, int(u_alphacolorref.g) & u_alphacolormask.g, int(u_alphacolorref.b) & u_alphacolormask.b)";
-						WRITE(p, "  if (%s %s %s) %s\n", maskedFragColor, colorTestFuncs[colorTestFunc], maskedColorRef, discardStatement);
+						if (compat.vulkan) {
+							// TODO: Use this for GL as well?
+							WRITE(p, "  if ((v_scaled & u_alphacolormask.rgb) %s (u_alphacolorref.rgb & u_alphacolormask.rgb)) %s\n", colorTestFuncs[colorTestFunc], discardStatement);
+						} else {
+							const char *maskedFragColor = "ivec3(v_scaled.r & u_alphacolormask.r, v_scaled.g & u_alphacolormask.g, v_scaled.b & u_alphacolormask.b)";
+							const char *maskedColorRef = "ivec3(int(u_alphacolorref.r) & u_alphacolormask.r, int(u_alphacolorref.g) & u_alphacolormask.g, int(u_alphacolorref.b) & u_alphacolormask.b)";
+							WRITE(p, "  if (%s %s %s) %s\n", maskedFragColor, colorTestFuncs[colorTestFunc], maskedColorRef, discardStatement);
+						}
 					} else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
 						WRITE(p, "  if (roundTo255thv(v.rgb) %s u_alphacolorref.rgb) %s\n", colorTestFuncs[colorTestFunc], discardStatement);
 					} else {
@ -642,6 +635,11 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, uint64_t *uni
 			default:                            srcFactor = "u_blendFixA"; break;
 			}

+			if (!strcmp(srcFactor, "ERROR")) {
+				*errorString = "Bad replaceblend src factor";
+				return false;
+			}
+
 			WRITE(p, "  v.rgb = v.rgb * %s;\n", srcFactor);
 		}

@ -685,7 +683,7 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, uint64_t *uni
 			case GE_DSTBLEND_DOUBLEDSTALPHA:    dstFactor = "vec3(destColor.a * 2.0)"; break;
 			case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a * 2.0)"; break;
 			case GE_DSTBLEND_FIXB:              dstFactor = "u_blendFixB"; break;
-			default:                            srcFactor = "u_blendFixB"; break;
+			default:                            dstFactor = "u_blendFixB"; break;
 			}

 			switch (replaceBlendEq) {
@ -707,6 +705,9 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, uint64_t *uni
 			case GE_BLENDMODE_ABSDIFF:
 				WRITE(p, "  v.rgb = abs(v.rgb - destColor.rgb);\n");
 				break;
+			default:
+				*errorString = "Bad replace blend eq";
+				return false;
 			}
 		}

@ -804,6 +805,10 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, uint64_t *uni
 			WRITE(p, "  z = (1.0/65535.0) * floor(z * 65535.0);\n");
 		}
 		WRITE(p, "  gl_FragDepth = z;\n");
+	} else if (!earlyFragmentTests && useAdrenoBugWorkaround) {
+		// Adreno (and possibly MESA/others) apply early frag tests even with discard in the shader.
+		// Writing depth prevents the bug, even with depth_unchanged specified.
+		WRITE(p, "  gl_FragDepth = gl_FragCoord.z;\n");
 	}

 	WRITE(p, "}\n");
--- a/GPU/GLES/FragmentShaderGeneratorGLES.h
+++ b/GPU/GLES/FragmentShaderGeneratorGLES.h
@ -17,6 +17,8 @@

 #pragma once

+#include "GPU/Common/ShaderCommon.h"
+
 struct FShaderID;

-bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, uint64_t *uniformMask, std::string *errorString);
+bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, const GLSLShaderCompat &compat, uint64_t *uniformMask, std::string *errorString);
--- a/GPU/GLES/ShaderManagerGLES.cpp
+++ b/GPU/GLES/ShaderManagerGLES.cpp
@ -166,7 +166,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
 	queries.push_back({ &u_tess_weights_u, "u_tess_weights_u" });
 	queries.push_back({ &u_tess_weights_v, "u_tess_weights_v" });
 	queries.push_back({ &u_spline_counts, "u_spline_counts" });
-	queries.push_back({ &u_depal, "u_depal" });
+	queries.push_back({ &u_depal_mask_shift_off_fmt, "u_depal_mask_shift_off_fmt" });

 	attrMask = vs->GetAttrMask();
 	availableUniforms = vs->GetUniformMask() | fs->GetUniformMask();
@ -298,7 +298,7 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu
 		uint32_t val = BytesToUint32(indexMask, indexShift, indexOffset, format);
 		// Poke in a bilinear filter flag in the top bit.
 		val |= gstate.isMagnifyFilteringEnabled() << 31;
-		render_->SetUniformI1(&u_depal, val);
+		render_->SetUniformI1(&u_depal_mask_shift_off_fmt, val);
 	}

 	// Update any dirty uniforms before we draw
@ -576,12 +576,93 @@ ShaderManagerGLES::ShaderManagerGLES(Draw::DrawContext *draw)
 	codeBuffer_ = new char[16384];
 	lastFSID_.set_invalid();
 	lastVSID_.set_invalid();
+	DetectShaderLanguage();
 }

 ShaderManagerGLES::~ShaderManagerGLES() {
 	delete [] codeBuffer_;
 }

+void ShaderManagerGLES::DetectShaderLanguage() {
+	GLSLShaderCompat &compat = compat_;
+	compat.attribute = "attribute";
+	compat.varying_vs = "varying";
+	compat.varying_fs = "varying";
+	compat.fragColor0 = "gl_FragColor";
+	compat.fragColor1 = "fragColor1";
+	compat.texture = "texture2D";
+	compat.texelFetch = nullptr;
+	compat.bitwiseOps = false;
+	compat.lastFragData = nullptr;
+	compat.gles = gl_extensions.IsGLES;
+
+	if (compat.gles) {
+		if (gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300)) {
+			compat.glslVersionNumber = 300;  // GLSL ES 3.0
+			compat.fragColor0 = "fragColor0";
+			compat.texture = "texture";
+			compat.glslES30 = true;
+			compat.bitwiseOps = true;
+			compat.texelFetch = "texelFetch";
+		} else {
+			compat.glslVersionNumber = 100;  // GLSL ES 1.0
+			if (gl_extensions.EXT_gpu_shader4) {
+				compat.bitwiseOps = true;
+				compat.texelFetch = "texelFetch2D";
+			}
+			if (gl_extensions.EXT_blend_func_extended) {
+				// Oldy moldy GLES, so use the fixed output name.
+				compat.fragColor1 = "gl_SecondaryFragColorEXT";
+			}
+		}
+	} else {
+		if (!gl_extensions.ForceGL2 || gl_extensions.IsCoreContext) {
+			if (gl_extensions.VersionGEThan(3, 3, 0)) {
+				compat.glslVersionNumber = 330;
+				compat.fragColor0 = "fragColor0";
+				compat.texture = "texture";
+				compat.glslES30 = true;
+				compat.bitwiseOps = true;
+				compat.texelFetch = "texelFetch";
+			} else if (gl_extensions.VersionGEThan(3, 0, 0)) {
+				compat.glslVersionNumber = 130;
+				compat.fragColor0 = "fragColor0";
+				compat.bitwiseOps = true;
+				compat.texelFetch = "texelFetch";
+			} else {
+				compat.glslVersionNumber = 110;
+				if (gl_extensions.EXT_gpu_shader4) {
+					compat.bitwiseOps = true;
+					compat.texelFetch = "texelFetch2D";
+				}
+			}
+		}
+	}
+
+	if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) {
+		if (gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300) && gl_extensions.EXT_shader_framebuffer_fetch) {
+			compat.framebufferFetchExtension = "#extension GL_EXT_shader_framebuffer_fetch : require";
+			compat.lastFragData = "fragColor0";
+		} else if (gl_extensions.EXT_shader_framebuffer_fetch) {
+			compat.framebufferFetchExtension = "#extension GL_EXT_shader_framebuffer_fetch : require";
+			compat.lastFragData = "gl_LastFragData[0]";
+		} else if (gl_extensions.NV_shader_framebuffer_fetch) {
+			// GL_NV_shader_framebuffer_fetch is available on mobile platform and ES 2.0 only but not on desktop.
+			compat.framebufferFetchExtension = "#extension GL_NV_shader_framebuffer_fetch : require";
+			compat.lastFragData = "gl_LastFragData[0]";
+		} else if (gl_extensions.ARM_shader_framebuffer_fetch) {
+			compat.framebufferFetchExtension = "#extension GL_ARM_shader_framebuffer_fetch : require";
+			compat.lastFragData = "gl_LastFragColorARM";
+		}
+	}
+
+	if (compat.glslES30 || gl_extensions.IsCoreContext) {
+		compat.varying_vs = "out";
+		compat.varying_fs = "in";
+		compat.attribute = "in";
+	}
+}
+
 void ShaderManagerGLES::Clear() {
 	DirtyLastShader();
 	for (auto iter = linkedShaderCache_.begin(); iter != linkedShaderCache_.end(); ++iter) {
@ -630,7 +711,7 @@ void ShaderManagerGLES::DirtyLastShader() {
 Shader *ShaderManagerGLES::CompileFragmentShader(FShaderID FSID) {
 	uint64_t uniformMask;
 	std::string errorString;
-	if (!GenerateFragmentShaderGLSL(FSID, codeBuffer_, &uniformMask, &errorString)) {
+	if (!GenerateFragmentShaderGLSL(FSID, codeBuffer_, compat_, &uniformMask, &errorString)) {
 		ERROR_LOG(G3D, "Shader gen error: %s", errorString.c_str());
 		return nullptr;
 	}
@ -643,7 +724,7 @@ Shader *ShaderManagerGLES::CompileVertexShader(VShaderID VSID) {
 	uint32_t attrMask;
 	uint64_t uniformMask;
 	std::string errorString;
-	if (!GenerateVertexShaderGLSL(VSID, codeBuffer_, &attrMask, &uniformMask, &errorString)) {
+	if (!GenerateVertexShaderGLSL(VSID, codeBuffer_, compat_, &attrMask, &uniformMask, &errorString)) {
 		ERROR_LOG(G3D, "Shader gen error: %s", errorString.c_str());
 		return nullptr;
 	}
--- a/GPU/GLES/ShaderManagerGLES.h
+++ b/GPU/GLES/ShaderManagerGLES.h
@ -88,7 +88,7 @@ public:

 	// Shader depal
 	int u_pal;  // the texture
-	int u_depal;  // the params
+	int u_depal_mask_shift_off_fmt;  // the params

 	// Fragment processing inputs
 	int u_alphacolorref;
@ -183,6 +183,7 @@ private:
 	void Clear();
 	Shader *CompileFragmentShader(FShaderID id);
 	Shader *CompileVertexShader(VShaderID id);
+	void DetectShaderLanguage();

 	struct LinkedShaderCacheEntry {
 		LinkedShaderCacheEntry(Shader *vs_, Shader *fs_, LinkedShader *ls_)
@ -195,6 +196,7 @@ private:
 	typedef std::vector<LinkedShaderCacheEntry> LinkedShaderCache;

 	GLRenderManager *render_;
+	GLSLShaderCompat compat_{};
 	LinkedShaderCache linkedShaderCache_;

 	bool lastVShaderSame_;
--- a/GPU/GLES/VertexShaderGeneratorGLES.cpp
+++ b/GPU/GLES/VertexShaderGeneratorGLES.cpp
@ -87,73 +87,35 @@ static const char * const boneWeightInDecl[9] = {
 // TODO: Skip all this if we can actually get a 16-bit depth buffer along with stencil, which
 // is a bit of a rare configuration, although quite common on mobile.

-bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, uint32_t *attrMask, uint64_t *uniformMask, std::string *errorString) {
-	char *p = buffer;
+bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShaderCompat &compat, uint32_t *attrMask, uint64_t *uniformMask, std::string *errorString) {
 	*attrMask = 0;
 	*uniformMask = 0;
-	// #define USE_FOR_LOOP

-	// In GLSL ES 3.0, you use "out" variables instead.
-	GLSLShaderCompat compat{};
-	compat.glslES30 = false;
-	compat.varying = "varying";
-	compat.attribute = "attribute";
-	const char * const * boneWeightDecl = boneWeightAttrDecl;
-	compat.texelFetch = NULL;
+	char *p = buffer;
+	WRITE(p, "#version %d%s\n", compat.glslVersionNumber, compat.gles ? " es" : "");
+
 	bool highpFog = false;
 	bool highpTexcoord = false;
-
-	if (gl_extensions.IsGLES) {
-		if (gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300)) {
-			WRITE(p, "#version 300 es\n");
-			compat.glslES30 = true;
-			compat.texelFetch = "texelFetch";
-		} else {
-			WRITE(p, "#version 100\n");  // GLSL ES 1.0
-			if (gl_extensions.EXT_gpu_shader4) {
-				WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n");
-				compat.texelFetch = "texelFetch2D";
-			}
-		}
-		WRITE(p, "precision highp float;\n");
-
+	if (compat.gles) {
 		// PowerVR needs highp to do the fog in MHU correctly.
 		// Others don't, and some can't handle highp in the fragment shader.
 		highpFog = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? true : false;
 		highpTexcoord = highpFog;
-	} else {
-		if (!gl_extensions.ForceGL2 || gl_extensions.IsCoreContext) {
-			if (gl_extensions.VersionGEThan(3, 3, 0)) {
-				compat.glslES30 = true;
-				WRITE(p, "#version 330\n");
-				compat.texelFetch = "texelFetch";
-			} else if (gl_extensions.VersionGEThan(3, 0, 0)) {
-				WRITE(p, "#version 130\n");
-				if (gl_extensions.EXT_gpu_shader4) {
-					WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n");
-					compat.texelFetch = "texelFetch";
-				}
-			} else {
-				WRITE(p, "#version 110\n");
-				if (gl_extensions.EXT_gpu_shader4) {
-					WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n");
-					compat.texelFetch = "texelFetch2D";
-				}
-			}
-		}
+	}

-		// We remove these everywhere - GL4, GL3, Mac-forced-GL2, etc.
+	if (gl_extensions.EXT_gpu_shader4) {
+		WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n");
+	}
+
+	WRITE(p, "#define splat3(x) vec3(x)\n");
+
+	if (compat.gles) {
+		WRITE(p, "precision highp float;\n");
+	} else {
 		WRITE(p, "#define lowp\n");
 		WRITE(p, "#define mediump\n");
 		WRITE(p, "#define highp\n");
 	}
-	WRITE(p, "#define splat3(x) vec3(x)\n");
-
-	if (compat.glslES30 || gl_extensions.IsCoreContext) {
-		compat.attribute = "in";
-		compat.varying = "out";
-		boneWeightDecl = boneWeightInDecl;
-	}

 	bool isModeThrough = id.Bit(VS_BIT_IS_THROUGH);
 	bool lmode = id.Bit(VS_BIT_LMODE);
@ -206,6 +168,10 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, uint32_t *attrM
 	int boneWeightScale = id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2);
 	if (enableBones) {
 		numBoneWeights = 1 + id.Bits(VS_BIT_BONES, 3);
+		const char * const * boneWeightDecl = boneWeightAttrDecl;
+		if (!strcmp(compat.attribute, "in")) {
+			boneWeightDecl = boneWeightInDecl;
+		}
 		WRITE(p, "%s", boneWeightDecl[numBoneWeights]);
 		*attrMask |= 1 << ATTR_W1;
 		if (numBoneWeights >= 5)
@ -336,21 +302,21 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, uint32_t *attrM
 		*uniformMask |= DIRTY_CULLRANGE;
 	}

-	WRITE(p, "%s%s lowp vec4 v_color0;\n", shading, compat.varying);
+	WRITE(p, "%s%s lowp vec4 v_color0;\n", shading, compat.varying_vs);
 	if (lmode) {
-		WRITE(p, "%s%s lowp vec3 v_color1;\n", shading, compat.varying);
+		WRITE(p, "%s%s lowp vec3 v_color1;\n", shading, compat.varying_vs);
 	}

 	if (doTexture) {
-		WRITE(p, "%s %s vec3 v_texcoord;\n", compat.varying, highpTexcoord ? "highp" : "mediump");
+		WRITE(p, "%s %s vec3 v_texcoord;\n", compat.varying_vs, highpTexcoord ? "highp" : "mediump");
 	}

 	if (enableFog) {
 		// See the fragment shader generator
 		if (highpFog) {
-			WRITE(p, "%s highp float v_fogdepth;\n", compat.varying);
+			WRITE(p, "%s highp float v_fogdepth;\n", compat.varying_vs);
 		} else {
-			WRITE(p, "%s mediump float v_fogdepth;\n", compat.varying);
+			WRITE(p, "%s mediump float v_fogdepth;\n", compat.varying_vs);
 		}
 	}

@ -518,30 +484,6 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, uint32_t *attrM
 				"w2.x", "w2.y", "w2.z", "w2.w",
 			};

-#if defined(USE_FOR_LOOP) && defined(USE_BONE_ARRAY)
-
-			// To loop through the weights, we unfortunately need to put them in a float array.
-			// GLSL ES sucks - no way to directly initialize an array!
-			switch (numBoneWeights) {
-			case 1: WRITE(p, "  float w[1]; w[0] = w1;\n"); break;
-			case 2: WRITE(p, "  float w[2]; w[0] = w1.x; w[1] = w1.y;\n"); break;
-			case 3: WRITE(p, "  float w[3]; w[0] = w1.x; w[1] = w1.y; w[2] = w1.z;\n"); break;
-			case 4: WRITE(p, "  float w[4]; w[0] = w1.x; w[1] = w1.y; w[2] = w1.z; w[3] = w1.w;\n"); break;
-			case 5: WRITE(p, "  float w[5]; w[0] = w1.x; w[1] = w1.y; w[2] = w1.z; w[3] = w1.w; w[4] = w2;\n"); break;
-			case 6: WRITE(p, "  float w[6]; w[0] = w1.x; w[1] = w1.y; w[2] = w1.z; w[3] = w1.w; w[4] = w2.x; w[5] = w2.y;\n"); break;
-			case 7: WRITE(p, "  float w[7]; w[0] = w1.x; w[1] = w1.y; w[2] = w1.z; w[3] = w1.w; w[4] = w2.x; w[5] = w2.y; w[6] = w2.z;\n"); break;
-			case 8: WRITE(p, "  float w[8]; w[0] = w1.x; w[1] = w1.y; w[2] = w1.z; w[3] = w1.w; w[4] = w2.x; w[5] = w2.y; w[6] = w2.z; w[7] = w2.w;\n"); break;
-			}
-
-			WRITE(p, "  mat4 skinMatrix = w[0] * u_bone[0];\n");
-			if (numBoneWeights > 1) {
-				WRITE(p, "  for (int i = 1; i < %i; i++) {\n", numBoneWeights);
-				WRITE(p, "    skinMatrix += w[i] * u_bone[i];\n");
-				WRITE(p, "  }\n");
-			}
-
-#else
-
 #ifdef USE_BONE_ARRAY
 			if (numBoneWeights == 1)
 				WRITE(p, "  mat4 skinMatrix = w1 * u_bone[0]");
@ -568,8 +510,6 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, uint32_t *attrM
 				if (numBoneWeights == 5 && i == 4) weightAttr = "w2";
 				WRITE(p, " + %s * u_bone%i", weightAttr, i);
 			}
-#endif
-
 #endif

 			WRITE(p, ";\n");
--- a/GPU/GLES/VertexShaderGeneratorGLES.h
+++ b/GPU/GLES/VertexShaderGeneratorGLES.h
@ -19,8 +19,6 @@

 #include "Common/CommonTypes.h"

-// #define USE_BONE_ARRAY
-
 struct VShaderID;

-bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, uint32_t *attrMask, uint64_t *uniformMask, std::string *errorString);
+bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShaderCompat &compat, uint32_t *attrMask, uint64_t *uniformMask, std::string *errorString);
--- a/GPU/GPU.vcxproj
+++ b/GPU/GPU.vcxproj
@ -476,7 +476,6 @@
    <ClInclude Include="Vulkan\DebugVisVulkan.h" />
    <ClInclude Include="Vulkan\DepalettizeShaderVulkan.h" />
    <ClInclude Include="Vulkan\DrawEngineVulkan.h" />
-    <ClInclude Include="Vulkan\FragmentShaderGeneratorVulkan.h" />
    <ClInclude Include="Vulkan\FramebufferManagerVulkan.h" />
    <ClInclude Include="Vulkan\GPU_Vulkan.h" />
    <ClInclude Include="Vulkan\PipelineManagerVulkan.h" />
@ -667,7 +666,6 @@
    <ClCompile Include="Vulkan\DebugVisVulkan.cpp" />
    <ClCompile Include="Vulkan\DepalettizeShaderVulkan.cpp" />
    <ClCompile Include="Vulkan\DrawEngineVulkan.cpp" />
-    <ClCompile Include="Vulkan\FragmentShaderGeneratorVulkan.cpp" />
    <ClCompile Include="Vulkan\FramebufferManagerVulkan.cpp" />
    <ClCompile Include="Vulkan\GPU_Vulkan.cpp" />
    <ClCompile Include="Vulkan\PipelineManagerVulkan.cpp" />
--- a/GPU/GPU.vcxproj.filters
+++ b/GPU/GPU.vcxproj.filters
@ -150,9 +150,6 @@
    <ClInclude Include="Vulkan\DrawEngineVulkan.h">
      <Filter>Vulkan</Filter>
    </ClInclude>
-    <ClInclude Include="Vulkan\FragmentShaderGeneratorVulkan.h">
-      <Filter>Vulkan</Filter>
-    </ClInclude>
    <ClInclude Include="Vulkan\FramebufferManagerVulkan.h">
      <Filter>Vulkan</Filter>
    </ClInclude>
@ -416,9 +413,6 @@
    <ClCompile Include="Vulkan\DrawEngineVulkan.cpp">
      <Filter>Vulkan</Filter>
    </ClCompile>
-    <ClCompile Include="Vulkan\FragmentShaderGeneratorVulkan.cpp">
-      <Filter>Vulkan</Filter>
-    </ClCompile>
    <ClCompile Include="Vulkan\GPU_Vulkan.cpp">
      <Filter>Vulkan</Filter>
    </ClCompile>
--- a/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp
+++ b/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp
@ -1,602 +0,0 @@
-// Copyright (c) 2012- PPSSPP Project.
-
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, version 2.0 or later versions.
-
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License 2.0 for more details.
-
-// A copy of the GPL 2.0 should have been included with the program.
-// If not, see http://www.gnu.org/licenses/
-
-// Official git repository and contact information can be found at
-// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
-
-#include <cstdio>
-#include <sstream>
-
-#include "Common/GPU/OpenGL/GLFeatures.h"
-
-#include "Common/Log.h"
-#include "Common/StringUtils.h"
-#include "Core/Reporting.h"
-#include "Core/Config.h"
-#include "GPU/Common/GPUStateUtils.h"
-#include "GPU/Common/ShaderId.h"
-#include "GPU/Vulkan/FragmentShaderGeneratorVulkan.h"
-#include "GPU/Vulkan/FramebufferManagerVulkan.h"
-#include "GPU/Vulkan/ShaderManagerVulkan.h"
-#include "GPU/Vulkan/PipelineManagerVulkan.h"
-
-#include "GPU/ge_constants.h"
-#include "GPU/GPUState.h"
-
-static const char *vulkan_glsl_preamble =
-	"#version 450\n"
-	"#extension GL_ARB_separate_shader_objects : enable\n"
-	"#extension GL_ARB_shading_language_420pack : enable\n"
-	"#extension GL_ARB_conservative_depth : enable\n"
-	"#extension GL_ARB_shader_image_load_store : enable\n\n";
-
-#define WRITE p+=sprintf
-
-bool GenerateFragmentShaderVulkanGLSL(const FShaderID &id, char *buffer, uint32_t vulkanVendorId, std::string *errorString) {
-	char *p = buffer;
-
-	const char *lastFragData = nullptr;
-
-	WRITE(p, "%s", vulkan_glsl_preamble);
-
-	bool lmode = id.Bit(FS_BIT_LMODE);
-	bool doTexture = id.Bit(FS_BIT_DO_TEXTURE);
-	bool enableFog = id.Bit(FS_BIT_ENABLE_FOG);
-	bool enableAlphaTest = id.Bit(FS_BIT_ALPHA_TEST);
-
-	bool alphaTestAgainstZero = id.Bit(FS_BIT_ALPHA_AGAINST_ZERO);
-	bool testForceToZero = id.Bit(FS_BIT_TEST_DISCARD_TO_ZERO);
-	bool enableColorTest = id.Bit(FS_BIT_COLOR_TEST);
-	bool colorTestAgainstZero = id.Bit(FS_BIT_COLOR_AGAINST_ZERO);
-	bool enableColorDoubling = id.Bit(FS_BIT_COLOR_DOUBLE);
-	bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ);
-	bool doTextureAlpha = id.Bit(FS_BIT_TEXALPHA);
-	bool doFlatShading = id.Bit(FS_BIT_FLATSHADE);
-	bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL);
-
-	GEComparison alphaTestFunc = (GEComparison)id.Bits(FS_BIT_ALPHA_TEST_FUNC, 3);
-	GEComparison colorTestFunc = (GEComparison)id.Bits(FS_BIT_COLOR_TEST_FUNC, 2);
-	bool needShaderTexClamp = id.Bit(FS_BIT_SHADER_TEX_CLAMP);
-
-	GETexFunc texFunc = (GETexFunc)id.Bits(FS_BIT_TEXFUNC, 3);
-	bool textureAtOffset = id.Bit(FS_BIT_TEXTURE_AT_OFFSET);
-
-	ReplaceBlendType replaceBlend = static_cast<ReplaceBlendType>(id.Bits(FS_BIT_REPLACE_BLEND, 3));
-	ReplaceAlphaType stencilToAlpha = static_cast<ReplaceAlphaType>(id.Bits(FS_BIT_STENCIL_TO_ALPHA, 2));
-
-	GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4);
-	GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4);
-	GEBlendMode replaceBlendEq = (GEBlendMode)id.Bits(FS_BIT_BLENDEQ, 3);
-	StencilValueType replaceAlphaWithStencilType = (StencilValueType)id.Bits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4);
-
-	bool isModeClear = id.Bit(FS_BIT_CLEARMODE);
-
-	const char *shading = doFlatShading ? "flat" : "";
-	bool earlyFragmentTests = ((!enableAlphaTest && !enableColorTest) || testForceToZero) && !gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
-	bool useAdrenoBugWorkaround = id.Bit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL);
-
-	if (earlyFragmentTests) {
-		WRITE(p, "layout (early_fragment_tests) in;\n");
-	} else if (useAdrenoBugWorkaround && !gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
-		WRITE(p, "layout (depth_unchanged) out float gl_FragDepth;\n");
-	}
-
-	WRITE(p, "layout (std140, set = 0, binding = 3) uniform baseUBO {\n%s};\n", ub_baseStr);
-	if (doTexture) {
-		WRITE(p, "layout (binding = 0) uniform sampler2D tex;\n");
-	}
-
-	if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) {
-		if (replaceBlend == REPLACE_BLEND_COPY_FBO) {
-			WRITE(p, "layout (binding = 1) uniform sampler2D fbotex;\n");
-		}
-	}
-
-	if (shaderDepal) {
-		WRITE(p, "layout (binding = 2) uniform sampler2D pal;\n");
-	}
-
-	WRITE(p, "layout (location = 1) %s in vec4 v_color0;\n", shading);
-	if (lmode)
-		WRITE(p, "layout (location = 2) %s in vec3 v_color1;\n", shading);
-	if (enableFog) {
-		WRITE(p, "layout (location = 3) in float v_fogdepth;\n");
-	}
-	if (doTexture) {
-		WRITE(p, "layout (location = 0) in vec3 v_texcoord;\n");
-	}
-
-	if (enableAlphaTest && !alphaTestAgainstZero) {
-		WRITE(p, "int roundAndScaleTo255i(in float x) { return int(floor(x * 255.0 + 0.5)); }\n");
-	}
-	if (enableColorTest && !colorTestAgainstZero) {
-		WRITE(p, "ivec3 roundAndScaleTo255iv(in vec3 x) { return ivec3(floor(x * 255.0 + 0.5)); }\n");
-	}
-
-	WRITE(p, "layout (location = 0, index = 0) out vec4 fragColor0;\n");
-	if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE) {
-		WRITE(p, "layout (location = 0, index = 1) out vec4 fragColor1;\n");
-	}
-
-	// PowerVR needs a custom modulo function. For some reason, this has far higher precision than the builtin one.
-	if ((gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) && needShaderTexClamp) {
-		WRITE(p, "float mymod(float a, float b) { return a - b * floor(a / b); }\n");
-	}
-
-	WRITE(p, "void main() {\n");
-	if (isModeClear) {
-		// Clear mode does not allow any fancy shading.
-		WRITE(p, "  vec4 v = v_color0;\n");
-	} else {
-		const char *secondary = "";
-		// Secondary color for specular on top of texture
-		if (lmode) {
-			WRITE(p, "  vec4 s = vec4(v_color1, 0.0);\n");
-			secondary = " + s";
-		} else {
-			secondary = "";
-		}
-
-		if (doTexture) {
-			const char *texcoord = "v_texcoord";
-			// TODO: Not sure the right way to do this for projection.
-			// This path destroys resolution on older PowerVR no matter what I do, so we disable it on SGX 540 and lesser, and live with the consequences.
-			if (needShaderTexClamp && !(gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_TERRIBLE)) {
-				// We may be clamping inside a larger surface (tex = 64x64, buffer=480x272).
-				// We may also be wrapping in such a surface, or either one in a too-small surface.
-				// Obviously, clamping to a smaller surface won't work.  But better to clamp to something.
-				std::string ucoord = "v_texcoord.x";
-				std::string vcoord = "v_texcoord.y";
-				if (doTextureProjection) {
-					ucoord = "(v_texcoord.x / v_texcoord.z)";
-					vcoord = "(v_texcoord.y / v_texcoord.z)";
-				}
-
-				std::string modulo = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? "mymod" : "mod";
-
-				if (id.Bit(FS_BIT_CLAMP_S)) {
-					ucoord = "clamp(" + ucoord + ", u_texclamp.z, u_texclamp.x - u_texclamp.z)";
-				} else {
-					ucoord = modulo + "(" + ucoord + ", u_texclamp.x)";
-				}
-				if (id.Bit(FS_BIT_CLAMP_T)) {
-					vcoord = "clamp(" + vcoord + ", u_texclamp.w, u_texclamp.y - u_texclamp.w)";
-				} else {
-					vcoord = modulo + "(" + vcoord + ", u_texclamp.y)";
-				}
-				if (textureAtOffset) {
-					ucoord = "(" + ucoord + " + u_texclampoff.x)";
-					vcoord = "(" + vcoord + " + u_texclampoff.y)";
-				}
-
-				WRITE(p, "  vec2 fixedcoord = vec2(%s, %s);\n", ucoord.c_str(), vcoord.c_str());
-				texcoord = "fixedcoord";
-				// We already projected it.
-				doTextureProjection = false;
-			}
-
-			if (!shaderDepal) {
-				if (doTextureProjection) {
-					WRITE(p, "  vec4 t = textureProj(tex, %s);\n", texcoord);
-				} else {
-					WRITE(p, "  vec4 t = texture(tex, %s.xy);\n", texcoord);
-				}
-			} else {
-				if (doTextureProjection) {
-					// We don't use textureProj because we need to manually offset from the divided coordinate to do filtering here.
-					// On older hardware it has the advantage of higher resolution math, but such old hardware can't run Vulkan.
-					WRITE(p, "  vec2 uv = %s.xy/%s.z;\n  vec2 uv_round;\n", texcoord, texcoord);
-				} else {
-					WRITE(p, "  vec2 uv = %s.xy;\n  vec2 uv_round;\n", texcoord);
-				}
-				WRITE(p, "  vec2 tsize = textureSize(tex, 0);\n");
-				WRITE(p, "  vec2 fraction;\n");
-				WRITE(p, "  bool bilinear = (u_depal_mask_shift_off_fmt >> 31) != 0;\n");
-				WRITE(p, "  if (bilinear) {\n");
-				WRITE(p, "    uv_round = uv * tsize - vec2(0.5, 0.5);\n");
-				WRITE(p, "    fraction = fract(uv_round);\n");
-				WRITE(p, "    uv_round = (uv_round - fraction + vec2(0.5, 0.5)) / tsize;\n");  // We want to take our four point samples at pixel centers.
-				WRITE(p, "  } else {\n");
-				WRITE(p, "    uv_round = uv;\n");
-				WRITE(p, "  }\n");
-				WRITE(p, "  vec4 t = texture(tex, uv_round);\n");
-				WRITE(p, "  vec4 t1 = textureOffset(tex, uv_round, ivec2(1, 0));\n");
-				WRITE(p, "  vec4 t2 = textureOffset(tex, uv_round, ivec2(0, 1));\n");
-				WRITE(p, "  vec4 t3 = textureOffset(tex, uv_round, ivec2(1, 1));\n");
-				WRITE(p, "  uint depalMask = (u_depal_mask_shift_off_fmt & 0xFF);\n");
-				WRITE(p, "  uint depalShift = (u_depal_mask_shift_off_fmt >> 8) & 0xFF;\n");
-				WRITE(p, "  uint depalOffset = ((u_depal_mask_shift_off_fmt >> 16) & 0xFF) << 4;\n");
-				WRITE(p, "  uint depalFmt = (u_depal_mask_shift_off_fmt >> 24) & 0x3;\n");
-				WRITE(p, "  uvec4 col; uint index0; uint index1; uint index2; uint index3;\n");
-				WRITE(p, "  switch (depalFmt) {\n");  // We might want to include fmt in the shader ID if this is a performance issue.
-				WRITE(p, "  case 0:\n");  // 565
-				WRITE(p, "    col = uvec4(t.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
-				WRITE(p, "    index0 = (col.b << 11) | (col.g << 5) | (col.r);\n");
-				WRITE(p, "    if (bilinear) {\n");
-				WRITE(p, "      col = uvec4(t1.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
-				WRITE(p, "      index1 = (col.b << 11) | (col.g << 5) | (col.r);\n");
-				WRITE(p, "      col = uvec4(t2.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
-				WRITE(p, "      index2 = (col.b << 11) | (col.g << 5) | (col.r);\n");
-				WRITE(p, "      col = uvec4(t3.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
-				WRITE(p, "      index3 = (col.b << 11) | (col.g << 5) | (col.r);\n");
-				WRITE(p, "    }\n");
-				WRITE(p, "    break;\n");
-				WRITE(p, "  case 1:\n");  // 5551
-				WRITE(p, "    col = uvec4(t.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
-				WRITE(p, "    index0 = (col.a << 15) | (col.b << 10) | (col.g << 5) | (col.r);\n");
-				WRITE(p, "    if (bilinear) {\n");
-				WRITE(p, "      col = uvec4(t1.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
-				WRITE(p, "      index1 = (col.a << 15) | (col.b << 10) | (col.g << 5) | (col.r);\n");
-				WRITE(p, "      col = uvec4(t2.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
-				WRITE(p, "      index2 = (col.a << 15) | (col.b << 10) | (col.g << 5) | (col.r);\n");
-				WRITE(p, "      col = uvec4(t3.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
-				WRITE(p, "      index3 = (col.a << 15) | (col.b << 10) | (col.g << 5) | (col.r);\n");
-				WRITE(p, "    }\n");
-				WRITE(p, "    break;\n");
-				WRITE(p, "  case 2:\n");  // 4444
-				WRITE(p, "    col = uvec4(t.rgba * 15.99);\n");
-				WRITE(p, "    index0 = (col.a << 12) | (col.b << 8) | (col.g << 4) | (col.r);\n");
-				WRITE(p, "    if (bilinear) {\n");
-				WRITE(p, "      col = uvec4(t1.rgba * 15.99);\n");
-				WRITE(p, "      index1 = (col.a << 12) | (col.b << 8) | (col.g << 4) | (col.r);\n");
-				WRITE(p, "      col = uvec4(t2.rgba * 15.99);\n");
-				WRITE(p, "      index2 = (col.a << 12) | (col.b << 8) | (col.g << 4) | (col.r);\n");
-				WRITE(p, "      col = uvec4(t3.rgba * 15.99);\n");
-				WRITE(p, "      index3 = (col.a << 12) | (col.b << 8) | (col.g << 4) | (col.r);\n");
-				WRITE(p, "    }\n");
-				WRITE(p, "    break;\n");
-				WRITE(p, "  case 3:\n");  // 8888
-				WRITE(p, "    col = uvec4(t.rgba * 255.99);\n");
-				WRITE(p, "    index0 = (col.a << 24) | (col.b << 16) | (col.g << 8) | (col.r);\n");
-				WRITE(p, "    if (bilinear) {\n");
-				WRITE(p, "      col = uvec4(t1.rgba * 255.99);\n");
-				WRITE(p, "      index1 = (col.a << 24) | (col.b << 16) | (col.g << 8) | (col.r);\n");
-				WRITE(p, "      col = uvec4(t2.rgba * 255.99);\n");
-				WRITE(p, "      index2 = (col.a << 24) | (col.b << 16) | (col.g << 8) | (col.r);\n");
-				WRITE(p, "      col = uvec4(t3.rgba * 255.99);\n");
-				WRITE(p, "      index3 = (col.a << 24) | (col.b << 16) | (col.g << 8) | (col.r);\n");
-				WRITE(p, "    }\n");
-				WRITE(p, "    break;\n");
-				WRITE(p, "  };\n");
-				WRITE(p, "  index0 = ((index0 >> depalShift) & depalMask) | depalOffset;\n");
-				WRITE(p, "  t = texelFetch(pal, ivec2(index0, 0), 0);\n");
-				WRITE(p, "  if (bilinear && !(index0 == index1 && index1 == index2 && index2 == index3)) {\n");
-				WRITE(p, "    index1 = ((index1 >> depalShift) & depalMask) | depalOffset;\n");
-				WRITE(p, "    index2 = ((index2 >> depalShift) & depalMask) | depalOffset;\n");
-				WRITE(p, "    index3 = ((index3 >> depalShift) & depalMask) | depalOffset;\n");
-				WRITE(p, "    t1 = texelFetch(pal, ivec2(index1, 0), 0);\n");
-				WRITE(p, "    t2 = texelFetch(pal, ivec2(index2, 0), 0);\n");
-				WRITE(p, "    t3 = texelFetch(pal, ivec2(index3, 0), 0);\n");
-				WRITE(p, "    t = mix(t, t1, fraction.x);\n");
-				WRITE(p, "    t2 = mix(t2, t3, fraction.x);\n");
-				WRITE(p, "    t = mix(t, t2, fraction.y);\n");
-				WRITE(p, "  }\n");
-			}
-
-			if (texFunc != GE_TEXFUNC_REPLACE || !doTextureAlpha)
-				WRITE(p, "  vec4 p = v_color0;\n");
-
-			if (doTextureAlpha) { // texfmt == RGBA
-				switch (texFunc) {
-				case GE_TEXFUNC_MODULATE:
-					WRITE(p, "  vec4 v = p * t%s;\n", secondary);
-					break;
-
-				case GE_TEXFUNC_DECAL:
-					WRITE(p, "  vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a)%s;\n", secondary);
-					break;
-
-				case GE_TEXFUNC_BLEND:
-					WRITE(p, "  vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a)%s;\n", secondary);
-					break;
-
-				case GE_TEXFUNC_REPLACE:
-					WRITE(p, "  vec4 v = t%s;\n", secondary);
-					break;
-
-				case GE_TEXFUNC_ADD:
-				case GE_TEXFUNC_UNKNOWN1:
-				case GE_TEXFUNC_UNKNOWN2:
-				case GE_TEXFUNC_UNKNOWN3:
-					WRITE(p, "  vec4 v = vec4(p.rgb + t.rgb, p.a * t.a)%s;\n", secondary);
-					break;
-				default:
-					WRITE(p, "  vec4 v = p;\n"); break;
-				}
-			} else { // texfmt == RGB
-				switch (texFunc) {
-				case GE_TEXFUNC_MODULATE:
-					WRITE(p, "  vec4 v = vec4(t.rgb * p.rgb, p.a)%s;\n", secondary);
-					break;
-
-				case GE_TEXFUNC_DECAL:
-					WRITE(p, "  vec4 v = vec4(t.rgb, p.a)%s;\n", secondary);
-					break;
-
-				case GE_TEXFUNC_BLEND:
-					WRITE(p, "  vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a)%s;\n", secondary);
-					break;
-
-				case GE_TEXFUNC_REPLACE:
-					WRITE(p, "  vec4 v = vec4(t.rgb, p.a)%s;\n", secondary);
-					break;
-
-				case GE_TEXFUNC_ADD:
-				case GE_TEXFUNC_UNKNOWN1:
-				case GE_TEXFUNC_UNKNOWN2:
-				case GE_TEXFUNC_UNKNOWN3:
-					WRITE(p, "  vec4 v = vec4(p.rgb + t.rgb, p.a)%s;\n", secondary); break;
-				default:
-					WRITE(p, "  vec4 v = p;\n"); break;
-				}
-			}
-
-			if (enableColorDoubling) {
-				// This happens before fog is applied.
-				WRITE(p, "  v.rgb = clamp(v.rgb * 2.0, 0.0, 1.0);\n");
-			}
-		} else {
-			// No texture mapping
-			WRITE(p, "  vec4 v = v_color0 %s;\n", secondary);
-		}
-
-		// Texture access is at half texels [0.5/256, 255.5/256], but colors are normalized [0, 255].
-		// So we have to scale to account for the difference.
-		std::string alphaTestXCoord = "0";
-
-		const char *discardStatement = testForceToZero ? "v.a = 0.0;" : "discard;";
-		if (enableAlphaTest) {
-			if (alphaTestAgainstZero) {
-				// When testing against 0 (extremely common), we can avoid some math.
-				// 0.002 is approximately half of 1.0 / 255.0.
-				if (alphaTestFunc == GE_COMP_NOTEQUAL || alphaTestFunc == GE_COMP_GREATER) {
-					WRITE(p, "  if (v.a < 0.002) %s\n", discardStatement);
-				} else if (alphaTestFunc != GE_COMP_NEVER) {
-					// Anything else is a test for == 0.  Happens sometimes, actually...
-					WRITE(p, "  if (v.a > 0.002) %s\n", discardStatement);
-				} else {
-					// NEVER has been logged as used by games, although it makes little sense - statically failing.
-					// Maybe we could discard the drawcall, but it's pretty rare.  Let's just statically discard here.
-					WRITE(p, "  %s\n", discardStatement);
-				}
-			} else {
-				const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " };
-				if (alphaTestFuncs[alphaTestFunc][0] != '#') {
-					WRITE(p, "  if ((roundAndScaleTo255i(v.a) & u_alphacolormask.a) %s u_alphacolorref.a) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement);
-				} else {
-					// This means NEVER.  See above.
-					WRITE(p, "  %s\n", discardStatement);
-				}
-			}
-		}
-
-		if (enableFog) {
-			WRITE(p, "  float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n");
-			WRITE(p, "  v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
-			// WRITE(p, "  v.x = v_depth;\n");
-		}
-
-		if (enableColorTest) {
-			if (colorTestAgainstZero) {
-				// When testing against 0 (common), we can avoid some math.
-				// Have my doubts that this special case is actually worth it, but whatever.
-				// 0.002 is approximately half of 1.0 / 255.0.
-				if (colorTestFunc == GE_COMP_NOTEQUAL) {
-					WRITE(p, "  if (v.r + v.g + v.b < 0.002) %s\n", discardStatement);
-				} else if (colorTestFunc != GE_COMP_NEVER) {
-					// Anything else is a test for == 0.
-					WRITE(p, "  if (v.r + v.g + v.b > 0.002) %s\n", discardStatement);
-				} else {
-					// NEVER has been logged as used by games, although it makes little sense - statically failing.
-					// Maybe we could discard the drawcall, but it's pretty rare.  Let's just statically discard here.
-					WRITE(p, "  %s\n", discardStatement);
-				}
-			} else {
-				const char *colorTestFuncs[] = { "#", "#", " != ", " == " };
-				if (colorTestFuncs[colorTestFunc][0] != '#') {
-					WRITE(p, "  ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
-					WRITE(p, "  if ((v_scaled & u_alphacolormask.rgb) %s (u_alphacolorref.rgb & u_alphacolormask.rgb)) %s\n", colorTestFuncs[colorTestFunc], discardStatement);
-				} else {
-					WRITE(p, "  %s\n", discardStatement);
-				}
-			}
-		}
-
-		if (replaceBlend == REPLACE_BLEND_2X_SRC) {
-			WRITE(p, "  v.rgb = v.rgb * 2.0;\n");
-		}
-
-		if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) {
-			const char *srcFactor = "ERROR";
-			switch (replaceBlendFuncA) {
-			case GE_SRCBLEND_DSTCOLOR:          srcFactor = "ERROR"; break;
-			case GE_SRCBLEND_INVDSTCOLOR:       srcFactor = "ERROR"; break;
-			case GE_SRCBLEND_SRCALPHA:          srcFactor = "vec3(v.a)"; break;
-			case GE_SRCBLEND_INVSRCALPHA:       srcFactor = "vec3(1.0 - v.a)"; break;
-			case GE_SRCBLEND_DSTALPHA:          srcFactor = "ERROR"; break;
-			case GE_SRCBLEND_INVDSTALPHA:       srcFactor = "ERROR"; break;
-			case GE_SRCBLEND_DOUBLESRCALPHA:    srcFactor = "vec3(v.a * 2.0)"; break;
-			case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a * 2.0)"; break;
-			case GE_SRCBLEND_DOUBLEDSTALPHA:    srcFactor = "ERROR"; break;
-			case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "ERROR"; break;
-			case GE_SRCBLEND_FIXA:              srcFactor = "u_blendFixA"; break;
-			}
-
-			if (!strcmp(srcFactor, "ERROR")) {
-				*errorString = "Bad replaceblend src factor";
-				return false;
-			}
-
-			WRITE(p, "  v.rgb = v.rgb * %s;\n", srcFactor);
-		}
-
-		if (replaceBlend == REPLACE_BLEND_COPY_FBO) {
-			WRITE(p, "  lowp vec4 destColor = texelFetch(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n");
-
-			const char *srcFactor = "vec3(1.0)";
-			const char *dstFactor = "vec3(0.0)";
-
-			switch (replaceBlendFuncA) {
-			case GE_SRCBLEND_DSTCOLOR:          srcFactor = "destColor.rgb"; break;
-			case GE_SRCBLEND_INVDSTCOLOR:       srcFactor = "(vec3(1.0) - destColor.rgb)"; break;
-			case GE_SRCBLEND_SRCALPHA:          srcFactor = "vec3(v.a)"; break;
-			case GE_SRCBLEND_INVSRCALPHA:       srcFactor = "vec3(1.0 - v.a)"; break;
-			case GE_SRCBLEND_DSTALPHA:          srcFactor = "vec3(destColor.a)"; break;
-			case GE_SRCBLEND_INVDSTALPHA:       srcFactor = "vec3(1.0 - destColor.a)"; break;
-			case GE_SRCBLEND_DOUBLESRCALPHA:    srcFactor = "vec3(v.a * 2.0)"; break;
-			case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a * 2.0)"; break;
-			case GE_SRCBLEND_DOUBLEDSTALPHA:    srcFactor = "vec3(destColor.a * 2.0)"; break;
-			case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a * 2.0)"; break;
-			case GE_SRCBLEND_FIXA:              srcFactor = "u_blendFixA"; break;
-			}
-			switch (replaceBlendFuncB) {
-			case GE_DSTBLEND_SRCCOLOR:          dstFactor = "v.rgb"; break;
-			case GE_DSTBLEND_INVSRCCOLOR:       dstFactor = "(vec3(1.0) - v.rgb)"; break;
-			case GE_DSTBLEND_SRCALPHA:          dstFactor = "vec3(v.a)"; break;
-			case GE_DSTBLEND_INVSRCALPHA:       dstFactor = "vec3(1.0 - v.a)"; break;
-			case GE_DSTBLEND_DSTALPHA:          dstFactor = "vec3(destColor.a)"; break;
-			case GE_DSTBLEND_INVDSTALPHA:       dstFactor = "vec3(1.0 - destColor.a)"; break;
-			case GE_DSTBLEND_DOUBLESRCALPHA:    dstFactor = "vec3(v.a * 2.0)"; break;
-			case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "vec3(1.0 - v.a * 2.0)"; break;
-			case GE_DSTBLEND_DOUBLEDSTALPHA:    dstFactor = "vec3(destColor.a * 2.0)"; break;
-			case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a * 2.0)"; break;
-			case GE_DSTBLEND_FIXB:              dstFactor = "u_blendFixB"; break;
-			}
-
-			switch (replaceBlendEq) {
-			case GE_BLENDMODE_MUL_AND_ADD:
-				WRITE(p, "  v.rgb = v.rgb * %s + destColor.rgb * %s;\n", srcFactor, dstFactor);
-				break;
-			case GE_BLENDMODE_MUL_AND_SUBTRACT:
-				WRITE(p, "  v.rgb = v.rgb * %s - destColor.rgb * %s;\n", srcFactor, dstFactor);
-				break;
-			case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
-				WRITE(p, "  v.rgb = destColor.rgb * %s - v.rgb * %s;\n", dstFactor, srcFactor);
-				break;
-			case GE_BLENDMODE_MIN:
-				WRITE(p, "  v.rgb = min(v.rgb, destColor.rgb);\n");
-				break;
-			case GE_BLENDMODE_MAX:
-				WRITE(p, "  v.rgb = max(v.rgb, destColor.rgb);\n");
-				break;
-			case GE_BLENDMODE_ABSDIFF:
-				WRITE(p, "  v.rgb = abs(v.rgb - destColor.rgb);\n");
-				break;
-			}
-		}
-
-		if (replaceBlend == REPLACE_BLEND_2X_ALPHA || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) {
-			WRITE(p, "  v.a = v.a * 2.0;\n");
-		}
-	}
-
-	std::string replacedAlpha = "0.0";
-	char replacedAlphaTemp[64] = "";
-	if (stencilToAlpha != REPLACE_ALPHA_NO) {
-		switch (replaceAlphaWithStencilType) {
-		case STENCIL_VALUE_UNIFORM:
-			replacedAlpha = "u_stencilReplaceValue";
-			break;
-
-		case STENCIL_VALUE_ZERO:
-			replacedAlpha = "0.0";
-			break;
-
-		case STENCIL_VALUE_ONE:
-		case STENCIL_VALUE_INVERT:
-			// In invert, we subtract by one, but we want to output one here.
-			replacedAlpha = "1.0";
-			break;
-
-		case STENCIL_VALUE_INCR_4:
-		case STENCIL_VALUE_DECR_4:
-			// We're adding/subtracting, just by the smallest value in 4-bit.
-			snprintf(replacedAlphaTemp, sizeof(replacedAlphaTemp), "%f", 1.0 / 15.0);
-			replacedAlpha = replacedAlphaTemp;
-			break;
-
-		case STENCIL_VALUE_INCR_8:
-		case STENCIL_VALUE_DECR_8:
-			// We're adding/subtracting, just by the smallest value in 8-bit.
-			snprintf(replacedAlphaTemp, sizeof(replacedAlphaTemp), "%f", 1.0 / 255.0);
-			replacedAlpha = replacedAlphaTemp;
-			break;
-
-		case STENCIL_VALUE_KEEP:
-			// Do nothing. We'll mask out the alpha using color mask.
-			break;
-		}
-	}
-
-	switch (stencilToAlpha) {
-	case REPLACE_ALPHA_DUALSOURCE:
-		WRITE(p, "  fragColor0 = vec4(v.rgb, %s);\n", replacedAlpha.c_str());
-		WRITE(p, "  fragColor1 = vec4(0.0, 0.0, 0.0, v.a);\n");
-		break;
-
-	case REPLACE_ALPHA_YES:
-		WRITE(p, "  fragColor0 = vec4(v.rgb, %s);\n", replacedAlpha.c_str());
-		break;
-
-	case REPLACE_ALPHA_NO:
-		WRITE(p, "  fragColor0 = v;\n");
-		break;
-
-	default:
-		*errorString = "Bad stencil-to-alpha type, corrupt ID?";
-		return false;
-	}
-
-	LogicOpReplaceType replaceLogicOpType = (LogicOpReplaceType)id.Bits(FS_BIT_REPLACE_LOGIC_OP_TYPE, 2);
-	switch (replaceLogicOpType) {
-	case LOGICOPTYPE_ONE:
-		WRITE(p, "  fragColor0.rgb = vec3(1.0, 1.0, 1.0);\n");
-		break;
-	case LOGICOPTYPE_INVERT:
-		WRITE(p, "  fragColor0.rgb = vec3(1.0, 1.0, 1.0) - fragColor0.rgb;\n");
-		break;
-	case LOGICOPTYPE_NORMAL:
-		break;
-
-	default:
-		*errorString = "Bad logic op type, corrupt ID?";
-		return false;
-	}
-
-	if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
-		const double scale = DepthSliceFactor() * 65535.0;
-
-		WRITE(p, "  highp float z = gl_FragCoord.z;\n");
-		if (gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) {
-			// We center the depth with an offset, but only its fraction matters.
-			// When (DepthSliceFactor() - 1) is odd, it will be 0.5, otherwise 0.
-			if (((int)(DepthSliceFactor() - 1.0f) & 1) == 1) {
-				WRITE(p, "  z = (floor((z * %f) - (1.0 / 2.0)) + (1.0 / 2.0)) * (1.0 / %f);\n", scale, scale);
-			} else {
-				WRITE(p, "  z = floor(z * %f) * (1.0 / %f);\n", scale, scale);
-			}
-		} else {
-			WRITE(p, "  z = (1.0/65535.0) * floor(z * 65535.0);\n");
-		}
-		WRITE(p, "  gl_FragDepth = z;\n");
-	} else if (!earlyFragmentTests && useAdrenoBugWorkaround) {
-		// Adreno (and possibly MESA/others) apply early frag tests even with discard in the shader.
-		// Writing depth prevents the bug, even with depth_unchanged specified.
-		WRITE(p, "  gl_FragDepth = gl_FragCoord.z;\n");
-	}
-
-	WRITE(p, "}\n");
-
-	return true;
-}
--- a/GPU/Vulkan/FragmentShaderGeneratorVulkan.h
+++ b/GPU/Vulkan/FragmentShaderGeneratorVulkan.h
@ -1,23 +0,0 @@
-#pragma once
-// Copyright (c) 2012- PPSSPP Project.
-
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, version 2.0 or later versions.
-
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License 2.0 for more details.
-
-// A copy of the GPL 2.0 should have been included with the program.
-// If not, see http://www.gnu.org/licenses/
-
-// Official git repository and contact information can be found at
-// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
-
-#pragma once
-
-struct FShaderID;
-
-bool GenerateFragmentShaderVulkanGLSL(const FShaderID &id, char *buffer, uint32_t vulkanVendorId, std::string *errorString);
--- a/GPU/Vulkan/ShaderManagerVulkan.cpp
+++ b/GPU/Vulkan/ShaderManagerVulkan.cpp
@ -39,7 +39,7 @@
 #include "GPU/Vulkan/ShaderManagerVulkan.h"
 #include "GPU/Vulkan/DrawEngineVulkan.h"
 #include "GPU/Vulkan/FramebufferManagerVulkan.h"
-#include "GPU/Vulkan/FragmentShaderGeneratorVulkan.h"
+#include "GPU/GLES/FragmentShaderGeneratorGLES.h"
 #include "GPU/Vulkan/VertexShaderGeneratorVulkan.h"

 VulkanFragmentShader::VulkanFragmentShader(VulkanContext *vulkan, FShaderID id, const char *code)
@ -168,6 +168,8 @@ ShaderManagerVulkan::ShaderManagerVulkan(Draw::DrawContext *draw, VulkanContext
 	static_assert(sizeof(ub_base) <= 512, "ub_base grew too big");
 	static_assert(sizeof(ub_lights) <= 512, "ub_lights grew too big");
 	static_assert(sizeof(ub_bones) <= 384, "ub_bones grew too big");
+
+	compat_.SetupForVulkan();
 }

 ShaderManagerVulkan::~ShaderManagerVulkan() {
@ -271,10 +273,11 @@ void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader

 	VulkanFragmentShader *fs = fsCache_.Get(FSID);
 	if (!fs) {
-		uint32_t vendorID = vulkan_->GetPhysicalDeviceProperties().properties.vendorID;
+		// uint32_t vendorID = vulkan_->GetPhysicalDeviceProperties().properties.vendorID;
 		// Fragment shader not in cache. Let's compile it.
 		std::string genErrorString;
-		bool success = GenerateFragmentShaderVulkanGLSL(FSID, codeBuffer_, vendorID, &genErrorString);
+		uint64_t uniformMask = 0;  // Not used
+		bool success = GenerateFragmentShaderGLSL(FSID, codeBuffer_, compat_, &uniformMask, &genErrorString);
 		_assert_(success);
 		fs = new VulkanFragmentShader(vulkan_, FSID, codeBuffer_);
 		fsCache_.Insert(FSID, fs);
@ -399,6 +402,8 @@ bool ShaderManagerVulkan::LoadCache(FILE *f) {
 		vsCache_.Insert(id, vs);
 	}
 	uint32_t vendorID = vulkan_->GetPhysicalDeviceProperties().properties.vendorID;
+	GLSLShaderCompat compat{};
+	compat.SetupForVulkan();
 	for (int i = 0; i < header.numFragmentShaders; i++) {
 		FShaderID id;
 		if (fread(&id, sizeof(id), 1, f) != 1) {
@ -406,7 +411,8 @@ bool ShaderManagerVulkan::LoadCache(FILE *f) {
 			break;
 		}
 		std::string genErrorString;
-		if (!GenerateFragmentShaderVulkanGLSL(id, codeBuffer_, vendorID, &genErrorString)) {
+		uint64_t uniformMask = 0;
+		if (!GenerateFragmentShaderGLSL(id, codeBuffer_, compat, &uniformMask, &genErrorString)) {
 			return false;
 		}
 		VulkanFragmentShader *fs = new VulkanFragmentShader(vulkan_, id, codeBuffer_);
--- a/GPU/Vulkan/ShaderManagerVulkan.h
+++ b/GPU/Vulkan/ShaderManagerVulkan.h
@ -25,7 +25,7 @@
 #include "GPU/Common/ShaderCommon.h"
 #include "GPU/Common/ShaderId.h"
 #include "GPU/Vulkan/VertexShaderGeneratorVulkan.h"
-#include "GPU/Vulkan/FragmentShaderGeneratorVulkan.h"
+#include "GPU/GLES/FragmentShaderGeneratorGLES.h"
 #include "GPU/Vulkan/VulkanUtil.h"
 #include "Common/Math/lin/matrix4x4.h"
 #include "GPU/Common/ShaderUniforms.h"
@ -131,6 +131,7 @@ private:
 	void Clear();

 	VulkanContext *vulkan_;
+	GLSLShaderCompat compat_{};

 	typedef DenseHashMap<FShaderID, VulkanFragmentShader *, nullptr> FSCache;
 	FSCache fsCache_;
--- a/GPU/Vulkan/StencilBufferVulkan.cpp
+++ b/GPU/Vulkan/StencilBufferVulkan.cpp
@ -22,7 +22,6 @@
 #include "Core/Reporting.h"
 #include "GPU/Common/StencilCommon.h"
 #include "GPU/Vulkan/FramebufferManagerVulkan.h"
-#include "GPU/Vulkan/FragmentShaderGeneratorVulkan.h"
 #include "GPU/Vulkan/ShaderManagerVulkan.h"
 #include "GPU/Vulkan/TextureCacheVulkan.h"
 #include "GPU/Vulkan/VulkanUtil.h"
--- a/GPU/Vulkan/TextureCacheVulkan.cpp
+++ b/GPU/Vulkan/TextureCacheVulkan.cpp
@ -45,7 +45,6 @@
 #include "GPU/Common/TextureDecoder.h"
 #include "GPU/Vulkan/TextureCacheVulkan.h"
 #include "GPU/Vulkan/FramebufferManagerVulkan.h"
-#include "GPU/Vulkan/FragmentShaderGeneratorVulkan.h"
 #include "GPU/Vulkan/DepalettizeShaderVulkan.h"
 #include "GPU/Vulkan/ShaderManagerVulkan.h"
 #include "GPU/Vulkan/DrawEngineVulkan.h"
--- a/android/jni/Android.mk
+++ b/android/jni/Android.mk
@ -125,7 +125,6 @@ VULKAN_FILES := \
  $(SRC)/Common/GPU/Vulkan/VulkanDebug.cpp \
  $(SRC)/Common/GPU/Vulkan/VulkanImage.cpp \
  $(SRC)/Common/GPU/Vulkan/VulkanMemory.cpp \
-  $(SRC)/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp \
  $(SRC)/GPU/Vulkan/DrawEngineVulkan.cpp \
  $(SRC)/GPU/Vulkan/FramebufferManagerVulkan.cpp \
  $(SRC)/GPU/Vulkan/GPU_Vulkan.cpp \
--- a/libretro/Makefile.common
+++ b/libretro/Makefile.common
@ -639,7 +639,6 @@ SOURCES_C += \
 SOURCES_CXX += \
 	$(GPUDIR)/Vulkan/DepalettizeShaderVulkan.cpp \
 	$(GPUDIR)/Vulkan/DrawEngineVulkan.cpp \
-	$(GPUDIR)/Vulkan/FragmentShaderGeneratorVulkan.cpp \
 	$(GPUDIR)/Vulkan/FramebufferManagerVulkan.cpp \
 	$(GPUDIR)/Vulkan/GPU_Vulkan.cpp \
 	$(GPUDIR)/Vulkan/PipelineManagerVulkan.cpp \
--- a/unittest/TestShaderGenerators.cpp
+++ b/unittest/TestShaderGenerators.cpp
@ -1,3 +1,5 @@
+#include <algorithm>
+
 #include "Common/StringUtils.h"

 #include "GPU/Common/ShaderId.h"
@ -6,7 +8,6 @@

 #include "GPU/Vulkan/VulkanContext.h"

-#include "GPU/Vulkan/FragmentShaderGeneratorVulkan.h"
 #include "GPU/Directx9/FragmentShaderGeneratorHLSL.h"
 #include "GPU/GLES/FragmentShaderGeneratorGLES.h"

@ -20,6 +21,7 @@
 #include "GPU/D3D9/D3DCompilerLoader.h"
 #include "GPU/D3D9/D3D9ShaderCompiler.h"

+
 bool GenerateFShader(FShaderID id, char *buffer, ShaderLanguage lang, std::string *errorString) {
 	switch (lang) {
 	case ShaderLanguage::HLSL_D3D11:
@ -29,7 +31,12 @@ bool GenerateFShader(FShaderID id, char *buffer, ShaderLanguage lang, std::strin
 		// TODO: Need a device :(  Returning false here so it doesn't get tried.
 		return false;
 	case ShaderLanguage::GLSL_VULKAN:
-		return GenerateFragmentShaderVulkanGLSL(id, buffer, 0, errorString);
+	{
+		GLSLShaderCompat compat{};
+		compat.SetupForVulkan();
+		uint64_t uniformMask;
+		return GenerateFragmentShaderGLSL(id, buffer, compat, &uniformMask, errorString);
+	}
 	case ShaderLanguage::GLSL_140:
 	case ShaderLanguage::GLSL_300:
 		// TODO: Need a device - except that maybe glslang could be used to verify these ....
@ -75,24 +82,50 @@ bool TestCompileShader(const char *buffer, ShaderLanguage lang, bool vertex) {
 		return result;
 	}
 	case ShaderLanguage::GLSL_140:
-
 		return false;
 	case ShaderLanguage::GLSL_300:
-
 		return false;
+
 	default:
 		return false;
 	}
 }

+void PrintDiff(const char *a, const char *b) {
+	// Stupidest diff ever: Just print both lines, and a few around it, when we find a mismatch.
+	std::vector<std::string> a_lines;
+	std::vector<std::string> b_lines;
+	SplitString(a, '\n', a_lines);
+	SplitString(b, '\n', b_lines);
+	for (size_t i = 0; i < a_lines.size() && i < b_lines.size(); i++) {
+		if (a_lines[i] != b_lines[i]) {
+			// Print some context
+			for (size_t j = std::max((int)i - 4, 0); j < i; j++) {
+				printf("%s\n", a_lines[j].c_str());
+			}
+			printf("DIFF found at line %d:\n", (int)i);
+			printf("a: %s\n", a_lines[i].c_str());
+			printf("b: %s\n", b_lines[i].c_str());
+			printf("...continues...\n");
+			for (size_t j = i; j < i + 4 && j < a_lines.size(); j++) {
+				printf("a: %s\n", a_lines[j].c_str());
+				printf("b: %s\n", b_lines[j].c_str());
+			}
+			printf("==================\n");
+			return;
+		}
+	}
+}
+
+
 bool TestShaderGenerators() {
 	LoadD3D11();
 	init_glslang();
 	LoadD3DCompilerDynamic();

 	ShaderLanguage languages[] = {
-		ShaderLanguage::HLSL_D3D11,
 		ShaderLanguage::GLSL_VULKAN,
+		ShaderLanguage::HLSL_D3D11,
 		ShaderLanguage::GLSL_140,
 		ShaderLanguage::GLSL_300,
 		ShaderLanguage::HLSL_DX9,
@ -106,7 +139,7 @@ bool TestShaderGenerators() {
 	}
 	GMRng rng;
 	int successes = 0;
-	int count = 200;
+	int count = 700;

 	// Generate a bunch of random fragment shader IDs, try to generate shader source.
 	// Then compile it and check that it's ok.
@ -118,16 +151,31 @@ bool TestShaderGenerators() {
 		id.d[1] = top;

 		bool generateSuccess[numLanguages]{};
+		std::string genErrorString[numLanguages];

 		for (int j = 0; j < numLanguages; j++) {
-			std::string genErrorString;
-			generateSuccess[j] = GenerateFShader(id, buffer[j], languages[j], &genErrorString);
-			if (!genErrorString.empty()) {
-				printf("%s\n", genErrorString.c_str());
+			generateSuccess[j] = GenerateFShader(id, buffer[j], languages[j], &genErrorString[j]);
+			if (!genErrorString[j].empty()) {
+				printf("%s\n", genErrorString[j].c_str());
 			}
 			// We ignore the contents of the error string here, not even gonna try to compile if it errors.
 		}

+		/*
+		// KEEPING FOR REUSE LATER: Defunct temporary test: Compare GLSL-in-Vulkan-mode vs Vulkan
+		if (generateSuccess[0] != generateSuccess[1]) {
+			printf("mismatching success! %s %s\n", genErrorString[0].c_str(), genErrorString[1].c_str());
+			printf("%s\n", buffer[0]);
+			printf("%s\n", buffer[1]);
+			return 1;
+		}
+		if (generateSuccess[0] && strcmp(buffer[0], buffer[1])) {
+			printf("mismatching shaders!\n");
+			PrintDiff(buffer[0], buffer[1]);
+			return 1;
+		}
+		*/
+
 		// Now that we have the strings ready for easy comparison (buffer,4 in the watch window),
 		// let's try to compile them.
 		for (int j = 0; j < numLanguages; j++) {