From c2a6090c3f53ff3c94adaf4cd4c64157d3043f9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 30 Oct 2020 10:22:51 +0100 Subject: [PATCH] More D3D9 fixes. --- GPU/Common/ShaderId.h | 7 +- GPU/Directx9/FragmentShaderGeneratorHLSL.h | 15 +-- GPU/GLES/FragmentShaderGeneratorGLES.cpp | 146 +++++++++++++++++---- GPU/GLES/FragmentShaderGeneratorGLES.h | 18 +++ GPU/GLES/VertexShaderGeneratorGLES.cpp | 2 +- unittest/TestShaderGenerators.cpp | 7 +- 6 files changed, 149 insertions(+), 46 deletions(-) diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index 60eba9e460..e6cf46be68 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -164,10 +164,9 @@ protected: } } void SetBits(int bit, int count, int value) { - if (value != 0) { - const int mask = (1 << count) - 1; - d[bit >> 5] |= (value & mask) << (bit & 31); - } + const int mask = (1 << count) - 1; + const int shifted_mask = mask << (bit & 31); + d[bit >> 5] = (d[bit >> 5] & ~shifted_mask) | ((value & mask) << (bit & 31)); } }; diff --git a/GPU/Directx9/FragmentShaderGeneratorHLSL.h b/GPU/Directx9/FragmentShaderGeneratorHLSL.h index 5dcaa52e54..a3ec82cf14 100644 --- a/GPU/Directx9/FragmentShaderGeneratorHLSL.h +++ b/GPU/Directx9/FragmentShaderGeneratorHLSL.h @@ -19,19 +19,6 @@ #include "GPU/Common/ShaderId.h" #include "GPU/Common/ShaderCommon.h" +#include "GPU/GLES/FragmentShaderGeneratorGLES.h" bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguage lang, std::string *errorString); - -#define CONST_PS_TEXENV 0 -#define CONST_PS_ALPHACOLORREF 1 -#define CONST_PS_ALPHACOLORMASK 2 -#define CONST_PS_FOGCOLOR 3 -#define CONST_PS_STENCILREPLACE 4 -#define CONST_PS_BLENDFIXA 5 -#define CONST_PS_BLENDFIXB 6 -#define CONST_PS_FBOTEXSIZE 7 -#define CONST_PS_TEXCLAMP 8 -#define CONST_PS_TEXCLAMPOFF 9 - -// For stencil upload -#define CONST_PS_STENCILVALUE 10 diff --git a/GPU/GLES/FragmentShaderGeneratorGLES.cpp b/GPU/GLES/FragmentShaderGeneratorGLES.cpp index e190bb70a5..fdb795f6c6 100644 --- a/GPU/GLES/FragmentShaderGeneratorGLES.cpp +++ b/GPU/GLES/FragmentShaderGeneratorGLES.cpp @@ -202,29 +202,75 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, const GLSLSha if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE) { WRITE(p, "layout (location = 0, index = 1) out vec4 fragColor1;\n"); } - } else if (compat.shaderLanguage == ShaderLanguage::HLSL_D3D11) { - WRITE(p, "SamplerState samp : register(s0);\n"); - WRITE(p, "Texture2D tex : register(t0);\n"); - if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) { - if (replaceBlend == REPLACE_BLEND_COPY_FBO) { - // No sampler required, we Load - WRITE(p, "Texture2D fboTex : register(t1);\n"); + } else if (compat.shaderLanguage == HLSL_D3D11 || compat.shaderLanguage == HLSL_D3D9) { + if (compat.shaderLanguage == HLSL_D3D9) { + if (doTexture) + WRITE(p, "sampler tex : register(s0);\n"); + if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) { + if (replaceBlend == REPLACE_BLEND_COPY_FBO) { + WRITE(p, "vec2 u_fbotexSize : register(c%i);\n", CONST_PS_FBOTEXSIZE); + WRITE(p, "sampler fbotex : register(s1);\n"); + } + if (replaceBlendFuncA >= GE_SRCBLEND_FIXA) { + WRITE(p, "float3 u_blendFixA : register(c%i);\n", CONST_PS_BLENDFIXA); + } + if (replaceBlendFuncB >= GE_DSTBLEND_FIXB) { + WRITE(p, "float3 u_blendFixB : register(c%i);\n", CONST_PS_BLENDFIXB); + } } + if (gstate_c.needShaderTexClamp && doTexture) { + WRITE(p, "vec4 u_texclamp : register(c%i);\n", CONST_PS_TEXCLAMP); + if (textureAtOffset) { + WRITE(p, "vec2 u_texclampoff : register(c%i);\n", CONST_PS_TEXCLAMPOFF); + } + } + + if (enableAlphaTest || enableColorTest) { + WRITE(p, "vec4 u_alphacolorref : register(c%i);\n", CONST_PS_ALPHACOLORREF); + WRITE(p, "vec4 u_alphacolormask : register(c%i);\n", CONST_PS_ALPHACOLORMASK); + } + if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) { + WRITE(p, "float u_stencilReplaceValue : register(c%i);\n", CONST_PS_STENCILREPLACE); + } + if (doTexture && texFunc == GE_TEXFUNC_BLEND) { + WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV); + } + if (enableFog) { + WRITE(p, "float3 u_fogcolor : register(c%i);\n", CONST_PS_FOGCOLOR); + } + } else { + WRITE(p, "SamplerState samp : register(s0);\n"); + WRITE(p, "Texture2D tex : register(t0);\n"); + if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) { + if (replaceBlend == REPLACE_BLEND_COPY_FBO) { + // No sampler required, we Load + WRITE(p, "Texture2D fboTex : register(t1);\n"); + } + } + WRITE(p, "cbuffer base : register(b0) {\n%s};\n", cb_baseStr); } - WRITE(p, "cbuffer base : register(b0) {\n%s};\n", cb_baseStr); if (enableAlphaTest) { - WRITE(p, "int roundAndScaleTo255i(float x) { return int(floor(x * 255.0f + 0.5f)); }\n"); + if (compat.shaderLanguage == HLSL_D3D11) { + WRITE(p, "int roundAndScaleTo255i(float x) { return int(floor(x * 255.0f + 0.5f)); }\n"); + } else { + // D3D11 level 9 gets to take this path. + WRITE(p, "float roundAndScaleTo255f(float x) { return floor(x * 255.0f + 0.5f); }\n"); + } } if (enableColorTest) { - WRITE(p, "uvec3 roundAndScaleTo255iv(float3 x) { return uvec3(floor(x * 255.0f + 0.5f)); }\n"); + if (compat.shaderLanguage == HLSL_D3D11) { + WRITE(p, "uvec3 roundAndScaleTo255iv(float3 x) { return uvec3(floor(x * 255.0f + 0.5f)); }\n"); + } else { + WRITE(p, "vec3 roundAndScaleTo255v(float3 x) { return floor(x * 255.0f + 0.5f); }\n"); + } } WRITE(p, "struct PS_IN {\n"); if (doTexture) { WRITE(p, " vec3 v_texcoord: TEXCOORD0;\n"); } - const char *colorInterpolation = doFlatShading ? "nointerpolation " : ""; + const char *colorInterpolation = doFlatShading && compat.shaderLanguage == HLSL_D3D11 ? "nointerpolation " : ""; WRITE(p, " %svec4 v_color0: COLOR0;\n", colorInterpolation); if (lmode) { WRITE(p, " vec3 v_color1: COLOR1;\n"); @@ -232,22 +278,60 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, const GLSLSha if (enableFog) { WRITE(p, " float v_fogdepth: TEXCOORD1;\n"); } - if ((replaceBlend == REPLACE_BLEND_COPY_FBO) || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) { + if (compat.shaderLanguage == HLSL_D3D11 && ((replaceBlend == REPLACE_BLEND_COPY_FBO) || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT))) { WRITE(p, " vec4 pixelPos : SV_POSITION;\n"); } WRITE(p, "};\n"); - WRITE(p, "struct PS_OUT {\n"); - if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE) { - WRITE(p, " vec4 target : SV_Target0;\n"); - WRITE(p, " vec4 target1 : SV_Target1;\n"); - } else { - WRITE(p, " vec4 target : SV_Target;\n"); + if (compat.shaderLanguage == HLSL_D3D11) { + WRITE(p, "struct PS_OUT {\n"); + if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE) { + WRITE(p, " vec4 target : SV_Target0;\n"); + WRITE(p, " vec4 target1 : SV_Target1;\n"); + } else { + WRITE(p, " vec4 target : SV_Target;\n"); + } + if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) { + WRITE(p, " float depth : SV_DEPTH;\n"); + } + WRITE(p, "};\n"); } - if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) { - WRITE(p, " float depth : SV_DEPTH;\n"); + } else if (compat.shaderLanguage == HLSL_D3D9) { + if (doTexture) + WRITE(p, "sampler tex : register(s0);\n"); + if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) { + if (replaceBlend == REPLACE_BLEND_COPY_FBO) { + WRITE(p, "vec2 u_fbotexSize : register(c%i);\n", CONST_PS_FBOTEXSIZE); + WRITE(p, "sampler fbotex : register(s1);\n"); + } + if (replaceBlendFuncA >= GE_SRCBLEND_FIXA) { + WRITE(p, "float3 u_blendFixA : register(c%i);\n", CONST_PS_BLENDFIXA); + } + if (replaceBlendFuncB >= GE_DSTBLEND_FIXB) { + WRITE(p, "float3 u_blendFixB : register(c%i);\n", CONST_PS_BLENDFIXB); + } } - WRITE(p, "};\n"); + if (gstate_c.needShaderTexClamp && doTexture) { + WRITE(p, "vec4 u_texclamp : register(c%i);\n", CONST_PS_TEXCLAMP); + if (textureAtOffset) { + WRITE(p, "vec2 u_texclampoff : register(c%i);\n", CONST_PS_TEXCLAMPOFF); + } + } + + if (enableAlphaTest || enableColorTest) { + WRITE(p, "vec4 u_alphacolorref : register(c%i);\n", CONST_PS_ALPHACOLORREF); + WRITE(p, "vec4 u_alphacolormask : register(c%i);\n", CONST_PS_ALPHACOLORMASK); + } + if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) { + WRITE(p, "float u_stencilReplaceValue : register(c%i);\n", CONST_PS_STENCILREPLACE); + } + if (doTexture && texFunc == GE_TEXFUNC_BLEND) { + WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV); + } + if (enableFog) { + WRITE(p, "float3 u_fogcolor : register(c%i);\n", CONST_PS_FOGCOLOR); + } + } else if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) { if (shaderDepal && gl_extensions.IsGLES) { WRITE(p, "precision highp int;\n"); @@ -362,9 +446,11 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, const GLSLSha WRITE(p, "float mymod(float a, float b) { return a - b * floor(a / b); }\n"); } - if (compat.shaderLanguage == HLSL_D3D11 || compat.shaderLanguage == HLSL_D3D9) { + if (compat.shaderLanguage == HLSL_D3D11) { WRITE(p, "PS_OUT main( PS_IN In ) {\n"); - WRITE(p, " PS_OUT outfragment;\n"); + WRITE(p, " PS_OUT outfragment;\n"); + } else if (compat.shaderLanguage == HLSL_D3D9) { + WRITE(p, "vec4 main( PS_IN In ) : COLOR {\n"); } else { WRITE(p, "void main() {\n"); } @@ -433,13 +519,19 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, const GLSLSha } else { WRITE(p, " vec4 t = tex.Sample(samp, %s.xy)%s;\n", texcoord.c_str(), bgraTexture ? ".bgra" : ""); } + } else if (compat.shaderLanguage == HLSL_D3D9) { + if (doTextureProjection) { + WRITE(p, " vec4 t = tex2Dproj(tex, vec4(v_texcoord.x, v_texcoord.y, 0, v_texcoord.z))%s;\n", bgraTexture ? ".bgra" : ""); + } else { + WRITE(p, " vec4 t = tex2D(tex, %s.xy)%s;\n", texcoord.c_str(), bgraTexture ? ".bgra" : ""); + } } else { if (doTextureProjection) { WRITE(p, " vec4 t = %sProj(tex, %s);\n", compat.texture, texcoord.c_str()); } else { WRITE(p, " vec4 t = %s(tex, %s.xy);\n", compat.texture, texcoord.c_str()); } - } + } } else { if (doTextureProjection) { // We don't use textureProj because we need better control and it's probably not much of a savings anyway. @@ -918,12 +1010,14 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, const GLSLSha if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE) { WRITE(p, " %s = vec4(v.rgb, %s);\n", compat.fragColor0, replacedAlpha.c_str()); WRITE(p, " %s = vec4(0.0, 0.0, 0.0, v.a);\n", compat.fragColor1); - } else { + } else if (compat.shaderLanguage != HLSL_D3D9) { WRITE(p, " %s = v;\n", compat.fragColor0); } - if (compat.shaderLanguage == HLSL_D3D11 || compat.shaderLanguage == HLSL_D3D9) { + if (compat.shaderLanguage == HLSL_D3D11) { WRITE(p, " return outfragment;\n"); + } else if (compat.shaderLanguage == HLSL_D3D9) { + WRITE(p, " return v;\n"); } WRITE(p, "}\n"); diff --git a/GPU/GLES/FragmentShaderGeneratorGLES.h b/GPU/GLES/FragmentShaderGeneratorGLES.h index fde1158e93..a4c743ca04 100644 --- a/GPU/GLES/FragmentShaderGeneratorGLES.h +++ b/GPU/GLES/FragmentShaderGeneratorGLES.h @@ -21,4 +21,22 @@ struct FShaderID; +// D3D9 constants + + +#define CONST_PS_TEXENV 0 +#define CONST_PS_ALPHACOLORREF 1 +#define CONST_PS_ALPHACOLORMASK 2 +#define CONST_PS_FOGCOLOR 3 +#define CONST_PS_STENCILREPLACE 4 +#define CONST_PS_BLENDFIXA 5 +#define CONST_PS_BLENDFIXB 6 +#define CONST_PS_FBOTEXSIZE 7 +#define CONST_PS_TEXCLAMP 8 +#define CONST_PS_TEXCLAMPOFF 9 + +// For stencil upload +#define CONST_PS_STENCILVALUE 10 + + bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, const GLSLShaderCompat &compat, uint64_t *uniformMask, std::string *errorString); diff --git a/GPU/GLES/VertexShaderGeneratorGLES.cpp b/GPU/GLES/VertexShaderGeneratorGLES.cpp index 76faa4af05..b4c3a6d160 100644 --- a/GPU/GLES/VertexShaderGeneratorGLES.cpp +++ b/GPU/GLES/VertexShaderGeneratorGLES.cpp @@ -132,7 +132,7 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade if (compat.gles) { WRITE(p, "precision highp float;\n"); - } else if (!compat.shaderLanguage == GLSL_VULKAN) { + } else if (compat.shaderLanguage != GLSL_VULKAN) { WRITE(p, "#define lowp\n"); WRITE(p, "#define mediump\n"); WRITE(p, "#define highp\n"); diff --git a/unittest/TestShaderGenerators.cpp b/unittest/TestShaderGenerators.cpp index 4008d75606..e14ee6465d 100644 --- a/unittest/TestShaderGenerators.cpp +++ b/unittest/TestShaderGenerators.cpp @@ -4,6 +4,7 @@ #include "GPU/Common/ShaderId.h" #include "GPU/Common/ShaderCommon.h" +#include "GPU/Common/GPUStateUtils.h" #include "Common/Data/Random/Rng.h" #include "GPU/Vulkan/VulkanContext.h" @@ -147,7 +148,7 @@ void PrintDiff(const char *a, const char *b) { printf("a: %s\n", a_lines[i].c_str()); printf("b: %s\n", b_lines[i].c_str()); printf("...continues...\n"); - for (size_t j = i + 1; j < i + 5 && j < a_lines.size(); j++) { + for (size_t j = i + 1; j < i + 5 && j < a_lines.size() && j < b_lines.size(); j++) { printf("a: %s\n", a_lines[j].c_str()); printf("b: %s\n", b_lines[j].c_str()); } @@ -196,6 +197,10 @@ bool TestShaderGenerators() { id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, false); id.SetBit(FS_BIT_SHADER_DEPAL, false); + // DX9 disabling: + if (static_cast(id.Bits(FS_BIT_STENCIL_TO_ALPHA, 2)) == ReplaceAlphaType::REPLACE_ALPHA_DUALSOURCE) + continue; + bool generateSuccess[numLanguages]{}; std::string genErrorString[numLanguages];