From 22b26ffc09d290c3fe6a6ae15424293691bbbf2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 23 Oct 2020 00:44:35 +0200 Subject: [PATCH 1/9] Time for the vertex shaders. Set up a test, start eliminating differences. --- GPU/Common/ShaderCommon.h | 15 + GPU/GLES/ShaderManagerGLES.cpp | 2 +- GPU/GLES/VertexShaderGeneratorGLES.cpp | 429 +++++++++++++-------- GPU/Vulkan/PipelineManagerVulkan.h | 14 +- GPU/Vulkan/ShaderManagerVulkan.cpp | 5 +- GPU/Vulkan/VertexShaderGeneratorVulkan.cpp | 1 + unittest/TestShaderGenerators.cpp | 105 +++-- 7 files changed, 349 insertions(+), 222 deletions(-) diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h index b7eb56e93c..acc0de74f5 100644 --- a/GPU/Common/ShaderCommon.h +++ b/GPU/Common/ShaderCommon.h @@ -31,6 +31,8 @@ enum ShaderLanguage { HLSL_DX9, HLSL_D3D11, HLSL_D3D11_LEVEL9, + + TEST_GLSL_VULKAN, }; enum DebugShaderType { @@ -157,3 +159,16 @@ struct GLSLShaderCompat { void SetupForVulkan(); }; + +// PSP vertex format. +enum class PspAttributeLocation { + POSITION = 0, + TEXCOORD = 1, + NORMAL = 2, + W1 = 3, + W2 = 4, + COLOR0 = 5, + COLOR1 = 6, + + COUNT +}; diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index ccae98787a..4b903f9191 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -264,7 +264,7 @@ static void SetFloatUniform4(GLRenderManager *render, GLint *uniform, float data static void SetMatrix4x3(GLRenderManager *render, GLint *uniform, const float *m4x3) { float m4x4[16]; - ConvertMatrix4x3To4x4(m4x4, m4x3); + ConvertMatrix4x3To4x4Transposed(m4x4, m4x3); render->SetUniformM4x4(uniform, m4x4); } diff --git a/GPU/GLES/VertexShaderGeneratorGLES.cpp b/GPU/GLES/VertexShaderGeneratorGLES.cpp index b2fde269c8..2582853992 100644 --- a/GPU/GLES/VertexShaderGeneratorGLES.cpp +++ b/GPU/GLES/VertexShaderGeneratorGLES.cpp @@ -20,15 +20,15 @@ #include #include "Common/GPU/OpenGL/GLFeatures.h" - #include "Common/StringUtils.h" +#include "Core/Config.h" #include "GPU/ge_constants.h" #include "GPU/GPUState.h" -#include "Core/Config.h" +#include "GPU/Common/ShaderId.h" +#include "GPU/Common/ShaderUniforms.h" +#include "GPU/Common/VertexDecoderCommon.h" #include "GPU/GLES/VertexShaderGeneratorGLES.h" #include "GPU/GLES/ShaderManagerGLES.h" -#include "GPU/Common/ShaderId.h" -#include "GPU/Common/VertexDecoderCommon.h" #undef WRITE @@ -87,31 +87,52 @@ static const char * const boneWeightInDecl[9] = { // TODO: Skip all this if we can actually get a 16-bit depth buffer along with stencil, which // is a bit of a rare configuration, although quite common on mobile. +static const char * const boneWeightDecl[9] = { + "#ERROR#", + "layout(location = 3) in float w1;\n", + "layout(location = 3) in vec2 w1;\n", + "layout(location = 3) in vec3 w1;\n", + "layout(location = 3) in vec4 w1;\n", + "layout(location = 3) in vec4 w1;\nlayout(location = 4) in float w2;\n", + "layout(location = 3) in vec4 w1;\nlayout(location = 4) in vec2 w2;\n", + "layout(location = 3) in vec4 w1;\nlayout(location = 4) in vec3 w2;\n", + "layout(location = 3) in vec4 w1;\nlayout(location = 4) in vec4 w2;\n", +}; + +static const char *vulkan_glsl_preamble = +"#version 450\n" +"#extension GL_ARB_separate_shader_objects : enable\n" +"#extension GL_ARB_shading_language_420pack : enable\n" +"#define splat3(x) vec3(x)\n\n"; + bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShaderCompat &compat, uint32_t *attrMask, uint64_t *uniformMask, std::string *errorString) { *attrMask = 0; *uniformMask = 0; - char *p = buffer; - WRITE(p, "#version %d%s\n", compat.glslVersionNumber, compat.gles ? " es" : ""); - bool highpFog = false; bool highpTexcoord = false; - if (compat.gles) { - // PowerVR needs highp to do the fog in MHU correctly. - // Others don't, and some can't handle highp in the fragment shader. - highpFog = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? true : false; - highpTexcoord = highpFog; + + char *p = buffer; + if (compat.vulkan) { + WRITE(p, "%s", vulkan_glsl_preamble); + } else { + if (compat.gles) { + // PowerVR needs highp to do the fog in MHU correctly. + // Others don't, and some can't handle highp in the fragment shader. + highpFog = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? true : false; + highpTexcoord = highpFog; + } + WRITE(p, "#version %d%s\n", compat.glslVersionNumber, compat.gles ? " es" : ""); + WRITE(p, "#define splat3(x) vec3(x)\n"); } - if (gl_extensions.EXT_gpu_shader4) { + if (!compat.vulkan && gl_extensions.EXT_gpu_shader4) { WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n"); } - WRITE(p, "#define splat3(x) vec3(x)\n"); - if (compat.gles) { WRITE(p, "precision highp float;\n"); - } else { + } else if (!compat.vulkan) { WRITE(p, "#define lowp\n"); WRITE(p, "#define mediump\n"); WRITE(p, "#define highp\n"); @@ -120,7 +141,7 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade bool isModeThrough = id.Bit(VS_BIT_IS_THROUGH); bool lmode = id.Bit(VS_BIT_LMODE); bool doTexture = id.Bit(VS_BIT_DO_TEXTURE); - bool doTextureProjection = id.Bit(VS_BIT_DO_TEXTURE_TRANSFORM); + bool doTextureTransform = id.Bit(VS_BIT_DO_TEXTURE_TRANSFORM); GETexMapMode uvGenMode = static_cast(id.Bits(VS_BIT_UVGEN_MODE, 2)); @@ -148,8 +169,17 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade bool hasNormalTess = id.Bit(VS_BIT_HAS_NORMAL_TESS); bool flipNormalTess = id.Bit(VS_BIT_NORM_REVERSE_TESS); + if (compat.vulkan) { + WRITE(p, "\n"); + WRITE(p, "layout (std140, set = 0, binding = 3) uniform baseVars {\n%s};\n", ub_baseStr); + if (enableLighting || doShadeMapping) + WRITE(p, "layout (std140, set = 0, binding = 4) uniform lightVars {\n%s};\n", ub_vs_lightsStr); + if (enableBones) + WRITE(p, "layout (std140, set = 0, binding = 5) uniform boneVars {\n%s};\n", ub_vs_bonesStr); + } + const char *shading = ""; - if (compat.glslES30) + if (compat.glslES30 || compat.vulkan) shading = doFlatShading ? "flat " : ""; DoLightComputation doLight[4] = { LIGHT_OFF, LIGHT_OFF, LIGHT_OFF, LIGHT_OFF }; @@ -166,157 +196,204 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade int numBoneWeights = 0; int boneWeightScale = id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2); - if (enableBones) { - numBoneWeights = 1 + id.Bits(VS_BIT_BONES, 3); - const char * const * boneWeightDecl = boneWeightAttrDecl; - if (!strcmp(compat.attribute, "in")) { - boneWeightDecl = boneWeightInDecl; - } - WRITE(p, "%s", boneWeightDecl[numBoneWeights]); - *attrMask |= 1 << ATTR_W1; - if (numBoneWeights >= 5) - *attrMask |= 1 << ATTR_W2; - } - - if (useHWTransform) - WRITE(p, "%s vec3 position;\n", compat.attribute); - else - WRITE(p, "%s vec4 position;\n", compat.attribute); // need to pass the fog coord in w - *attrMask |= 1 << ATTR_POSITION; - - if (useHWTransform && hasNormal) { - WRITE(p, "%s mediump vec3 normal;\n", compat.attribute); - *attrMask |= 1 << ATTR_NORMAL; - } - bool texcoordVec3In = false; - if (doTexture && hasTexcoord) { - if (!useHWTransform && doTextureProjection && !isModeThrough) { - WRITE(p, "%s vec3 texcoord;\n", compat.attribute); - texcoordVec3In = true; - } else { - WRITE(p, "%s vec2 texcoord;\n", compat.attribute); - } - *attrMask |= 1 << ATTR_TEXCOORD; - } - if (hasColor) { - WRITE(p, "%s lowp vec4 color0;\n", compat.attribute); - *attrMask |= 1 << ATTR_COLOR0; - if (lmode && !useHWTransform) { // only software transform supplies color1 as vertex data - WRITE(p, "%s lowp vec3 color1;\n", compat.attribute); - *attrMask |= 1 << ATTR_COLOR1; - } - } + bool scaleUV = false; - if (isModeThrough) { - WRITE(p, "uniform mat4 u_proj_through;\n"); - *uniformMask |= DIRTY_PROJTHROUGHMATRIX; - } else { - WRITE(p, "uniform mat4 u_proj;\n"); - *uniformMask |= DIRTY_PROJMATRIX; - // Add all the uniforms we'll need to transform properly. - } - - bool scaleUV = !isModeThrough && (uvGenMode == GE_TEXMAP_TEXTURE_COORDS || uvGenMode == GE_TEXMAP_UNKNOWN); - - if (useHWTransform) { - // When transforming by hardware, we need a great deal more uniforms... - WRITE(p, "uniform mat4 u_world;\n"); - WRITE(p, "uniform mat4 u_view;\n"); - *uniformMask |= DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX; - if (doTextureProjection) { - WRITE(p, "uniform mediump mat4 u_texmtx;\n"); - *uniformMask |= DIRTY_TEXMATRIX; - } + if (compat.vulkan) { if (enableBones) { -#ifdef USE_BONE_ARRAY - WRITE(p, "uniform mediump mat4 u_bone[%i];\n", numBoneWeights); - *uniformMask |= DIRTY_BONE_UNIFORMS; -#else - for (int i = 0; i < numBoneWeights; i++) { - WRITE(p, "uniform mat4 u_bone%i;\n", i); - *uniformMask |= DIRTY_BONEMATRIX0 << i; - } -#endif + numBoneWeights = 1 + id.Bits(VS_BIT_BONES, 3); + WRITE(p, "%s", boneWeightDecl[numBoneWeights]); } + + if (useHWTransform) + WRITE(p, "layout (location = %d) in vec3 position;\n", (int)PspAttributeLocation::POSITION); + else + // we pass the fog coord in w + WRITE(p, "layout (location = %d) in vec4 position;\n", (int)PspAttributeLocation::POSITION); + + if (useHWTransform && hasNormal) + WRITE(p, "layout (location = %d) in vec3 normal;\n", (int)PspAttributeLocation::NORMAL); + + bool texcoordInVec3 = false; + if (doTexture && hasTexcoord) { + if (!useHWTransform && doTextureTransform && !isModeThrough) { + WRITE(p, "layout (location = %d) in vec3 texcoord;\n", (int)PspAttributeLocation::TEXCOORD); + texcoordInVec3 = true; + } else + WRITE(p, "layout (location = %d) in vec2 texcoord;\n", (int)PspAttributeLocation::TEXCOORD); + } + if (hasColor) { + WRITE(p, "layout (location = %d) in vec4 color0;\n", (int)PspAttributeLocation::COLOR0); + if (lmode && !useHWTransform) // only software transform supplies color1 as vertex data + WRITE(p, "layout (location = %d) in vec3 color1;\n", (int)PspAttributeLocation::COLOR1); + } + + WRITE(p, "layout (location = 1) %sout vec4 v_color0;\n", shading); + if (lmode) { + WRITE(p, "layout (location = 2) %sout vec3 v_color1;\n", shading); + } + if (doTexture) { - WRITE(p, "uniform vec4 u_uvscaleoffset;\n"); - *uniformMask |= DIRTY_UVSCALEOFFSET; + WRITE(p, "layout (location = 0) out vec3 v_texcoord;\n"); } - for (int i = 0; i < 4; i++) { - if (doLight[i] != LIGHT_OFF) { - // This is needed for shade mapping - WRITE(p, "uniform vec3 u_lightpos%i;\n", i); - *uniformMask |= DIRTY_LIGHT0 << i; + + if (enableFog) { + // See the fragment shader generator + WRITE(p, "layout (location = 3) out float v_fogdepth;\n"); + } + } else { + if (enableBones) { + numBoneWeights = 1 + id.Bits(VS_BIT_BONES, 3); + const char * const * boneWeightDecl = boneWeightAttrDecl; + if (!strcmp(compat.attribute, "in")) { + boneWeightDecl = boneWeightInDecl; } - if (doLight[i] == LIGHT_FULL) { - *uniformMask |= DIRTY_LIGHT0 << i; - GELightType type = static_cast(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2)); - GELightComputation comp = static_cast(id.Bits(VS_BIT_LIGHT0_COMP + 4 * i, 2)); + WRITE(p, "%s", boneWeightDecl[numBoneWeights]); + *attrMask |= 1 << ATTR_W1; + if (numBoneWeights >= 5) + *attrMask |= 1 << ATTR_W2; + } - if (type != GE_LIGHTTYPE_DIRECTIONAL) - WRITE(p, "uniform mediump vec3 u_lightatt%i;\n", i); + if (useHWTransform) + WRITE(p, "%s vec3 position;\n", compat.attribute); + else + WRITE(p, "%s vec4 position;\n", compat.attribute); // need to pass the fog coord in w + *attrMask |= 1 << ATTR_POSITION; - if (type == GE_LIGHTTYPE_SPOT || type == GE_LIGHTTYPE_UNKNOWN) { - WRITE(p, "uniform mediump vec3 u_lightdir%i;\n", i); - WRITE(p, "uniform mediump vec2 u_lightangle_spotCoef%i;\n", i); - } - WRITE(p, "uniform lowp vec3 u_lightambient%i;\n", i); - WRITE(p, "uniform lowp vec3 u_lightdiffuse%i;\n", i); + if (useHWTransform && hasNormal) { + WRITE(p, "%s mediump vec3 normal;\n", compat.attribute); + *attrMask |= 1 << ATTR_NORMAL; + } - if (comp == GE_LIGHTCOMP_BOTH) { - WRITE(p, "uniform lowp vec3 u_lightspecular%i;\n", i); - } + if (doTexture && hasTexcoord) { + if (!useHWTransform && doTextureTransform && !isModeThrough) { + WRITE(p, "%s vec3 texcoord;\n", compat.attribute); + texcoordVec3In = true; + } else { + WRITE(p, "%s vec2 texcoord;\n", compat.attribute); + } + *attrMask |= 1 << ATTR_TEXCOORD; + } + if (hasColor) { + WRITE(p, "%s lowp vec4 color0;\n", compat.attribute); + *attrMask |= 1 << ATTR_COLOR0; + if (lmode && !useHWTransform) { // only software transform supplies color1 as vertex data + WRITE(p, "%s lowp vec3 color1;\n", compat.attribute); + *attrMask |= 1 << ATTR_COLOR1; } } - if (enableLighting) { - WRITE(p, "uniform lowp vec4 u_ambient;\n"); - *uniformMask |= DIRTY_AMBIENT; - if ((matUpdate & 2) == 0 || !hasColor) { - WRITE(p, "uniform lowp vec3 u_matdiffuse;\n"); - *uniformMask |= DIRTY_MATDIFFUSE; - } - WRITE(p, "uniform lowp vec4 u_matspecular;\n"); // Specular coef is contained in alpha - WRITE(p, "uniform lowp vec3 u_matemissive;\n"); - *uniformMask |= DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE; - } - } - if (useHWTransform || !hasColor) { - WRITE(p, "uniform lowp vec4 u_matambientalpha;\n"); // matambient + matalpha - *uniformMask |= DIRTY_MATAMBIENTALPHA; - } - if (enableFog) { - WRITE(p, "uniform highp vec2 u_fogcoef;\n"); - *uniformMask |= DIRTY_FOGCOEF; - } - - if (!isModeThrough && gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { - WRITE(p, "uniform highp vec4 u_depthRange;\n"); - *uniformMask |= DIRTY_DEPTHRANGE; - } - - if (!isModeThrough) { - WRITE(p, "uniform highp vec4 u_cullRangeMin;\n"); - WRITE(p, "uniform highp vec4 u_cullRangeMax;\n"); - *uniformMask |= DIRTY_CULLRANGE; - } - - WRITE(p, "%s%s lowp vec4 v_color0;\n", shading, compat.varying_vs); - if (lmode) { - WRITE(p, "%s%s lowp vec3 v_color1;\n", shading, compat.varying_vs); - } - - if (doTexture) { - WRITE(p, "%s %s vec3 v_texcoord;\n", compat.varying_vs, highpTexcoord ? "highp" : "mediump"); - } - - if (enableFog) { - // See the fragment shader generator - if (highpFog) { - WRITE(p, "%s highp float v_fogdepth;\n", compat.varying_vs); + if (isModeThrough) { + WRITE(p, "uniform mat4 u_proj_through;\n"); + *uniformMask |= DIRTY_PROJTHROUGHMATRIX; } else { - WRITE(p, "%s mediump float v_fogdepth;\n", compat.varying_vs); + WRITE(p, "uniform mat4 u_proj;\n"); + *uniformMask |= DIRTY_PROJMATRIX; + // Add all the uniforms we'll need to transform properly. + } + + scaleUV = !isModeThrough && (uvGenMode == GE_TEXMAP_TEXTURE_COORDS || uvGenMode == GE_TEXMAP_UNKNOWN); + + if (useHWTransform) { + // When transforming by hardware, we need a great deal more uniforms... + // TODO: Use 4x3 matrices where possible. Though probably doesn't matter much. + WRITE(p, "uniform mat4 u_world;\n"); + WRITE(p, "uniform mat4 u_view;\n"); + *uniformMask |= DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX; + if (doTextureTransform) { + WRITE(p, "uniform mediump mat4 u_texmtx;\n"); + *uniformMask |= DIRTY_TEXMATRIX; + } + if (enableBones) { +#ifdef USE_BONE_ARRAY + WRITE(p, "uniform mediump mat4 u_bone[%i];\n", numBoneWeights); + *uniformMask |= DIRTY_BONE_UNIFORMS; +#else + for (int i = 0; i < numBoneWeights; i++) { + WRITE(p, "uniform mat4 u_bone%i;\n", i); + *uniformMask |= DIRTY_BONEMATRIX0 << i; + } +#endif + } + if (doTexture) { + WRITE(p, "uniform vec4 u_uvscaleoffset;\n"); + *uniformMask |= DIRTY_UVSCALEOFFSET; + } + for (int i = 0; i < 4; i++) { + if (doLight[i] != LIGHT_OFF) { + // This is needed for shade mapping + WRITE(p, "uniform vec3 u_lightpos%i;\n", i); + *uniformMask |= DIRTY_LIGHT0 << i; + } + if (doLight[i] == LIGHT_FULL) { + *uniformMask |= DIRTY_LIGHT0 << i; + GELightType type = static_cast(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2)); + GELightComputation comp = static_cast(id.Bits(VS_BIT_LIGHT0_COMP + 4 * i, 2)); + + if (type != GE_LIGHTTYPE_DIRECTIONAL) + WRITE(p, "uniform mediump vec3 u_lightatt%i;\n", i); + + if (type == GE_LIGHTTYPE_SPOT || type == GE_LIGHTTYPE_UNKNOWN) { + WRITE(p, "uniform mediump vec3 u_lightdir%i;\n", i); + WRITE(p, "uniform mediump vec2 u_lightangle_spotCoef%i;\n", i); + } + WRITE(p, "uniform lowp vec3 u_lightambient%i;\n", i); + WRITE(p, "uniform lowp vec3 u_lightdiffuse%i;\n", i); + + if (comp == GE_LIGHTCOMP_BOTH) { + WRITE(p, "uniform lowp vec3 u_lightspecular%i;\n", i); + } + } + } + if (enableLighting) { + WRITE(p, "uniform lowp vec4 u_ambient;\n"); + *uniformMask |= DIRTY_AMBIENT; + if ((matUpdate & 2) == 0 || !hasColor) { + WRITE(p, "uniform lowp vec3 u_matdiffuse;\n"); + *uniformMask |= DIRTY_MATDIFFUSE; + } + WRITE(p, "uniform lowp vec4 u_matspecular;\n"); // Specular coef is contained in alpha + WRITE(p, "uniform lowp vec3 u_matemissive;\n"); + *uniformMask |= DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE; + } + } + + if (useHWTransform || !hasColor) { + WRITE(p, "uniform lowp vec4 u_matambientalpha;\n"); // matambient + matalpha + *uniformMask |= DIRTY_MATAMBIENTALPHA; + } + if (enableFog) { + WRITE(p, "uniform highp vec2 u_fogcoef;\n"); + *uniformMask |= DIRTY_FOGCOEF; + } + + if (!isModeThrough && gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { + WRITE(p, "uniform highp vec4 u_depthRange;\n"); + *uniformMask |= DIRTY_DEPTHRANGE; + } + + if (!isModeThrough) { + WRITE(p, "uniform highp vec4 u_cullRangeMin;\n"); + WRITE(p, "uniform highp vec4 u_cullRangeMax;\n"); + *uniformMask |= DIRTY_CULLRANGE; + } + + WRITE(p, "%s%s lowp vec4 v_color0;\n", shading, compat.varying_vs); + if (lmode) { + WRITE(p, "%s%s lowp vec3 v_color1;\n", shading, compat.varying_vs); + } + + if (doTexture) { + WRITE(p, "%s %s vec3 v_texcoord;\n", compat.varying_vs, highpTexcoord ? "highp" : "mediump"); + } + + if (enableFog) { + // See the fragment shader generator + if (highpFog) { + WRITE(p, "%s highp float v_fogdepth;\n", compat.varying_vs); + } else { + WRITE(p, "%s mediump float v_fogdepth;\n", compat.varying_vs); + } } } @@ -336,11 +413,33 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade if (doBezier || doSpline) { *uniformMask |= DIRTY_BEZIERSPLINE; - WRITE(p, "uniform sampler2D u_tess_points;\n"); // Control Points - WRITE(p, "uniform sampler2D u_tess_weights_u;\n"); - WRITE(p, "uniform sampler2D u_tess_weights_v;\n"); + if (compat.vulkan) { + WRITE(p, "struct TessData {\n"); + WRITE(p, " vec4 pos;\n"); + WRITE(p, " vec4 uv;\n"); + WRITE(p, " vec4 color;\n"); + WRITE(p, "};\n"); + WRITE(p, "layout (std430, set = 0, binding = 6) readonly buffer s_tess_data {\n"); + WRITE(p, " TessData data[];\n"); + WRITE(p, "} tess_data;\n"); - WRITE(p, "uniform int u_spline_counts;\n"); + WRITE(p, "layout (std430) struct TessWeight {\n"); + WRITE(p, " vec4 basis;\n"); + WRITE(p, " vec4 deriv;\n"); + WRITE(p, "};\n"); + WRITE(p, "layout (std430, set = 0, binding = 7) readonly buffer s_tess_weights_u {\n"); + WRITE(p, " TessWeight data[];\n"); + WRITE(p, "} tess_weights_u;\n"); + WRITE(p, "layout (std430, set = 0, binding = 8) readonly buffer s_tess_weights_v {\n"); + WRITE(p, " TessWeight data[];\n"); + WRITE(p, "} tess_weights_v;\n"); + } else { + WRITE(p, "uniform sampler2D u_tess_points;\n"); // Control Points + WRITE(p, "uniform sampler2D u_tess_weights_u;\n"); + WRITE(p, "uniform sampler2D u_tess_weights_v;\n"); + + WRITE(p, "uniform int u_spline_counts;\n"); + } for (int i = 2; i <= 4; i++) { // Define 3 types vec2, vec3, vec4 @@ -461,17 +560,17 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade WRITE(p, " Tess tess;\n"); WRITE(p, " tessellate(tess);\n"); - WRITE(p, " vec3 worldpos = (u_world * vec4(tess.pos.xyz, 1.0)).xyz;\n"); + WRITE(p, " vec3 worldpos = (vec4(tess.pos.xyz, 1.0) * u_world).xyz;\n"); if (hasNormalTess) { - WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(%stess.nrm, 0.0)).xyz);\n", flipNormalTess ? "-" : ""); + WRITE(p, " mediump vec3 worldnormal = normalize((vec4(%stess.nrm, 0.0) * u_world).xyz);\n", flipNormalTess ? "-" : ""); } else { WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n"); } } else { // No skinning, just standard T&L. - WRITE(p, " vec3 worldpos = (u_world * vec4(position.xyz, 1.0)).xyz;\n"); + WRITE(p, " vec3 worldpos = (vec4(position.xyz, 1.0) * u_world).xyz;\n"); if (hasNormal) - WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(%snormal, 0.0)).xyz);\n", flipNormal ? "-" : ""); + WRITE(p, " mediump vec3 worldnormal = normalize((vec4(%snormal, 0.0) * u_world).xyz);\n", flipNormal ? "-" : ""); else WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n"); } @@ -526,7 +625,7 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(skinnednormal, 0.0)).xyz);\n"); } - WRITE(p, " vec4 viewPos = u_view * vec4(worldpos, 1.0);\n"); + WRITE(p, " vec4 viewPos = vec4(worldpos, 1.0) * u_view;\n"); // Final view and projection transforms. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { @@ -742,7 +841,7 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade break; } // Transform by texture matrix. XYZ as we are doing projection mapping. - WRITE(p, " v_texcoord = (u_texmtx * %s).xyz * vec3(u_uvscaleoffset.xy, 1.0);\n", temp_tc.c_str()); + WRITE(p, " v_texcoord = (%s * u_texmtx).xyz * vec3(u_uvscaleoffset.xy, 1.0);\n", temp_tc.c_str()); } break; diff --git a/GPU/Vulkan/PipelineManagerVulkan.h b/GPU/Vulkan/PipelineManagerVulkan.h index bb747bd29b..8646d5b8bf 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.h +++ b/GPU/Vulkan/PipelineManagerVulkan.h @@ -22,24 +22,12 @@ #include "GPU/Common/VertexDecoderCommon.h" #include "GPU/Common/ShaderId.h" +#include "GPU/Common/ShaderCommon.h" #include "GPU/Vulkan/VulkanUtil.h" #include "GPU/Vulkan/StateMappingVulkan.h" #include "GPU/Vulkan/VulkanQueueRunner.h" -// PSP vertex format. -enum class PspAttributeLocation { - POSITION = 0, - TEXCOORD = 1, - NORMAL = 2, - W1 = 3, - W2 = 4, - COLOR0 = 5, - COLOR1 = 6, - - COUNT -}; - struct VulkanPipelineKey { VulkanPipelineRasterStateKey raster; // prim is included here VkRenderPass renderPass; diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index 8d20b9ca58..2624db88e4 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -387,6 +387,8 @@ bool ShaderManagerVulkan::LoadCache(FILE *f) { if (header.featureFlags != gstate_c.featureFlags) return false; + GLSLShaderCompat compat{}; + compat.SetupForVulkan(); for (int i = 0; i < header.numVertexShaders; i++) { VShaderID id; if (fread(&id, sizeof(id), 1, f) != 1) { @@ -402,8 +404,7 @@ bool ShaderManagerVulkan::LoadCache(FILE *f) { vsCache_.Insert(id, vs); } uint32_t vendorID = vulkan_->GetPhysicalDeviceProperties().properties.vendorID; - GLSLShaderCompat compat{}; - compat.SetupForVulkan(); + for (int i = 0; i < header.numFragmentShaders; i++) { FShaderID id; if (fread(&id, sizeof(id), 1, f) != 1) { diff --git a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp index f18ea66729..76a613b35d 100644 --- a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp @@ -205,6 +205,7 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri WRITE(p, "}\n\n"); } + // Hardware tessellation if (doBezier || doSpline) { WRITE(p, "struct TessData {\n"); WRITE(p, " vec4 pos;\n"); diff --git a/unittest/TestShaderGenerators.cpp b/unittest/TestShaderGenerators.cpp index 2c4ffa936b..88b41f3536 100644 --- a/unittest/TestShaderGenerators.cpp +++ b/unittest/TestShaderGenerators.cpp @@ -41,6 +41,8 @@ bool GenerateFShader(FShaderID id, char *buffer, ShaderLanguage lang, std::strin case ShaderLanguage::GLSL_300: // TODO: Need a device - except that maybe glslang could be used to verify these .... return false; + case ShaderLanguage::TEST_GLSL_VULKAN: + return false; default: return false; } @@ -57,6 +59,14 @@ bool GenerateVShader(VShaderID id, char *buffer, ShaderLanguage lang, std::strin // return DX9::GenerateFragmentShaderHLSL(id, buffer, ShaderLanguage::HLSL_DX9); case ShaderLanguage::GLSL_VULKAN: return GenerateVertexShaderVulkanGLSL(id, buffer, errorString); + case ShaderLanguage::TEST_GLSL_VULKAN: + { + GLSLShaderCompat compat{}; + compat.SetupForVulkan(); + uint32_t attrMask; + uint64_t uniformMask; + return GenerateVertexShaderGLSL(id, buffer, compat, &attrMask, &uniformMask, errorString); + } default: return false; } @@ -124,6 +134,7 @@ bool TestShaderGenerators() { LoadD3DCompilerDynamic(); ShaderLanguage languages[] = { + ShaderLanguage::TEST_GLSL_VULKAN, ShaderLanguage::GLSL_VULKAN, ShaderLanguage::HLSL_D3D11, ShaderLanguage::GLSL_140, @@ -141,6 +152,59 @@ bool TestShaderGenerators() { int successes = 0; int count = 700; + // Generate a bunch of random vertex shader IDs, try to generate shader source. + // Then compile it and check that it's ok. + for (int i = 0; i < count; i++) { + uint32_t bottom = rng.R32(); + uint32_t top = rng.R32(); + VShaderID id; + id.d[0] = bottom; + id.d[1] = top; + + // Skip testing beziers for now. I'll deal with those bugs later. + id.SetBit(VS_BIT_BEZIER, false); + id.SetBit(VS_BIT_SPLINE, false); + + bool generateSuccess[numLanguages]{}; + std::string genErrorString[numLanguages]; + + for (int j = 0; j < numLanguages; j++) { + generateSuccess[j] = GenerateVShader(id, buffer[j], languages[j], &genErrorString[j]); + if (!genErrorString[j].empty()) { + printf("%s\n", genErrorString[j].c_str()); + } + } + + if (generateSuccess[0] != generateSuccess[1]) { + printf("mismatching success! %s %s\n", genErrorString[0].c_str(), genErrorString[1].c_str()); + printf("%s\n", buffer[0]); + printf("%s\n", buffer[1]); + return 1; + } + if (generateSuccess[0] && strcmp(buffer[0], buffer[1])) { + printf("mismatching shaders!\n"); + PrintDiff(buffer[0], buffer[1]); + return 1; + } + + // Now that we have the strings ready for easy comparison (buffer,4 in the watch window), + // let's try to compile them. + for (int j = 0; j < numLanguages; j++) { + if (generateSuccess[j]) { + if (!TestCompileShader(buffer[j], languages[j], true)) { + printf("Error compiling vertex shader:\n\n%s\n\n", LineNumberString(buffer[j]).c_str()); + return false; + } + successes++; + } + } + } + + printf("%d/%d vertex shaders generated (it's normal that it's not all, there are invalid bit combos)\n", successes, count * numLanguages); + + successes = 0; + count = 200; + // Generate a bunch of random fragment shader IDs, try to generate shader source. // Then compile it and check that it's ok. for (int i = 0; i < count; i++) { @@ -194,47 +258,6 @@ bool TestShaderGenerators() { successes = 0; count = 200; - // Generate a bunch of random vertex shader IDs, try to generate shader source. - // Then compile it and check that it's ok. - for (int i = 0; i < count; i++) { - uint32_t bottom = rng.R32(); - uint32_t top = rng.R32(); - VShaderID id; - id.d[0] = bottom; - id.d[1] = top; - - // Skip testing beziers for now. I'll deal with those bugs later. - id.SetBit(VS_BIT_BEZIER, false); - id.SetBit(VS_BIT_SPLINE, false); - - bool generateSuccess[numLanguages]{}; - - for (int j = 0; j < numLanguages; j++) { - std::string genErrorString; - generateSuccess[j] = GenerateVShader(id, buffer[j], languages[j], &genErrorString); - if (!genErrorString.empty()) { - printf("%s\n", genErrorString.c_str()); - } - } - - // Now that we have the strings ready for easy comparison (buffer,4 in the watch window), - // let's try to compile them. - for (int j = 0; j < numLanguages; j++) { - if (generateSuccess[j]) { - if (!TestCompileShader(buffer[j], languages[j], true)) { - printf("Error compiling vertex shader:\n\n%s\n\n", LineNumberString(buffer[j]).c_str()); - return false; - } - successes++; - } - } - } - - printf("%d/%d vertex shaders generated (it's normal that it's not all, there are invalid bit combos)\n", successes, count * numLanguages); - - successes = 0; - count = 200; - for (int i = 0; i < numLanguages; i++) { delete[] buffer[i]; } From 057fd9f8a3f29afe725d0f2a0797d2f56ad0041b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 23 Oct 2020 09:01:56 +0200 Subject: [PATCH 2/9] Bridge more vertex shader differences --- GPU/Common/ShaderCommon.cpp | 1 + GPU/Common/ShaderCommon.h | 1 + GPU/Common/ShaderUniforms.h | 4 ++-- GPU/Directx9/VertexShaderGeneratorHLSL.cpp | 4 ++-- GPU/GLES/ShaderManagerGLES.cpp | 1 + GPU/GLES/VertexShaderGeneratorGLES.cpp | 12 +++++++----- GPU/Vulkan/VertexShaderGeneratorVulkan.cpp | 8 ++++---- unittest/TestShaderGenerators.cpp | 7 ++++--- 8 files changed, 22 insertions(+), 16 deletions(-) diff --git a/GPU/Common/ShaderCommon.cpp b/GPU/Common/ShaderCommon.cpp index e48f733437..e264d97467 100644 --- a/GPU/Common/ShaderCommon.cpp +++ b/GPU/Common/ShaderCommon.cpp @@ -117,4 +117,5 @@ void GLSLShaderCompat::SetupForVulkan() { texture = "texture"; texelFetch = "texelFetch"; vulkan = true; + forceMatrix4x4 = false; } diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h index acc0de74f5..a8d03563b1 100644 --- a/GPU/Common/ShaderCommon.h +++ b/GPU/Common/ShaderCommon.h @@ -156,6 +156,7 @@ struct GLSLShaderCompat { const char *framebufferFetchExtension; bool glslES30; bool bitwiseOps; + bool forceMatrix4x4; void SetupForVulkan(); }; diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h index 5acf973588..96be41af09 100644 --- a/GPU/Common/ShaderUniforms.h +++ b/GPU/Common/ShaderUniforms.h @@ -50,7 +50,7 @@ R"( mat4 u_proj; mat4 u_proj_through; mat3x4 u_view; mat3x4 u_world; - mat3x4 u_tex; + mat3x4 u_texmtx; vec4 u_uvscaleoffset; vec4 u_depthRange; vec2 u_fogcoef; @@ -78,7 +78,7 @@ R"( float4x4 u_proj; float4x4 u_proj_through; float4x3 u_view; float4x3 u_world; - float4x3 u_tex; + float4x3 u_texmtx; float4 u_uvscaleoffset; float4 u_depthRange; float2 u_fogcoef; diff --git a/GPU/Directx9/VertexShaderGeneratorHLSL.cpp b/GPU/Directx9/VertexShaderGeneratorHLSL.cpp index 78cb8d1265..12a60caffd 100644 --- a/GPU/Directx9/VertexShaderGeneratorHLSL.cpp +++ b/GPU/Directx9/VertexShaderGeneratorHLSL.cpp @@ -121,7 +121,7 @@ bool GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage WRITE(p, "float4x3 u_world : register(c%i);\n", CONST_VS_WORLD); WRITE(p, "float4x3 u_view : register(c%i);\n", CONST_VS_VIEW); if (doTextureTransform) - WRITE(p, "float4x3 u_tex : register(c%i);\n", CONST_VS_TEXMTX); + WRITE(p, "float4x3 u_texmtx : register(c%i);\n", CONST_VS_TEXMTX); if (enableBones) { #ifdef USE_BONE_ARRAY WRITE(p, "float4x3 u_bone[%i] : register(c%i);\n", numBones, CONST_VS_BONE0); @@ -687,7 +687,7 @@ bool GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage break; } // Transform by texture matrix. XYZ as we are doing projection mapping. - WRITE(p, " Out.v_texcoord.xyz = mul(%s, u_tex) * float3(u_uvscaleoffset.xy, 1.0);\n", temp_tc.c_str()); + WRITE(p, " Out.v_texcoord.xyz = mul(%s, u_texmtx) * float3(u_uvscaleoffset.xy, 1.0);\n", temp_tc.c_str()); } break; diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 4b903f9191..9a7d10e7ef 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -595,6 +595,7 @@ void ShaderManagerGLES::DetectShaderLanguage() { compat.bitwiseOps = false; compat.lastFragData = nullptr; compat.gles = gl_extensions.IsGLES; + compat.forceMatrix4x4 = true; if (compat.gles) { if (gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300)) { diff --git a/GPU/GLES/VertexShaderGeneratorGLES.cpp b/GPU/GLES/VertexShaderGeneratorGLES.cpp index 2582853992..c1916601e8 100644 --- a/GPU/GLES/VertexShaderGeneratorGLES.cpp +++ b/GPU/GLES/VertexShaderGeneratorGLES.cpp @@ -197,7 +197,6 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade int numBoneWeights = 0; int boneWeightScale = id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2); bool texcoordVec3In = false; - bool scaleUV = false; if (compat.vulkan) { if (enableBones) { @@ -292,8 +291,6 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade // Add all the uniforms we'll need to transform properly. } - scaleUV = !isModeThrough && (uvGenMode == GE_TEXMAP_TEXTURE_COORDS || uvGenMode == GE_TEXMAP_UNKNOWN); - if (useHWTransform) { // When transforming by hardware, we need a great deal more uniforms... // TODO: Use 4x3 matrices where possible. Though probably doesn't matter much. @@ -625,7 +622,7 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(skinnednormal, 0.0)).xyz);\n"); } - WRITE(p, " vec4 viewPos = vec4(worldpos, 1.0) * u_view;\n"); + WRITE(p, " vec4 viewPos = vec4((vec4(worldpos, 1.0) * u_view).xyz, 1.0);\n"); // Final view and projection transforms. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { @@ -787,6 +784,8 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade WRITE(p, " v_color1 = vec3(0.0);\n"); } + bool scaleUV = !isModeThrough && (uvGenMode == GE_TEXMAP_TEXTURE_COORDS || uvGenMode == GE_TEXMAP_UNKNOWN); + // Step 3: UV generation if (doTexture) { switch (uvGenMode) { @@ -795,7 +794,7 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade if (scaleUV) { if (hasTexcoord) { if (doBezier || doSpline) - WRITE(p, " v_texcoord = vec3(tess.tex * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n"); + WRITE(p, " v_texcoord = vec3(tess.tex.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n"); else WRITE(p, " v_texcoord = vec3(texcoord.xy * u_uvscaleoffset.xy, 0.0);\n"); } else { @@ -876,6 +875,9 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade WRITE(p, " }\n"); } WRITE(p, " gl_Position = outPos;\n"); + if (compat.vulkan) { + WRITE(p, " gl_PointSize = 1.0;\n"); + } WRITE(p, "}\n"); return true; diff --git a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp index 76a613b35d..22613b0594 100644 --- a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp @@ -346,9 +346,9 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri } } else { // No skinning, just standard T&L. - WRITE(p, " vec3 worldpos = vec4(position.xyz, 1.0) * u_world;\n"); + WRITE(p, " vec3 worldpos = (vec4(position.xyz, 1.0) * u_world).xyz;\n"); if (hasNormal) - WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%snormal, 0.0) * u_world);\n", flipNormal ? "-" : ""); + WRITE(p, " mediump vec3 worldnormal = normalize((vec4(%snormal, 0.0) * u_world).xyz);\n", flipNormal ? "-" : ""); else WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n"); } @@ -382,7 +382,7 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri WRITE(p, " mediump vec3 worldnormal = normalize(vec4(skinnednormal, 0.0) * u_world);\n"); } - WRITE(p, " vec4 viewPos = vec4(vec4(worldpos, 1.0) * u_view, 1.0);\n"); + WRITE(p, " vec4 viewPos = vec4((vec4(worldpos, 1.0) * u_view).xyz, 1.0);\n"); // Final view and projection transforms. if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) { @@ -595,7 +595,7 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri break; } // Transform by texture matrix. XYZ as we are doing projection mapping. - WRITE(p, " v_texcoord = (%s * u_tex).xyz * vec3(u_uvscaleoffset.xy, 1.0);\n", temp_tc.c_str()); + WRITE(p, " v_texcoord = (%s * u_texmtx).xyz * vec3(u_uvscaleoffset.xy, 1.0);\n", temp_tc.c_str()); } break; diff --git a/unittest/TestShaderGenerators.cpp b/unittest/TestShaderGenerators.cpp index 88b41f3536..c2d4f5d8d9 100644 --- a/unittest/TestShaderGenerators.cpp +++ b/unittest/TestShaderGenerators.cpp @@ -95,7 +95,8 @@ bool TestCompileShader(const char *buffer, ShaderLanguage lang, bool vertex) { return false; case ShaderLanguage::GLSL_300: return false; - + case ShaderLanguage::TEST_GLSL_VULKAN: + return true; default: return false; } @@ -117,7 +118,7 @@ void PrintDiff(const char *a, const char *b) { printf("a: %s\n", a_lines[i].c_str()); printf("b: %s\n", b_lines[i].c_str()); printf("...continues...\n"); - for (size_t j = i; j < i + 4 && j < a_lines.size(); j++) { + for (size_t j = i + 1; j < i + 5 && j < a_lines.size(); j++) { printf("a: %s\n", a_lines[j].c_str()); printf("b: %s\n", b_lines[j].c_str()); } @@ -182,7 +183,7 @@ bool TestShaderGenerators() { return 1; } if (generateSuccess[0] && strcmp(buffer[0], buffer[1])) { - printf("mismatching shaders!\n"); + printf("mismatching shaders! a=glsl b=vulkan\n"); PrintDiff(buffer[0], buffer[1]); return 1; } From a8f4a4d7497bdbb3b92c188e2a18c48374b9d5c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 23 Oct 2020 18:37:37 +0200 Subject: [PATCH 3/9] Fix more vertex shader differences --- GPU/Common/ShaderUniforms.h | 3 +-- GPU/GLES/ShaderManagerGLES.cpp | 2 +- GPU/GLES/VertexShaderGeneratorGLES.cpp | 31 +++++++--------------- GPU/Vulkan/VertexShaderGeneratorVulkan.cpp | 17 ++++++------ 4 files changed, 19 insertions(+), 34 deletions(-) diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h index 96be41af09..fd13956eec 100644 --- a/GPU/Common/ShaderUniforms.h +++ b/GPU/Common/ShaderUniforms.h @@ -194,10 +194,9 @@ struct UB_VS_Bones { }; static const char *ub_vs_bonesStr = -R"( mat3x4 u_bone[8]; +R"( mat3x4 u_bone0; mat3x4 u_bone1; mat3x4 u_bone2; mat3x4 u_bone3; mat3x4 u_bone4; mat3x4 u_bone5; mat3x4 u_bone6; mat3x4 u_bone7; mat3x4 u_bone8; )"; -// HLSL code is shared so these names are changed to match those in DX9. static const char *cb_vs_bonesStr = R"( float4x3 u_bone[8]; )"; diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 9a7d10e7ef..5d5af4f4a8 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -495,7 +495,7 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu float bonetemp[16]; for (int i = 0; i < numBones; i++) { if (dirty & (DIRTY_BONEMATRIX0 << i)) { - ConvertMatrix4x3To4x4(bonetemp, gstate.boneMatrix + 12 * i); + ConvertMatrix4x3To4x4Transposed(bonetemp, gstate.boneMatrix + 12 * i); render_->SetUniformM4x4(&u_bone[i], bonetemp); } } diff --git a/GPU/GLES/VertexShaderGeneratorGLES.cpp b/GPU/GLES/VertexShaderGeneratorGLES.cpp index c1916601e8..317c54fbd1 100644 --- a/GPU/GLES/VertexShaderGeneratorGLES.cpp +++ b/GPU/GLES/VertexShaderGeneratorGLES.cpp @@ -580,25 +580,14 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade "w2.x", "w2.y", "w2.z", "w2.w", }; -#ifdef USE_BONE_ARRAY - if (numBoneWeights == 1) - WRITE(p, " mat4 skinMatrix = w1 * u_bone[0]"); - else - WRITE(p, " mat4 skinMatrix = w1.x * u_bone[0]"); - for (int i = 1; i < numBoneWeights; i++) { - const char *weightAttr = boneWeightAttr[i]; - // workaround for "cant do .x of scalar" issue - if (numBoneWeights == 1 && i == 0) weightAttr = "w1"; - if (numBoneWeights == 5 && i == 4) weightAttr = "w2"; - WRITE(p, " + %s * u_bone[%i]", weightAttr, i); - } -#else + const char *boneMatrix = compat.forceMatrix4x4 ? "mat4" : "mat3x4"; + // Uncomment this to screw up bone shaders to check the vertex shader software fallback // WRITE(p, "THIS SHOULD ERROR! #error"); if (numBoneWeights == 1) - WRITE(p, " mat4 skinMatrix = w1 * u_bone0"); + WRITE(p, " %s skinMatrix = w1 * u_bone0", boneMatrix); else - WRITE(p, " mat4 skinMatrix = w1.x * u_bone0"); + WRITE(p, " %s skinMatrix = w1.x * u_bone0", boneMatrix); for (int i = 1; i < numBoneWeights; i++) { const char *weightAttr = boneWeightAttr[i]; // workaround for "cant do .x of scalar" issue @@ -606,20 +595,18 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade if (numBoneWeights == 5 && i == 4) weightAttr = "w2"; WRITE(p, " + %s * u_bone%i", weightAttr, i); } -#endif WRITE(p, ";\n"); - // Trying to simplify this results in bugs in LBP... - WRITE(p, " vec3 skinnedpos = (skinMatrix * vec4(position, 1.0)).xyz %s;\n", factor); - WRITE(p, " vec3 worldpos = (u_world * vec4(skinnedpos, 1.0)).xyz;\n"); + WRITE(p, " vec3 skinnedpos = (vec4(position, 1.0) * skinMatrix).xyz %s;\n", factor); + WRITE(p, " vec3 worldpos = (vec4(skinnedpos, 1.0) * u_world).xyz;\n"); if (hasNormal) { - WRITE(p, " mediump vec3 skinnednormal = (skinMatrix * vec4(%snormal, 0.0)).xyz %s;\n", flipNormal ? "-" : "", factor); + WRITE(p, " mediump vec3 skinnednormal = (vec4(%snormal, 0.0) * skinMatrix).xyz %s;\n", flipNormal ? "-" : "", factor); } else { - WRITE(p, " mediump vec3 skinnednormal = (skinMatrix * vec4(0.0, 0.0, %s1.0, 0.0)).xyz %s;\n", flipNormal ? "-" : "", factor); + WRITE(p, " mediump vec3 skinnednormal = (vec4(0.0, 0.0, %s1.0, 0.0) * skinMatrix).xyz %s;\n", flipNormal ? "-" : "", factor); } - WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(skinnednormal, 0.0)).xyz);\n"); + WRITE(p, " mediump vec3 worldnormal = normalize((vec4(skinnednormal, 0.0) * u_world).xyz);\n"); } WRITE(p, " vec4 viewPos = vec4((vec4(worldpos, 1.0) * u_view).xyz, 1.0);\n"); diff --git a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp index 22613b0594..cb295d4f6a 100644 --- a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp @@ -361,25 +361,24 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri "w2.x", "w2.y", "w2.z", "w2.w", }; - WRITE(p, " mat3x4 skinMatrix = w1.x * u_bone[0];\n"); + WRITE(p, " mat3x4 skinMatrix = w1.x * u_bone0"); if (numBoneWeights > 1) { for (int i = 1; i < numBoneWeights; i++) { - WRITE(p, " skinMatrix += %s * u_bone[%i];\n", boneWeightAttr[i], i); + WRITE(p, " + %s * u_bone%d", boneWeightAttr[i], i); } } WRITE(p, ";\n"); - // Trying to simplify this results in bugs in LBP... - WRITE(p, " vec3 skinnedpos = (vec4(position, 1.0) * skinMatrix) %s;\n", factor); - WRITE(p, " vec3 worldpos = vec4(skinnedpos, 1.0) * u_world;\n"); + WRITE(p, " vec3 skinnedpos = (vec4(position, 1.0) * skinMatrix).xyz %s;\n", factor); + WRITE(p, " vec3 worldpos = (vec4(skinnedpos, 1.0) * u_world).xyz;\n"); if (hasNormal) { - WRITE(p, " mediump vec3 skinnednormal = vec4(%snormal, 0.0) * skinMatrix %s;\n", flipNormal ? "-" : "", factor); + WRITE(p, " mediump vec3 skinnednormal = (vec4(%snormal, 0.0) * skinMatrix).xyz %s;\n", flipNormal ? "-" : "", factor); } else { - WRITE(p, " mediump vec3 skinnednormal = vec4(0.0, 0.0, %s1.0, 0.0) * skinMatrix %s;\n", flipNormal ? "-" : "", factor); + WRITE(p, " mediump vec3 skinnednormal = (vec4(0.0, 0.0, %s1.0, 0.0) * skinMatrix).xyz %s;\n", flipNormal ? "-" : "", factor); } - WRITE(p, " mediump vec3 worldnormal = normalize(vec4(skinnednormal, 0.0) * u_world);\n"); + WRITE(p, " mediump vec3 worldnormal = normalize((vec4(skinnednormal, 0.0) * u_world).xyz);\n"); } WRITE(p, " vec4 viewPos = vec4((vec4(worldpos, 1.0) * u_view).xyz, 1.0);\n"); @@ -408,7 +407,7 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri bool distanceNeeded = false; if (enableLighting) { - WRITE(p, " vec4 lightSum0 = u_ambient * %s + vec4(u_matemissive, 0.0);\n", ambientStr); + WRITE(p, " lowp vec4 lightSum0 = u_ambient * %s + vec4(u_matemissive, 0.0);\n", ambientStr); for (int i = 0; i < 4; i++) { GELightType type = static_cast(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2)); From 7532116b6928a9f891ec25ddc40068dc17f4ec18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 23 Oct 2020 22:32:11 +0200 Subject: [PATCH 4/9] Fix additional minor differences --- GPU/GLES/VertexShaderGeneratorGLES.cpp | 15 +++++---- GPU/Vulkan/VertexShaderGeneratorVulkan.cpp | 39 +++++++++++++--------- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/GPU/GLES/VertexShaderGeneratorGLES.cpp b/GPU/GLES/VertexShaderGeneratorGLES.cpp index 317c54fbd1..eca75bb28e 100644 --- a/GPU/GLES/VertexShaderGeneratorGLES.cpp +++ b/GPU/GLES/VertexShaderGeneratorGLES.cpp @@ -196,7 +196,7 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade int numBoneWeights = 0; int boneWeightScale = id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2); - bool texcoordVec3In = false; + bool texcoordInVec3 = false; if (compat.vulkan) { if (enableBones) { @@ -213,7 +213,6 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade if (useHWTransform && hasNormal) WRITE(p, "layout (location = %d) in vec3 normal;\n", (int)PspAttributeLocation::NORMAL); - bool texcoordInVec3 = false; if (doTexture && hasTexcoord) { if (!useHWTransform && doTextureTransform && !isModeThrough) { WRITE(p, "layout (location = %d) in vec3 texcoord;\n", (int)PspAttributeLocation::TEXCOORD); @@ -267,7 +266,7 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade if (doTexture && hasTexcoord) { if (!useHWTransform && doTextureTransform && !isModeThrough) { WRITE(p, "%s vec3 texcoord;\n", compat.attribute); - texcoordVec3In = true; + texcoordInVec3 = true; } else { WRITE(p, "%s vec2 texcoord;\n", compat.attribute); } @@ -521,7 +520,7 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade if (!useHWTransform) { // Simple pass-through of vertex data to fragment shader if (doTexture) { - if (texcoordVec3In) { + if (texcoordInVec3) { WRITE(p, " v_texcoord = texcoord;\n"); } else { WRITE(p, " v_texcoord = vec3(texcoord, 1.0);\n"); @@ -584,15 +583,17 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade // Uncomment this to screw up bone shaders to check the vertex shader software fallback // WRITE(p, "THIS SHOULD ERROR! #error"); - if (numBoneWeights == 1) + if (numBoneWeights == 1 && !compat.vulkan) WRITE(p, " %s skinMatrix = w1 * u_bone0", boneMatrix); else WRITE(p, " %s skinMatrix = w1.x * u_bone0", boneMatrix); for (int i = 1; i < numBoneWeights; i++) { const char *weightAttr = boneWeightAttr[i]; // workaround for "cant do .x of scalar" issue - if (numBoneWeights == 1 && i == 0) weightAttr = "w1"; - if (numBoneWeights == 5 && i == 4) weightAttr = "w2"; + if (!compat.vulkan) { + if (numBoneWeights == 1 && i == 0) weightAttr = "w1"; + if (numBoneWeights == 5 && i == 4) weightAttr = "w2"; + } WRITE(p, " + %s * u_bone%i", weightAttr, i); } diff --git a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp index cb295d4f6a..ad56a542e9 100644 --- a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp @@ -406,6 +406,7 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri bool specularIsZero = true; bool distanceNeeded = false; + bool anySpots = false; if (enableLighting) { WRITE(p, " lowp vec4 lightSum0 = u_ambient * %s + vec4(u_matemissive, 0.0);\n", ambientStr); @@ -419,18 +420,24 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri specularIsZero = false; if (type != GE_LIGHTTYPE_DIRECTIONAL) distanceNeeded = true; + if (type == GE_LIGHTTYPE_SPOT || type == GE_LIGHTTYPE_UNKNOWN) + anySpots = true; } if (!specularIsZero) { - WRITE(p, " vec3 lightSum1 = vec3(0.0);\n"); + WRITE(p, " lowp vec3 lightSum1 = vec3(0.0);\n"); } if (!diffuseIsZero) { WRITE(p, " vec3 toLight;\n"); - WRITE(p, " vec3 diffuse;\n"); + WRITE(p, " lowp vec3 diffuse;\n"); } if (distanceNeeded) { WRITE(p, " float distance;\n"); - WRITE(p, " float lightScale;\n"); + WRITE(p, " lowp float lightScale;\n"); + } + WRITE(p, " mediump float ldot;\n"); + if (anySpots) { + WRITE(p, " lowp float angle;\n"); } } @@ -455,14 +462,14 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri bool doSpecular = comp == GE_LIGHTCOMP_BOTH; bool poweredDiffuse = comp == GE_LIGHTCOMP_ONLYPOWDIFFUSE; - WRITE(p, " mediump float dot%i = dot(toLight, worldnormal);\n", i); + WRITE(p, " ldot = dot(toLight, worldnormal);\n"); if (poweredDiffuse) { // pow(0.0, 0.0) may be undefined, but the PSP seems to treat it as 1.0. // Seen in Tales of the World: Radiant Mythology (#2424.) WRITE(p, " if (u_matspecular.a <= 0.0) {\n"); - WRITE(p, " dot%i = 1.0;\n", i); + WRITE(p, " ldot = 1.0;\n"); WRITE(p, " } else {\n"); - WRITE(p, " dot%i = pow(max(dot%i, 0.0), u_matspecular.a);\n", i, i); + WRITE(p, " ldot = pow(max(ldot, 0.0), u_matspecular.a);\n"); WRITE(p, " }\n"); } @@ -478,9 +485,9 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri break; case GE_LIGHTTYPE_SPOT: case GE_LIGHTTYPE_UNKNOWN: - WRITE(p, " float angle%i = length(u_lightdir%d) == 0.0 ? 0.0 : dot(normalize(u_lightdir%d), toLight);\n", i, i, i); - WRITE(p, " if (angle%i >= u_lightangle_spotCoef%d.x) {\n", i, i); - WRITE(p, " lightScale = clamp(1.0 / dot(u_lightatt%d, vec3(1.0, distance, distance*distance)), 0.0, 1.0) * (u_lightangle_spotCoef%d.y <= 0.0 ? 1.0 : pow(angle%i, u_lightangle_spotCoef%d.y));\n", i, i, i, i); + WRITE(p, " angle = length(u_lightdir%d) == 0.0 ? 0.0 : dot(normalize(u_lightdir%d), toLight);\n", i, i); + WRITE(p, " if (angle >= u_lightangle_spotCoef%d.x) {\n", i); + WRITE(p, " lightScale = clamp(1.0 / dot(u_lightatt%d, vec3(1.0, distance, distance*distance)), 0.0, 1.0) * (u_lightangle_spotCoef%d.y <= 0.0 ? 1.0 : pow(angle, u_lightangle_spotCoef%d.y));\n", i, i, i); WRITE(p, " } else {\n"); WRITE(p, " lightScale = 0.0;\n"); WRITE(p, " }\n"); @@ -490,17 +497,17 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri break; } - WRITE(p, " diffuse = (u_lightdiffuse%d * %s) * max(dot%i, 0.0);\n", i, diffuseStr, i); + WRITE(p, " diffuse = (u_lightdiffuse%d * %s) * max(ldot, 0.0);\n", i, diffuseStr); if (doSpecular) { - WRITE(p, " if (dot%i >= 0.0) {\n", i); - WRITE(p, " dot%i = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n", i); + WRITE(p, " if (ldot >= 0.0) {\n"); + WRITE(p, " ldot = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n"); WRITE(p, " if (u_matspecular.a <= 0.0) {\n"); - WRITE(p, " dot%i = 1.0;\n", i); + WRITE(p, " ldot = 1.0;\n"); WRITE(p, " } else {\n"); - WRITE(p, " dot%i = pow(max(dot%i, 0.0), u_matspecular.a);\n", i, i); + WRITE(p, " ldot = pow(max(ldot, 0.0), u_matspecular.a);\n"); WRITE(p, " }\n"); - WRITE(p, " if (dot%i > 0.0)\n", i); - WRITE(p, " lightSum1 += u_lightspecular%d * %s * dot%i %s;\n", i, specularStr, i, timesLightScale); + WRITE(p, " if (ldot > 0.0)\n"); + WRITE(p, " lightSum1 += u_lightspecular%d * %s * ldot %s;\n", i, specularStr, timesLightScale); WRITE(p, " }\n"); } WRITE(p, " lightSum0.rgb += (u_lightambient%d * %s.rgb + diffuse)%s;\n", i, ambientStr, timesLightScale); From 6055350a2c341c3461fb68dd3b752d7f0bf36039 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 23 Oct 2020 23:41:26 +0200 Subject: [PATCH 5/9] Initial work on fixing tess --- GPU/Common/ShaderCommon.cpp | 1 + GPU/Common/ShaderCommon.h | 1 + GPU/GLES/VertexShaderGeneratorGLES.cpp | 66 +++++++++++++++------- GPU/Vulkan/VertexShaderGeneratorVulkan.cpp | 2 +- unittest/TestShaderGenerators.cpp | 4 -- 5 files changed, 49 insertions(+), 25 deletions(-) diff --git a/GPU/Common/ShaderCommon.cpp b/GPU/Common/ShaderCommon.cpp index e264d97467..04c4a12d4d 100644 --- a/GPU/Common/ShaderCommon.cpp +++ b/GPU/Common/ShaderCommon.cpp @@ -118,4 +118,5 @@ void GLSLShaderCompat::SetupForVulkan() { texelFetch = "texelFetch"; vulkan = true; forceMatrix4x4 = false; + coefsFromBuffers = true; } diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h index a8d03563b1..4fc01fb715 100644 --- a/GPU/Common/ShaderCommon.h +++ b/GPU/Common/ShaderCommon.h @@ -157,6 +157,7 @@ struct GLSLShaderCompat { bool glslES30; bool bitwiseOps; bool forceMatrix4x4; + bool coefsFromBuffers; void SetupForVulkan(); }; diff --git a/GPU/GLES/VertexShaderGeneratorGLES.cpp b/GPU/GLES/VertexShaderGeneratorGLES.cpp index eca75bb28e..f7b1bad4b2 100644 --- a/GPU/GLES/VertexShaderGeneratorGLES.cpp +++ b/GPU/GLES/VertexShaderGeneratorGLES.cpp @@ -419,7 +419,7 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade WRITE(p, " TessData data[];\n"); WRITE(p, "} tess_data;\n"); - WRITE(p, "layout (std430) struct TessWeight {\n"); + WRITE(p, "struct TessWeight {\n"); WRITE(p, " vec4 basis;\n"); WRITE(p, " vec4 deriv;\n"); WRITE(p, "};\n"); @@ -450,7 +450,7 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade WRITE(p, "}\n"); } - if (!gl_extensions.VersionGEThan(3, 0, 0)) { // For glsl version 1.10 + if (compat.glslVersionNumber < 130) { // For glsl version 1.10 WRITE(p, "mat4 outerProduct(vec4 u, vec4 v) {\n"); WRITE(p, " return mat4(u * v[0], u * v[1], u * v[2], u * v[3]);\n"); WRITE(p, "}\n"); @@ -473,23 +473,43 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade WRITE(p, " vec3 _pos[16];\n"); WRITE(p, " vec2 _tex[16];\n"); WRITE(p, " vec4 _col[16];\n"); - WRITE(p, " int index_u, index_v;\n"); - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - WRITE(p, " index_u = (%i + point_pos.x);\n", j); - WRITE(p, " index_v = (%i + point_pos.y);\n", i); - WRITE(p, " _pos[%i] = %s(u_tess_points, ivec2(index_u, index_v), 0).xyz;\n", i * 4 + j, compat.texelFetch); - if (doTexture && hasTexcoordTess) - WRITE(p, " _tex[%i] = %s(u_tess_points, ivec2(index_u + u_spline_counts, index_v), 0).xy;\n", i * 4 + j, compat.texelFetch); - if (hasColorTess) - WRITE(p, " _col[%i] = %s(u_tess_points, ivec2(index_u + u_spline_counts * 2, index_v), 0).rgba;\n", i * 4 + j, compat.texelFetch); + if (compat.coefsFromBuffers) { + WRITE(p, " int index;\n"); + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + WRITE(p, " index = (%i + point_pos.y) * int(u_spline_counts) + (%i + point_pos.x);\n", i, j); + WRITE(p, " _pos[%i] = tess_data.data[index].pos.xyz;\n", i * 4 + j); + if (doTexture && hasTexcoordTess) + WRITE(p, " _tex[%i] = tess_data.data[index].uv.xy;\n", i * 4 + j); + if (hasColorTess) + WRITE(p, " _col[%i] = tess_data.data[index].color;\n", i * 4 + j); + } } - } - // Basis polynomials as weight coefficients - WRITE(p, " vec4 basis_u = %s(u_tess_weights_u, %s, 0);\n", compat.texelFetch, "ivec2(weight_idx.x * 2, 0)"); - WRITE(p, " vec4 basis_v = %s(u_tess_weights_v, %s, 0);\n", compat.texelFetch, "ivec2(weight_idx.y * 2, 0)"); - WRITE(p, " mat4 basis = outerProduct(basis_u, basis_v);\n"); + // Basis polynomials as weight coefficients + WRITE(p, " vec4 basis_u = tess_weights_u.data[weight_idx.x].basis;\n"); + WRITE(p, " vec4 basis_v = tess_weights_v.data[weight_idx.y].basis;\n"); + WRITE(p, " mat4 basis = outerProduct(basis_u, basis_v);\n"); + + } else { + WRITE(p, " int index_u, index_v;\n"); + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + WRITE(p, " index_u = (%i + point_pos.x);\n", j); + WRITE(p, " index_v = (%i + point_pos.y);\n", i); + WRITE(p, " _pos[%i] = %s(u_tess_points, ivec2(index_u, index_v), 0).xyz;\n", i * 4 + j, compat.texelFetch); + if (doTexture && hasTexcoordTess) + WRITE(p, " _tex[%i] = %s(u_tess_points, ivec2(index_u + u_spline_counts, index_v), 0).xy;\n", i * 4 + j, compat.texelFetch); + if (hasColorTess) + WRITE(p, " _col[%i] = %s(u_tess_points, ivec2(index_u + u_spline_counts * 2, index_v), 0).rgba;\n", i * 4 + j, compat.texelFetch); + } + } + + // Basis polynomials as weight coefficients + WRITE(p, " vec4 basis_u = %s(u_tess_weights_u, %s, 0);\n", compat.texelFetch, "ivec2(weight_idx.x * 2, 0)"); + WRITE(p, " vec4 basis_v = %s(u_tess_weights_v, %s, 0);\n", compat.texelFetch, "ivec2(weight_idx.y * 2, 0)"); + WRITE(p, " mat4 basis = outerProduct(basis_u, basis_v);\n"); + } // Tessellate WRITE(p, " tess.pos = tess_sample(_pos, basis);\n"); @@ -504,9 +524,15 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade else WRITE(p, " tess.col = u_matambientalpha;\n"); if (hasNormalTess) { - // Derivatives as weight coefficients - WRITE(p, " vec4 deriv_u = %s(u_tess_weights_u, %s, 0);\n", compat.texelFetch, "ivec2(weight_idx.x * 2 + 1, 0)"); - WRITE(p, " vec4 deriv_v = %s(u_tess_weights_v, %s, 0);\n", compat.texelFetch, "ivec2(weight_idx.y * 2 + 1, 0)"); + if (compat.coefsFromBuffers) { + // Derivatives as weight coefficients + WRITE(p, " vec4 deriv_u = tess_weights_u.data[weight_idx.x].deriv;\n"); + WRITE(p, " vec4 deriv_v = tess_weights_v.data[weight_idx.y].deriv;\n"); + } else { + // Derivatives as weight coefficients + WRITE(p, " vec4 deriv_u = %s(u_tess_weights_u, %s, 0);\n", compat.texelFetch, "ivec2(weight_idx.x * 2 + 1, 0)"); + WRITE(p, " vec4 deriv_v = %s(u_tess_weights_v, %s, 0);\n", compat.texelFetch, "ivec2(weight_idx.y * 2 + 1, 0)"); + } WRITE(p, " vec3 du = tess_sample(_pos, outerProduct(deriv_u, basis_v));\n"); WRITE(p, " vec3 dv = tess_sample(_pos, outerProduct(basis_u, deriv_v));\n"); diff --git a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp index ad56a542e9..734f85033a 100644 --- a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp @@ -216,7 +216,7 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri WRITE(p, " TessData data[];\n"); WRITE(p, "} tess_data;\n"); - WRITE(p, "layout (std430) struct TessWeight {\n"); + WRITE(p, "struct TessWeight {\n"); WRITE(p, " vec4 basis;\n"); WRITE(p, " vec4 deriv;\n"); WRITE(p, "};\n"); diff --git a/unittest/TestShaderGenerators.cpp b/unittest/TestShaderGenerators.cpp index c2d4f5d8d9..98af754f21 100644 --- a/unittest/TestShaderGenerators.cpp +++ b/unittest/TestShaderGenerators.cpp @@ -162,10 +162,6 @@ bool TestShaderGenerators() { id.d[0] = bottom; id.d[1] = top; - // Skip testing beziers for now. I'll deal with those bugs later. - id.SetBit(VS_BIT_BEZIER, false); - id.SetBit(VS_BIT_SPLINE, false); - bool generateSuccess[numLanguages]{}; std::string genErrorString[numLanguages]; From 69c8912b9371a753a0c4b5e3c579729bfff29064 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 25 Oct 2020 08:24:19 +0100 Subject: [PATCH 6/9] More tess fixes --- GPU/Directx9/VertexShaderGeneratorHLSL.cpp | 4 ++-- GPU/GLES/VertexShaderGeneratorGLES.cpp | 5 +++-- GPU/Vulkan/VertexShaderGeneratorVulkan.cpp | 8 ++++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/GPU/Directx9/VertexShaderGeneratorHLSL.cpp b/GPU/Directx9/VertexShaderGeneratorHLSL.cpp index 12a60caffd..9b88902800 100644 --- a/GPU/Directx9/VertexShaderGeneratorHLSL.cpp +++ b/GPU/Directx9/VertexShaderGeneratorHLSL.cpp @@ -70,8 +70,8 @@ bool GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE); int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3); - bool doBezier = id.Bit(VS_BIT_BEZIER); - bool doSpline = id.Bit(VS_BIT_SPLINE); + bool doBezier = id.Bit(VS_BIT_BEZIER) && !enableBones && useHWTransform; + bool doSpline = id.Bit(VS_BIT_SPLINE) && !enableBones && useHWTransform; bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS); bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS); bool hasNormalTess = id.Bit(VS_BIT_HAS_NORMAL_TESS); diff --git a/GPU/GLES/VertexShaderGeneratorGLES.cpp b/GPU/GLES/VertexShaderGeneratorGLES.cpp index f7b1bad4b2..66b0b90f23 100644 --- a/GPU/GLES/VertexShaderGeneratorGLES.cpp +++ b/GPU/GLES/VertexShaderGeneratorGLES.cpp @@ -162,8 +162,9 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE); int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3); - bool doBezier = id.Bit(VS_BIT_BEZIER); - bool doSpline = id.Bit(VS_BIT_SPLINE); + // Apparently we don't support bezier/spline together with bones. + bool doBezier = id.Bit(VS_BIT_BEZIER) && !enableBones; + bool doSpline = id.Bit(VS_BIT_SPLINE) && !enableBones; bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS); bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS); bool hasNormalTess = id.Bit(VS_BIT_HAS_NORMAL_TESS); diff --git a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp index 734f85033a..9f0135b896 100644 --- a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp @@ -121,8 +121,8 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE); int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3); - bool doBezier = id.Bit(VS_BIT_BEZIER); - bool doSpline = id.Bit(VS_BIT_SPLINE); + bool doBezier = id.Bit(VS_BIT_BEZIER) && !enableBones && useHWTransform; + bool doSpline = id.Bit(VS_BIT_SPLINE) && !enableBones && useHWTransform; bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS); bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS); bool hasNormalTess = id.Bit(VS_BIT_HAS_NORMAL_TESS); @@ -338,9 +338,9 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri WRITE(p, " Tess tess;\n"); WRITE(p, " tessellate(tess);\n"); - WRITE(p, " vec3 worldpos = vec4(tess.pos.xyz, 1.0) * u_world;\n"); + WRITE(p, " vec3 worldpos = (vec4(tess.pos.xyz, 1.0) * u_world).xyz;\n"); if (hasNormalTess) { - WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%stess.nrm, 0.0) * u_world);\n", flipNormalTess ? "-" : ""); + WRITE(p, " mediump vec3 worldnormal = normalize((vec4(%stess.nrm, 0.0) * u_world).xyz);\n", flipNormalTess ? "-" : ""); } else { WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n"); } From fb2ac3a67e9dbe6e67375f241e335219baedb334 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 25 Oct 2020 08:33:01 +0100 Subject: [PATCH 7/9] Additional fixes --- GPU/Directx9/VertexShaderGeneratorHLSL.cpp | 5 +++++ GPU/GLES/VertexShaderGeneratorGLES.cpp | 9 +++++++-- GPU/Vulkan/VertexShaderGeneratorVulkan.cpp | 7 +++++++ unittest/TestShaderGenerators.cpp | 6 +++--- 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/GPU/Directx9/VertexShaderGeneratorHLSL.cpp b/GPU/Directx9/VertexShaderGeneratorHLSL.cpp index 9b88902800..4279290ed0 100644 --- a/GPU/Directx9/VertexShaderGeneratorHLSL.cpp +++ b/GPU/Directx9/VertexShaderGeneratorHLSL.cpp @@ -72,6 +72,11 @@ bool GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage bool doBezier = id.Bit(VS_BIT_BEZIER) && !enableBones && useHWTransform; bool doSpline = id.Bit(VS_BIT_SPLINE) && !enableBones && useHWTransform; + if ((doBezier || doSpline) && !hasNormal) { + // Bad usage. + *errorString = "Invalid flags - tess requires normal."; + return false; + } bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS); bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS); bool hasNormalTess = id.Bit(VS_BIT_HAS_NORMAL_TESS); diff --git a/GPU/GLES/VertexShaderGeneratorGLES.cpp b/GPU/GLES/VertexShaderGeneratorGLES.cpp index 66b0b90f23..c5d0bf8623 100644 --- a/GPU/GLES/VertexShaderGeneratorGLES.cpp +++ b/GPU/GLES/VertexShaderGeneratorGLES.cpp @@ -163,8 +163,13 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3); // Apparently we don't support bezier/spline together with bones. - bool doBezier = id.Bit(VS_BIT_BEZIER) && !enableBones; - bool doSpline = id.Bit(VS_BIT_SPLINE) && !enableBones; + bool doBezier = id.Bit(VS_BIT_BEZIER) && !enableBones && useHWTransform; + bool doSpline = id.Bit(VS_BIT_SPLINE) && !enableBones && useHWTransform; + if ((doBezier || doSpline) && !hasNormal) { + // Bad usage. + *errorString = "Invalid flags - tess requires normal."; + return false; + } bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS); bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS); bool hasNormalTess = id.Bit(VS_BIT_HAS_NORMAL_TESS); diff --git a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp index 9f0135b896..03b9ade092 100644 --- a/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp @@ -123,6 +123,13 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri bool doBezier = id.Bit(VS_BIT_BEZIER) && !enableBones && useHWTransform; bool doSpline = id.Bit(VS_BIT_SPLINE) && !enableBones && useHWTransform; + + if ((doBezier || doSpline) && !hasNormal) { + // Bad usage. + *errorString = "Invalid flags - tess requires normal."; + return false; + } + bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS); bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS); bool hasNormalTess = id.Bit(VS_BIT_HAS_NORMAL_TESS); diff --git a/unittest/TestShaderGenerators.cpp b/unittest/TestShaderGenerators.cpp index 98af754f21..6693dc01ac 100644 --- a/unittest/TestShaderGenerators.cpp +++ b/unittest/TestShaderGenerators.cpp @@ -173,15 +173,15 @@ bool TestShaderGenerators() { } if (generateSuccess[0] != generateSuccess[1]) { - printf("mismatching success! %s %s\n", genErrorString[0].c_str(), genErrorString[1].c_str()); + printf("mismatching success! '%s' '%s'\n", genErrorString[0].c_str(), genErrorString[1].c_str()); printf("%s\n", buffer[0]); printf("%s\n", buffer[1]); - return 1; + return false; } if (generateSuccess[0] && strcmp(buffer[0], buffer[1])) { printf("mismatching shaders! a=glsl b=vulkan\n"); PrintDiff(buffer[0], buffer[1]); - return 1; + return false; } // Now that we have the strings ready for easy comparison (buffer,4 in the watch window), From 2977c56a120fe3fae475ea69860b2d3ab4b7a034 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 23 Oct 2020 22:40:51 +0200 Subject: [PATCH 8/9] Complete the vertex shader merge, deleting the Vulkan-specific vertex shader generator. --- CMakeLists.txt | 2 -- GPU/Common/ShaderCommon.h | 2 -- GPU/GPU.vcxproj | 2 -- GPU/GPU.vcxproj.filters | 6 ------ GPU/Vulkan/ShaderManagerVulkan.cpp | 11 ++++++++--- GPU/Vulkan/ShaderManagerVulkan.h | 2 +- GPU/Vulkan/VertexShaderGeneratorVulkan.h | 5 ----- android/jni/Android.mk | 1 - libretro/Makefile.common | 1 - unittest/TestShaderGenerators.cpp | 12 +++--------- 10 files changed, 12 insertions(+), 32 deletions(-) delete mode 100644 GPU/Vulkan/VertexShaderGeneratorVulkan.h diff --git a/CMakeLists.txt b/CMakeLists.txt index bd958b7090..adfad87ca3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1278,8 +1278,6 @@ set(GPU_VULKAN GPU/Vulkan/TextureCacheVulkan.h GPU/Vulkan/TextureScalerVulkan.cpp GPU/Vulkan/TextureScalerVulkan.h - GPU/Vulkan/VertexShaderGeneratorVulkan.cpp - GPU/Vulkan/VertexShaderGeneratorVulkan.h GPU/Vulkan/VulkanUtil.cpp GPU/Vulkan/VulkanUtil.h ) diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h index 4fc01fb715..0c6d8e53ad 100644 --- a/GPU/Common/ShaderCommon.h +++ b/GPU/Common/ShaderCommon.h @@ -31,8 +31,6 @@ enum ShaderLanguage { HLSL_DX9, HLSL_D3D11, HLSL_D3D11_LEVEL9, - - TEST_GLSL_VULKAN, }; enum DebugShaderType { diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 90e56a8dbd..e63b0babdb 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -483,7 +483,6 @@ - @@ -674,7 +673,6 @@ - diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index daea75b1a3..4df5470e9b 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -171,9 +171,6 @@ Vulkan - - Vulkan - Vulkan @@ -431,9 +428,6 @@ Vulkan - - Vulkan - Vulkan diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index 2624db88e4..84909a4d8a 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -40,7 +40,7 @@ #include "GPU/Vulkan/DrawEngineVulkan.h" #include "GPU/Vulkan/FramebufferManagerVulkan.h" #include "GPU/GLES/FragmentShaderGeneratorGLES.h" -#include "GPU/Vulkan/VertexShaderGeneratorVulkan.h" +#include "GPU/GLES/VertexShaderGeneratorGLES.h" VulkanFragmentShader::VulkanFragmentShader(VulkanContext *vulkan, FShaderID id, const char *code) : vulkan_(vulkan), id_(id), failed_(false), module_(0) { @@ -265,7 +265,10 @@ void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader if (!vs) { // Vertex shader not in cache. Let's compile it. std::string genErrorString; - GenerateVertexShaderVulkanGLSL(VSID, codeBuffer_, &genErrorString); + uint64_t uniformMask = 0; // Not used + uint32_t attributeMask = 0; // Not used + bool success = GenerateVertexShaderGLSL(VSID, codeBuffer_, compat_, &attributeMask, &uniformMask, &genErrorString); + _assert_(success); vs = new VulkanVertexShader(vulkan_, VSID, codeBuffer_, useHWTransform); vsCache_.Insert(VSID, vs); } @@ -397,7 +400,9 @@ bool ShaderManagerVulkan::LoadCache(FILE *f) { } bool useHWTransform = id.Bit(VS_BIT_USE_HW_TRANSFORM); std::string genErrorString; - if (!GenerateVertexShaderVulkanGLSL(id, codeBuffer_, &genErrorString)) { + uint32_t attributeMask = 0; + uint64_t uniformMask = 0; + if (!GenerateVertexShaderGLSL(id, codeBuffer_, compat, &attributeMask, &uniformMask, &genErrorString)) { return false; } VulkanVertexShader *vs = new VulkanVertexShader(vulkan_, id, codeBuffer_, useHWTransform); diff --git a/GPU/Vulkan/ShaderManagerVulkan.h b/GPU/Vulkan/ShaderManagerVulkan.h index ddd2629422..874cd698dd 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.h +++ b/GPU/Vulkan/ShaderManagerVulkan.h @@ -24,7 +24,7 @@ #include "Common/GPU/Vulkan/VulkanMemory.h" #include "GPU/Common/ShaderCommon.h" #include "GPU/Common/ShaderId.h" -#include "GPU/Vulkan/VertexShaderGeneratorVulkan.h" +#include "GPU/GLES/VertexShaderGeneratorGLES.h" #include "GPU/GLES/FragmentShaderGeneratorGLES.h" #include "GPU/Vulkan/VulkanUtil.h" #include "Common/Math/lin/matrix4x4.h" diff --git a/GPU/Vulkan/VertexShaderGeneratorVulkan.h b/GPU/Vulkan/VertexShaderGeneratorVulkan.h deleted file mode 100644 index 51f2b56d55..0000000000 --- a/GPU/Vulkan/VertexShaderGeneratorVulkan.h +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - -#include "GPU/Common/ShaderId.h" - -bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::string *errorString); diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 96f52d33ab..9a9bad4827 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -135,7 +135,6 @@ VULKAN_FILES := \ $(SRC)/GPU/Vulkan/TextureCacheVulkan.cpp \ $(SRC)/GPU/Vulkan/TextureScalerVulkan.cpp \ $(SRC)/GPU/Vulkan/DepalettizeShaderVulkan.cpp \ - $(SRC)/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp \ $(SRC)/GPU/Vulkan/VulkanUtil.cpp \ $(SRC)/GPU/Vulkan/DebugVisVulkan.cpp #endif diff --git a/libretro/Makefile.common b/libretro/Makefile.common index 29e24d164c..db8dbcd34f 100644 --- a/libretro/Makefile.common +++ b/libretro/Makefile.common @@ -647,7 +647,6 @@ SOURCES_CXX += \ $(GPUDIR)/Vulkan/StencilBufferVulkan.cpp \ $(GPUDIR)/Vulkan/TextureCacheVulkan.cpp \ $(GPUDIR)/Vulkan/TextureScalerVulkan.cpp \ - $(GPUDIR)/Vulkan/VertexShaderGeneratorVulkan.cpp \ $(GPUDIR)/Vulkan/VulkanUtil.cpp \ $(LIBRETRODIR)/LibretroVulkanContext.cpp \ $(LIBRETRODIR)/libretro_vulkan.cpp diff --git a/unittest/TestShaderGenerators.cpp b/unittest/TestShaderGenerators.cpp index 6693dc01ac..29e9b98c87 100644 --- a/unittest/TestShaderGenerators.cpp +++ b/unittest/TestShaderGenerators.cpp @@ -11,7 +11,6 @@ #include "GPU/Directx9/FragmentShaderGeneratorHLSL.h" #include "GPU/GLES/FragmentShaderGeneratorGLES.h" -#include "GPU/Vulkan/VertexShaderGeneratorVulkan.h" #include "GPU/Directx9/VertexShaderGeneratorHLSL.h" #include "GPU/GLES/VertexShaderGeneratorGLES.h" @@ -41,8 +40,6 @@ bool GenerateFShader(FShaderID id, char *buffer, ShaderLanguage lang, std::strin case ShaderLanguage::GLSL_300: // TODO: Need a device - except that maybe glslang could be used to verify these .... return false; - case ShaderLanguage::TEST_GLSL_VULKAN: - return false; default: return false; } @@ -58,8 +55,6 @@ bool GenerateVShader(VShaderID id, char *buffer, ShaderLanguage lang, std::strin return false; // return DX9::GenerateFragmentShaderHLSL(id, buffer, ShaderLanguage::HLSL_DX9); case ShaderLanguage::GLSL_VULKAN: - return GenerateVertexShaderVulkanGLSL(id, buffer, errorString); - case ShaderLanguage::TEST_GLSL_VULKAN: { GLSLShaderCompat compat{}; compat.SetupForVulkan(); @@ -95,8 +90,6 @@ bool TestCompileShader(const char *buffer, ShaderLanguage lang, bool vertex) { return false; case ShaderLanguage::GLSL_300: return false; - case ShaderLanguage::TEST_GLSL_VULKAN: - return true; default: return false; } @@ -135,7 +128,6 @@ bool TestShaderGenerators() { LoadD3DCompilerDynamic(); ShaderLanguage languages[] = { - ShaderLanguage::TEST_GLSL_VULKAN, ShaderLanguage::GLSL_VULKAN, ShaderLanguage::HLSL_D3D11, ShaderLanguage::GLSL_140, @@ -171,7 +163,8 @@ bool TestShaderGenerators() { printf("%s\n", genErrorString[j].c_str()); } } - + /* + // KEEPING FOR REUSE LATER: Defunct temporary test: Compare GLSL-in-Vulkan-mode vs Vulkan if (generateSuccess[0] != generateSuccess[1]) { printf("mismatching success! '%s' '%s'\n", genErrorString[0].c_str(), genErrorString[1].c_str()); printf("%s\n", buffer[0]); @@ -183,6 +176,7 @@ bool TestShaderGenerators() { PrintDiff(buffer[0], buffer[1]); return false; } + */ // Now that we have the strings ready for easy comparison (buffer,4 in the watch window), // let's try to compile them. From 148d3b72e5e73a6681b93d92e0e5155fc2f53c1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 23 Oct 2020 23:30:02 +0200 Subject: [PATCH 9/9] Use splat3(x) liberally (converts to vec3(x) on GLES, vec3(x,x,x) on D3D) Prep for merging the HLSL and GLSL generators. --- GPU/Directx9/FragmentShaderGeneratorHLSL.cpp | 34 ++++++------- GPU/Directx9/VertexShaderGeneratorHLSL.cpp | 4 +- GPU/GLES/FragmentShaderGeneratorGLES.cpp | 53 +++++++++++--------- 3 files changed, 47 insertions(+), 44 deletions(-) diff --git a/GPU/Directx9/FragmentShaderGeneratorHLSL.cpp b/GPU/Directx9/FragmentShaderGeneratorHLSL.cpp index a48f1d17e9..88a0c6b289 100644 --- a/GPU/Directx9/FragmentShaderGeneratorHLSL.cpp +++ b/GPU/Directx9/FragmentShaderGeneratorHLSL.cpp @@ -372,15 +372,15 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag switch (replaceBlendFuncA) { case GE_SRCBLEND_DSTCOLOR: srcFactor = "ERROR"; break; case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "ERROR"; break; - case GE_SRCBLEND_SRCALPHA: srcFactor = "float3(v.a, v.a, v.a)"; break; - case GE_SRCBLEND_INVSRCALPHA: srcFactor = "float3(1.0 - v.a, 1.0 - v.a, 1.0 - v.a)"; break; + case GE_SRCBLEND_SRCALPHA: srcFactor = "splat3(v.a)"; break; + case GE_SRCBLEND_INVSRCALPHA: srcFactor = "splat3(1.0 - v.a)"; break; case GE_SRCBLEND_DSTALPHA: srcFactor = "ERROR"; break; case GE_SRCBLEND_INVDSTALPHA: srcFactor = "ERROR"; break; - case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "float3(v.a * 2.0, v.a * 2.0, v.a * 2.0)"; break; - case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "float3(1.0 - v.a * 2.0, 1.0 - v.a * 2.0, 1.0 - v.a * 2.0)"; break; + case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "splat3(v.a * 2.0)"; break; + case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "splat3(1.0 - v.a * 2.0)"; break; // PRE_SRC for REPLACE_BLEND_PRE_SRC_2X_ALPHA means "double the src." // It's close to the same, but clamping can still be an issue. - case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "float3(2.0, 2.0, 2.0)"; break; + case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "splat3(2.0)"; break; case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "ERROR"; break; case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break; default: srcFactor = "u_blendFixA"; break; @@ -401,29 +401,29 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag switch (replaceBlendFuncA) { case GE_SRCBLEND_DSTCOLOR: srcFactor = "destColor.rgb"; break; - case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "(float3(1.0, 1.0, 1.0) - destColor.rgb)"; break; + case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "(splat3(1.0) - destColor.rgb)"; break; case GE_SRCBLEND_SRCALPHA: srcFactor = "v.aaa"; break; - case GE_SRCBLEND_INVSRCALPHA: srcFactor = "float3(1.0, 1.0, 1.0) - v.aaa"; break; + case GE_SRCBLEND_INVSRCALPHA: srcFactor = "splat3(1.0) - v.aaa"; break; case GE_SRCBLEND_DSTALPHA: srcFactor = "float3(destColor.aaa)"; break; - case GE_SRCBLEND_INVDSTALPHA: srcFactor = "float3(1.0, 1.0, 1.0) - destColor.aaa"; break; + case GE_SRCBLEND_INVDSTALPHA: srcFactor = "splat3(1.0) - destColor.aaa"; break; case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "v.aaa * 2.0"; break; - case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "float3(1.0, 1.0, 1.0) - v.aaa * 2.0"; break; + case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "splat3(1.0) - v.aaa * 2.0"; break; case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "destColor.aaa * 2.0"; break; - case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "float3(1.0, 1.0, 1.0) - destColor.aaa * 2.0"; break; + case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "splat3(1.0) - destColor.aaa * 2.0"; break; case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break; default: srcFactor = "u_blendFixA"; break; } switch (replaceBlendFuncB) { case GE_DSTBLEND_SRCCOLOR: dstFactor = "v.rgb"; break; - case GE_DSTBLEND_INVSRCCOLOR: dstFactor = "(float3(1.0, 1.0, 1.0) - v.rgb)"; break; + case GE_DSTBLEND_INVSRCCOLOR: dstFactor = "(splat3(1.0) - v.rgb)"; break; case GE_DSTBLEND_SRCALPHA: dstFactor = "v.aaa"; break; - case GE_DSTBLEND_INVSRCALPHA: dstFactor = "float3(1.0, 1.0, 1.0) - v.aaa"; break; + case GE_DSTBLEND_INVSRCALPHA: dstFactor = "splat3(1.0) - v.aaa"; break; case GE_DSTBLEND_DSTALPHA: dstFactor = "destColor.aaa"; break; - case GE_DSTBLEND_INVDSTALPHA: dstFactor = "float3(1.0, 1.0, 1.0) - destColor.aaa"; break; + case GE_DSTBLEND_INVDSTALPHA: dstFactor = "splat3(1.0) - destColor.aaa"; break; case GE_DSTBLEND_DOUBLESRCALPHA: dstFactor = "v.aaa * 2.0"; break; - case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "float3(1.0, 1.0, 1.0) - v.aaa * 2.0"; break; + case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "splat3(1.0) - v.aaa * 2.0"; break; case GE_DSTBLEND_DOUBLEDSTALPHA: dstFactor = "destColor.aaa * 2.0"; break; - case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "float3(1.0, 1.0, 1.0) - destColor.aaa * 2.0"; break; + case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "splat3(1.0) - destColor.aaa * 2.0"; break; case GE_DSTBLEND_FIXB: dstFactor = "u_blendFixB"; break; default: dstFactor = "u_blendFixB"; break; } @@ -512,10 +512,10 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag LogicOpReplaceType replaceLogicOpType = (LogicOpReplaceType)id.Bits(FS_BIT_REPLACE_LOGIC_OP_TYPE, 2); switch (replaceLogicOpType) { case LOGICOPTYPE_ONE: - WRITE(p, " v.rgb = float3(1.0, 1.0, 1.0);\n"); + WRITE(p, " v.rgb = splat3(1.0);\n"); break; case LOGICOPTYPE_INVERT: - WRITE(p, " v.rgb = float3(1.0, 1.0, 1.0) - v.rgb;\n"); + WRITE(p, " v.rgb = splat3(1.0) - v.rgb;\n"); break; case LOGICOPTYPE_NORMAL: break; diff --git a/GPU/Directx9/VertexShaderGeneratorHLSL.cpp b/GPU/Directx9/VertexShaderGeneratorHLSL.cpp index 4279290ed0..8425b16f5a 100644 --- a/GPU/Directx9/VertexShaderGeneratorHLSL.cpp +++ b/GPU/Directx9/VertexShaderGeneratorHLSL.cpp @@ -636,7 +636,7 @@ bool GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage WRITE(p, " Out.v_color0 = u_matambientalpha;\n"); } if (lmode) - WRITE(p, " Out.v_color1 = float3(0, 0, 0);\n"); + WRITE(p, " Out.v_color1 = splat3(0.0);\n"); } // Step 3: UV generation @@ -651,7 +651,7 @@ bool GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage else WRITE(p, " Out.v_texcoord = float3(In.texcoord.xy * u_uvscaleoffset.xy, 0.0);\n"); } else { - WRITE(p, " Out.v_texcoord = float3(0.0, 0.0, 0.0);\n"); + WRITE(p, " Out.v_texcoord = splat3(0.0);\n"); } } else { if (hasTexcoord) { diff --git a/GPU/GLES/FragmentShaderGeneratorGLES.cpp b/GPU/GLES/FragmentShaderGeneratorGLES.cpp index 391dd950bd..fb6215e5bc 100644 --- a/GPU/GLES/FragmentShaderGeneratorGLES.cpp +++ b/GPU/GLES/FragmentShaderGeneratorGLES.cpp @@ -62,6 +62,9 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, const GLSLSha if (compat.vulkan) { WRITE(p, "%s", vulkan_glsl_preamble); + WRITE(p, "#define lowp\n"); + WRITE(p, "#define mediump\n"); + WRITE(p, "#define highp\n"); } else { WRITE(p, "#version %d%s\n", compat.glslVersionNumber, compat.gles ? " es" : ""); @@ -621,15 +624,15 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, const GLSLSha switch (replaceBlendFuncA) { case GE_SRCBLEND_DSTCOLOR: srcFactor = "ERROR"; break; case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "ERROR"; break; - case GE_SRCBLEND_SRCALPHA: srcFactor = "vec3(v.a)"; break; - case GE_SRCBLEND_INVSRCALPHA: srcFactor = "vec3(1.0 - v.a)"; break; + case GE_SRCBLEND_SRCALPHA: srcFactor = "splat3(v.a)"; break; + case GE_SRCBLEND_INVSRCALPHA: srcFactor = "splat3(1.0 - v.a)"; break; case GE_SRCBLEND_DSTALPHA: srcFactor = "ERROR"; break; case GE_SRCBLEND_INVDSTALPHA: srcFactor = "ERROR"; break; - case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "vec3(v.a * 2.0)"; break; - case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a * 2.0)"; break; + case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "splat3(v.a * 2.0)"; break; + case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "splat3(1.0 - v.a * 2.0)"; break; // PRE_SRC for REPLACE_BLEND_PRE_SRC_2X_ALPHA means "double the src." // It's close to the same, but clamping can still be an issue. - case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "vec3(2.0)"; break; + case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "splat3(2.0)"; break; case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "ERROR"; break; case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break; default: srcFactor = "u_blendFixA"; break; @@ -654,34 +657,34 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, const GLSLSha WRITE(p, " lowp vec4 destColor = %s(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n", compat.texelFetch); } - const char *srcFactor = "vec3(1.0)"; - const char *dstFactor = "vec3(0.0)"; + const char *srcFactor = "splat3(1.0)"; + const char *dstFactor = "splat3(0.0)"; switch (replaceBlendFuncA) { case GE_SRCBLEND_DSTCOLOR: srcFactor = "destColor.rgb"; break; - case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "(vec3(1.0) - destColor.rgb)"; break; - case GE_SRCBLEND_SRCALPHA: srcFactor = "vec3(v.a)"; break; - case GE_SRCBLEND_INVSRCALPHA: srcFactor = "vec3(1.0 - v.a)"; break; - case GE_SRCBLEND_DSTALPHA: srcFactor = "vec3(destColor.a)"; break; - case GE_SRCBLEND_INVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a)"; break; - case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "vec3(v.a * 2.0)"; break; - case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a * 2.0)"; break; - case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "vec3(destColor.a * 2.0)"; break; - case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a * 2.0)"; break; + case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "(splat3(1.0) - destColor.rgb)"; break; + case GE_SRCBLEND_SRCALPHA: srcFactor = "splat3(v.a)"; break; + case GE_SRCBLEND_INVSRCALPHA: srcFactor = "splat3(1.0 - v.a)"; break; + case GE_SRCBLEND_DSTALPHA: srcFactor = "splat3(destColor.a)"; break; + case GE_SRCBLEND_INVDSTALPHA: srcFactor = "splat3(1.0 - destColor.a)"; break; + case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "splat3(v.a * 2.0)"; break; + case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "splat3(1.0 - v.a * 2.0)"; break; + case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "splat3(destColor.a * 2.0)"; break; + case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "splat3(1.0 - destColor.a * 2.0)"; break; case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break; default: srcFactor = "u_blendFixA"; break; } switch (replaceBlendFuncB) { case GE_DSTBLEND_SRCCOLOR: dstFactor = "v.rgb"; break; - case GE_DSTBLEND_INVSRCCOLOR: dstFactor = "(vec3(1.0) - v.rgb)"; break; - case GE_DSTBLEND_SRCALPHA: dstFactor = "vec3(v.a)"; break; - case GE_DSTBLEND_INVSRCALPHA: dstFactor = "vec3(1.0 - v.a)"; break; - case GE_DSTBLEND_DSTALPHA: dstFactor = "vec3(destColor.a)"; break; - case GE_DSTBLEND_INVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a)"; break; - case GE_DSTBLEND_DOUBLESRCALPHA: dstFactor = "vec3(v.a * 2.0)"; break; - case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "vec3(1.0 - v.a * 2.0)"; break; - case GE_DSTBLEND_DOUBLEDSTALPHA: dstFactor = "vec3(destColor.a * 2.0)"; break; - case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a * 2.0)"; break; + case GE_DSTBLEND_INVSRCCOLOR: dstFactor = "(splat3(1.0) - v.rgb)"; break; + case GE_DSTBLEND_SRCALPHA: dstFactor = "splat3(v.a)"; break; + case GE_DSTBLEND_INVSRCALPHA: dstFactor = "splat3(1.0 - v.a)"; break; + case GE_DSTBLEND_DSTALPHA: dstFactor = "splat3(destColor.a)"; break; + case GE_DSTBLEND_INVDSTALPHA: dstFactor = "splat3(1.0 - destColor.a)"; break; + case GE_DSTBLEND_DOUBLESRCALPHA: dstFactor = "splat3(v.a * 2.0)"; break; + case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "splat3(1.0 - v.a * 2.0)"; break; + case GE_DSTBLEND_DOUBLEDSTALPHA: dstFactor = "splat3(destColor.a * 2.0)"; break; + case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "splat3(1.0 - destColor.a * 2.0)"; break; case GE_DSTBLEND_FIXB: dstFactor = "u_blendFixB"; break; default: dstFactor = "u_blendFixB"; break; }