Merge pull request #13580 from hrydgard/merge-glsl-vertex-shadergens

Merge GLSL vertex shader generators
This commit is contained in:
Henrik Rydgård 2020-10-26 09:52:57 +01:00 committed by GitHub
commit 5c619287e3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 534 additions and 383 deletions

View file

@ -1278,8 +1278,6 @@ set(GPU_VULKAN
GPU/Vulkan/TextureCacheVulkan.h
GPU/Vulkan/TextureScalerVulkan.cpp
GPU/Vulkan/TextureScalerVulkan.h
GPU/Vulkan/VertexShaderGeneratorVulkan.cpp
GPU/Vulkan/VertexShaderGeneratorVulkan.h
GPU/Vulkan/VulkanUtil.cpp
GPU/Vulkan/VulkanUtil.h
)

View file

@ -117,4 +117,6 @@ void GLSLShaderCompat::SetupForVulkan() {
texture = "texture";
texelFetch = "texelFetch";
vulkan = true;
forceMatrix4x4 = false;
coefsFromBuffers = true;
}

View file

@ -154,6 +154,21 @@ struct GLSLShaderCompat {
const char *framebufferFetchExtension;
bool glslES30;
bool bitwiseOps;
bool forceMatrix4x4;
bool coefsFromBuffers;
void SetupForVulkan();
};
// PSP vertex format.
enum class PspAttributeLocation {
POSITION = 0,
TEXCOORD = 1,
NORMAL = 2,
W1 = 3,
W2 = 4,
COLOR0 = 5,
COLOR1 = 6,
COUNT
};

View file

@ -50,7 +50,7 @@ R"( mat4 u_proj;
mat4 u_proj_through;
mat3x4 u_view;
mat3x4 u_world;
mat3x4 u_tex;
mat3x4 u_texmtx;
vec4 u_uvscaleoffset;
vec4 u_depthRange;
vec2 u_fogcoef;
@ -78,7 +78,7 @@ R"( float4x4 u_proj;
float4x4 u_proj_through;
float4x3 u_view;
float4x3 u_world;
float4x3 u_tex;
float4x3 u_texmtx;
float4 u_uvscaleoffset;
float4 u_depthRange;
float2 u_fogcoef;
@ -194,10 +194,9 @@ struct UB_VS_Bones {
};
static const char *ub_vs_bonesStr =
R"( mat3x4 u_bone[8];
R"( mat3x4 u_bone0; mat3x4 u_bone1; mat3x4 u_bone2; mat3x4 u_bone3; mat3x4 u_bone4; mat3x4 u_bone5; mat3x4 u_bone6; mat3x4 u_bone7; mat3x4 u_bone8;
)";
// HLSL code is shared so these names are changed to match those in DX9.
static const char *cb_vs_bonesStr =
R"( float4x3 u_bone[8];
)";

View file

@ -372,15 +372,15 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
switch (replaceBlendFuncA) {
case GE_SRCBLEND_DSTCOLOR: srcFactor = "ERROR"; break;
case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "ERROR"; break;
case GE_SRCBLEND_SRCALPHA: srcFactor = "float3(v.a, v.a, v.a)"; break;
case GE_SRCBLEND_INVSRCALPHA: srcFactor = "float3(1.0 - v.a, 1.0 - v.a, 1.0 - v.a)"; break;
case GE_SRCBLEND_SRCALPHA: srcFactor = "splat3(v.a)"; break;
case GE_SRCBLEND_INVSRCALPHA: srcFactor = "splat3(1.0 - v.a)"; break;
case GE_SRCBLEND_DSTALPHA: srcFactor = "ERROR"; break;
case GE_SRCBLEND_INVDSTALPHA: srcFactor = "ERROR"; break;
case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "float3(v.a * 2.0, v.a * 2.0, v.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "float3(1.0 - v.a * 2.0, 1.0 - v.a * 2.0, 1.0 - v.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "splat3(v.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "splat3(1.0 - v.a * 2.0)"; break;
// PRE_SRC for REPLACE_BLEND_PRE_SRC_2X_ALPHA means "double the src."
// It's close to the same, but clamping can still be an issue.
case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "float3(2.0, 2.0, 2.0)"; break;
case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "splat3(2.0)"; break;
case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "ERROR"; break;
case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break;
default: srcFactor = "u_blendFixA"; break;
@ -401,29 +401,29 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
switch (replaceBlendFuncA) {
case GE_SRCBLEND_DSTCOLOR: srcFactor = "destColor.rgb"; break;
case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "(float3(1.0, 1.0, 1.0) - destColor.rgb)"; break;
case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "(splat3(1.0) - destColor.rgb)"; break;
case GE_SRCBLEND_SRCALPHA: srcFactor = "v.aaa"; break;
case GE_SRCBLEND_INVSRCALPHA: srcFactor = "float3(1.0, 1.0, 1.0) - v.aaa"; break;
case GE_SRCBLEND_INVSRCALPHA: srcFactor = "splat3(1.0) - v.aaa"; break;
case GE_SRCBLEND_DSTALPHA: srcFactor = "float3(destColor.aaa)"; break;
case GE_SRCBLEND_INVDSTALPHA: srcFactor = "float3(1.0, 1.0, 1.0) - destColor.aaa"; break;
case GE_SRCBLEND_INVDSTALPHA: srcFactor = "splat3(1.0) - destColor.aaa"; break;
case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "v.aaa * 2.0"; break;
case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "float3(1.0, 1.0, 1.0) - v.aaa * 2.0"; break;
case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "splat3(1.0) - v.aaa * 2.0"; break;
case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "destColor.aaa * 2.0"; break;
case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "float3(1.0, 1.0, 1.0) - destColor.aaa * 2.0"; break;
case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "splat3(1.0) - destColor.aaa * 2.0"; break;
case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break;
default: srcFactor = "u_blendFixA"; break;
}
switch (replaceBlendFuncB) {
case GE_DSTBLEND_SRCCOLOR: dstFactor = "v.rgb"; break;
case GE_DSTBLEND_INVSRCCOLOR: dstFactor = "(float3(1.0, 1.0, 1.0) - v.rgb)"; break;
case GE_DSTBLEND_INVSRCCOLOR: dstFactor = "(splat3(1.0) - v.rgb)"; break;
case GE_DSTBLEND_SRCALPHA: dstFactor = "v.aaa"; break;
case GE_DSTBLEND_INVSRCALPHA: dstFactor = "float3(1.0, 1.0, 1.0) - v.aaa"; break;
case GE_DSTBLEND_INVSRCALPHA: dstFactor = "splat3(1.0) - v.aaa"; break;
case GE_DSTBLEND_DSTALPHA: dstFactor = "destColor.aaa"; break;
case GE_DSTBLEND_INVDSTALPHA: dstFactor = "float3(1.0, 1.0, 1.0) - destColor.aaa"; break;
case GE_DSTBLEND_INVDSTALPHA: dstFactor = "splat3(1.0) - destColor.aaa"; break;
case GE_DSTBLEND_DOUBLESRCALPHA: dstFactor = "v.aaa * 2.0"; break;
case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "float3(1.0, 1.0, 1.0) - v.aaa * 2.0"; break;
case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "splat3(1.0) - v.aaa * 2.0"; break;
case GE_DSTBLEND_DOUBLEDSTALPHA: dstFactor = "destColor.aaa * 2.0"; break;
case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "float3(1.0, 1.0, 1.0) - destColor.aaa * 2.0"; break;
case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "splat3(1.0) - destColor.aaa * 2.0"; break;
case GE_DSTBLEND_FIXB: dstFactor = "u_blendFixB"; break;
default: dstFactor = "u_blendFixB"; break;
}
@ -512,10 +512,10 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
LogicOpReplaceType replaceLogicOpType = (LogicOpReplaceType)id.Bits(FS_BIT_REPLACE_LOGIC_OP_TYPE, 2);
switch (replaceLogicOpType) {
case LOGICOPTYPE_ONE:
WRITE(p, " v.rgb = float3(1.0, 1.0, 1.0);\n");
WRITE(p, " v.rgb = splat3(1.0);\n");
break;
case LOGICOPTYPE_INVERT:
WRITE(p, " v.rgb = float3(1.0, 1.0, 1.0) - v.rgb;\n");
WRITE(p, " v.rgb = splat3(1.0) - v.rgb;\n");
break;
case LOGICOPTYPE_NORMAL:
break;

View file

@ -70,8 +70,13 @@ bool GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE);
int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3);
bool doBezier = id.Bit(VS_BIT_BEZIER);
bool doSpline = id.Bit(VS_BIT_SPLINE);
bool doBezier = id.Bit(VS_BIT_BEZIER) && !enableBones && useHWTransform;
bool doSpline = id.Bit(VS_BIT_SPLINE) && !enableBones && useHWTransform;
if ((doBezier || doSpline) && !hasNormal) {
// Bad usage.
*errorString = "Invalid flags - tess requires normal.";
return false;
}
bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS);
bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS);
bool hasNormalTess = id.Bit(VS_BIT_HAS_NORMAL_TESS);
@ -121,7 +126,7 @@ bool GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
WRITE(p, "float4x3 u_world : register(c%i);\n", CONST_VS_WORLD);
WRITE(p, "float4x3 u_view : register(c%i);\n", CONST_VS_VIEW);
if (doTextureTransform)
WRITE(p, "float4x3 u_tex : register(c%i);\n", CONST_VS_TEXMTX);
WRITE(p, "float4x3 u_texmtx : register(c%i);\n", CONST_VS_TEXMTX);
if (enableBones) {
#ifdef USE_BONE_ARRAY
WRITE(p, "float4x3 u_bone[%i] : register(c%i);\n", numBones, CONST_VS_BONE0);
@ -631,7 +636,7 @@ bool GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
WRITE(p, " Out.v_color0 = u_matambientalpha;\n");
}
if (lmode)
WRITE(p, " Out.v_color1 = float3(0, 0, 0);\n");
WRITE(p, " Out.v_color1 = splat3(0.0);\n");
}
// Step 3: UV generation
@ -646,7 +651,7 @@ bool GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
else
WRITE(p, " Out.v_texcoord = float3(In.texcoord.xy * u_uvscaleoffset.xy, 0.0);\n");
} else {
WRITE(p, " Out.v_texcoord = float3(0.0, 0.0, 0.0);\n");
WRITE(p, " Out.v_texcoord = splat3(0.0);\n");
}
} else {
if (hasTexcoord) {
@ -687,7 +692,7 @@ bool GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
break;
}
// Transform by texture matrix. XYZ as we are doing projection mapping.
WRITE(p, " Out.v_texcoord.xyz = mul(%s, u_tex) * float3(u_uvscaleoffset.xy, 1.0);\n", temp_tc.c_str());
WRITE(p, " Out.v_texcoord.xyz = mul(%s, u_texmtx) * float3(u_uvscaleoffset.xy, 1.0);\n", temp_tc.c_str());
}
break;

View file

@ -62,6 +62,9 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, const GLSLSha
if (compat.vulkan) {
WRITE(p, "%s", vulkan_glsl_preamble);
WRITE(p, "#define lowp\n");
WRITE(p, "#define mediump\n");
WRITE(p, "#define highp\n");
} else {
WRITE(p, "#version %d%s\n", compat.glslVersionNumber, compat.gles ? " es" : "");
@ -621,15 +624,15 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, const GLSLSha
switch (replaceBlendFuncA) {
case GE_SRCBLEND_DSTCOLOR: srcFactor = "ERROR"; break;
case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "ERROR"; break;
case GE_SRCBLEND_SRCALPHA: srcFactor = "vec3(v.a)"; break;
case GE_SRCBLEND_INVSRCALPHA: srcFactor = "vec3(1.0 - v.a)"; break;
case GE_SRCBLEND_SRCALPHA: srcFactor = "splat3(v.a)"; break;
case GE_SRCBLEND_INVSRCALPHA: srcFactor = "splat3(1.0 - v.a)"; break;
case GE_SRCBLEND_DSTALPHA: srcFactor = "ERROR"; break;
case GE_SRCBLEND_INVDSTALPHA: srcFactor = "ERROR"; break;
case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "vec3(v.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "splat3(v.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "splat3(1.0 - v.a * 2.0)"; break;
// PRE_SRC for REPLACE_BLEND_PRE_SRC_2X_ALPHA means "double the src."
// It's close to the same, but clamping can still be an issue.
case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "vec3(2.0)"; break;
case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "splat3(2.0)"; break;
case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "ERROR"; break;
case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break;
default: srcFactor = "u_blendFixA"; break;
@ -654,34 +657,34 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, const GLSLSha
WRITE(p, " lowp vec4 destColor = %s(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n", compat.texelFetch);
}
const char *srcFactor = "vec3(1.0)";
const char *dstFactor = "vec3(0.0)";
const char *srcFactor = "splat3(1.0)";
const char *dstFactor = "splat3(0.0)";
switch (replaceBlendFuncA) {
case GE_SRCBLEND_DSTCOLOR: srcFactor = "destColor.rgb"; break;
case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "(vec3(1.0) - destColor.rgb)"; break;
case GE_SRCBLEND_SRCALPHA: srcFactor = "vec3(v.a)"; break;
case GE_SRCBLEND_INVSRCALPHA: srcFactor = "vec3(1.0 - v.a)"; break;
case GE_SRCBLEND_DSTALPHA: srcFactor = "vec3(destColor.a)"; break;
case GE_SRCBLEND_INVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a)"; break;
case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "vec3(v.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "vec3(destColor.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a * 2.0)"; break;
case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "(splat3(1.0) - destColor.rgb)"; break;
case GE_SRCBLEND_SRCALPHA: srcFactor = "splat3(v.a)"; break;
case GE_SRCBLEND_INVSRCALPHA: srcFactor = "splat3(1.0 - v.a)"; break;
case GE_SRCBLEND_DSTALPHA: srcFactor = "splat3(destColor.a)"; break;
case GE_SRCBLEND_INVDSTALPHA: srcFactor = "splat3(1.0 - destColor.a)"; break;
case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "splat3(v.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "splat3(1.0 - v.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "splat3(destColor.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "splat3(1.0 - destColor.a * 2.0)"; break;
case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break;
default: srcFactor = "u_blendFixA"; break;
}
switch (replaceBlendFuncB) {
case GE_DSTBLEND_SRCCOLOR: dstFactor = "v.rgb"; break;
case GE_DSTBLEND_INVSRCCOLOR: dstFactor = "(vec3(1.0) - v.rgb)"; break;
case GE_DSTBLEND_SRCALPHA: dstFactor = "vec3(v.a)"; break;
case GE_DSTBLEND_INVSRCALPHA: dstFactor = "vec3(1.0 - v.a)"; break;
case GE_DSTBLEND_DSTALPHA: dstFactor = "vec3(destColor.a)"; break;
case GE_DSTBLEND_INVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a)"; break;
case GE_DSTBLEND_DOUBLESRCALPHA: dstFactor = "vec3(v.a * 2.0)"; break;
case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "vec3(1.0 - v.a * 2.0)"; break;
case GE_DSTBLEND_DOUBLEDSTALPHA: dstFactor = "vec3(destColor.a * 2.0)"; break;
case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a * 2.0)"; break;
case GE_DSTBLEND_INVSRCCOLOR: dstFactor = "(splat3(1.0) - v.rgb)"; break;
case GE_DSTBLEND_SRCALPHA: dstFactor = "splat3(v.a)"; break;
case GE_DSTBLEND_INVSRCALPHA: dstFactor = "splat3(1.0 - v.a)"; break;
case GE_DSTBLEND_DSTALPHA: dstFactor = "splat3(destColor.a)"; break;
case GE_DSTBLEND_INVDSTALPHA: dstFactor = "splat3(1.0 - destColor.a)"; break;
case GE_DSTBLEND_DOUBLESRCALPHA: dstFactor = "splat3(v.a * 2.0)"; break;
case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "splat3(1.0 - v.a * 2.0)"; break;
case GE_DSTBLEND_DOUBLEDSTALPHA: dstFactor = "splat3(destColor.a * 2.0)"; break;
case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "splat3(1.0 - destColor.a * 2.0)"; break;
case GE_DSTBLEND_FIXB: dstFactor = "u_blendFixB"; break;
default: dstFactor = "u_blendFixB"; break;
}

View file

@ -264,7 +264,7 @@ static void SetFloatUniform4(GLRenderManager *render, GLint *uniform, float data
static void SetMatrix4x3(GLRenderManager *render, GLint *uniform, const float *m4x3) {
float m4x4[16];
ConvertMatrix4x3To4x4(m4x4, m4x3);
ConvertMatrix4x3To4x4Transposed(m4x4, m4x3);
render->SetUniformM4x4(uniform, m4x4);
}
@ -495,7 +495,7 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu
float bonetemp[16];
for (int i = 0; i < numBones; i++) {
if (dirty & (DIRTY_BONEMATRIX0 << i)) {
ConvertMatrix4x3To4x4(bonetemp, gstate.boneMatrix + 12 * i);
ConvertMatrix4x3To4x4Transposed(bonetemp, gstate.boneMatrix + 12 * i);
render_->SetUniformM4x4(&u_bone[i], bonetemp);
}
}
@ -595,6 +595,7 @@ void ShaderManagerGLES::DetectShaderLanguage() {
compat.bitwiseOps = false;
compat.lastFragData = nullptr;
compat.gles = gl_extensions.IsGLES;
compat.forceMatrix4x4 = true;
if (compat.gles) {
if (gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300)) {

View file

@ -20,15 +20,15 @@
#include <locale.h>
#include "Common/GPU/OpenGL/GLFeatures.h"
#include "Common/StringUtils.h"
#include "Core/Config.h"
#include "GPU/ge_constants.h"
#include "GPU/GPUState.h"
#include "Core/Config.h"
#include "GPU/Common/ShaderId.h"
#include "GPU/Common/ShaderUniforms.h"
#include "GPU/Common/VertexDecoderCommon.h"
#include "GPU/GLES/VertexShaderGeneratorGLES.h"
#include "GPU/GLES/ShaderManagerGLES.h"
#include "GPU/Common/ShaderId.h"
#include "GPU/Common/VertexDecoderCommon.h"
#undef WRITE
@ -87,31 +87,52 @@ static const char * const boneWeightInDecl[9] = {
// TODO: Skip all this if we can actually get a 16-bit depth buffer along with stencil, which
// is a bit of a rare configuration, although quite common on mobile.
static const char * const boneWeightDecl[9] = {
"#ERROR#",
"layout(location = 3) in float w1;\n",
"layout(location = 3) in vec2 w1;\n",
"layout(location = 3) in vec3 w1;\n",
"layout(location = 3) in vec4 w1;\n",
"layout(location = 3) in vec4 w1;\nlayout(location = 4) in float w2;\n",
"layout(location = 3) in vec4 w1;\nlayout(location = 4) in vec2 w2;\n",
"layout(location = 3) in vec4 w1;\nlayout(location = 4) in vec3 w2;\n",
"layout(location = 3) in vec4 w1;\nlayout(location = 4) in vec4 w2;\n",
};
static const char *vulkan_glsl_preamble =
"#version 450\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"#extension GL_ARB_shading_language_420pack : enable\n"
"#define splat3(x) vec3(x)\n\n";
bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShaderCompat &compat, uint32_t *attrMask, uint64_t *uniformMask, std::string *errorString) {
*attrMask = 0;
*uniformMask = 0;
char *p = buffer;
WRITE(p, "#version %d%s\n", compat.glslVersionNumber, compat.gles ? " es" : "");
bool highpFog = false;
bool highpTexcoord = false;
if (compat.gles) {
// PowerVR needs highp to do the fog in MHU correctly.
// Others don't, and some can't handle highp in the fragment shader.
highpFog = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? true : false;
highpTexcoord = highpFog;
char *p = buffer;
if (compat.vulkan) {
WRITE(p, "%s", vulkan_glsl_preamble);
} else {
if (compat.gles) {
// PowerVR needs highp to do the fog in MHU correctly.
// Others don't, and some can't handle highp in the fragment shader.
highpFog = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? true : false;
highpTexcoord = highpFog;
}
WRITE(p, "#version %d%s\n", compat.glslVersionNumber, compat.gles ? " es" : "");
WRITE(p, "#define splat3(x) vec3(x)\n");
}
if (gl_extensions.EXT_gpu_shader4) {
if (!compat.vulkan && gl_extensions.EXT_gpu_shader4) {
WRITE(p, "#extension GL_EXT_gpu_shader4 : enable\n");
}
WRITE(p, "#define splat3(x) vec3(x)\n");
if (compat.gles) {
WRITE(p, "precision highp float;\n");
} else {
} else if (!compat.vulkan) {
WRITE(p, "#define lowp\n");
WRITE(p, "#define mediump\n");
WRITE(p, "#define highp\n");
@ -120,7 +141,7 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade
bool isModeThrough = id.Bit(VS_BIT_IS_THROUGH);
bool lmode = id.Bit(VS_BIT_LMODE);
bool doTexture = id.Bit(VS_BIT_DO_TEXTURE);
bool doTextureProjection = id.Bit(VS_BIT_DO_TEXTURE_TRANSFORM);
bool doTextureTransform = id.Bit(VS_BIT_DO_TEXTURE_TRANSFORM);
GETexMapMode uvGenMode = static_cast<GETexMapMode>(id.Bits(VS_BIT_UVGEN_MODE, 2));
@ -141,15 +162,30 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade
bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE);
int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3);
bool doBezier = id.Bit(VS_BIT_BEZIER);
bool doSpline = id.Bit(VS_BIT_SPLINE);
// Apparently we don't support bezier/spline together with bones.
bool doBezier = id.Bit(VS_BIT_BEZIER) && !enableBones && useHWTransform;
bool doSpline = id.Bit(VS_BIT_SPLINE) && !enableBones && useHWTransform;
if ((doBezier || doSpline) && !hasNormal) {
// Bad usage.
*errorString = "Invalid flags - tess requires normal.";
return false;
}
bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS);
bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS);
bool hasNormalTess = id.Bit(VS_BIT_HAS_NORMAL_TESS);
bool flipNormalTess = id.Bit(VS_BIT_NORM_REVERSE_TESS);
if (compat.vulkan) {
WRITE(p, "\n");
WRITE(p, "layout (std140, set = 0, binding = 3) uniform baseVars {\n%s};\n", ub_baseStr);
if (enableLighting || doShadeMapping)
WRITE(p, "layout (std140, set = 0, binding = 4) uniform lightVars {\n%s};\n", ub_vs_lightsStr);
if (enableBones)
WRITE(p, "layout (std140, set = 0, binding = 5) uniform boneVars {\n%s};\n", ub_vs_bonesStr);
}
const char *shading = "";
if (compat.glslES30)
if (compat.glslES30 || compat.vulkan)
shading = doFlatShading ? "flat " : "";
DoLightComputation doLight[4] = { LIGHT_OFF, LIGHT_OFF, LIGHT_OFF, LIGHT_OFF };
@ -166,157 +202,200 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade
int numBoneWeights = 0;
int boneWeightScale = id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2);
if (enableBones) {
numBoneWeights = 1 + id.Bits(VS_BIT_BONES, 3);
const char * const * boneWeightDecl = boneWeightAttrDecl;
if (!strcmp(compat.attribute, "in")) {
boneWeightDecl = boneWeightInDecl;
}
WRITE(p, "%s", boneWeightDecl[numBoneWeights]);
*attrMask |= 1 << ATTR_W1;
if (numBoneWeights >= 5)
*attrMask |= 1 << ATTR_W2;
}
bool texcoordInVec3 = false;
if (useHWTransform)
WRITE(p, "%s vec3 position;\n", compat.attribute);
else
WRITE(p, "%s vec4 position;\n", compat.attribute); // need to pass the fog coord in w
*attrMask |= 1 << ATTR_POSITION;
if (useHWTransform && hasNormal) {
WRITE(p, "%s mediump vec3 normal;\n", compat.attribute);
*attrMask |= 1 << ATTR_NORMAL;
}
bool texcoordVec3In = false;
if (doTexture && hasTexcoord) {
if (!useHWTransform && doTextureProjection && !isModeThrough) {
WRITE(p, "%s vec3 texcoord;\n", compat.attribute);
texcoordVec3In = true;
} else {
WRITE(p, "%s vec2 texcoord;\n", compat.attribute);
}
*attrMask |= 1 << ATTR_TEXCOORD;
}
if (hasColor) {
WRITE(p, "%s lowp vec4 color0;\n", compat.attribute);
*attrMask |= 1 << ATTR_COLOR0;
if (lmode && !useHWTransform) { // only software transform supplies color1 as vertex data
WRITE(p, "%s lowp vec3 color1;\n", compat.attribute);
*attrMask |= 1 << ATTR_COLOR1;
}
}
if (isModeThrough) {
WRITE(p, "uniform mat4 u_proj_through;\n");
*uniformMask |= DIRTY_PROJTHROUGHMATRIX;
} else {
WRITE(p, "uniform mat4 u_proj;\n");
*uniformMask |= DIRTY_PROJMATRIX;
// Add all the uniforms we'll need to transform properly.
}
bool scaleUV = !isModeThrough && (uvGenMode == GE_TEXMAP_TEXTURE_COORDS || uvGenMode == GE_TEXMAP_UNKNOWN);
if (useHWTransform) {
// When transforming by hardware, we need a great deal more uniforms...
WRITE(p, "uniform mat4 u_world;\n");
WRITE(p, "uniform mat4 u_view;\n");
*uniformMask |= DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX;
if (doTextureProjection) {
WRITE(p, "uniform mediump mat4 u_texmtx;\n");
*uniformMask |= DIRTY_TEXMATRIX;
}
if (compat.vulkan) {
if (enableBones) {
#ifdef USE_BONE_ARRAY
WRITE(p, "uniform mediump mat4 u_bone[%i];\n", numBoneWeights);
*uniformMask |= DIRTY_BONE_UNIFORMS;
#else
for (int i = 0; i < numBoneWeights; i++) {
WRITE(p, "uniform mat4 u_bone%i;\n", i);
*uniformMask |= DIRTY_BONEMATRIX0 << i;
}
#endif
numBoneWeights = 1 + id.Bits(VS_BIT_BONES, 3);
WRITE(p, "%s", boneWeightDecl[numBoneWeights]);
}
if (useHWTransform)
WRITE(p, "layout (location = %d) in vec3 position;\n", (int)PspAttributeLocation::POSITION);
else
// we pass the fog coord in w
WRITE(p, "layout (location = %d) in vec4 position;\n", (int)PspAttributeLocation::POSITION);
if (useHWTransform && hasNormal)
WRITE(p, "layout (location = %d) in vec3 normal;\n", (int)PspAttributeLocation::NORMAL);
if (doTexture && hasTexcoord) {
if (!useHWTransform && doTextureTransform && !isModeThrough) {
WRITE(p, "layout (location = %d) in vec3 texcoord;\n", (int)PspAttributeLocation::TEXCOORD);
texcoordInVec3 = true;
} else
WRITE(p, "layout (location = %d) in vec2 texcoord;\n", (int)PspAttributeLocation::TEXCOORD);
}
if (hasColor) {
WRITE(p, "layout (location = %d) in vec4 color0;\n", (int)PspAttributeLocation::COLOR0);
if (lmode && !useHWTransform) // only software transform supplies color1 as vertex data
WRITE(p, "layout (location = %d) in vec3 color1;\n", (int)PspAttributeLocation::COLOR1);
}
WRITE(p, "layout (location = 1) %sout vec4 v_color0;\n", shading);
if (lmode) {
WRITE(p, "layout (location = 2) %sout vec3 v_color1;\n", shading);
}
if (doTexture) {
WRITE(p, "uniform vec4 u_uvscaleoffset;\n");
*uniformMask |= DIRTY_UVSCALEOFFSET;
WRITE(p, "layout (location = 0) out vec3 v_texcoord;\n");
}
for (int i = 0; i < 4; i++) {
if (doLight[i] != LIGHT_OFF) {
// This is needed for shade mapping
WRITE(p, "uniform vec3 u_lightpos%i;\n", i);
*uniformMask |= DIRTY_LIGHT0 << i;
if (enableFog) {
// See the fragment shader generator
WRITE(p, "layout (location = 3) out float v_fogdepth;\n");
}
} else {
if (enableBones) {
numBoneWeights = 1 + id.Bits(VS_BIT_BONES, 3);
const char * const * boneWeightDecl = boneWeightAttrDecl;
if (!strcmp(compat.attribute, "in")) {
boneWeightDecl = boneWeightInDecl;
}
if (doLight[i] == LIGHT_FULL) {
*uniformMask |= DIRTY_LIGHT0 << i;
GELightType type = static_cast<GELightType>(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2));
GELightComputation comp = static_cast<GELightComputation>(id.Bits(VS_BIT_LIGHT0_COMP + 4 * i, 2));
WRITE(p, "%s", boneWeightDecl[numBoneWeights]);
*attrMask |= 1 << ATTR_W1;
if (numBoneWeights >= 5)
*attrMask |= 1 << ATTR_W2;
}
if (type != GE_LIGHTTYPE_DIRECTIONAL)
WRITE(p, "uniform mediump vec3 u_lightatt%i;\n", i);
if (useHWTransform)
WRITE(p, "%s vec3 position;\n", compat.attribute);
else
WRITE(p, "%s vec4 position;\n", compat.attribute); // need to pass the fog coord in w
*attrMask |= 1 << ATTR_POSITION;
if (type == GE_LIGHTTYPE_SPOT || type == GE_LIGHTTYPE_UNKNOWN) {
WRITE(p, "uniform mediump vec3 u_lightdir%i;\n", i);
WRITE(p, "uniform mediump vec2 u_lightangle_spotCoef%i;\n", i);
}
WRITE(p, "uniform lowp vec3 u_lightambient%i;\n", i);
WRITE(p, "uniform lowp vec3 u_lightdiffuse%i;\n", i);
if (useHWTransform && hasNormal) {
WRITE(p, "%s mediump vec3 normal;\n", compat.attribute);
*attrMask |= 1 << ATTR_NORMAL;
}
if (comp == GE_LIGHTCOMP_BOTH) {
WRITE(p, "uniform lowp vec3 u_lightspecular%i;\n", i);
}
if (doTexture && hasTexcoord) {
if (!useHWTransform && doTextureTransform && !isModeThrough) {
WRITE(p, "%s vec3 texcoord;\n", compat.attribute);
texcoordInVec3 = true;
} else {
WRITE(p, "%s vec2 texcoord;\n", compat.attribute);
}
*attrMask |= 1 << ATTR_TEXCOORD;
}
if (hasColor) {
WRITE(p, "%s lowp vec4 color0;\n", compat.attribute);
*attrMask |= 1 << ATTR_COLOR0;
if (lmode && !useHWTransform) { // only software transform supplies color1 as vertex data
WRITE(p, "%s lowp vec3 color1;\n", compat.attribute);
*attrMask |= 1 << ATTR_COLOR1;
}
}
if (enableLighting) {
WRITE(p, "uniform lowp vec4 u_ambient;\n");
*uniformMask |= DIRTY_AMBIENT;
if ((matUpdate & 2) == 0 || !hasColor) {
WRITE(p, "uniform lowp vec3 u_matdiffuse;\n");
*uniformMask |= DIRTY_MATDIFFUSE;
}
WRITE(p, "uniform lowp vec4 u_matspecular;\n"); // Specular coef is contained in alpha
WRITE(p, "uniform lowp vec3 u_matemissive;\n");
*uniformMask |= DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE;
}
}
if (useHWTransform || !hasColor) {
WRITE(p, "uniform lowp vec4 u_matambientalpha;\n"); // matambient + matalpha
*uniformMask |= DIRTY_MATAMBIENTALPHA;
}
if (enableFog) {
WRITE(p, "uniform highp vec2 u_fogcoef;\n");
*uniformMask |= DIRTY_FOGCOEF;
}
if (!isModeThrough && gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
WRITE(p, "uniform highp vec4 u_depthRange;\n");
*uniformMask |= DIRTY_DEPTHRANGE;
}
if (!isModeThrough) {
WRITE(p, "uniform highp vec4 u_cullRangeMin;\n");
WRITE(p, "uniform highp vec4 u_cullRangeMax;\n");
*uniformMask |= DIRTY_CULLRANGE;
}
WRITE(p, "%s%s lowp vec4 v_color0;\n", shading, compat.varying_vs);
if (lmode) {
WRITE(p, "%s%s lowp vec3 v_color1;\n", shading, compat.varying_vs);
}
if (doTexture) {
WRITE(p, "%s %s vec3 v_texcoord;\n", compat.varying_vs, highpTexcoord ? "highp" : "mediump");
}
if (enableFog) {
// See the fragment shader generator
if (highpFog) {
WRITE(p, "%s highp float v_fogdepth;\n", compat.varying_vs);
if (isModeThrough) {
WRITE(p, "uniform mat4 u_proj_through;\n");
*uniformMask |= DIRTY_PROJTHROUGHMATRIX;
} else {
WRITE(p, "%s mediump float v_fogdepth;\n", compat.varying_vs);
WRITE(p, "uniform mat4 u_proj;\n");
*uniformMask |= DIRTY_PROJMATRIX;
// Add all the uniforms we'll need to transform properly.
}
if (useHWTransform) {
// When transforming by hardware, we need a great deal more uniforms...
// TODO: Use 4x3 matrices where possible. Though probably doesn't matter much.
WRITE(p, "uniform mat4 u_world;\n");
WRITE(p, "uniform mat4 u_view;\n");
*uniformMask |= DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX;
if (doTextureTransform) {
WRITE(p, "uniform mediump mat4 u_texmtx;\n");
*uniformMask |= DIRTY_TEXMATRIX;
}
if (enableBones) {
#ifdef USE_BONE_ARRAY
WRITE(p, "uniform mediump mat4 u_bone[%i];\n", numBoneWeights);
*uniformMask |= DIRTY_BONE_UNIFORMS;
#else
for (int i = 0; i < numBoneWeights; i++) {
WRITE(p, "uniform mat4 u_bone%i;\n", i);
*uniformMask |= DIRTY_BONEMATRIX0 << i;
}
#endif
}
if (doTexture) {
WRITE(p, "uniform vec4 u_uvscaleoffset;\n");
*uniformMask |= DIRTY_UVSCALEOFFSET;
}
for (int i = 0; i < 4; i++) {
if (doLight[i] != LIGHT_OFF) {
// This is needed for shade mapping
WRITE(p, "uniform vec3 u_lightpos%i;\n", i);
*uniformMask |= DIRTY_LIGHT0 << i;
}
if (doLight[i] == LIGHT_FULL) {
*uniformMask |= DIRTY_LIGHT0 << i;
GELightType type = static_cast<GELightType>(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2));
GELightComputation comp = static_cast<GELightComputation>(id.Bits(VS_BIT_LIGHT0_COMP + 4 * i, 2));
if (type != GE_LIGHTTYPE_DIRECTIONAL)
WRITE(p, "uniform mediump vec3 u_lightatt%i;\n", i);
if (type == GE_LIGHTTYPE_SPOT || type == GE_LIGHTTYPE_UNKNOWN) {
WRITE(p, "uniform mediump vec3 u_lightdir%i;\n", i);
WRITE(p, "uniform mediump vec2 u_lightangle_spotCoef%i;\n", i);
}
WRITE(p, "uniform lowp vec3 u_lightambient%i;\n", i);
WRITE(p, "uniform lowp vec3 u_lightdiffuse%i;\n", i);
if (comp == GE_LIGHTCOMP_BOTH) {
WRITE(p, "uniform lowp vec3 u_lightspecular%i;\n", i);
}
}
}
if (enableLighting) {
WRITE(p, "uniform lowp vec4 u_ambient;\n");
*uniformMask |= DIRTY_AMBIENT;
if ((matUpdate & 2) == 0 || !hasColor) {
WRITE(p, "uniform lowp vec3 u_matdiffuse;\n");
*uniformMask |= DIRTY_MATDIFFUSE;
}
WRITE(p, "uniform lowp vec4 u_matspecular;\n"); // Specular coef is contained in alpha
WRITE(p, "uniform lowp vec3 u_matemissive;\n");
*uniformMask |= DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE;
}
}
if (useHWTransform || !hasColor) {
WRITE(p, "uniform lowp vec4 u_matambientalpha;\n"); // matambient + matalpha
*uniformMask |= DIRTY_MATAMBIENTALPHA;
}
if (enableFog) {
WRITE(p, "uniform highp vec2 u_fogcoef;\n");
*uniformMask |= DIRTY_FOGCOEF;
}
if (!isModeThrough && gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
WRITE(p, "uniform highp vec4 u_depthRange;\n");
*uniformMask |= DIRTY_DEPTHRANGE;
}
if (!isModeThrough) {
WRITE(p, "uniform highp vec4 u_cullRangeMin;\n");
WRITE(p, "uniform highp vec4 u_cullRangeMax;\n");
*uniformMask |= DIRTY_CULLRANGE;
}
WRITE(p, "%s%s lowp vec4 v_color0;\n", shading, compat.varying_vs);
if (lmode) {
WRITE(p, "%s%s lowp vec3 v_color1;\n", shading, compat.varying_vs);
}
if (doTexture) {
WRITE(p, "%s %s vec3 v_texcoord;\n", compat.varying_vs, highpTexcoord ? "highp" : "mediump");
}
if (enableFog) {
// See the fragment shader generator
if (highpFog) {
WRITE(p, "%s highp float v_fogdepth;\n", compat.varying_vs);
} else {
WRITE(p, "%s mediump float v_fogdepth;\n", compat.varying_vs);
}
}
}
@ -336,11 +415,33 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade
if (doBezier || doSpline) {
*uniformMask |= DIRTY_BEZIERSPLINE;
WRITE(p, "uniform sampler2D u_tess_points;\n"); // Control Points
WRITE(p, "uniform sampler2D u_tess_weights_u;\n");
WRITE(p, "uniform sampler2D u_tess_weights_v;\n");
if (compat.vulkan) {
WRITE(p, "struct TessData {\n");
WRITE(p, " vec4 pos;\n");
WRITE(p, " vec4 uv;\n");
WRITE(p, " vec4 color;\n");
WRITE(p, "};\n");
WRITE(p, "layout (std430, set = 0, binding = 6) readonly buffer s_tess_data {\n");
WRITE(p, " TessData data[];\n");
WRITE(p, "} tess_data;\n");
WRITE(p, "uniform int u_spline_counts;\n");
WRITE(p, "struct TessWeight {\n");
WRITE(p, " vec4 basis;\n");
WRITE(p, " vec4 deriv;\n");
WRITE(p, "};\n");
WRITE(p, "layout (std430, set = 0, binding = 7) readonly buffer s_tess_weights_u {\n");
WRITE(p, " TessWeight data[];\n");
WRITE(p, "} tess_weights_u;\n");
WRITE(p, "layout (std430, set = 0, binding = 8) readonly buffer s_tess_weights_v {\n");
WRITE(p, " TessWeight data[];\n");
WRITE(p, "} tess_weights_v;\n");
} else {
WRITE(p, "uniform sampler2D u_tess_points;\n"); // Control Points
WRITE(p, "uniform sampler2D u_tess_weights_u;\n");
WRITE(p, "uniform sampler2D u_tess_weights_v;\n");
WRITE(p, "uniform int u_spline_counts;\n");
}
for (int i = 2; i <= 4; i++) {
// Define 3 types vec2, vec3, vec4
@ -355,7 +456,7 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade
WRITE(p, "}\n");
}
if (!gl_extensions.VersionGEThan(3, 0, 0)) { // For glsl version 1.10
if (compat.glslVersionNumber < 130) { // For glsl version 1.10
WRITE(p, "mat4 outerProduct(vec4 u, vec4 v) {\n");
WRITE(p, " return mat4(u * v[0], u * v[1], u * v[2], u * v[3]);\n");
WRITE(p, "}\n");
@ -378,23 +479,43 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade
WRITE(p, " vec3 _pos[16];\n");
WRITE(p, " vec2 _tex[16];\n");
WRITE(p, " vec4 _col[16];\n");
WRITE(p, " int index_u, index_v;\n");
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
WRITE(p, " index_u = (%i + point_pos.x);\n", j);
WRITE(p, " index_v = (%i + point_pos.y);\n", i);
WRITE(p, " _pos[%i] = %s(u_tess_points, ivec2(index_u, index_v), 0).xyz;\n", i * 4 + j, compat.texelFetch);
if (doTexture && hasTexcoordTess)
WRITE(p, " _tex[%i] = %s(u_tess_points, ivec2(index_u + u_spline_counts, index_v), 0).xy;\n", i * 4 + j, compat.texelFetch);
if (hasColorTess)
WRITE(p, " _col[%i] = %s(u_tess_points, ivec2(index_u + u_spline_counts * 2, index_v), 0).rgba;\n", i * 4 + j, compat.texelFetch);
if (compat.coefsFromBuffers) {
WRITE(p, " int index;\n");
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
WRITE(p, " index = (%i + point_pos.y) * int(u_spline_counts) + (%i + point_pos.x);\n", i, j);
WRITE(p, " _pos[%i] = tess_data.data[index].pos.xyz;\n", i * 4 + j);
if (doTexture && hasTexcoordTess)
WRITE(p, " _tex[%i] = tess_data.data[index].uv.xy;\n", i * 4 + j);
if (hasColorTess)
WRITE(p, " _col[%i] = tess_data.data[index].color;\n", i * 4 + j);
}
}
}
// Basis polynomials as weight coefficients
WRITE(p, " vec4 basis_u = %s(u_tess_weights_u, %s, 0);\n", compat.texelFetch, "ivec2(weight_idx.x * 2, 0)");
WRITE(p, " vec4 basis_v = %s(u_tess_weights_v, %s, 0);\n", compat.texelFetch, "ivec2(weight_idx.y * 2, 0)");
WRITE(p, " mat4 basis = outerProduct(basis_u, basis_v);\n");
// Basis polynomials as weight coefficients
WRITE(p, " vec4 basis_u = tess_weights_u.data[weight_idx.x].basis;\n");
WRITE(p, " vec4 basis_v = tess_weights_v.data[weight_idx.y].basis;\n");
WRITE(p, " mat4 basis = outerProduct(basis_u, basis_v);\n");
} else {
WRITE(p, " int index_u, index_v;\n");
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
WRITE(p, " index_u = (%i + point_pos.x);\n", j);
WRITE(p, " index_v = (%i + point_pos.y);\n", i);
WRITE(p, " _pos[%i] = %s(u_tess_points, ivec2(index_u, index_v), 0).xyz;\n", i * 4 + j, compat.texelFetch);
if (doTexture && hasTexcoordTess)
WRITE(p, " _tex[%i] = %s(u_tess_points, ivec2(index_u + u_spline_counts, index_v), 0).xy;\n", i * 4 + j, compat.texelFetch);
if (hasColorTess)
WRITE(p, " _col[%i] = %s(u_tess_points, ivec2(index_u + u_spline_counts * 2, index_v), 0).rgba;\n", i * 4 + j, compat.texelFetch);
}
}
// Basis polynomials as weight coefficients
WRITE(p, " vec4 basis_u = %s(u_tess_weights_u, %s, 0);\n", compat.texelFetch, "ivec2(weight_idx.x * 2, 0)");
WRITE(p, " vec4 basis_v = %s(u_tess_weights_v, %s, 0);\n", compat.texelFetch, "ivec2(weight_idx.y * 2, 0)");
WRITE(p, " mat4 basis = outerProduct(basis_u, basis_v);\n");
}
// Tessellate
WRITE(p, " tess.pos = tess_sample(_pos, basis);\n");
@ -409,9 +530,15 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade
else
WRITE(p, " tess.col = u_matambientalpha;\n");
if (hasNormalTess) {
// Derivatives as weight coefficients
WRITE(p, " vec4 deriv_u = %s(u_tess_weights_u, %s, 0);\n", compat.texelFetch, "ivec2(weight_idx.x * 2 + 1, 0)");
WRITE(p, " vec4 deriv_v = %s(u_tess_weights_v, %s, 0);\n", compat.texelFetch, "ivec2(weight_idx.y * 2 + 1, 0)");
if (compat.coefsFromBuffers) {
// Derivatives as weight coefficients
WRITE(p, " vec4 deriv_u = tess_weights_u.data[weight_idx.x].deriv;\n");
WRITE(p, " vec4 deriv_v = tess_weights_v.data[weight_idx.y].deriv;\n");
} else {
// Derivatives as weight coefficients
WRITE(p, " vec4 deriv_u = %s(u_tess_weights_u, %s, 0);\n", compat.texelFetch, "ivec2(weight_idx.x * 2 + 1, 0)");
WRITE(p, " vec4 deriv_v = %s(u_tess_weights_v, %s, 0);\n", compat.texelFetch, "ivec2(weight_idx.y * 2 + 1, 0)");
}
WRITE(p, " vec3 du = tess_sample(_pos, outerProduct(deriv_u, basis_v));\n");
WRITE(p, " vec3 dv = tess_sample(_pos, outerProduct(basis_u, deriv_v));\n");
@ -425,7 +552,7 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade
if (!useHWTransform) {
// Simple pass-through of vertex data to fragment shader
if (doTexture) {
if (texcoordVec3In) {
if (texcoordInVec3) {
WRITE(p, " v_texcoord = texcoord;\n");
} else {
WRITE(p, " v_texcoord = vec3(texcoord, 1.0);\n");
@ -461,17 +588,17 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade
WRITE(p, " Tess tess;\n");
WRITE(p, " tessellate(tess);\n");
WRITE(p, " vec3 worldpos = (u_world * vec4(tess.pos.xyz, 1.0)).xyz;\n");
WRITE(p, " vec3 worldpos = (vec4(tess.pos.xyz, 1.0) * u_world).xyz;\n");
if (hasNormalTess) {
WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(%stess.nrm, 0.0)).xyz);\n", flipNormalTess ? "-" : "");
WRITE(p, " mediump vec3 worldnormal = normalize((vec4(%stess.nrm, 0.0) * u_world).xyz);\n", flipNormalTess ? "-" : "");
} else {
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
}
} else {
// No skinning, just standard T&L.
WRITE(p, " vec3 worldpos = (u_world * vec4(position.xyz, 1.0)).xyz;\n");
WRITE(p, " vec3 worldpos = (vec4(position.xyz, 1.0) * u_world).xyz;\n");
if (hasNormal)
WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(%snormal, 0.0)).xyz);\n", flipNormal ? "-" : "");
WRITE(p, " mediump vec3 worldnormal = normalize((vec4(%snormal, 0.0) * u_world).xyz);\n", flipNormal ? "-" : "");
else
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
}
@ -484,49 +611,38 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade
"w2.x", "w2.y", "w2.z", "w2.w",
};
#ifdef USE_BONE_ARRAY
if (numBoneWeights == 1)
WRITE(p, " mat4 skinMatrix = w1 * u_bone[0]");
else
WRITE(p, " mat4 skinMatrix = w1.x * u_bone[0]");
for (int i = 1; i < numBoneWeights; i++) {
const char *weightAttr = boneWeightAttr[i];
// workaround for "cant do .x of scalar" issue
if (numBoneWeights == 1 && i == 0) weightAttr = "w1";
if (numBoneWeights == 5 && i == 4) weightAttr = "w2";
WRITE(p, " + %s * u_bone[%i]", weightAttr, i);
}
#else
const char *boneMatrix = compat.forceMatrix4x4 ? "mat4" : "mat3x4";
// Uncomment this to screw up bone shaders to check the vertex shader software fallback
// WRITE(p, "THIS SHOULD ERROR! #error");
if (numBoneWeights == 1)
WRITE(p, " mat4 skinMatrix = w1 * u_bone0");
if (numBoneWeights == 1 && !compat.vulkan)
WRITE(p, " %s skinMatrix = w1 * u_bone0", boneMatrix);
else
WRITE(p, " mat4 skinMatrix = w1.x * u_bone0");
WRITE(p, " %s skinMatrix = w1.x * u_bone0", boneMatrix);
for (int i = 1; i < numBoneWeights; i++) {
const char *weightAttr = boneWeightAttr[i];
// workaround for "cant do .x of scalar" issue
if (numBoneWeights == 1 && i == 0) weightAttr = "w1";
if (numBoneWeights == 5 && i == 4) weightAttr = "w2";
if (!compat.vulkan) {
if (numBoneWeights == 1 && i == 0) weightAttr = "w1";
if (numBoneWeights == 5 && i == 4) weightAttr = "w2";
}
WRITE(p, " + %s * u_bone%i", weightAttr, i);
}
#endif
WRITE(p, ";\n");
// Trying to simplify this results in bugs in LBP...
WRITE(p, " vec3 skinnedpos = (skinMatrix * vec4(position, 1.0)).xyz %s;\n", factor);
WRITE(p, " vec3 worldpos = (u_world * vec4(skinnedpos, 1.0)).xyz;\n");
WRITE(p, " vec3 skinnedpos = (vec4(position, 1.0) * skinMatrix).xyz %s;\n", factor);
WRITE(p, " vec3 worldpos = (vec4(skinnedpos, 1.0) * u_world).xyz;\n");
if (hasNormal) {
WRITE(p, " mediump vec3 skinnednormal = (skinMatrix * vec4(%snormal, 0.0)).xyz %s;\n", flipNormal ? "-" : "", factor);
WRITE(p, " mediump vec3 skinnednormal = (vec4(%snormal, 0.0) * skinMatrix).xyz %s;\n", flipNormal ? "-" : "", factor);
} else {
WRITE(p, " mediump vec3 skinnednormal = (skinMatrix * vec4(0.0, 0.0, %s1.0, 0.0)).xyz %s;\n", flipNormal ? "-" : "", factor);
WRITE(p, " mediump vec3 skinnednormal = (vec4(0.0, 0.0, %s1.0, 0.0) * skinMatrix).xyz %s;\n", flipNormal ? "-" : "", factor);
}
WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(skinnednormal, 0.0)).xyz);\n");
WRITE(p, " mediump vec3 worldnormal = normalize((vec4(skinnednormal, 0.0) * u_world).xyz);\n");
}
WRITE(p, " vec4 viewPos = u_view * vec4(worldpos, 1.0);\n");
WRITE(p, " vec4 viewPos = vec4((vec4(worldpos, 1.0) * u_view).xyz, 1.0);\n");
// Final view and projection transforms.
if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
@ -688,6 +804,8 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade
WRITE(p, " v_color1 = vec3(0.0);\n");
}
bool scaleUV = !isModeThrough && (uvGenMode == GE_TEXMAP_TEXTURE_COORDS || uvGenMode == GE_TEXMAP_UNKNOWN);
// Step 3: UV generation
if (doTexture) {
switch (uvGenMode) {
@ -696,7 +814,7 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade
if (scaleUV) {
if (hasTexcoord) {
if (doBezier || doSpline)
WRITE(p, " v_texcoord = vec3(tess.tex * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
WRITE(p, " v_texcoord = vec3(tess.tex.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
else
WRITE(p, " v_texcoord = vec3(texcoord.xy * u_uvscaleoffset.xy, 0.0);\n");
} else {
@ -742,7 +860,7 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade
break;
}
// Transform by texture matrix. XYZ as we are doing projection mapping.
WRITE(p, " v_texcoord = (u_texmtx * %s).xyz * vec3(u_uvscaleoffset.xy, 1.0);\n", temp_tc.c_str());
WRITE(p, " v_texcoord = (%s * u_texmtx).xyz * vec3(u_uvscaleoffset.xy, 1.0);\n", temp_tc.c_str());
}
break;
@ -777,6 +895,9 @@ bool GenerateVertexShaderGLSL(const VShaderID &id, char *buffer, const GLSLShade
WRITE(p, " }\n");
}
WRITE(p, " gl_Position = outPos;\n");
if (compat.vulkan) {
WRITE(p, " gl_PointSize = 1.0;\n");
}
WRITE(p, "}\n");
return true;

View file

@ -483,7 +483,6 @@
<ClInclude Include="Vulkan\StateMappingVulkan.h" />
<ClInclude Include="Vulkan\TextureCacheVulkan.h" />
<ClInclude Include="Vulkan\TextureScalerVulkan.h" />
<ClInclude Include="Vulkan\VertexShaderGeneratorVulkan.h" />
<ClInclude Include="Vulkan\VulkanUtil.h" />
</ItemGroup>
<ItemGroup>
@ -674,7 +673,6 @@
<ClCompile Include="Vulkan\StencilBufferVulkan.cpp" />
<ClCompile Include="Vulkan\TextureCacheVulkan.cpp" />
<ClCompile Include="Vulkan\TextureScalerVulkan.cpp" />
<ClCompile Include="Vulkan\VertexShaderGeneratorVulkan.cpp" />
<ClCompile Include="Vulkan\VulkanUtil.cpp" />
</ItemGroup>
<ItemGroup>

View file

@ -171,9 +171,6 @@
<ClInclude Include="Vulkan\TextureScalerVulkan.h">
<Filter>Vulkan</Filter>
</ClInclude>
<ClInclude Include="Vulkan\VertexShaderGeneratorVulkan.h">
<Filter>Vulkan</Filter>
</ClInclude>
<ClInclude Include="Vulkan\VulkanUtil.h">
<Filter>Vulkan</Filter>
</ClInclude>
@ -431,9 +428,6 @@
<ClCompile Include="Vulkan\TextureScalerVulkan.cpp">
<Filter>Vulkan</Filter>
</ClCompile>
<ClCompile Include="Vulkan\VertexShaderGeneratorVulkan.cpp">
<Filter>Vulkan</Filter>
</ClCompile>
<ClCompile Include="Vulkan\VulkanUtil.cpp">
<Filter>Vulkan</Filter>
</ClCompile>

View file

@ -22,24 +22,12 @@
#include "GPU/Common/VertexDecoderCommon.h"
#include "GPU/Common/ShaderId.h"
#include "GPU/Common/ShaderCommon.h"
#include "GPU/Vulkan/VulkanUtil.h"
#include "GPU/Vulkan/StateMappingVulkan.h"
#include "GPU/Vulkan/VulkanQueueRunner.h"
// PSP vertex format.
enum class PspAttributeLocation {
POSITION = 0,
TEXCOORD = 1,
NORMAL = 2,
W1 = 3,
W2 = 4,
COLOR0 = 5,
COLOR1 = 6,
COUNT
};
struct VulkanPipelineKey {
VulkanPipelineRasterStateKey raster; // prim is included here
VkRenderPass renderPass;

View file

@ -40,7 +40,7 @@
#include "GPU/Vulkan/DrawEngineVulkan.h"
#include "GPU/Vulkan/FramebufferManagerVulkan.h"
#include "GPU/GLES/FragmentShaderGeneratorGLES.h"
#include "GPU/Vulkan/VertexShaderGeneratorVulkan.h"
#include "GPU/GLES/VertexShaderGeneratorGLES.h"
VulkanFragmentShader::VulkanFragmentShader(VulkanContext *vulkan, FShaderID id, const char *code)
: vulkan_(vulkan), id_(id), failed_(false), module_(0) {
@ -265,7 +265,10 @@ void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader
if (!vs) {
// Vertex shader not in cache. Let's compile it.
std::string genErrorString;
GenerateVertexShaderVulkanGLSL(VSID, codeBuffer_, &genErrorString);
uint64_t uniformMask = 0; // Not used
uint32_t attributeMask = 0; // Not used
bool success = GenerateVertexShaderGLSL(VSID, codeBuffer_, compat_, &attributeMask, &uniformMask, &genErrorString);
_assert_(success);
vs = new VulkanVertexShader(vulkan_, VSID, codeBuffer_, useHWTransform);
vsCache_.Insert(VSID, vs);
}
@ -387,6 +390,8 @@ bool ShaderManagerVulkan::LoadCache(FILE *f) {
if (header.featureFlags != gstate_c.featureFlags)
return false;
GLSLShaderCompat compat{};
compat.SetupForVulkan();
for (int i = 0; i < header.numVertexShaders; i++) {
VShaderID id;
if (fread(&id, sizeof(id), 1, f) != 1) {
@ -395,15 +400,16 @@ bool ShaderManagerVulkan::LoadCache(FILE *f) {
}
bool useHWTransform = id.Bit(VS_BIT_USE_HW_TRANSFORM);
std::string genErrorString;
if (!GenerateVertexShaderVulkanGLSL(id, codeBuffer_, &genErrorString)) {
uint32_t attributeMask = 0;
uint64_t uniformMask = 0;
if (!GenerateVertexShaderGLSL(id, codeBuffer_, compat, &attributeMask, &uniformMask, &genErrorString)) {
return false;
}
VulkanVertexShader *vs = new VulkanVertexShader(vulkan_, id, codeBuffer_, useHWTransform);
vsCache_.Insert(id, vs);
}
uint32_t vendorID = vulkan_->GetPhysicalDeviceProperties().properties.vendorID;
GLSLShaderCompat compat{};
compat.SetupForVulkan();
for (int i = 0; i < header.numFragmentShaders; i++) {
FShaderID id;
if (fread(&id, sizeof(id), 1, f) != 1) {

View file

@ -24,7 +24,7 @@
#include "Common/GPU/Vulkan/VulkanMemory.h"
#include "GPU/Common/ShaderCommon.h"
#include "GPU/Common/ShaderId.h"
#include "GPU/Vulkan/VertexShaderGeneratorVulkan.h"
#include "GPU/GLES/VertexShaderGeneratorGLES.h"
#include "GPU/GLES/FragmentShaderGeneratorGLES.h"
#include "GPU/Vulkan/VulkanUtil.h"
#include "Common/Math/lin/matrix4x4.h"

View file

@ -121,8 +121,15 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri
bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE);
int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3);
bool doBezier = id.Bit(VS_BIT_BEZIER);
bool doSpline = id.Bit(VS_BIT_SPLINE);
bool doBezier = id.Bit(VS_BIT_BEZIER) && !enableBones && useHWTransform;
bool doSpline = id.Bit(VS_BIT_SPLINE) && !enableBones && useHWTransform;
if ((doBezier || doSpline) && !hasNormal) {
// Bad usage.
*errorString = "Invalid flags - tess requires normal.";
return false;
}
bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS);
bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS);
bool hasNormalTess = id.Bit(VS_BIT_HAS_NORMAL_TESS);
@ -205,6 +212,7 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri
WRITE(p, "}\n\n");
}
// Hardware tessellation
if (doBezier || doSpline) {
WRITE(p, "struct TessData {\n");
WRITE(p, " vec4 pos;\n");
@ -215,7 +223,7 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri
WRITE(p, " TessData data[];\n");
WRITE(p, "} tess_data;\n");
WRITE(p, "layout (std430) struct TessWeight {\n");
WRITE(p, "struct TessWeight {\n");
WRITE(p, " vec4 basis;\n");
WRITE(p, " vec4 deriv;\n");
WRITE(p, "};\n");
@ -337,17 +345,17 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri
WRITE(p, " Tess tess;\n");
WRITE(p, " tessellate(tess);\n");
WRITE(p, " vec3 worldpos = vec4(tess.pos.xyz, 1.0) * u_world;\n");
WRITE(p, " vec3 worldpos = (vec4(tess.pos.xyz, 1.0) * u_world).xyz;\n");
if (hasNormalTess) {
WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%stess.nrm, 0.0) * u_world);\n", flipNormalTess ? "-" : "");
WRITE(p, " mediump vec3 worldnormal = normalize((vec4(%stess.nrm, 0.0) * u_world).xyz);\n", flipNormalTess ? "-" : "");
} else {
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
}
} else {
// No skinning, just standard T&L.
WRITE(p, " vec3 worldpos = vec4(position.xyz, 1.0) * u_world;\n");
WRITE(p, " vec3 worldpos = (vec4(position.xyz, 1.0) * u_world).xyz;\n");
if (hasNormal)
WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%snormal, 0.0) * u_world);\n", flipNormal ? "-" : "");
WRITE(p, " mediump vec3 worldnormal = normalize((vec4(%snormal, 0.0) * u_world).xyz);\n", flipNormal ? "-" : "");
else
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
}
@ -360,28 +368,27 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri
"w2.x", "w2.y", "w2.z", "w2.w",
};
WRITE(p, " mat3x4 skinMatrix = w1.x * u_bone[0];\n");
WRITE(p, " mat3x4 skinMatrix = w1.x * u_bone0");
if (numBoneWeights > 1) {
for (int i = 1; i < numBoneWeights; i++) {
WRITE(p, " skinMatrix += %s * u_bone[%i];\n", boneWeightAttr[i], i);
WRITE(p, " + %s * u_bone%d", boneWeightAttr[i], i);
}
}
WRITE(p, ";\n");
// Trying to simplify this results in bugs in LBP...
WRITE(p, " vec3 skinnedpos = (vec4(position, 1.0) * skinMatrix) %s;\n", factor);
WRITE(p, " vec3 worldpos = vec4(skinnedpos, 1.0) * u_world;\n");
WRITE(p, " vec3 skinnedpos = (vec4(position, 1.0) * skinMatrix).xyz %s;\n", factor);
WRITE(p, " vec3 worldpos = (vec4(skinnedpos, 1.0) * u_world).xyz;\n");
if (hasNormal) {
WRITE(p, " mediump vec3 skinnednormal = vec4(%snormal, 0.0) * skinMatrix %s;\n", flipNormal ? "-" : "", factor);
WRITE(p, " mediump vec3 skinnednormal = (vec4(%snormal, 0.0) * skinMatrix).xyz %s;\n", flipNormal ? "-" : "", factor);
} else {
WRITE(p, " mediump vec3 skinnednormal = vec4(0.0, 0.0, %s1.0, 0.0) * skinMatrix %s;\n", flipNormal ? "-" : "", factor);
WRITE(p, " mediump vec3 skinnednormal = (vec4(0.0, 0.0, %s1.0, 0.0) * skinMatrix).xyz %s;\n", flipNormal ? "-" : "", factor);
}
WRITE(p, " mediump vec3 worldnormal = normalize(vec4(skinnednormal, 0.0) * u_world);\n");
WRITE(p, " mediump vec3 worldnormal = normalize((vec4(skinnednormal, 0.0) * u_world).xyz);\n");
}
WRITE(p, " vec4 viewPos = vec4(vec4(worldpos, 1.0) * u_view, 1.0);\n");
WRITE(p, " vec4 viewPos = vec4((vec4(worldpos, 1.0) * u_view).xyz, 1.0);\n");
// Final view and projection transforms.
if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
@ -406,8 +413,9 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri
bool specularIsZero = true;
bool distanceNeeded = false;
bool anySpots = false;
if (enableLighting) {
WRITE(p, " vec4 lightSum0 = u_ambient * %s + vec4(u_matemissive, 0.0);\n", ambientStr);
WRITE(p, " lowp vec4 lightSum0 = u_ambient * %s + vec4(u_matemissive, 0.0);\n", ambientStr);
for (int i = 0; i < 4; i++) {
GELightType type = static_cast<GELightType>(id.Bits(VS_BIT_LIGHT0_TYPE + 4 * i, 2));
@ -419,18 +427,24 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri
specularIsZero = false;
if (type != GE_LIGHTTYPE_DIRECTIONAL)
distanceNeeded = true;
if (type == GE_LIGHTTYPE_SPOT || type == GE_LIGHTTYPE_UNKNOWN)
anySpots = true;
}
if (!specularIsZero) {
WRITE(p, " vec3 lightSum1 = vec3(0.0);\n");
WRITE(p, " lowp vec3 lightSum1 = vec3(0.0);\n");
}
if (!diffuseIsZero) {
WRITE(p, " vec3 toLight;\n");
WRITE(p, " vec3 diffuse;\n");
WRITE(p, " lowp vec3 diffuse;\n");
}
if (distanceNeeded) {
WRITE(p, " float distance;\n");
WRITE(p, " float lightScale;\n");
WRITE(p, " lowp float lightScale;\n");
}
WRITE(p, " mediump float ldot;\n");
if (anySpots) {
WRITE(p, " lowp float angle;\n");
}
}
@ -455,14 +469,14 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri
bool doSpecular = comp == GE_LIGHTCOMP_BOTH;
bool poweredDiffuse = comp == GE_LIGHTCOMP_ONLYPOWDIFFUSE;
WRITE(p, " mediump float dot%i = dot(toLight, worldnormal);\n", i);
WRITE(p, " ldot = dot(toLight, worldnormal);\n");
if (poweredDiffuse) {
// pow(0.0, 0.0) may be undefined, but the PSP seems to treat it as 1.0.
// Seen in Tales of the World: Radiant Mythology (#2424.)
WRITE(p, " if (u_matspecular.a <= 0.0) {\n");
WRITE(p, " dot%i = 1.0;\n", i);
WRITE(p, " ldot = 1.0;\n");
WRITE(p, " } else {\n");
WRITE(p, " dot%i = pow(max(dot%i, 0.0), u_matspecular.a);\n", i, i);
WRITE(p, " ldot = pow(max(ldot, 0.0), u_matspecular.a);\n");
WRITE(p, " }\n");
}
@ -478,9 +492,9 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri
break;
case GE_LIGHTTYPE_SPOT:
case GE_LIGHTTYPE_UNKNOWN:
WRITE(p, " float angle%i = length(u_lightdir%d) == 0.0 ? 0.0 : dot(normalize(u_lightdir%d), toLight);\n", i, i, i);
WRITE(p, " if (angle%i >= u_lightangle_spotCoef%d.x) {\n", i, i);
WRITE(p, " lightScale = clamp(1.0 / dot(u_lightatt%d, vec3(1.0, distance, distance*distance)), 0.0, 1.0) * (u_lightangle_spotCoef%d.y <= 0.0 ? 1.0 : pow(angle%i, u_lightangle_spotCoef%d.y));\n", i, i, i, i);
WRITE(p, " angle = length(u_lightdir%d) == 0.0 ? 0.0 : dot(normalize(u_lightdir%d), toLight);\n", i, i);
WRITE(p, " if (angle >= u_lightangle_spotCoef%d.x) {\n", i);
WRITE(p, " lightScale = clamp(1.0 / dot(u_lightatt%d, vec3(1.0, distance, distance*distance)), 0.0, 1.0) * (u_lightangle_spotCoef%d.y <= 0.0 ? 1.0 : pow(angle, u_lightangle_spotCoef%d.y));\n", i, i, i);
WRITE(p, " } else {\n");
WRITE(p, " lightScale = 0.0;\n");
WRITE(p, " }\n");
@ -490,17 +504,17 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri
break;
}
WRITE(p, " diffuse = (u_lightdiffuse%d * %s) * max(dot%i, 0.0);\n", i, diffuseStr, i);
WRITE(p, " diffuse = (u_lightdiffuse%d * %s) * max(ldot, 0.0);\n", i, diffuseStr);
if (doSpecular) {
WRITE(p, " if (dot%i >= 0.0) {\n", i);
WRITE(p, " dot%i = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n", i);
WRITE(p, " if (ldot >= 0.0) {\n");
WRITE(p, " ldot = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n");
WRITE(p, " if (u_matspecular.a <= 0.0) {\n");
WRITE(p, " dot%i = 1.0;\n", i);
WRITE(p, " ldot = 1.0;\n");
WRITE(p, " } else {\n");
WRITE(p, " dot%i = pow(max(dot%i, 0.0), u_matspecular.a);\n", i, i);
WRITE(p, " ldot = pow(max(ldot, 0.0), u_matspecular.a);\n");
WRITE(p, " }\n");
WRITE(p, " if (dot%i > 0.0)\n", i);
WRITE(p, " lightSum1 += u_lightspecular%d * %s * dot%i %s;\n", i, specularStr, i, timesLightScale);
WRITE(p, " if (ldot > 0.0)\n");
WRITE(p, " lightSum1 += u_lightspecular%d * %s * ldot %s;\n", i, specularStr, timesLightScale);
WRITE(p, " }\n");
}
WRITE(p, " lightSum0.rgb += (u_lightambient%d * %s.rgb + diffuse)%s;\n", i, ambientStr, timesLightScale);
@ -594,7 +608,7 @@ bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::stri
break;
}
// Transform by texture matrix. XYZ as we are doing projection mapping.
WRITE(p, " v_texcoord = (%s * u_tex).xyz * vec3(u_uvscaleoffset.xy, 1.0);\n", temp_tc.c_str());
WRITE(p, " v_texcoord = (%s * u_texmtx).xyz * vec3(u_uvscaleoffset.xy, 1.0);\n", temp_tc.c_str());
}
break;

View file

@ -1,5 +0,0 @@
#pragma once
#include "GPU/Common/ShaderId.h"
bool GenerateVertexShaderVulkanGLSL(const VShaderID &id, char *buffer, std::string *errorString);

View file

@ -135,7 +135,6 @@ VULKAN_FILES := \
$(SRC)/GPU/Vulkan/TextureCacheVulkan.cpp \
$(SRC)/GPU/Vulkan/TextureScalerVulkan.cpp \
$(SRC)/GPU/Vulkan/DepalettizeShaderVulkan.cpp \
$(SRC)/GPU/Vulkan/VertexShaderGeneratorVulkan.cpp \
$(SRC)/GPU/Vulkan/VulkanUtil.cpp \
$(SRC)/GPU/Vulkan/DebugVisVulkan.cpp
#endif

View file

@ -647,7 +647,6 @@ SOURCES_CXX += \
$(GPUDIR)/Vulkan/StencilBufferVulkan.cpp \
$(GPUDIR)/Vulkan/TextureCacheVulkan.cpp \
$(GPUDIR)/Vulkan/TextureScalerVulkan.cpp \
$(GPUDIR)/Vulkan/VertexShaderGeneratorVulkan.cpp \
$(GPUDIR)/Vulkan/VulkanUtil.cpp \
$(LIBRETRODIR)/LibretroVulkanContext.cpp \
$(LIBRETRODIR)/libretro_vulkan.cpp

View file

@ -11,7 +11,6 @@
#include "GPU/Directx9/FragmentShaderGeneratorHLSL.h"
#include "GPU/GLES/FragmentShaderGeneratorGLES.h"
#include "GPU/Vulkan/VertexShaderGeneratorVulkan.h"
#include "GPU/Directx9/VertexShaderGeneratorHLSL.h"
#include "GPU/GLES/VertexShaderGeneratorGLES.h"
@ -56,7 +55,13 @@ bool GenerateVShader(VShaderID id, char *buffer, ShaderLanguage lang, std::strin
return false;
// return DX9::GenerateFragmentShaderHLSL(id, buffer, ShaderLanguage::HLSL_DX9);
case ShaderLanguage::GLSL_VULKAN:
return GenerateVertexShaderVulkanGLSL(id, buffer, errorString);
{
GLSLShaderCompat compat{};
compat.SetupForVulkan();
uint32_t attrMask;
uint64_t uniformMask;
return GenerateVertexShaderGLSL(id, buffer, compat, &attrMask, &uniformMask, errorString);
}
default:
return false;
}
@ -85,7 +90,6 @@ bool TestCompileShader(const char *buffer, ShaderLanguage lang, bool vertex) {
return false;
case ShaderLanguage::GLSL_300:
return false;
default:
return false;
}
@ -107,7 +111,7 @@ void PrintDiff(const char *a, const char *b) {
printf("a: %s\n", a_lines[i].c_str());
printf("b: %s\n", b_lines[i].c_str());
printf("...continues...\n");
for (size_t j = i; j < i + 4 && j < a_lines.size(); j++) {
for (size_t j = i + 1; j < i + 5 && j < a_lines.size(); j++) {
printf("a: %s\n", a_lines[j].c_str());
printf("b: %s\n", b_lines[j].c_str());
}
@ -141,6 +145,57 @@ bool TestShaderGenerators() {
int successes = 0;
int count = 700;
// Generate a bunch of random vertex shader IDs, try to generate shader source.
// Then compile it and check that it's ok.
for (int i = 0; i < count; i++) {
uint32_t bottom = rng.R32();
uint32_t top = rng.R32();
VShaderID id;
id.d[0] = bottom;
id.d[1] = top;
bool generateSuccess[numLanguages]{};
std::string genErrorString[numLanguages];
for (int j = 0; j < numLanguages; j++) {
generateSuccess[j] = GenerateVShader(id, buffer[j], languages[j], &genErrorString[j]);
if (!genErrorString[j].empty()) {
printf("%s\n", genErrorString[j].c_str());
}
}
/*
// KEEPING FOR REUSE LATER: Defunct temporary test: Compare GLSL-in-Vulkan-mode vs Vulkan
if (generateSuccess[0] != generateSuccess[1]) {
printf("mismatching success! '%s' '%s'\n", genErrorString[0].c_str(), genErrorString[1].c_str());
printf("%s\n", buffer[0]);
printf("%s\n", buffer[1]);
return false;
}
if (generateSuccess[0] && strcmp(buffer[0], buffer[1])) {
printf("mismatching shaders! a=glsl b=vulkan\n");
PrintDiff(buffer[0], buffer[1]);
return false;
}
*/
// Now that we have the strings ready for easy comparison (buffer,4 in the watch window),
// let's try to compile them.
for (int j = 0; j < numLanguages; j++) {
if (generateSuccess[j]) {
if (!TestCompileShader(buffer[j], languages[j], true)) {
printf("Error compiling vertex shader:\n\n%s\n\n", LineNumberString(buffer[j]).c_str());
return false;
}
successes++;
}
}
}
printf("%d/%d vertex shaders generated (it's normal that it's not all, there are invalid bit combos)\n", successes, count * numLanguages);
successes = 0;
count = 200;
// Generate a bunch of random fragment shader IDs, try to generate shader source.
// Then compile it and check that it's ok.
for (int i = 0; i < count; i++) {
@ -194,47 +249,6 @@ bool TestShaderGenerators() {
successes = 0;
count = 200;
// Generate a bunch of random vertex shader IDs, try to generate shader source.
// Then compile it and check that it's ok.
for (int i = 0; i < count; i++) {
uint32_t bottom = rng.R32();
uint32_t top = rng.R32();
VShaderID id;
id.d[0] = bottom;
id.d[1] = top;
// Skip testing beziers for now. I'll deal with those bugs later.
id.SetBit(VS_BIT_BEZIER, false);
id.SetBit(VS_BIT_SPLINE, false);
bool generateSuccess[numLanguages]{};
for (int j = 0; j < numLanguages; j++) {
std::string genErrorString;
generateSuccess[j] = GenerateVShader(id, buffer[j], languages[j], &genErrorString);
if (!genErrorString.empty()) {
printf("%s\n", genErrorString.c_str());
}
}
// Now that we have the strings ready for easy comparison (buffer,4 in the watch window),
// let's try to compile them.
for (int j = 0; j < numLanguages; j++) {
if (generateSuccess[j]) {
if (!TestCompileShader(buffer[j], languages[j], true)) {
printf("Error compiling vertex shader:\n\n%s\n\n", LineNumberString(buffer[j]).c_str());
return false;
}
successes++;
}
}
}
printf("%d/%d vertex shaders generated (it's normal that it's not all, there are invalid bit combos)\n", successes, count * numLanguages);
successes = 0;
count = 200;
for (int i = 0; i < numLanguages; i++) {
delete[] buffer[i];
}