Evolve the HLSL and GLSL fragment shader generators even closer together.

This commit is contained in:
Henrik Rydgård 2020-10-29 00:38:52 +01:00
parent b4a76a9f09
commit b070ed45e9
4 changed files with 56 additions and 37 deletions

View file

@ -923,6 +923,11 @@ static std::string surface_transforms_to_string(VkSurfaceTransformFlagsKHR trans
bool VulkanContext::InitSwapchain() {
VkResult res = vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_devices_[physical_device_], surface_, &surfCapabilities_);
if (res == VK_ERROR_SURFACE_LOST_KHR) {
// Not much to do.
ERROR_LOG(G3D, "VK: Surface lost in InitSwapchain");
return false;
}
_dbg_assert_(res == VK_SUCCESS);
uint32_t presentModeCount;
res = vkGetPhysicalDeviceSurfacePresentModesKHR(physical_devices_[physical_device_], surface_, &presentModeCount, nullptr);

View file

@ -36,7 +36,8 @@ const char *hlsl_preamble =
"#define uvec3 uint3\n"
"#define ivec3 int3\n"
"#define splat3(x) float3(x, x, x)\n"
"#define usplat3(x) uvec3(x, x, x)\n";
"#define mix lerp\n"
"#define mod(x, y) fmod(x, y)\n";
const char *hlsl_d3d11_preamble =
"#define DISCARD discard\n"
@ -228,12 +229,12 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
if (id.Bit(FS_BIT_CLAMP_S)) {
ucoord = "clamp(" + ucoord + ", u_texclamp.z, u_texclamp.x - u_texclamp.z)";
} else {
ucoord = "fmod(" + ucoord + ", u_texclamp.x)";
ucoord = "mod(" + ucoord + ", u_texclamp.x)";
}
if (id.Bit(FS_BIT_CLAMP_T)) {
vcoord = "clamp(" + vcoord + ", u_texclamp.w, u_texclamp.y - u_texclamp.w)";
} else {
vcoord = "fmod(" + vcoord + ", u_texclamp.y)";
vcoord = "mod(" + vcoord + ", u_texclamp.y)";
}
if (textureAtOffset) {
ucoord = "(" + ucoord + " + u_texclampoff.x)";
@ -259,16 +260,18 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
WRITE(p, " vec4 t = tex2D(tex, %s.xy)%s;\n", texcoord, bgraTexture ? ".bgra" : "");
}
}
WRITE(p, " vec4 p = In.v_color0;\n");
if (texFunc != GE_TEXFUNC_REPLACE || !doTextureAlpha) {
WRITE(p, " vec4 p = In.v_color0;\n");
}
if (doTextureAlpha) { // texfmt == RGBA
switch (texFunc) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " vec4 v = p * t%s;\n", secondary); break;
case GE_TEXFUNC_DECAL:
WRITE(p, " vec4 v = vec4(lerp(p.rgb, t.rgb, t.a), p.a)%s;\n", secondary); break;
WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a)%s;\n", secondary); break;
case GE_TEXFUNC_BLEND:
WRITE(p, " vec4 v = vec4(lerp(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a)%s;\n", secondary); break;
WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a)%s;\n", secondary); break;
case GE_TEXFUNC_REPLACE:
WRITE(p, " vec4 v = t%s;\n", secondary); break;
case GE_TEXFUNC_ADD:
@ -287,7 +290,7 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
case GE_TEXFUNC_DECAL:
WRITE(p, " vec4 v = vec4(t.rgb, p.a)%s;\n", secondary); break;
case GE_TEXFUNC_BLEND:
WRITE(p, " vec4 v = vec4(lerp(p.rgb, u_texenv.rgb, t.rgb), p.a)%s;\n", secondary); break;
WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a)%s;\n", secondary); break;
case GE_TEXFUNC_REPLACE:
WRITE(p, " vec4 v = vec4(t.rgb, p.a)%s;\n", secondary); break;
case GE_TEXFUNC_ADD:
@ -311,7 +314,7 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
if (enableFog) {
WRITE(p, " float fogCoef = clamp(In.v_fogdepth, 0.0, 1.0);\n");
WRITE(p, " v = lerp(vec4(u_fogcolor, v.a), v, fogCoef);\n");
WRITE(p, " v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
}
const char *discardStatement = testForceToZero ? "v.a = 0.0;" : "DISCARD;";
@ -334,10 +337,10 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
if (alphaTestFuncs[alphaTestFunc][0] != '#') {
// TODO: Rewrite this to use clip() appropriately (like, clip(v.a - u_alphacolorref.a))
if (lang == HLSL_D3D11) {
WRITE(p, " if ((roundAndScaleTo255i(v.a) & u_alphacolormask.a) %s u_alphacolorref.a) discard;\n", alphaTestFuncs[alphaTestFunc]);
WRITE(p, " if ((roundAndScaleTo255i(v.a) & u_alphacolormask.a) %s int(u_alphacolorref.a)) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement);
} else {
// TODO: Use a texture to lookup bitwise ops?
WRITE(p, " if (roundAndScaleTo255f(v.a) %s u_alphacolorref.a) DISCARD;\n", alphaTestFuncs[alphaTestFunc]);
WRITE(p, " if (roundAndScaleTo255f(v.a) %s u_alphacolorref.a) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement);
}
} else {
// This means NEVER. See above.
@ -351,10 +354,10 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
// When testing against 0 (common), we can avoid some math.
// 0.002 is approximately half of 1.0 / 255.0.
if (colorTestFunc == GE_COMP_NOTEQUAL) {
WRITE(p, " if (v.r < 0.002 && v.g < 0.002 && v.b < 0.002) DISCARD;\n");
WRITE(p, " if (v.r < 0.002 && v.g < 0.002 && v.b < 0.002) %s\n", discardStatement);
} else if (colorTestFunc != GE_COMP_NEVER) {
// Anything else is a test for == 0.
WRITE(p, " if (v.r > 0.002 || v.g > 0.002 || v.b > 0.002) DISCARD;\n");
WRITE(p, " if (v.r > 0.002 || v.g > 0.002 || v.b > 0.002) %s\n", discardStatement);
} else {
// NEVER has been logged as used by games, although it makes little sense - statically failing.
// Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here.
@ -365,15 +368,15 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
if (colorTestFuncs[colorTestFunc][0] != '#') {
const char *test = colorTestFuncs[colorTestFunc];
if (lang == HLSL_D3D11) {
WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
WRITE(p, " ivec3 v_masked = v_scaled & u_alphacolormask.rgb;\n");
WRITE(p, " ivec3 colorTestRef = u_alphacolorref.rgb & u_alphacolormask.rgb;\n");
WRITE(p, " uvec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
WRITE(p, " uvec3 v_masked = v_scaled & u_alphacolormask.rgb;\n");
WRITE(p, " uvec3 colorTestRef = u_alphacolorref.rgb & u_alphacolormask.rgb;\n");
// We have to test the components separately, or we get incorrect results. See #10629.
WRITE(p, " if (v_masked.r %s colorTestRef.r && v_masked.g %s colorTestRef.g && v_masked.b %s colorTestRef.b) DISCARD;\n", test, test, test);
WRITE(p, " if (v_masked.r %s colorTestRef.r && v_masked.g %s colorTestRef.g && v_masked.b %s colorTestRef.b) %s\n", test, test, test, discardStatement);
} else {
// TODO: Use a texture to lookup bitwise ops instead?
WRITE(p, " vec3 colortest = roundAndScaleTo255v(v.rgb);\n");
WRITE(p, " if ((colortest.r %s u_alphacolorref.r) && (colortest.g %s u_alphacolorref.g) && (colortest.b %s u_alphacolorref.b)) DISCARD;\n", test, test, test);
WRITE(p, " if ((colortest.r %s u_alphacolorref.r) && (colortest.g %s u_alphacolorref.g) && (colortest.b %s u_alphacolorref.b)) %s\n", test, test, test, discardStatement);
}
}
else {
@ -422,7 +425,7 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
case GE_SRCBLEND_DSTCOLOR: srcFactor = "destColor.rgb"; break;
case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "(splat3(1.0) - destColor.rgb)"; break;
case GE_SRCBLEND_SRCALPHA: srcFactor = "v.aaa"; break;
case GE_SRCBLEND_INVSRCALPHA: srcFactor = "splat3(1.0) - v.aaa"; break;
case GE_SRCBLEND_INVSRCALPHA: srcFactor = "splat3(1.0 - v.a)"; break;
case GE_SRCBLEND_DSTALPHA: srcFactor = "destColor.aaa"; break;
case GE_SRCBLEND_INVDSTALPHA: srcFactor = "splat3(1.0) - destColor.aaa"; break;
case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "v.aaa * 2.0"; break;

View file

@ -667,7 +667,15 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, const GLSLSha
} else {
const char *colorTestFuncs[] = { "#", "#", " != ", " == " };
if (colorTestFuncs[colorTestFunc][0] != '#') {
if (compat.bitwiseOps) {
// TODO: Unify these paths better.
if (compat.d3d11) {
const char *test = colorTestFuncs[colorTestFunc];
WRITE(p, " uvec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
WRITE(p, " uvec3 v_masked = v_scaled & u_alphacolormask.rgb;\n");
WRITE(p, " uvec3 colorTestRef = u_alphacolorref.rgb & u_alphacolormask.rgb;\n");
// We have to test the components separately, or we get incorrect results. See #10629.
WRITE(p, " if (v_masked.r %s colorTestRef.r && v_masked.g %s colorTestRef.g && v_masked.b %s colorTestRef.b) %s\n", test, test, test, discardStatement);
} else if (compat.bitwiseOps) {
// Apparently GLES3 does not support vector bitwise ops.
WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
if (compat.vulkan) {
@ -723,7 +731,9 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, const GLSLSha
if (replaceBlend == REPLACE_BLEND_COPY_FBO) {
// If we have NV_shader_framebuffer_fetch / EXT_shader_framebuffer_fetch, we skip the blit.
// We can just read the prev value more directly.
if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) {
if (compat.d3d11) {
WRITE(p, " vec4 destColor = fboTex.Load(int3((int)In.pixelPos.x, (int)In.pixelPos.y, 0));\n");
} else if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) {
WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData);
} else if (!compat.texelFetch) {
WRITE(p, " lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture);
@ -731,34 +741,34 @@ bool GenerateFragmentShaderGLSL(const FShaderID &id, char *buffer, const GLSLSha
WRITE(p, " lowp vec4 destColor = %s(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n", compat.texelFetch);
}
const char *srcFactor = "splat3(1.0)";
const char *dstFactor = "splat3(0.0)";
const char *srcFactor = nullptr;
const char *dstFactor = nullptr;
switch (replaceBlendFuncA) {
case GE_SRCBLEND_DSTCOLOR: srcFactor = "destColor.rgb"; break;
case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "(splat3(1.0) - destColor.rgb)"; break;
case GE_SRCBLEND_SRCALPHA: srcFactor = "splat3(v.a)"; break;
case GE_SRCBLEND_SRCALPHA: srcFactor = "v.aaa"; break;
case GE_SRCBLEND_INVSRCALPHA: srcFactor = "splat3(1.0 - v.a)"; break;
case GE_SRCBLEND_DSTALPHA: srcFactor = "splat3(destColor.a)"; break;
case GE_SRCBLEND_INVDSTALPHA: srcFactor = "splat3(1.0 - destColor.a)"; break;
case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "splat3(v.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "splat3(1.0 - v.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "splat3(destColor.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "splat3(1.0 - destColor.a * 2.0)"; break;
case GE_SRCBLEND_DSTALPHA: srcFactor = "destColor.aaa"; break;
case GE_SRCBLEND_INVDSTALPHA: srcFactor = "splat3(1.0) - destColor.aaa"; break;
case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "v.aaa * 2.0"; break;
case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "splat3(1.0) - v.aaa * 2.0"; break;
case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "destColor.aaa * 2.0"; break;
case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "splat3(1.0) - destColor.aaa * 2.0"; break;
case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break;
default: srcFactor = "u_blendFixA"; break;
}
switch (replaceBlendFuncB) {
case GE_DSTBLEND_SRCCOLOR: dstFactor = "v.rgb"; break;
case GE_DSTBLEND_INVSRCCOLOR: dstFactor = "(splat3(1.0) - v.rgb)"; break;
case GE_DSTBLEND_SRCALPHA: dstFactor = "splat3(v.a)"; break;
case GE_DSTBLEND_INVSRCALPHA: dstFactor = "splat3(1.0 - v.a)"; break;
case GE_DSTBLEND_DSTALPHA: dstFactor = "splat3(destColor.a)"; break;
case GE_DSTBLEND_INVDSTALPHA: dstFactor = "splat3(1.0 - destColor.a)"; break;
case GE_DSTBLEND_DOUBLESRCALPHA: dstFactor = "splat3(v.a * 2.0)"; break;
case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "splat3(1.0 - v.a * 2.0)"; break;
case GE_DSTBLEND_DOUBLEDSTALPHA: dstFactor = "splat3(destColor.a * 2.0)"; break;
case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "splat3(1.0 - destColor.a * 2.0)"; break;
case GE_DSTBLEND_SRCALPHA: dstFactor = "v.aaa"; break;
case GE_DSTBLEND_INVSRCALPHA: dstFactor = "splat3(1.0) - v.aaa"; break;
case GE_DSTBLEND_DSTALPHA: dstFactor = "destColor.aaa"; break;
case GE_DSTBLEND_INVDSTALPHA: dstFactor = "splat3(1.0) - destColor.aaa"; break;
case GE_DSTBLEND_DOUBLESRCALPHA: dstFactor = "v.aaa * 2.0"; break;
case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "splat3(1.0) - v.aaa * 2.0"; break;
case GE_DSTBLEND_DOUBLEDSTALPHA: dstFactor = "destColor.aaa * 2.0"; break;
case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "splat3(1.0) - destColor.aaa * 2.0"; break;
case GE_DSTBLEND_FIXB: dstFactor = "u_blendFixB"; break;
default: dstFactor = "u_blendFixB"; break;
}

View file

@ -173,6 +173,7 @@ bool TestShaderGenerators() {
// bits we don't need to test because they are irrelevant on d3d11
id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, false);
id.SetBit(FS_BIT_SHADER_DEPAL, false);
bool generateSuccess[numLanguages]{};
std::string genErrorString[numLanguages];