Merge pull request #16763 from hrydgard/texalpha-shaderflag

Shader generator: Move FS_TEX_ALPHA to a uniform bool.
This commit is contained in:
Henrik Rydgård 2023-01-10 11:09:25 +01:00 committed by GitHub
commit bef50f9fdd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 71 additions and 85 deletions

View file

@ -1327,7 +1327,7 @@ bool PPGeImage::Load() {
unsigned char *textureData;
int success;
if (filename_.empty()) {
success = pngLoadPtr(Memory::GetPointerRange(png_, size_), size_, &width_, &height_, &textureData);
success = pngLoadPtr(Memory::GetPointerRange(png_, (u32)size_), size_, &width_, &height_, &textureData);
} else {
std::vector<u8> pngData;
if (pspFileSystem.ReadEntireFile(filename_, pngData) < 0) {

View file

@ -105,7 +105,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
bool colorTestAgainstZero = id.Bit(FS_BIT_COLOR_AGAINST_ZERO);
bool enableColorDoubling = id.Bit(FS_BIT_COLOR_DOUBLE);
bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ);
bool doTextureAlpha = id.Bit(FS_BIT_TEXALPHA);
if (texture3D && arrayTexture) {
*errorString = "Invalid combination of 3D texture and array texture, shouldn't happen";
@ -257,8 +256,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) {
WRITE(p, "float u_stencilReplaceValue : register(c%i);\n", CONST_PS_STENCILREPLACE);
}
if (doTexture && texFunc == GE_TEXFUNC_BLEND) {
WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV);
if (doTexture) {
if (texFunc == GE_TEXFUNC_BLEND) {
WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV);
}
WRITE(p, "float u_texNoAlpha : register(c%i);\n", CONST_PS_TEX_NO_ALPHA);
}
WRITE(p, "float3 u_fogcolor : register(c%i);\n", CONST_PS_FOGCOLOR);
if (texture3D) {
@ -351,6 +353,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
} else {
WRITE(p, "uniform sampler2D tex;\n");
}
*uniformMask |= DIRTY_TEXALPHA;
WRITE(p, "uniform float u_texNoAlpha;\n");
}
if (readFramebufferTex) {
@ -817,64 +821,38 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
break;
}
if (texFunc != GE_TEXFUNC_REPLACE || !doTextureAlpha)
WRITE(p, " vec4 p = v_color0;\n");
WRITE(p, " vec4 p = v_color0;\n");
if (doTextureAlpha) { // texfmt == RGBA
switch (texFunc) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " vec4 v = p * t + s\n;");
break;
case GE_TEXFUNC_DECAL:
WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a) + s;\n");
break;
case GE_TEXFUNC_BLEND:
WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a) + s;\n");
break;
case GE_TEXFUNC_REPLACE:
WRITE(p, " vec4 v = t + s;\n");
break;
case GE_TEXFUNC_ADD:
case GE_TEXFUNC_UNKNOWN1:
case GE_TEXFUNC_UNKNOWN2:
case GE_TEXFUNC_UNKNOWN3:
WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a) + s;\n");
break;
default:
WRITE(p, " vec4 v = p;\n"); break;
}
} else { // texfmt == RGB
switch (texFunc) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " vec4 v = vec4(t.rgb * p.rgb, p.a) + s;\n");
break;
case GE_TEXFUNC_DECAL:
WRITE(p, " vec4 v = vec4(t.rgb, p.a) + s;\n");
break;
case GE_TEXFUNC_BLEND:
WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a) + s;\n");
break;
case GE_TEXFUNC_REPLACE:
WRITE(p, " vec4 v = vec4(t.rgb, p.a) + s;\n");
break;
case GE_TEXFUNC_ADD:
case GE_TEXFUNC_UNKNOWN1:
case GE_TEXFUNC_UNKNOWN2:
case GE_TEXFUNC_UNKNOWN3:
WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a) + s;\n"); break;
default:
WRITE(p, " vec4 v = p;\n"); break;
}
if (texFunc != GE_TEXFUNC_REPLACE) {
WRITE(p, " t.a = max(t.a, u_texNoAlpha);\n");
}
switch (texFunc) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " vec4 v = p * t + s;\n");
break;
case GE_TEXFUNC_DECAL:
WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a) + s;\n");
break;
case GE_TEXFUNC_BLEND:
WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a) + s;\n");
break;
case GE_TEXFUNC_REPLACE:
WRITE(p, " vec4 r = t;\n");
WRITE(p, " r.a = mix(r.a, p.a, u_texNoAlpha);\n");
WRITE(p, " vec4 v = r + s;\n");
break;
case GE_TEXFUNC_ADD:
case GE_TEXFUNC_UNKNOWN1:
case GE_TEXFUNC_UNKNOWN2:
case GE_TEXFUNC_UNKNOWN3:
WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a) + s;\n");
break;
default:
// Doesn't happen
WRITE(p, " vec4 v = p + s;\n"); break;
break;
}
if (enableColorDoubling) {
// This happens before fog is applied.
WRITE(p, " v.rgb = clamp(v.rgb * 2.0, 0.0, 1.0);\n");

View file

@ -23,7 +23,7 @@
struct FShaderID;
// D3D9 constants
// D3D9 float constants
#define CONST_PS_TEXENV 0
#define CONST_PS_ALPHACOLORREF 1
@ -36,9 +36,13 @@ struct FShaderID;
#define CONST_PS_TEXCLAMP 8
#define CONST_PS_TEXCLAMPOFF 9
#define CONST_PS_MIPBIAS 10
#define CONST_PS_TEX_NO_ALPHA 11
// For stencil upload
#define CONST_PS_STENCILVALUE 11
#define BCONST_PS_STENCILVALUE 12
// D3D9 bool constants, they have their own register space.
// Can technically be deduced from the fragment shader ID, but this is safer.
enum class FragmentShaderFlags : u32 {

View file

@ -90,11 +90,11 @@ enum : uint64_t {
DIRTY_MIPBIAS = 1ULL << 37,
DIRTY_LIGHT_CONTROL = 1ULL << 38,
// space for 1 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS.
DIRTY_TEXALPHA = 1ULL << 39,
DIRTY_BONE_UNIFORMS = 0xFF000000ULL,
DIRTY_ALL_UNIFORMS = 0x7FFFFFFFFFULL,
DIRTY_ALL_UNIFORMS = 0xFFFFFFFFFFULL,
DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3,
// Other dirty elements that aren't uniforms!

View file

@ -192,7 +192,6 @@ std::string FragmentShaderDesc(const FShaderID &id) {
if (id.Bit(FS_BIT_CLEARMODE)) desc << "Clear ";
if (id.Bit(FS_BIT_DO_TEXTURE)) desc << (id.Bit(FS_BIT_3D_TEXTURE) ? "Tex3D " : "Tex ");
if (id.Bit(FS_BIT_DO_TEXTURE_PROJ)) desc << "TexProj ";
if (id.Bit(FS_BIT_TEXALPHA)) desc << "TexAlpha ";
if (id.Bit(FS_BIT_TEXTURE_AT_OFFSET)) desc << "TexOffs ";
if (id.Bit(FS_BIT_COLOR_DOUBLE)) desc << "2x ";
if (id.Bit(FS_BIT_FLATSHADE)) desc << "Flat ";
@ -291,7 +290,6 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue();
bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled();
bool doTextureProjection = (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX && MatrixNeedsProjection(gstate.tgenMatrix, gstate.getUVProjMode()));
bool doTextureAlpha = gstate.isTextureAlphaUsed();
bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;
ShaderDepalMode shaderDepalMode = gstate_c.shaderDepalMode;
@ -303,16 +301,9 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
SimulateLogicOpType simulateLogicOpType = pipelineState.blendState.simulateLogicOpType;
ReplaceAlphaType stencilToAlpha = pipelineState.blendState.replaceAlphaWithStencil;
// All texfuncs except replace are the same for RGB as for RGBA with full alpha.
// Note that checking this means that we must dirty the fragment shader ID whenever textureFullAlpha changes.
if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE) {
doTextureAlpha = false;
}
if (gstate.isTextureMapEnabled()) {
id.SetBit(FS_BIT_DO_TEXTURE);
id.SetBits(FS_BIT_TEXFUNC, 3, gstate.getTextureFunction());
id.SetBit(FS_BIT_TEXALPHA, doTextureAlpha & 1); // rgb or rgba
if (gstate_c.needShaderTexClamp) {
bool textureAtOffset = gstate_c.curTextureXOffset != 0 || gstate_c.curTextureYOffset != 0;
// 4 bits total.

View file

@ -6,9 +6,8 @@
#include "Common/CommonFuncs.h"
// TODO: There will be additional bits, indicating that groups of these will be
// sent to the shader and processed there. This will cut down the number of shaders ("ubershader approach")
// This is probably only really worth doing for lighting and bones.
// VS_BIT_LIGHT_UBERSHADER indicates that some groups of these will be
// sent to the shader and processed there. This cuts down the number of shaders ("ubershader approach").
enum VShaderBit : uint8_t {
VS_BIT_LMODE = 0,
VS_BIT_IS_THROUGH = 1,
@ -68,7 +67,7 @@ enum FShaderBit : uint8_t {
FS_BIT_CLEARMODE = 0,
FS_BIT_DO_TEXTURE = 1,
FS_BIT_TEXFUNC = 2, // 3 bits
FS_BIT_TEXALPHA = 5,
// 1 bit free at position 5
FS_BIT_3D_TEXTURE = 6,
FS_BIT_SHADER_TEX_CLAMP = 7,
FS_BIT_CLAMP_S = 8,

View file

@ -198,8 +198,12 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
}
}
if (dirtyUniforms & DIRTY_TEXALPHA) {
ub->texNoAlpha = gstate.isTextureAlphaUsed() ? 0.0f : 1.0f;
}
if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) {
ub->stencil = (float)gstate.getStencilTestRef() * (1.0 / 255.0);
ub->stencilReplaceValue = (float)gstate.getStencilTestRef() * (1.0 / 255.0);
}
// Note - this one is not in lighting but in transformCommon as it has uses beyond lighting

View file

@ -9,7 +9,7 @@
enum : uint64_t {
DIRTY_BASE_UNIFORMS =
DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF |
DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEFENABLE | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE |
DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEFENABLE | DIRTY_TEXENV | DIRTY_TEXALPHA | DIRTY_STENCILREPLACEVALUE |
DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_COLORWRITEMASK | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA |
DIRTY_BEZIERSPLINE | DIRTY_DEPAL,
DIRTY_LIGHT_UNIFORMS =
@ -17,7 +17,7 @@ enum : uint64_t {
DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT,
};
// Currently 448 bytes.
// Currently 496 bytes.
// Every line here is a 4-float.
struct alignas(16) UB_VS_FS_Base {
float proj[16];
@ -35,10 +35,11 @@ struct alignas(16) UB_VS_FS_Base {
// Fragment data
float fogColor[3]; uint32_t alphaColorRef;
float texEnvColor[3]; uint32_t colorTestMask;
float blendFixA[3]; float stencil;
float blendFixA[3]; float stencilReplaceValue;
float blendFixB[3]; float rotation;
float texClamp[4];
float texClampOffset[2]; float fogCoef[2];
float texNoAlpha; float pad[3];
// VR stuff is to go here, later. For normal drawing, we can then get away
// with just uploading the first 448 bytes of the struct (up to and including fogCoef).
};
@ -65,6 +66,7 @@ R"( mat4 u_proj;
vec4 u_texclamp;
vec2 u_texclampoff;
vec2 u_fogcoef;
float u_texNoAlpha; float pad0; float pad1; float pad2;
)";
// 512 bytes. Would like to shrink more. Some colors only have 8-bit precision and we expand

View file

@ -261,7 +261,7 @@ static void ConvertProjMatrixToD3DThrough(Matrix4x4 &in) {
in.translateAndScale(Vec3(xoff, yoff, 0.5f), Vec3(1.0f, 1.0f, 0.5f));
}
const uint64_t psUniforms = DIRTY_TEXENV | DIRTY_ALPHACOLORREF | DIRTY_ALPHACOLORMASK | DIRTY_FOGCOLOR | DIRTY_STENCILREPLACEVALUE | DIRTY_SHADERBLEND | DIRTY_TEXCLAMP | DIRTY_MIPBIAS;
const uint64_t psUniforms = DIRTY_TEXENV | DIRTY_TEXALPHA | DIRTY_ALPHACOLORREF | DIRTY_ALPHACOLORMASK | DIRTY_FOGCOLOR | DIRTY_STENCILREPLACEVALUE | DIRTY_SHADERBLEND | DIRTY_TEXCLAMP | DIRTY_MIPBIAS;
void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) {
if (dirtyUniforms & DIRTY_TEXENV) {
@ -279,7 +279,10 @@ void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) {
if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) {
PSSetFloat(CONST_PS_STENCILREPLACE, (float)gstate.getStencilTestRef() * (1.0f / 255.0f));
}
if (dirtyUniforms & DIRTY_TEXALPHA) {
// NOTE: Reversed value, more efficient in shader.
PSSetFloat(CONST_PS_TEX_NO_ALPHA, gstate.isTextureAlphaUsed() ? 0.0f : 1.0f);
}
if (dirtyUniforms & DIRTY_SHADERBLEND) {
PSSetColorUniform3(CONST_PS_BLENDFIXA, gstate.getFixA());
PSSetColorUniform3(CONST_PS_BLENDFIXB, gstate.getFixB());

View file

@ -152,6 +152,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
queries.push_back({ &u_uvscaleoffset, "u_uvscaleoffset" });
queries.push_back({ &u_texclamp, "u_texclamp" });
queries.push_back({ &u_texclampoff, "u_texclampoff" });
queries.push_back({ &u_texNoAlpha, "u_texNoAlpha" });
queries.push_back({ &u_lightControl, "u_lightControl" });
for (int i = 0; i < 4; i++) {
@ -440,6 +441,9 @@ void LinkedShader::UpdateUniforms(const ShaderID &vsid, bool useBufferedRenderin
if (dirty & DIRTY_TEXENV) {
SetColorUniform3(render_, &u_texenv, gstate.texenvcolor);
}
if (dirty & DIRTY_TEXALPHA) {
render_->SetUniformF1(&u_texNoAlpha, gstate.isTextureAlphaUsed() ? 0.0f : 1.0f);
}
if (dirty & DIRTY_ALPHACOLORREF) {
if (shaderLanguage.bitwiseOps) {
render_->SetUniformUI1(&u_alphacolorref, gstate.getColorTestRef() | ((gstate.getAlphaTestRef() & gstate.getAlphaTestMask()) << 24));
@ -945,7 +949,7 @@ enum class CacheDetectFlags {
};
#define CACHE_HEADER_MAGIC 0x83277592
#define CACHE_VERSION 21
#define CACHE_VERSION 22
struct CacheHeader {
uint32_t magic;
uint32_t version;

View file

@ -101,6 +101,7 @@ public:
int u_uvscaleoffset;
int u_texclamp;
int u_texclampoff;
int u_texNoAlpha;
// Lighting
int u_lightControl;

View file

@ -94,7 +94,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_TEXSHADELS, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
// Raster state for Direct3D 9, uncommon.
{ GE_CMD_SHADEMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE },
{ GE_CMD_TEXFUNC, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_TEXFUNC, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEXALPHA },
{ GE_CMD_COLORTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_ALPHATESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_COLORTESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },

View file

@ -516,7 +516,7 @@ enum class VulkanCacheDetectFlags {
};
#define CACHE_HEADER_MAGIC 0xff51f420
#define CACHE_VERSION 35
#define CACHE_VERSION 36
struct VulkanCacheHeader {
uint32_t magic;
uint32_t version;