Don't expand alphaColorRef to 128 bytes on backends where we don't need to.

This commit is contained in:
Henrik Rydgård 2022-10-10 17:16:52 +02:00
parent 69bc7b060f
commit aec22491fe
5 changed files with 24 additions and 19 deletions

View file

@ -353,7 +353,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, "uniform sampler2D testtex;\n");
} else {
*uniformMask |= DIRTY_ALPHACOLORREF;
WRITE(p, "uniform vec4 u_alphacolorref;\n");
if (compat.bitwiseOps) {
WRITE(p, "uniform uint u_alphacolorref;\n");
} else {
WRITE(p, "uniform vec4 u_alphacolorref;\n");
}
if (compat.bitwiseOps && ((enableColorTest && !colorTestAgainstZero) || (enableAlphaTest && !alphaTestAgainstZero))) {
*uniformMask |= DIRTY_ALPHACOLORMASK;
WRITE(p, "uniform uint u_alphacolormask;\n");
@ -882,7 +886,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " };
if (alphaTestFuncs[alphaTestFunc][0] != '#') {
if (compat.bitwiseOps) {
WRITE(p, " if ((roundAndScaleTo255i(v.a) & int(u_alphacolormask >> 24)) %s int(u_alphacolorref.a)) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement);
WRITE(p, " if ((roundAndScaleTo255i(v.a) & int(u_alphacolormask >> 24)) %s int(u_alphacolorref >> 24)) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement);
} else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
// Work around bad PVR driver problem where equality check + discard just doesn't work.
if (alphaTestFunc != GE_COMP_NOTEQUAL) {
@ -946,7 +950,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
} else if (compat.bitwiseOps) {
WRITE(p, " uint v_uint = roundAndScaleTo8x4(v.rgb);\n");
WRITE(p, " uint v_masked = v_uint & u_alphacolormask;\n");
WRITE(p, " uint colorTestRef = packFloatsTo8x4(u_alphacolorref.rgb) & u_alphacolormask;\n");
WRITE(p, " uint colorTestRef = (u_alphacolorref & u_alphacolormask) & 0xFFFFFFu;\n");
WRITE(p, " if (v_masked %s colorTestRef) %s\n", test, discardStatement);
} else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
WRITE(p, " if (roundTo255thv(v.rgb) %s u_alphacolorref.rgb) %s\n", test, discardStatement);

View file

@ -77,13 +77,13 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
Uint8x3ToFloat3(ub->texEnvColor, gstate.texenvcolor);
}
if (dirtyUniforms & DIRTY_ALPHACOLORREF) {
Uint8x3ToInt4_Alpha(ub->alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
ub->alphaColorRef = gstate.getColorTestRef() | ((gstate.getAlphaTestRef() & gstate.getAlphaTestMask()) << 24);
}
if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {
ub->colorTestMask = gstate.getColorTestMask() | (gstate.getAlphaTestMask() << 24);
}
if (dirtyUniforms & DIRTY_FOGCOLOR) {
Uint8x3ToFloat4(ub->fogColor, gstate.fogcolor);
Uint8x3ToFloat3(ub->fogColor, gstate.fogcolor);
}
if (dirtyUniforms & DIRTY_SHADERBLEND) {
Uint8x3ToFloat3(ub->blendFixA, gstate.getFixA());

View file

@ -17,7 +17,7 @@ enum : uint64_t {
DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT,
};
// Currently 480 bytes. Probably can't get to 256 (nVidia's UBO alignment, also common in other vendors).
// Currently 448 bytes.
// Every line here is a 4-float.
struct alignas(16) UB_VS_FS_Base {
float proj[16];
@ -34,9 +34,8 @@ struct alignas(16) UB_VS_FS_Base {
uint32_t spline_counts; uint32_t depal_mask_shift_off_fmt; // 4 params packed into one.
uint32_t colorWriteMask; float mipBias;
// Fragment data
float fogColor[4]; // .w is unused
float fogColor[3]; uint32_t alphaColorRef;
float texEnvColor[3]; uint32_t colorTestMask;
int alphaColorRef[4];
float blendFixA[3]; float stencil;
float blendFixB[3]; float rotation;
float texClamp[4];
@ -58,10 +57,8 @@ R"( mat4 u_proj;
uint u_depal_mask_shift_off_fmt;
uint u_colorWriteMask;
float u_mipBias;
vec3 u_fogcolor;
vec3 u_texenv;
uint u_alphacolormask;
ivec4 u_alphacolorref;
vec3 u_fogcolor; uint u_alphacolorref;
vec3 u_texenv; uint u_alphacolormask;
vec3 u_blendFixA; float u_stencilReplaceValue;
vec3 u_blendFixB; float u_rotation;
vec4 u_texclamp;

View file

@ -359,7 +359,7 @@ void LinkedShader::use(const ShaderID &VSID) {
// Note that we no longer track attr masks here - we do it for the input layouts instead.
}
void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBufferedRendering) {
void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBufferedRendering, const Draw::DeviceCaps &caps) {
u64 dirty = dirtyUniforms & availableUniforms;
dirtyUniforms = 0;
@ -432,8 +432,7 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu
render_->SetUniformM4x4(&u_proj, flippedMatrix.m);
render_->SetUniformF1(&u_rotation, useBufferedRendering ? 0 : (float)g_display_rotation);
}
if (dirty & DIRTY_PROJTHROUGHMATRIX)
{
if (dirty & DIRTY_PROJTHROUGHMATRIX) {
Matrix4x4 proj_through;
if (useBufferedRendering) {
proj_through.setOrtho(0.0f, gstate_c.curRTWidth, 0.0f, gstate_c.curRTHeight, 0.0f, 1.0f);
@ -446,7 +445,12 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu
SetColorUniform3(render_, &u_texenv, gstate.texenvcolor);
}
if (dirty & DIRTY_ALPHACOLORREF) {
SetColorUniform3Alpha255(render_, &u_alphacolorref, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
if (caps.fragmentShaderInt32Supported) {
// Same as bitwiseOps really
render_->SetUniformUI1(&u_alphacolorref, gstate.getColorTestRef() | ((gstate.getAlphaTestRef() & gstate.getAlphaTestMask()) << 24));
} else {
SetColorUniform3Alpha255(render_, &u_alphacolorref, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
}
}
if (dirty & DIRTY_ALPHACOLORMASK) {
render_->SetUniformUI1(&u_alphacolormask, gstate.getColorTestMask() | (gstate.getAlphaTestMask() << 24));
@ -813,7 +817,7 @@ LinkedShader *ShaderManagerGLES::ApplyFragmentShader(VShaderID VSID, Shader *vs,
}
if (lastVShaderSame_ && FSID == lastFSID_) {
lastShader_->UpdateUniforms(vertType, VSID, useBufferedRendering);
lastShader_->UpdateUniforms(vertType, VSID, useBufferedRendering, draw_->GetDeviceCaps());
return lastShader_;
}
@ -856,7 +860,7 @@ LinkedShader *ShaderManagerGLES::ApplyFragmentShader(VShaderID VSID, Shader *vs,
} else {
ls->use(VSID);
}
ls->UpdateUniforms(vertType, VSID, useBufferedRendering);
ls->UpdateUniforms(vertType, VSID, useBufferedRendering, draw_->GetDeviceCaps());
lastShader_ = ls;
return ls;

View file

@ -35,7 +35,7 @@ public:
~LinkedShader();
void use(const ShaderID &VSID);
void UpdateUniforms(u32 vertType, const ShaderID &VSID, bool useBufferedRendering);
void UpdateUniforms(u32 vertType, const ShaderID &VSID, bool useBufferedRendering, const Draw::DeviceCaps &caps);
GLRenderManager *render_;
Shader *vs_;