diff --git a/Core/Util/PPGeDraw.cpp b/Core/Util/PPGeDraw.cpp index 495e4d1249..525937a512 100644 --- a/Core/Util/PPGeDraw.cpp +++ b/Core/Util/PPGeDraw.cpp @@ -1327,7 +1327,7 @@ bool PPGeImage::Load() { unsigned char *textureData; int success; if (filename_.empty()) { - success = pngLoadPtr(Memory::GetPointerRange(png_, size_), size_, &width_, &height_, &textureData); + success = pngLoadPtr(Memory::GetPointerRange(png_, (u32)size_), size_, &width_, &height_, &textureData); } else { std::vector pngData; if (pspFileSystem.ReadEntireFile(filename_, pngData) < 0) { diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 64568fff86..0219a2391d 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -105,7 +105,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu bool colorTestAgainstZero = id.Bit(FS_BIT_COLOR_AGAINST_ZERO); bool enableColorDoubling = id.Bit(FS_BIT_COLOR_DOUBLE); bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ); - bool doTextureAlpha = id.Bit(FS_BIT_TEXALPHA); if (texture3D && arrayTexture) { *errorString = "Invalid combination of 3D texture and array texture, shouldn't happen"; @@ -257,8 +256,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) { WRITE(p, "float u_stencilReplaceValue : register(c%i);\n", CONST_PS_STENCILREPLACE); } - if (doTexture && texFunc == GE_TEXFUNC_BLEND) { - WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV); + if (doTexture) { + if (texFunc == GE_TEXFUNC_BLEND) { + WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV); + } + WRITE(p, "float u_texNoAlpha : register(c%i);\n", CONST_PS_TEX_NO_ALPHA); } WRITE(p, "float3 u_fogcolor : register(c%i);\n", CONST_PS_FOGCOLOR); if (texture3D) { @@ -351,6 +353,8 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu } else { WRITE(p, "uniform sampler2D tex;\n"); } + *uniformMask |= DIRTY_TEXALPHA; + WRITE(p, "uniform float u_texNoAlpha;\n"); } if (readFramebufferTex) { @@ -817,64 +821,38 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu break; } - if (texFunc != GE_TEXFUNC_REPLACE || !doTextureAlpha) - WRITE(p, " vec4 p = v_color0;\n"); + WRITE(p, " vec4 p = v_color0;\n"); - if (doTextureAlpha) { // texfmt == RGBA - switch (texFunc) { - case GE_TEXFUNC_MODULATE: - WRITE(p, " vec4 v = p * t + s\n;"); - break; - - case GE_TEXFUNC_DECAL: - WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a) + s;\n"); - break; - - case GE_TEXFUNC_BLEND: - WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a) + s;\n"); - break; - - case GE_TEXFUNC_REPLACE: - WRITE(p, " vec4 v = t + s;\n"); - break; - - case GE_TEXFUNC_ADD: - case GE_TEXFUNC_UNKNOWN1: - case GE_TEXFUNC_UNKNOWN2: - case GE_TEXFUNC_UNKNOWN3: - WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a) + s;\n"); - break; - default: - WRITE(p, " vec4 v = p;\n"); break; - } - } else { // texfmt == RGB - switch (texFunc) { - case GE_TEXFUNC_MODULATE: - WRITE(p, " vec4 v = vec4(t.rgb * p.rgb, p.a) + s;\n"); - break; - - case GE_TEXFUNC_DECAL: - WRITE(p, " vec4 v = vec4(t.rgb, p.a) + s;\n"); - break; - - case GE_TEXFUNC_BLEND: - WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a) + s;\n"); - break; - - case GE_TEXFUNC_REPLACE: - WRITE(p, " vec4 v = vec4(t.rgb, p.a) + s;\n"); - break; - - case GE_TEXFUNC_ADD: - case GE_TEXFUNC_UNKNOWN1: - case GE_TEXFUNC_UNKNOWN2: - case GE_TEXFUNC_UNKNOWN3: - WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a) + s;\n"); break; - default: - WRITE(p, " vec4 v = p;\n"); break; - } + if (texFunc != GE_TEXFUNC_REPLACE) { + WRITE(p, " t.a = max(t.a, u_texNoAlpha);\n"); } + switch (texFunc) { + case GE_TEXFUNC_MODULATE: + WRITE(p, " vec4 v = p * t + s;\n"); + break; + case GE_TEXFUNC_DECAL: + WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a) + s;\n"); + break; + case GE_TEXFUNC_BLEND: + WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a) + s;\n"); + break; + case GE_TEXFUNC_REPLACE: + WRITE(p, " vec4 r = t;\n"); + WRITE(p, " r.a = mix(r.a, p.a, u_texNoAlpha);\n"); + WRITE(p, " vec4 v = r + s;\n"); + break; + case GE_TEXFUNC_ADD: + case GE_TEXFUNC_UNKNOWN1: + case GE_TEXFUNC_UNKNOWN2: + case GE_TEXFUNC_UNKNOWN3: + WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a) + s;\n"); + break; + default: + // Doesn't happen + WRITE(p, " vec4 v = p + s;\n"); break; + break; + } if (enableColorDoubling) { // This happens before fog is applied. WRITE(p, " v.rgb = clamp(v.rgb * 2.0, 0.0, 1.0);\n"); diff --git a/GPU/Common/FragmentShaderGenerator.h b/GPU/Common/FragmentShaderGenerator.h index 88c2c3f9d6..bd96f6dcff 100644 --- a/GPU/Common/FragmentShaderGenerator.h +++ b/GPU/Common/FragmentShaderGenerator.h @@ -23,7 +23,7 @@ struct FShaderID; -// D3D9 constants +// D3D9 float constants #define CONST_PS_TEXENV 0 #define CONST_PS_ALPHACOLORREF 1 @@ -36,9 +36,13 @@ struct FShaderID; #define CONST_PS_TEXCLAMP 8 #define CONST_PS_TEXCLAMPOFF 9 #define CONST_PS_MIPBIAS 10 +#define CONST_PS_TEX_NO_ALPHA 11 // For stencil upload -#define CONST_PS_STENCILVALUE 11 +#define BCONST_PS_STENCILVALUE 12 + +// D3D9 bool constants, they have their own register space. + // Can technically be deduced from the fragment shader ID, but this is safer. enum class FragmentShaderFlags : u32 { diff --git a/GPU/Common/ShaderCommon.h b/GPU/Common/ShaderCommon.h index e715936ddd..7743dac933 100644 --- a/GPU/Common/ShaderCommon.h +++ b/GPU/Common/ShaderCommon.h @@ -90,11 +90,11 @@ enum : uint64_t { DIRTY_MIPBIAS = 1ULL << 37, DIRTY_LIGHT_CONTROL = 1ULL << 38, - // space for 1 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS. + DIRTY_TEXALPHA = 1ULL << 39, DIRTY_BONE_UNIFORMS = 0xFF000000ULL, - DIRTY_ALL_UNIFORMS = 0x7FFFFFFFFFULL, + DIRTY_ALL_UNIFORMS = 0xFFFFFFFFFFULL, DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3, // Other dirty elements that aren't uniforms! diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index d7da61c290..1ec1012f75 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -192,7 +192,6 @@ std::string FragmentShaderDesc(const FShaderID &id) { if (id.Bit(FS_BIT_CLEARMODE)) desc << "Clear "; if (id.Bit(FS_BIT_DO_TEXTURE)) desc << (id.Bit(FS_BIT_3D_TEXTURE) ? "Tex3D " : "Tex "); if (id.Bit(FS_BIT_DO_TEXTURE_PROJ)) desc << "TexProj "; - if (id.Bit(FS_BIT_TEXALPHA)) desc << "TexAlpha "; if (id.Bit(FS_BIT_TEXTURE_AT_OFFSET)) desc << "TexOffs "; if (id.Bit(FS_BIT_COLOR_DOUBLE)) desc << "2x "; if (id.Bit(FS_BIT_FLATSHADE)) desc << "Flat "; @@ -291,7 +290,6 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue(); bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled(); bool doTextureProjection = (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX && MatrixNeedsProjection(gstate.tgenMatrix, gstate.getUVProjMode())); - bool doTextureAlpha = gstate.isTextureAlphaUsed(); bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT; ShaderDepalMode shaderDepalMode = gstate_c.shaderDepalMode; @@ -303,16 +301,9 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip SimulateLogicOpType simulateLogicOpType = pipelineState.blendState.simulateLogicOpType; ReplaceAlphaType stencilToAlpha = pipelineState.blendState.replaceAlphaWithStencil; - // All texfuncs except replace are the same for RGB as for RGBA with full alpha. - // Note that checking this means that we must dirty the fragment shader ID whenever textureFullAlpha changes. - if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE) { - doTextureAlpha = false; - } - if (gstate.isTextureMapEnabled()) { id.SetBit(FS_BIT_DO_TEXTURE); id.SetBits(FS_BIT_TEXFUNC, 3, gstate.getTextureFunction()); - id.SetBit(FS_BIT_TEXALPHA, doTextureAlpha & 1); // rgb or rgba if (gstate_c.needShaderTexClamp) { bool textureAtOffset = gstate_c.curTextureXOffset != 0 || gstate_c.curTextureYOffset != 0; // 4 bits total. diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index e03b27f214..62adaaf38a 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -6,9 +6,8 @@ #include "Common/CommonFuncs.h" -// TODO: There will be additional bits, indicating that groups of these will be -// sent to the shader and processed there. This will cut down the number of shaders ("ubershader approach") -// This is probably only really worth doing for lighting and bones. +// VS_BIT_LIGHT_UBERSHADER indicates that some groups of these will be +// sent to the shader and processed there. This cuts down the number of shaders ("ubershader approach"). enum VShaderBit : uint8_t { VS_BIT_LMODE = 0, VS_BIT_IS_THROUGH = 1, @@ -68,7 +67,7 @@ enum FShaderBit : uint8_t { FS_BIT_CLEARMODE = 0, FS_BIT_DO_TEXTURE = 1, FS_BIT_TEXFUNC = 2, // 3 bits - FS_BIT_TEXALPHA = 5, + // 1 bit free at position 5 FS_BIT_3D_TEXTURE = 6, FS_BIT_SHADER_TEX_CLAMP = 7, FS_BIT_CLAMP_S = 8, diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index 92c22be0fe..e2baeabd1d 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -198,8 +198,12 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView } } + if (dirtyUniforms & DIRTY_TEXALPHA) { + ub->texNoAlpha = gstate.isTextureAlphaUsed() ? 0.0f : 1.0f; + } + if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) { - ub->stencil = (float)gstate.getStencilTestRef() * (1.0 / 255.0); + ub->stencilReplaceValue = (float)gstate.getStencilTestRef() * (1.0 / 255.0); } // Note - this one is not in lighting but in transformCommon as it has uses beyond lighting diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h index c09c231b86..4e8a560065 100644 --- a/GPU/Common/ShaderUniforms.h +++ b/GPU/Common/ShaderUniforms.h @@ -9,7 +9,7 @@ enum : uint64_t { DIRTY_BASE_UNIFORMS = DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF | - DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEFENABLE | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE | + DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEFENABLE | DIRTY_TEXENV | DIRTY_TEXALPHA | DIRTY_STENCILREPLACEVALUE | DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_COLORWRITEMASK | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA | DIRTY_BEZIERSPLINE | DIRTY_DEPAL, DIRTY_LIGHT_UNIFORMS = @@ -17,7 +17,7 @@ enum : uint64_t { DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT, }; -// Currently 448 bytes. +// Currently 496 bytes. // Every line here is a 4-float. struct alignas(16) UB_VS_FS_Base { float proj[16]; @@ -35,10 +35,11 @@ struct alignas(16) UB_VS_FS_Base { // Fragment data float fogColor[3]; uint32_t alphaColorRef; float texEnvColor[3]; uint32_t colorTestMask; - float blendFixA[3]; float stencil; + float blendFixA[3]; float stencilReplaceValue; float blendFixB[3]; float rotation; float texClamp[4]; float texClampOffset[2]; float fogCoef[2]; + float texNoAlpha; float pad[3]; // VR stuff is to go here, later. For normal drawing, we can then get away // with just uploading the first 448 bytes of the struct (up to and including fogCoef). }; @@ -65,6 +66,7 @@ R"( mat4 u_proj; vec4 u_texclamp; vec2 u_texclampoff; vec2 u_fogcoef; + float u_texNoAlpha; float pad0; float pad1; float pad2; )"; // 512 bytes. Would like to shrink more. Some colors only have 8-bit precision and we expand diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 1f820310bd..4bb5d6b943 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -261,7 +261,7 @@ static void ConvertProjMatrixToD3DThrough(Matrix4x4 &in) { in.translateAndScale(Vec3(xoff, yoff, 0.5f), Vec3(1.0f, 1.0f, 0.5f)); } -const uint64_t psUniforms = DIRTY_TEXENV | DIRTY_ALPHACOLORREF | DIRTY_ALPHACOLORMASK | DIRTY_FOGCOLOR | DIRTY_STENCILREPLACEVALUE | DIRTY_SHADERBLEND | DIRTY_TEXCLAMP | DIRTY_MIPBIAS; +const uint64_t psUniforms = DIRTY_TEXENV | DIRTY_TEXALPHA | DIRTY_ALPHACOLORREF | DIRTY_ALPHACOLORMASK | DIRTY_FOGCOLOR | DIRTY_STENCILREPLACEVALUE | DIRTY_SHADERBLEND | DIRTY_TEXCLAMP | DIRTY_MIPBIAS; void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) { if (dirtyUniforms & DIRTY_TEXENV) { @@ -279,7 +279,10 @@ void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) { if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) { PSSetFloat(CONST_PS_STENCILREPLACE, (float)gstate.getStencilTestRef() * (1.0f / 255.0f)); } - + if (dirtyUniforms & DIRTY_TEXALPHA) { + // NOTE: Reversed value, more efficient in shader. + PSSetFloat(CONST_PS_TEX_NO_ALPHA, gstate.isTextureAlphaUsed() ? 0.0f : 1.0f); + } if (dirtyUniforms & DIRTY_SHADERBLEND) { PSSetColorUniform3(CONST_PS_BLENDFIXA, gstate.getFixA()); PSSetColorUniform3(CONST_PS_BLENDFIXB, gstate.getFixB()); diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 8a728d7db4..7bd7af8286 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -152,6 +152,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, queries.push_back({ &u_uvscaleoffset, "u_uvscaleoffset" }); queries.push_back({ &u_texclamp, "u_texclamp" }); queries.push_back({ &u_texclampoff, "u_texclampoff" }); + queries.push_back({ &u_texNoAlpha, "u_texNoAlpha" }); queries.push_back({ &u_lightControl, "u_lightControl" }); for (int i = 0; i < 4; i++) { @@ -440,6 +441,9 @@ void LinkedShader::UpdateUniforms(const ShaderID &vsid, bool useBufferedRenderin if (dirty & DIRTY_TEXENV) { SetColorUniform3(render_, &u_texenv, gstate.texenvcolor); } + if (dirty & DIRTY_TEXALPHA) { + render_->SetUniformF1(&u_texNoAlpha, gstate.isTextureAlphaUsed() ? 0.0f : 1.0f); + } if (dirty & DIRTY_ALPHACOLORREF) { if (shaderLanguage.bitwiseOps) { render_->SetUniformUI1(&u_alphacolorref, gstate.getColorTestRef() | ((gstate.getAlphaTestRef() & gstate.getAlphaTestMask()) << 24)); @@ -945,7 +949,7 @@ enum class CacheDetectFlags { }; #define CACHE_HEADER_MAGIC 0x83277592 -#define CACHE_VERSION 21 +#define CACHE_VERSION 22 struct CacheHeader { uint32_t magic; uint32_t version; diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index 6520b1e4ad..c5665c533a 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -101,6 +101,7 @@ public: int u_uvscaleoffset; int u_texclamp; int u_texclampoff; + int u_texNoAlpha; // Lighting int u_lightControl; diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 0718862eb0..f7a8dfa7e6 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -94,7 +94,7 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_TEXSHADELS, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE }, // Raster state for Direct3D 9, uncommon. { GE_CMD_SHADEMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE }, - { GE_CMD_TEXFUNC, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE }, + { GE_CMD_TEXFUNC, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEXALPHA }, { GE_CMD_COLORTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE }, { GE_CMD_ALPHATESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE }, { GE_CMD_COLORTESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE }, diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index 955932550b..df2495c088 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -516,7 +516,7 @@ enum class VulkanCacheDetectFlags { }; #define CACHE_HEADER_MAGIC 0xff51f420 -#define CACHE_VERSION 35 +#define CACHE_VERSION 36 struct VulkanCacheHeader { uint32_t magic; uint32_t version;