From adc94b19508eb20f76d707238b778ed3aaf8eb9f Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 3 Dec 2022 12:44:02 -0800 Subject: [PATCH] softgpu: Use CLUT to optimize out blending more. This actually happens relatively often. --- GPU/Software/Rasterizer.cpp | 68 ++++++++++++++++++++++++++++++------- GPU/Software/Rasterizer.h | 3 ++ 2 files changed, 59 insertions(+), 12 deletions(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index b2c2a1e104..b9af632aa9 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -211,17 +211,65 @@ static inline RasterizerStateFlags ReplaceSamplerIDFlags(const RasterizerStateFl return updated | (RasterizerStateFlags)OptimizeSamplerIDFlags(replace); } +static bool CheckClutAlphaFull(RasterizerState *state) { + // We only need to check it once. + if (state->flags & RasterizerStateFlags::CLUT_ALPHA_CHECKED) + return !(state->flags & RasterizerStateFlags::CLUT_ALPHA_NON_FULL); + // For now, let's keep things simple. + const SamplerID &samplerID = state->samplerID; + if (samplerID.hasClutOffset || !samplerID.useSharedClut) + return false; + + uint32_t count = samplerID.TexFmt() == GE_TFMT_CLUT4 ? 16 : 256; + if (samplerID.hasClutMask) + count = std::min(count, ((samplerID.cached.clutFormat >> 8) & 0xFF) + 1); + + bool onlyFull = true; + switch (samplerID.ClutFmt()) { + case GE_CMODE_16BIT_BGR5650: + break; + + case GE_CMODE_16BIT_ABGR5551: + onlyFull = CheckAlpha16((const uint16_t *)samplerID.cached.clut, count, 0x8000) == CHECKALPHA_FULL; + break; + + case GE_CMODE_16BIT_ABGR4444: + onlyFull = CheckAlpha16((const uint16_t *)samplerID.cached.clut, count, 0xF000) == CHECKALPHA_FULL; + break; + + case GE_CMODE_32BIT_ABGR8888: + onlyFull = CheckAlpha32((const uint32_t *)samplerID.cached.clut, count, 0xFF000000) == CHECKALPHA_FULL; + break; + } + + if (!onlyFull) + state->flags |= RasterizerStateFlags::CLUT_ALPHA_NON_FULL; + state->flags |= RasterizerStateFlags::CLUT_ALPHA_CHECKED; + + return onlyFull; +} + static RasterizerStateFlags DetectStateOptimizations(RasterizerState *state) { // Note: all optimizations must be undoable. RasterizerStateFlags optimize = RasterizerStateFlags::NONE; + auto &pixelID = state->pixelID; + auto &samplerID = state->samplerID; - if (!state->pixelID.clearMode) { - auto &pixelID = state->pixelID; + bool alphaZero = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_ZERO); + bool alphaFull = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_FULL); + bool needTextureAlpha = state->enableTextures && samplerID.useTextureAlpha; + + if (!pixelID.clearMode) { auto &cached = pixelID.cached; - bool useTextureAlpha = state->enableTextures && state->samplerID.useTextureAlpha; bool alphaBlend = pixelID.alphaBlend || (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_OFF); - if (alphaBlend && !useTextureAlpha) { + if (needTextureAlpha && alphaBlend && alphaFull) { + bool usesClut = (samplerID.texfmt & 4) != 0; + if (usesClut && CheckClutAlphaFull(state)) + needTextureAlpha = false; + } + + if (alphaBlend && !needTextureAlpha) { PixelBlendFactor src = pixelID.AlphaBlendSrc(); PixelBlendFactor dst = pixelID.AlphaBlendDst(); if (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_SRC) @@ -229,17 +277,15 @@ static RasterizerStateFlags DetectStateOptimizations(RasterizerState *state) { if (state->flags & RasterizerStateFlags::OPTIMIZED_BLEND_DST) dst = PixelBlendFactor::INVSRCALPHA; - bool canZero = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_ZERO); - bool canFull = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_FULL); // Okay, we may be able to convert this to a fixed value. - if (canZero || canFull) { + if (alphaZero || alphaFull) { // If it was already set and we still can, set it again. if (src == PixelBlendFactor::SRCALPHA) optimize |= RasterizerStateFlags::OPTIMIZED_BLEND_SRC; if (dst == PixelBlendFactor::INVSRCALPHA) optimize |= RasterizerStateFlags::OPTIMIZED_BLEND_DST; } - if (canFull && (src == PixelBlendFactor::SRCALPHA || src == PixelBlendFactor::ONE) && (dst == PixelBlendFactor::INVSRCALPHA || dst == PixelBlendFactor::ZERO)) { + if (alphaFull && (src == PixelBlendFactor::SRCALPHA || src == PixelBlendFactor::ONE) && (dst == PixelBlendFactor::INVSRCALPHA || dst == PixelBlendFactor::ZERO)) { optimize |= RasterizerStateFlags::OPTIMIZED_BLEND_OFF; } } @@ -253,12 +299,10 @@ static RasterizerStateFlags DetectStateOptimizations(RasterizerState *state) { } if (state->enableTextures) { - bool useTextureAlpha = state->samplerID.useTextureAlpha; - bool alphaFull = !(state->flags & RasterizerStateFlags::VERTEX_ALPHA_NON_FULL); bool colorFull = !(state->flags & RasterizerStateFlags::VERTEX_NON_FULL_WHITE); - if (colorFull && (!useTextureAlpha || alphaFull)) { + if (colorFull && (!needTextureAlpha || alphaFull)) { // Modulate is common, sometimes even with a fixed color. Replace is cheaper. - GETexFunc texFunc = state->samplerID.TexFunc(); + GETexFunc texFunc = samplerID.TexFunc(); if (state->flags & RasterizerStateFlags::OPTIMIZED_TEXREPLACE) texFunc = GE_TEXFUNC_MODULATE; diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h index ed14d7ca12..859f01f655 100644 --- a/GPU/Software/Rasterizer.h +++ b/GPU/Software/Rasterizer.h @@ -40,6 +40,9 @@ enum class RasterizerStateFlags { VERTEX_ALPHA_NON_FULL = 0x0004, VERTEX_HAS_FOG = 0x0008, + CLUT_ALPHA_CHECKED = 0x0010, + CLUT_ALPHA_NON_FULL = 0x0020, + VERTEX_FLAT_RESET = VERTEX_NON_FULL_WHITE | VERTEX_ALPHA_NON_FULL | VERTEX_ALPHA_NON_ZERO | VERTEX_HAS_FOG, OPTIMIZED = 0x0001'0000,