From 2474eb6a7264ad26d9b793e0c90d13f76dc1a0d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 4 Sep 2022 11:14:47 +0200 Subject: [PATCH] Shader bit setup and code generation for logic-ops-in-shader --- GPU/Common/FragmentShaderGenerator.cpp | 53 +++++++++++++++++++------- GPU/Common/ShaderId.cpp | 4 ++ GPU/Common/ShaderId.h | 1 + GPU/Vulkan/ShaderManagerVulkan.cpp | 2 +- 4 files changed, 45 insertions(+), 15 deletions(-) diff --git a/GPU/Common/FragmentShaderGenerator.cpp b/GPU/Common/FragmentShaderGenerator.cpp index 2173130295..93301b461c 100644 --- a/GPU/Common/FragmentShaderGenerator.cpp +++ b/GPU/Common/FragmentShaderGenerator.cpp @@ -108,11 +108,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu blueToAlpha = true; } - GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4); - GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4); - GEBlendMode replaceBlendEq = (GEBlendMode)id.Bits(FS_BIT_BLENDEQ, 3); - StencilValueType replaceAlphaWithStencilType = (StencilValueType)id.Bits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4); - bool isModeClear = id.Bit(FS_BIT_CLEARMODE); const char *shading = ""; @@ -121,7 +116,16 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu bool useDiscardStencilBugWorkaround = id.Bit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL); - bool readFramebuffer = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask; + GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4); + GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4); + GEBlendMode replaceBlendEq = (GEBlendMode)id.Bits(FS_BIT_BLENDEQ, 3); + StencilValueType replaceAlphaWithStencilType = (StencilValueType)id.Bits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4); + + // Distinct from the logic op simulation support. + GELogicOp replaceLogicOpType = isModeClear ? GE_LOGIC_COPY : (GELogicOp)id.Bits(FS_BIT_REPLACE_LOGIC_OP, 4); + bool replaceLogicOp = replaceLogicOpType != GE_LOGIC_COPY; + + bool readFramebuffer = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask || replaceLogicOp; bool readFramebufferTex = readFramebuffer && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH); bool needFragCoord = readFramebuffer || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT); @@ -1078,16 +1082,37 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu return false; } - // Final color computed - apply color write mask. - // TODO: Maybe optimize to only do math on the affected channels? - // Or .. meh. That would require more shader bits. Though we could - // of course optimize for the common mask 0xF00000, though again, blue-to-alpha - // does a better job with that. - if (colorWriteMask) { + // Final color computed - apply logic ops and bitwise color write mask, through shader blending, if specified. + if (colorWriteMask || replaceLogicOp) { WRITE(p, " highp uint v32 = packUnorm4x8(%s);\n", compat.fragColor0); WRITE(p, " highp uint d32 = packUnorm4x8(destColor);\n"); - // Note that the mask has been flipped to the PC way - 1 means write. - WRITE(p, " v32 = (v32 & u_colorWriteMask) | (d32 & ~u_colorWriteMask);\n"); + + // v32 is both the "s" to the logical operation, and the value that we'll merge to the destination with masking later. + // d32 is the "d" to the logical operation. + // TODO: Do logical ops work on just RGB or also A on the PSP? + switch (replaceLogicOpType) { + case GE_LOGIC_CLEAR: p.C(" v32 = 0;\n"); break; + case GE_LOGIC_AND: p.C(" v32 = v32 & d32;\n"); break; + case GE_LOGIC_AND_REVERSE: p.C(" v32 = v32 & ~d32;\n"); break; + case GE_LOGIC_COPY: break; // source to dest, do nothing. Will be set to this, if not used. + case GE_LOGIC_AND_INVERTED: p.C(" v32 = ~v32 & d32;\n"); break; + case GE_LOGIC_NOOP: p.C(" v32 = d32;\n"); break; + case GE_LOGIC_XOR: p.C(" v32 = v32 ^ d32;\n"); break; + case GE_LOGIC_OR: p.C(" v32 = v32 | d32;\n"); break; + case GE_LOGIC_NOR: p.C(" v32 = ~(v32 | d32);\n"); break; + case GE_LOGIC_EQUIV: p.C(" v32 = ~(v32 ^ d32);\n"); break; + case GE_LOGIC_INVERTED: p.C(" v32 = ~d32;\n"); break; + case GE_LOGIC_OR_REVERSE: p.C(" v32 = v32 | ~d32;\n"); break; + case GE_LOGIC_COPY_INVERTED: p.C(" v32 = ~v32;\n"); break; + case GE_LOGIC_OR_INVERTED: p.C(" v32 = (~v32) | d32;\n"); break; + case GE_LOGIC_NAND: p.C(" v32 = ~(v32 & d32);\n"); break; + case GE_LOGIC_SET: p.C(" v32 = 0xFFFFFFFF;\n"); break; + } + + // Note that the mask has already been flipped to the PC way - 1 means write. + if (colorWriteMask) { + WRITE(p, " v32 = (v32 & u_colorWriteMask) | (d32 & ~u_colorWriteMask);\n"); + } WRITE(p, " %s = unpackUnorm4x8(v32);\n", compat.fragColor0); } diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 53176ebcba..f67ab96b88 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -268,6 +268,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip ReplaceBlendType replaceBlend = pipelineState.blendState.replaceBlend; ReplaceAlphaType stencilToAlpha = pipelineState.blendState.replaceAlphaWithStencil; SimulateLogicOpType simulateLogicOpType = pipelineState.blendState.simulateLogicOpType; + GELogicOp replaceLogicOpType = GE_LOGIC_COPY; // All texfuncs except replace are the same for RGB as for RGBA with full alpha. // Note that checking this means that we must dirty the fragment shader ID whenever textureFullAlpha changes. @@ -325,6 +326,9 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip // 2 bits. id.SetBits(FS_BIT_SIMULATE_LOGIC_OP_TYPE, 2, simulateLogicOpType); + // 4 bits. Set to GE_LOGIC_COPY if not used, which does nothing in the shader generator. + id.SetBits(FS_BIT_REPLACE_LOGIC_OP, 4, (int)replaceLogicOpType); + // If replaceBlend == REPLACE_BLEND_STANDARD (or REPLACE_BLEND_NO) nothing is done, so we kill these bits. if (replaceBlend == REPLACE_BLEND_BLUE_TO_ALPHA) { id.SetBits(FS_BIT_REPLACE_BLEND, 3, replaceBlend); diff --git a/GPU/Common/ShaderId.h b/GPU/Common/ShaderId.h index 914824d6bc..bd54e2bd82 100644 --- a/GPU/Common/ShaderId.h +++ b/GPU/Common/ShaderId.h @@ -95,6 +95,7 @@ enum FShaderBit : uint8_t { FS_BIT_COLOR_WRITEMASK = 50, FS_BIT_3D_TEXTURE = 51, FS_BIT_SHADER_SMOOTHED_DEPAL = 52, + FS_BIT_REPLACE_LOGIC_OP = 53, // 4 bits. GE_LOGIC_COPY means no-op/off. }; static inline FShaderBit operator +(FShaderBit bit, int i) { diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index 52c2103a88..eb30aa4728 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -370,7 +370,7 @@ VulkanFragmentShader *ShaderManagerVulkan::GetFragmentShaderFromModule(VkShaderM // instantaneous. #define CACHE_HEADER_MAGIC 0xff51f420 -#define CACHE_VERSION 21 +#define CACHE_VERSION 22 struct VulkanCacheHeader { uint32_t magic; uint32_t version;