diff --git a/GPU/Software/DrawPixel.cpp b/GPU/Software/DrawPixel.cpp index f28e908e45..3caa911067 100644 --- a/GPU/Software/DrawPixel.cpp +++ b/GPU/Software/DrawPixel.cpp @@ -155,7 +155,7 @@ static inline void SetPixelColor(GEBufferFormat fmt, int fbStride, int x, int y, static inline bool AlphaTestPassed(const PixelFuncID &pixelID, int alpha) { const u8 ref = pixelID.alphaTestRef; if (pixelID.hasAlphaTestMask) - alpha &= gstate.getAlphaTestMask(); + alpha &= pixelID.cached.alphaTestMask; switch (pixelID.AlphaTestFunc()) { case GE_COMP_NEVER: @@ -209,7 +209,7 @@ static inline bool ColorTestPassed(const Vec3 &color) { static inline bool StencilTestPassed(const PixelFuncID &pixelID, u8 stencil) { if (pixelID.hasStencilTestMask) - stencil &= gstate.getStencilTestMask(); + stencil &= pixelID.cached.stencilTestMask; u8 ref = pixelID.stencilTestRef; switch (pixelID.StencilTestFunc()) { case GE_COMP_NEVER: @@ -239,7 +239,7 @@ static inline bool StencilTestPassed(const PixelFuncID &pixelID, u8 stencil) { return true; } -static inline u8 ApplyStencilOp(GEBufferFormat fmt, GEStencilOp op, u8 old_stencil) { +static inline u8 ApplyStencilOp(GEBufferFormat fmt, uint8_t stencilReplace, GEStencilOp op, u8 old_stencil) { switch (op) { case GE_STENCILOP_KEEP: return old_stencil; @@ -248,7 +248,7 @@ static inline u8 ApplyStencilOp(GEBufferFormat fmt, GEStencilOp op, u8 old_stenc return 0; case GE_STENCILOP_REPLACE: - return gstate.getStencilTestRef(); + return stencilReplace; case GE_STENCILOP_INVERT: return ~old_stencil; @@ -427,20 +427,21 @@ void SOFTRAST_CALL DrawSinglePixel(int x, int y, int z, int fog, Vec4IntArg colo if (pixelID.DepthClear()) SetPixelDepth(x, y, pixelID.cached.depthbufStride, z); } else if (pixelID.stencilTest) { + const uint8_t stencilReplace = pixelID.hasStencilTestMask ? pixelID.cached.stencilRef : pixelID.stencilTestRef; if (!StencilTestPassed(pixelID, stencil)) { - stencil = ApplyStencilOp(fbFormat, pixelID.SFail(), stencil); + stencil = ApplyStencilOp(fbFormat, stencilReplace, pixelID.SFail(), stencil); SetPixelStencil(fbFormat, pixelID.cached.framebufStride, targetWriteMask, x, y, stencil); return; } // Also apply depth at the same time. If disabled, same as passing. if (pixelID.DepthTestFunc() != GE_COMP_ALWAYS && !DepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) { - stencil = ApplyStencilOp(fbFormat, pixelID.ZFail(), stencil); + stencil = ApplyStencilOp(fbFormat, stencilReplace, pixelID.ZFail(), stencil); SetPixelStencil(fbFormat, pixelID.cached.framebufStride, targetWriteMask, x, y, stencil); return; } - stencil = ApplyStencilOp(fbFormat, pixelID.ZPass(), stencil); + stencil = ApplyStencilOp(fbFormat, stencilReplace, pixelID.ZPass(), stencil); } else { if (pixelID.DepthTestFunc() != GE_COMP_ALWAYS && !DepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) { return; diff --git a/GPU/Software/DrawPixelX86.cpp b/GPU/Software/DrawPixelX86.cpp index fd313cb1ee..7a0d4fbc7c 100644 --- a/GPU/Software/DrawPixelX86.cpp +++ b/GPU/Software/DrawPixelX86.cpp @@ -425,14 +425,13 @@ bool PixelJitCache::Jit_AlphaTest(const PixelFuncID &id) { } if (id.hasAlphaTestMask) { - // Unfortunate, we'll need gstate to load the mask. + // Unfortunate, we'll need pixelID to load the mask. // Note: we leave the ALPHA purpose untouched and free it, because later code may reuse. - X64Reg gstateReg = GetGState(); + X64Reg idReg = GetPixelID(); X64Reg maskedReg = regCache_.Alloc(RegCache::GEN_TEMP0); - // The mask is >> 16, so we load + 2. - MOVZX(32, 8, maskedReg, MDisp(gstateReg, offsetof(GPUgstate, alphatest) + 2)); - regCache_.Unlock(gstateReg, RegCache::GEN_GSTATE); + MOVZX(32, 8, maskedReg, MDisp(idReg, offsetof(PixelFuncID, cached.alphaTestMask))); + UnlockPixelID(idReg); AND(32, R(maskedReg), R(alphaReg)); regCache_.Unlock(alphaReg, RegCache::GEN_SRC_ALPHA); @@ -642,11 +641,11 @@ bool PixelJitCache::Jit_StencilAndDepthTest(const PixelFuncID &id) { Describe("StencilAndDepth"); X64Reg maskedReg = stencilReg; if (id.hasStencilTestMask) { - X64Reg gstateReg = GetGState(); + X64Reg idReg = GetPixelID(); maskedReg = regCache_.Alloc(RegCache::GEN_TEMP0); MOV(32, R(maskedReg), R(stencilReg)); - AND(8, R(maskedReg), MDisp(gstateReg, offsetof(GPUgstate, stenciltest) + 2)); - regCache_.Unlock(gstateReg, RegCache::GEN_GSTATE); + AND(8, R(maskedReg), MDisp(idReg, offsetof(PixelFuncID, cached.stencilTestMask))); + UnlockPixelID(idReg); } bool success = true; @@ -738,7 +737,6 @@ bool PixelJitCache::Jit_StencilTest(const PixelFuncID &id, RegCache::Reg stencil return true; } - bool hadGStateReg = regCache_.Has(RegCache::GEN_GSTATE); bool hadColorOffReg = regCache_.Has(RegCache::GEN_COLOR_OFF); bool hadIdReg = regCache_.Has(RegCache::GEN_ID); @@ -751,9 +749,7 @@ bool PixelJitCache::Jit_StencilTest(const PixelFuncID &id, RegCache::Reg stencil Discard(); } - // If we allocated either gstate or colorOff in the conditional, forget. - if (!hadGStateReg && regCache_.Has(RegCache::GEN_GSTATE)) - regCache_.Change(RegCache::GEN_GSTATE, RegCache::GEN_INVALID); + // If we allocated either id or colorOff in the conditional, forget. if (!hadColorOffReg && regCache_.Has(RegCache::GEN_COLOR_OFF)) regCache_.Change(RegCache::GEN_COLOR_OFF, RegCache::GEN_INVALID); if (!hadIdReg && regCache_.Has(RegCache::GEN_ID)) @@ -816,7 +812,6 @@ bool PixelJitCache::Jit_DepthTestForStencil(const PixelFuncID &id, RegCache::Reg break; } - bool hadGStateReg = regCache_.Has(RegCache::GEN_GSTATE); bool hadColorOffReg = regCache_.Has(RegCache::GEN_COLOR_OFF); bool hadIdReg = regCache_.Has(RegCache::GEN_ID); @@ -825,9 +820,7 @@ bool PixelJitCache::Jit_DepthTestForStencil(const PixelFuncID &id, RegCache::Reg success = success && Jit_WriteStencilOnly(id, stencilReg); Discard(); - // If we allocated either gstate or colorOff in the conditional, forget. - if (!hadGStateReg && regCache_.Has(RegCache::GEN_GSTATE)) - regCache_.Change(RegCache::GEN_GSTATE, RegCache::GEN_INVALID); + // If we allocated either id or colorOff in the conditional, forget. if (!hadColorOffReg && regCache_.Has(RegCache::GEN_COLOR_OFF)) regCache_.Change(RegCache::GEN_COLOR_OFF, RegCache::GEN_INVALID); if (!hadIdReg && regCache_.Has(RegCache::GEN_ID)) @@ -859,9 +852,9 @@ bool PixelJitCache::Jit_ApplyStencilOp(const PixelFuncID &id, GEStencilOp op, Re case GE_STENCILOP_REPLACE: if (id.hasStencilTestMask) { // Load the unmasked value. - X64Reg gstateReg = GetGState(); - MOVZX(32, 8, stencilReg, MDisp(gstateReg, offsetof(GPUgstate, stenciltest) + 1)); - regCache_.Unlock(gstateReg, RegCache::GEN_GSTATE); + X64Reg idReg = GetPixelID(); + MOVZX(32, 8, stencilReg, MDisp(idReg, offsetof(PixelFuncID, cached.stencilRef))); + UnlockPixelID(idReg); } else { MOV(8, R(stencilReg), Imm8(id.stencilTestRef)); } diff --git a/GPU/Software/FuncId.cpp b/GPU/Software/FuncId.cpp index 19ab3fa436..db73995a4d 100644 --- a/GPU/Software/FuncId.cpp +++ b/GPU/Software/FuncId.cpp @@ -203,6 +203,14 @@ void ComputePixelFuncID(PixelFuncID *id) { id->cached.maxz = gstate.getDepthRangeMax(); id->cached.framebufStride = gstate.FrameBufStride(); id->cached.depthbufStride = gstate.DepthBufStride(); + + if (id->hasStencilTestMask) { + // Without the mask applied, unlike the one in the key. + id->cached.stencilRef = gstate.getStencilTestRef(); + id->cached.stencilTestMask = gstate.getStencilTestMask(); + } + if (id->hasAlphaTestMask) + id->cached.alphaTestMask = gstate.getAlphaTestMask(); } std::string DescribePixelFuncID(const PixelFuncID &id) { diff --git a/GPU/Software/FuncId.h b/GPU/Software/FuncId.h index 598f62cd78..382a48c2d6 100644 --- a/GPU/Software/FuncId.h +++ b/GPU/Software/FuncId.h @@ -57,6 +57,9 @@ struct PixelFuncID { uint16_t framebufStride; uint16_t depthbufStride; GELogicOp logicOp; + uint8_t stencilRef; + uint8_t stencilTestMask; + uint8_t alphaTestMask; } cached; union {