softgpu: Cache alpha/stencil test masks in state.

This commit is contained in:
Unknown W. Brackets 2022-01-15 11:46:07 -08:00
parent acad2640dd
commit aa9d751248
4 changed files with 31 additions and 26 deletions

View file

@ -155,7 +155,7 @@ static inline void SetPixelColor(GEBufferFormat fmt, int fbStride, int x, int y,
static inline bool AlphaTestPassed(const PixelFuncID &pixelID, int alpha) {
const u8 ref = pixelID.alphaTestRef;
if (pixelID.hasAlphaTestMask)
alpha &= gstate.getAlphaTestMask();
alpha &= pixelID.cached.alphaTestMask;
switch (pixelID.AlphaTestFunc()) {
case GE_COMP_NEVER:
@ -209,7 +209,7 @@ static inline bool ColorTestPassed(const Vec3<int> &color) {
static inline bool StencilTestPassed(const PixelFuncID &pixelID, u8 stencil) {
if (pixelID.hasStencilTestMask)
stencil &= gstate.getStencilTestMask();
stencil &= pixelID.cached.stencilTestMask;
u8 ref = pixelID.stencilTestRef;
switch (pixelID.StencilTestFunc()) {
case GE_COMP_NEVER:
@ -239,7 +239,7 @@ static inline bool StencilTestPassed(const PixelFuncID &pixelID, u8 stencil) {
return true;
}
static inline u8 ApplyStencilOp(GEBufferFormat fmt, GEStencilOp op, u8 old_stencil) {
static inline u8 ApplyStencilOp(GEBufferFormat fmt, uint8_t stencilReplace, GEStencilOp op, u8 old_stencil) {
switch (op) {
case GE_STENCILOP_KEEP:
return old_stencil;
@ -248,7 +248,7 @@ static inline u8 ApplyStencilOp(GEBufferFormat fmt, GEStencilOp op, u8 old_stenc
return 0;
case GE_STENCILOP_REPLACE:
return gstate.getStencilTestRef();
return stencilReplace;
case GE_STENCILOP_INVERT:
return ~old_stencil;
@ -427,20 +427,21 @@ void SOFTRAST_CALL DrawSinglePixel(int x, int y, int z, int fog, Vec4IntArg colo
if (pixelID.DepthClear())
SetPixelDepth(x, y, pixelID.cached.depthbufStride, z);
} else if (pixelID.stencilTest) {
const uint8_t stencilReplace = pixelID.hasStencilTestMask ? pixelID.cached.stencilRef : pixelID.stencilTestRef;
if (!StencilTestPassed(pixelID, stencil)) {
stencil = ApplyStencilOp(fbFormat, pixelID.SFail(), stencil);
stencil = ApplyStencilOp(fbFormat, stencilReplace, pixelID.SFail(), stencil);
SetPixelStencil(fbFormat, pixelID.cached.framebufStride, targetWriteMask, x, y, stencil);
return;
}
// Also apply depth at the same time. If disabled, same as passing.
if (pixelID.DepthTestFunc() != GE_COMP_ALWAYS && !DepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) {
stencil = ApplyStencilOp(fbFormat, pixelID.ZFail(), stencil);
stencil = ApplyStencilOp(fbFormat, stencilReplace, pixelID.ZFail(), stencil);
SetPixelStencil(fbFormat, pixelID.cached.framebufStride, targetWriteMask, x, y, stencil);
return;
}
stencil = ApplyStencilOp(fbFormat, pixelID.ZPass(), stencil);
stencil = ApplyStencilOp(fbFormat, stencilReplace, pixelID.ZPass(), stencil);
} else {
if (pixelID.DepthTestFunc() != GE_COMP_ALWAYS && !DepthTestPassed(pixelID.DepthTestFunc(), x, y, pixelID.cached.depthbufStride, z)) {
return;

View file

@ -425,14 +425,13 @@ bool PixelJitCache::Jit_AlphaTest(const PixelFuncID &id) {
}
if (id.hasAlphaTestMask) {
// Unfortunate, we'll need gstate to load the mask.
// Unfortunate, we'll need pixelID to load the mask.
// Note: we leave the ALPHA purpose untouched and free it, because later code may reuse.
X64Reg gstateReg = GetGState();
X64Reg idReg = GetPixelID();
X64Reg maskedReg = regCache_.Alloc(RegCache::GEN_TEMP0);
// The mask is >> 16, so we load + 2.
MOVZX(32, 8, maskedReg, MDisp(gstateReg, offsetof(GPUgstate, alphatest) + 2));
regCache_.Unlock(gstateReg, RegCache::GEN_GSTATE);
MOVZX(32, 8, maskedReg, MDisp(idReg, offsetof(PixelFuncID, cached.alphaTestMask)));
UnlockPixelID(idReg);
AND(32, R(maskedReg), R(alphaReg));
regCache_.Unlock(alphaReg, RegCache::GEN_SRC_ALPHA);
@ -642,11 +641,11 @@ bool PixelJitCache::Jit_StencilAndDepthTest(const PixelFuncID &id) {
Describe("StencilAndDepth");
X64Reg maskedReg = stencilReg;
if (id.hasStencilTestMask) {
X64Reg gstateReg = GetGState();
X64Reg idReg = GetPixelID();
maskedReg = regCache_.Alloc(RegCache::GEN_TEMP0);
MOV(32, R(maskedReg), R(stencilReg));
AND(8, R(maskedReg), MDisp(gstateReg, offsetof(GPUgstate, stenciltest) + 2));
regCache_.Unlock(gstateReg, RegCache::GEN_GSTATE);
AND(8, R(maskedReg), MDisp(idReg, offsetof(PixelFuncID, cached.stencilTestMask)));
UnlockPixelID(idReg);
}
bool success = true;
@ -738,7 +737,6 @@ bool PixelJitCache::Jit_StencilTest(const PixelFuncID &id, RegCache::Reg stencil
return true;
}
bool hadGStateReg = regCache_.Has(RegCache::GEN_GSTATE);
bool hadColorOffReg = regCache_.Has(RegCache::GEN_COLOR_OFF);
bool hadIdReg = regCache_.Has(RegCache::GEN_ID);
@ -751,9 +749,7 @@ bool PixelJitCache::Jit_StencilTest(const PixelFuncID &id, RegCache::Reg stencil
Discard();
}
// If we allocated either gstate or colorOff in the conditional, forget.
if (!hadGStateReg && regCache_.Has(RegCache::GEN_GSTATE))
regCache_.Change(RegCache::GEN_GSTATE, RegCache::GEN_INVALID);
// If we allocated either id or colorOff in the conditional, forget.
if (!hadColorOffReg && regCache_.Has(RegCache::GEN_COLOR_OFF))
regCache_.Change(RegCache::GEN_COLOR_OFF, RegCache::GEN_INVALID);
if (!hadIdReg && regCache_.Has(RegCache::GEN_ID))
@ -816,7 +812,6 @@ bool PixelJitCache::Jit_DepthTestForStencil(const PixelFuncID &id, RegCache::Reg
break;
}
bool hadGStateReg = regCache_.Has(RegCache::GEN_GSTATE);
bool hadColorOffReg = regCache_.Has(RegCache::GEN_COLOR_OFF);
bool hadIdReg = regCache_.Has(RegCache::GEN_ID);
@ -825,9 +820,7 @@ bool PixelJitCache::Jit_DepthTestForStencil(const PixelFuncID &id, RegCache::Reg
success = success && Jit_WriteStencilOnly(id, stencilReg);
Discard();
// If we allocated either gstate or colorOff in the conditional, forget.
if (!hadGStateReg && regCache_.Has(RegCache::GEN_GSTATE))
regCache_.Change(RegCache::GEN_GSTATE, RegCache::GEN_INVALID);
// If we allocated either id or colorOff in the conditional, forget.
if (!hadColorOffReg && regCache_.Has(RegCache::GEN_COLOR_OFF))
regCache_.Change(RegCache::GEN_COLOR_OFF, RegCache::GEN_INVALID);
if (!hadIdReg && regCache_.Has(RegCache::GEN_ID))
@ -859,9 +852,9 @@ bool PixelJitCache::Jit_ApplyStencilOp(const PixelFuncID &id, GEStencilOp op, Re
case GE_STENCILOP_REPLACE:
if (id.hasStencilTestMask) {
// Load the unmasked value.
X64Reg gstateReg = GetGState();
MOVZX(32, 8, stencilReg, MDisp(gstateReg, offsetof(GPUgstate, stenciltest) + 1));
regCache_.Unlock(gstateReg, RegCache::GEN_GSTATE);
X64Reg idReg = GetPixelID();
MOVZX(32, 8, stencilReg, MDisp(idReg, offsetof(PixelFuncID, cached.stencilRef)));
UnlockPixelID(idReg);
} else {
MOV(8, R(stencilReg), Imm8(id.stencilTestRef));
}

View file

@ -203,6 +203,14 @@ void ComputePixelFuncID(PixelFuncID *id) {
id->cached.maxz = gstate.getDepthRangeMax();
id->cached.framebufStride = gstate.FrameBufStride();
id->cached.depthbufStride = gstate.DepthBufStride();
if (id->hasStencilTestMask) {
// Without the mask applied, unlike the one in the key.
id->cached.stencilRef = gstate.getStencilTestRef();
id->cached.stencilTestMask = gstate.getStencilTestMask();
}
if (id->hasAlphaTestMask)
id->cached.alphaTestMask = gstate.getAlphaTestMask();
}
std::string DescribePixelFuncID(const PixelFuncID &id) {

View file

@ -57,6 +57,9 @@ struct PixelFuncID {
uint16_t framebufStride;
uint16_t depthbufStride;
GELogicOp logicOp;
uint8_t stencilRef;
uint8_t stencilTestMask;
uint8_t alphaTestMask;
} cached;
union {