softgpu: Cache minz/maxz in draw pixel state.

This commit is contained in:
Unknown W. Brackets 2022-01-15 10:32:15 -08:00
parent 0b3f096c01
commit f1ce2e7715
6 changed files with 48 additions and 39 deletions

View file

@ -384,7 +384,7 @@ void SOFTRAST_CALL DrawSinglePixel(int x, int y, int z, int fog, Vec4IntArg colo
Vec4<int> prim_color = Vec4<int>(color_in).Clamp(0, 255);
// Depth range test - applied in clear mode, if not through mode.
if (pixelID.applyDepthRange)
if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax())
if (z < pixelID.cached.minz || z > pixelID.cached.maxz)
return;
if (pixelID.AlphaTestFunc() != GE_COMP_ALWAYS && !clearMode)

View file

@ -73,6 +73,8 @@ private:
#endif
RegCache::Reg GetGState();
RegCache::Reg GetPixelID();
void UnlockPixelID(RegCache::Reg &r);
RegCache::Reg GetConstBase();
RegCache::Reg GetZeroVec();
// Note: these may require a temporary reg.

View file

@ -147,6 +147,25 @@ RegCache::Reg PixelJitCache::GetGState() {
return regCache_.Find(RegCache::GEN_GSTATE);
}
RegCache::Reg PixelJitCache::GetPixelID() {
if (regCache_.Has(RegCache::GEN_ARG_ID))
return regCache_.Find(RegCache::GEN_ARG_ID);
if (!regCache_.Has(RegCache::GEN_ID)) {
X64Reg r = regCache_.Alloc(RegCache::GEN_ID);
_assert_(stackIDOffset_ != -1);
MOV(PTRBITS, R(r), MDisp(RSP, stackIDOffset_));
return r;
}
return regCache_.Find(RegCache::GEN_ID);
}
void PixelJitCache::UnlockPixelID(RegCache::Reg &r) {
if (regCache_.Has(RegCache::GEN_ARG_ID))
regCache_.Unlock(r, RegCache::GEN_ARG_ID);
else
regCache_.Unlock(r, RegCache::GEN_ID);
}
RegCache::Reg PixelJitCache::GetConstBase() {
if (!regCache_.Has(RegCache::GEN_CONST_BASE)) {
X64Reg r = regCache_.Alloc(RegCache::GEN_CONST_BASE);
@ -353,31 +372,19 @@ void PixelJitCache::Discard(Gen::CCFlags cc) {
bool PixelJitCache::Jit_ApplyDepthRange(const PixelFuncID &id) {
if (id.applyDepthRange) {
Describe("ApplyDepthR");
X64Reg gstateReg = INVALID_REG;
if (!RipAccessible(&gstate.minz) || !RipAccessible(&gstate.maxz))
gstateReg = GetGState();
X64Reg maxReg = regCache_.Alloc(RegCache::GEN_TEMP0);
X64Reg argZReg = regCache_.Find(RegCache::GEN_ARG_Z);
X64Reg idReg = GetPixelID();
// For lower, we compare directly (we take care of the 32-bit case below.)
if (RipAccessible(&gstate.minz))
CMP(16, R(argZReg), M(&gstate.minz));
else
CMP(16, R(argZReg), MDisp(gstateReg, offsetof(GPUgstate, minz)));
Discard(CC_B);
// We expanded this to 32 bits, so it's convenient to compare.
CMP(32, R(argZReg), MDisp(idReg, offsetof(PixelFuncID, cached.minz)));
Discard(CC_L);
// We load the low 16 bits, but compare all 32 of z. Above handles < 0.
if (RipAccessible(&gstate.maxz))
MOVZX(32, 16, maxReg, M(&gstate.maxz));
else
MOVZX(32, 16, maxReg, MDisp(gstateReg, offsetof(GPUgstate, maxz)));
CMP(32, R(argZReg), R(maxReg));
Discard(CC_A);
CMP(32, R(argZReg), MDisp(idReg, offsetof(PixelFuncID, cached.maxz)));
Discard(CC_G);
UnlockPixelID(idReg);
regCache_.Unlock(argZReg, RegCache::GEN_ARG_Z);
if (gstateReg != INVALID_REG)
regCache_.Unlock(gstateReg, RegCache::GEN_GSTATE);
regCache_.Release(maxReg, RegCache::GEN_TEMP0);
}
// Since this is early on, try to free up the z reg if we don't need it anymore.
@ -544,14 +551,7 @@ bool PixelJitCache::Jit_ApplyFog(const PixelFuncID &id) {
// Load fog and expand to 16 bit. Ignore the high 8 bits, which'll match up with A.
Describe("ApplyFog");
X64Reg fogColorReg = regCache_.Alloc(RegCache::VEC_TEMP1);
X64Reg idReg = INVALID_REG;
if (regCache_.Has(RegCache::GEN_ARG_ID)) {
idReg = regCache_.Find(RegCache::GEN_ARG_ID);
} else {
_assert_(stackIDOffset_ != -1);
idReg = regCache_.Alloc(RegCache::GEN_TEMP1);
MOV(PTRBITS, R(idReg), MDisp(RSP, stackIDOffset_));
}
X64Reg idReg = GetPixelID();
if (cpu_info.bSSE4_1) {
PMOVZXBW(fogColorReg, MDisp(idReg, offsetof(PixelFuncID, cached.fogColor)));
} else {
@ -560,11 +560,7 @@ bool PixelJitCache::Jit_ApplyFog(const PixelFuncID &id) {
PUNPCKLBW(fogColorReg, R(zeroReg));
regCache_.Unlock(zeroReg, RegCache::VEC_ZERO);
}
if (regCache_.Has(RegCache::GEN_ARG_ID)) {
regCache_.Unlock(idReg, RegCache::GEN_ARG_ID);
} else {
regCache_.Release(idReg, RegCache::GEN_TEMP1);
}
UnlockPixelID(idReg);
// Load a set of 255s at 16 bit into a reg for later...
X64Reg invertReg = regCache_.Alloc(RegCache::VEC_TEMP2);
@ -744,6 +740,7 @@ bool PixelJitCache::Jit_StencilTest(const PixelFuncID &id, RegCache::Reg stencil
bool hadGStateReg = regCache_.Has(RegCache::GEN_GSTATE);
bool hadColorOffReg = regCache_.Has(RegCache::GEN_COLOR_OFF);
bool hadIdReg = regCache_.Has(RegCache::GEN_ID);
bool success = true;
if (stencilReg != INVALID_REG && (!hasFixedResult || !fixedResult)) {
@ -759,6 +756,8 @@ bool PixelJitCache::Jit_StencilTest(const PixelFuncID &id, RegCache::Reg stencil
regCache_.Change(RegCache::GEN_GSTATE, RegCache::GEN_INVALID);
if (!hadColorOffReg && regCache_.Has(RegCache::GEN_COLOR_OFF))
regCache_.Change(RegCache::GEN_COLOR_OFF, RegCache::GEN_INVALID);
if (!hadIdReg && regCache_.Has(RegCache::GEN_ID))
regCache_.Change(RegCache::GEN_ID, RegCache::GEN_INVALID);
if (!hasFixedResult)
SetJumpTarget(toPass);
@ -819,6 +818,7 @@ bool PixelJitCache::Jit_DepthTestForStencil(const PixelFuncID &id, RegCache::Reg
bool hadGStateReg = regCache_.Has(RegCache::GEN_GSTATE);
bool hadColorOffReg = regCache_.Has(RegCache::GEN_COLOR_OFF);
bool hadIdReg = regCache_.Has(RegCache::GEN_ID);
bool success = true;
success = success && Jit_ApplyStencilOp(id, id.ZFail(), stencilReg);
@ -830,6 +830,8 @@ bool PixelJitCache::Jit_DepthTestForStencil(const PixelFuncID &id, RegCache::Reg
regCache_.Change(RegCache::GEN_GSTATE, RegCache::GEN_INVALID);
if (!hadColorOffReg && regCache_.Has(RegCache::GEN_COLOR_OFF))
regCache_.Change(RegCache::GEN_COLOR_OFF, RegCache::GEN_INVALID);
if (!hadIdReg && regCache_.Has(RegCache::GEN_ID))
regCache_.Change(RegCache::GEN_ID, RegCache::GEN_INVALID);
SetJumpTarget(skip);

View file

@ -199,6 +199,8 @@ void ComputePixelFuncID(PixelFuncID *id) {
if (id->applyFog) {
id->cached.fogColor = gstate.fogcolor & 0x00FFFFFF;
}
id->cached.minz = gstate.getDepthRangeMin();
id->cached.maxz = gstate.getDepthRangeMax();
id->cached.framebufStride = gstate.FrameBufStride();
id->cached.depthbufStride = gstate.DepthBufStride();
}

View file

@ -52,6 +52,8 @@ struct PixelFuncID {
uint32_t colorWriteMask{};
int8_t ditherMatrix[16]{};
uint32_t fogColor;
int minz;
int maxz;
uint16_t framebufStride;
uint16_t depthbufStride;
} cached;

View file

@ -107,12 +107,13 @@ struct RegCache {
GEN_SRC_ALPHA = 0x0100,
GEN_GSTATE = 0x0101,
GEN_CONST_BASE = 0x0102,
GEN_STENCIL = 0x0103,
GEN_COLOR_OFF = 0x0104,
GEN_DEPTH_OFF = 0x0105,
GEN_RESULT = 0x0106,
GEN_SHIFTVAL = 0x0107,
GEN_ID = 0x0102,
GEN_CONST_BASE = 0x0103,
GEN_STENCIL = 0x0104,
GEN_COLOR_OFF = 0x0105,
GEN_DEPTH_OFF = 0x0106,
GEN_RESULT = 0x0107,
GEN_SHIFTVAL = 0x0108,
GEN_ARG_X = 0x0180,
GEN_ARG_Y = 0x0181,