mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
softgpu: Cache minz/maxz in draw pixel state.
This commit is contained in:
parent
0b3f096c01
commit
f1ce2e7715
6 changed files with 48 additions and 39 deletions
|
@ -384,7 +384,7 @@ void SOFTRAST_CALL DrawSinglePixel(int x, int y, int z, int fog, Vec4IntArg colo
|
|||
Vec4<int> prim_color = Vec4<int>(color_in).Clamp(0, 255);
|
||||
// Depth range test - applied in clear mode, if not through mode.
|
||||
if (pixelID.applyDepthRange)
|
||||
if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax())
|
||||
if (z < pixelID.cached.minz || z > pixelID.cached.maxz)
|
||||
return;
|
||||
|
||||
if (pixelID.AlphaTestFunc() != GE_COMP_ALWAYS && !clearMode)
|
||||
|
|
|
@ -73,6 +73,8 @@ private:
|
|||
#endif
|
||||
|
||||
RegCache::Reg GetGState();
|
||||
RegCache::Reg GetPixelID();
|
||||
void UnlockPixelID(RegCache::Reg &r);
|
||||
RegCache::Reg GetConstBase();
|
||||
RegCache::Reg GetZeroVec();
|
||||
// Note: these may require a temporary reg.
|
||||
|
|
|
@ -147,6 +147,25 @@ RegCache::Reg PixelJitCache::GetGState() {
|
|||
return regCache_.Find(RegCache::GEN_GSTATE);
|
||||
}
|
||||
|
||||
RegCache::Reg PixelJitCache::GetPixelID() {
|
||||
if (regCache_.Has(RegCache::GEN_ARG_ID))
|
||||
return regCache_.Find(RegCache::GEN_ARG_ID);
|
||||
if (!regCache_.Has(RegCache::GEN_ID)) {
|
||||
X64Reg r = regCache_.Alloc(RegCache::GEN_ID);
|
||||
_assert_(stackIDOffset_ != -1);
|
||||
MOV(PTRBITS, R(r), MDisp(RSP, stackIDOffset_));
|
||||
return r;
|
||||
}
|
||||
return regCache_.Find(RegCache::GEN_ID);
|
||||
}
|
||||
|
||||
void PixelJitCache::UnlockPixelID(RegCache::Reg &r) {
|
||||
if (regCache_.Has(RegCache::GEN_ARG_ID))
|
||||
regCache_.Unlock(r, RegCache::GEN_ARG_ID);
|
||||
else
|
||||
regCache_.Unlock(r, RegCache::GEN_ID);
|
||||
}
|
||||
|
||||
RegCache::Reg PixelJitCache::GetConstBase() {
|
||||
if (!regCache_.Has(RegCache::GEN_CONST_BASE)) {
|
||||
X64Reg r = regCache_.Alloc(RegCache::GEN_CONST_BASE);
|
||||
|
@ -353,31 +372,19 @@ void PixelJitCache::Discard(Gen::CCFlags cc) {
|
|||
bool PixelJitCache::Jit_ApplyDepthRange(const PixelFuncID &id) {
|
||||
if (id.applyDepthRange) {
|
||||
Describe("ApplyDepthR");
|
||||
X64Reg gstateReg = INVALID_REG;
|
||||
if (!RipAccessible(&gstate.minz) || !RipAccessible(&gstate.maxz))
|
||||
gstateReg = GetGState();
|
||||
X64Reg maxReg = regCache_.Alloc(RegCache::GEN_TEMP0);
|
||||
X64Reg argZReg = regCache_.Find(RegCache::GEN_ARG_Z);
|
||||
X64Reg idReg = GetPixelID();
|
||||
|
||||
// For lower, we compare directly (we take care of the 32-bit case below.)
|
||||
if (RipAccessible(&gstate.minz))
|
||||
CMP(16, R(argZReg), M(&gstate.minz));
|
||||
else
|
||||
CMP(16, R(argZReg), MDisp(gstateReg, offsetof(GPUgstate, minz)));
|
||||
Discard(CC_B);
|
||||
// We expanded this to 32 bits, so it's convenient to compare.
|
||||
CMP(32, R(argZReg), MDisp(idReg, offsetof(PixelFuncID, cached.minz)));
|
||||
Discard(CC_L);
|
||||
|
||||
// We load the low 16 bits, but compare all 32 of z. Above handles < 0.
|
||||
if (RipAccessible(&gstate.maxz))
|
||||
MOVZX(32, 16, maxReg, M(&gstate.maxz));
|
||||
else
|
||||
MOVZX(32, 16, maxReg, MDisp(gstateReg, offsetof(GPUgstate, maxz)));
|
||||
CMP(32, R(argZReg), R(maxReg));
|
||||
Discard(CC_A);
|
||||
CMP(32, R(argZReg), MDisp(idReg, offsetof(PixelFuncID, cached.maxz)));
|
||||
Discard(CC_G);
|
||||
|
||||
UnlockPixelID(idReg);
|
||||
regCache_.Unlock(argZReg, RegCache::GEN_ARG_Z);
|
||||
if (gstateReg != INVALID_REG)
|
||||
regCache_.Unlock(gstateReg, RegCache::GEN_GSTATE);
|
||||
regCache_.Release(maxReg, RegCache::GEN_TEMP0);
|
||||
}
|
||||
|
||||
// Since this is early on, try to free up the z reg if we don't need it anymore.
|
||||
|
@ -544,14 +551,7 @@ bool PixelJitCache::Jit_ApplyFog(const PixelFuncID &id) {
|
|||
// Load fog and expand to 16 bit. Ignore the high 8 bits, which'll match up with A.
|
||||
Describe("ApplyFog");
|
||||
X64Reg fogColorReg = regCache_.Alloc(RegCache::VEC_TEMP1);
|
||||
X64Reg idReg = INVALID_REG;
|
||||
if (regCache_.Has(RegCache::GEN_ARG_ID)) {
|
||||
idReg = regCache_.Find(RegCache::GEN_ARG_ID);
|
||||
} else {
|
||||
_assert_(stackIDOffset_ != -1);
|
||||
idReg = regCache_.Alloc(RegCache::GEN_TEMP1);
|
||||
MOV(PTRBITS, R(idReg), MDisp(RSP, stackIDOffset_));
|
||||
}
|
||||
X64Reg idReg = GetPixelID();
|
||||
if (cpu_info.bSSE4_1) {
|
||||
PMOVZXBW(fogColorReg, MDisp(idReg, offsetof(PixelFuncID, cached.fogColor)));
|
||||
} else {
|
||||
|
@ -560,11 +560,7 @@ bool PixelJitCache::Jit_ApplyFog(const PixelFuncID &id) {
|
|||
PUNPCKLBW(fogColorReg, R(zeroReg));
|
||||
regCache_.Unlock(zeroReg, RegCache::VEC_ZERO);
|
||||
}
|
||||
if (regCache_.Has(RegCache::GEN_ARG_ID)) {
|
||||
regCache_.Unlock(idReg, RegCache::GEN_ARG_ID);
|
||||
} else {
|
||||
regCache_.Release(idReg, RegCache::GEN_TEMP1);
|
||||
}
|
||||
UnlockPixelID(idReg);
|
||||
|
||||
// Load a set of 255s at 16 bit into a reg for later...
|
||||
X64Reg invertReg = regCache_.Alloc(RegCache::VEC_TEMP2);
|
||||
|
@ -744,6 +740,7 @@ bool PixelJitCache::Jit_StencilTest(const PixelFuncID &id, RegCache::Reg stencil
|
|||
|
||||
bool hadGStateReg = regCache_.Has(RegCache::GEN_GSTATE);
|
||||
bool hadColorOffReg = regCache_.Has(RegCache::GEN_COLOR_OFF);
|
||||
bool hadIdReg = regCache_.Has(RegCache::GEN_ID);
|
||||
|
||||
bool success = true;
|
||||
if (stencilReg != INVALID_REG && (!hasFixedResult || !fixedResult)) {
|
||||
|
@ -759,6 +756,8 @@ bool PixelJitCache::Jit_StencilTest(const PixelFuncID &id, RegCache::Reg stencil
|
|||
regCache_.Change(RegCache::GEN_GSTATE, RegCache::GEN_INVALID);
|
||||
if (!hadColorOffReg && regCache_.Has(RegCache::GEN_COLOR_OFF))
|
||||
regCache_.Change(RegCache::GEN_COLOR_OFF, RegCache::GEN_INVALID);
|
||||
if (!hadIdReg && regCache_.Has(RegCache::GEN_ID))
|
||||
regCache_.Change(RegCache::GEN_ID, RegCache::GEN_INVALID);
|
||||
|
||||
if (!hasFixedResult)
|
||||
SetJumpTarget(toPass);
|
||||
|
@ -819,6 +818,7 @@ bool PixelJitCache::Jit_DepthTestForStencil(const PixelFuncID &id, RegCache::Reg
|
|||
|
||||
bool hadGStateReg = regCache_.Has(RegCache::GEN_GSTATE);
|
||||
bool hadColorOffReg = regCache_.Has(RegCache::GEN_COLOR_OFF);
|
||||
bool hadIdReg = regCache_.Has(RegCache::GEN_ID);
|
||||
|
||||
bool success = true;
|
||||
success = success && Jit_ApplyStencilOp(id, id.ZFail(), stencilReg);
|
||||
|
@ -830,6 +830,8 @@ bool PixelJitCache::Jit_DepthTestForStencil(const PixelFuncID &id, RegCache::Reg
|
|||
regCache_.Change(RegCache::GEN_GSTATE, RegCache::GEN_INVALID);
|
||||
if (!hadColorOffReg && regCache_.Has(RegCache::GEN_COLOR_OFF))
|
||||
regCache_.Change(RegCache::GEN_COLOR_OFF, RegCache::GEN_INVALID);
|
||||
if (!hadIdReg && regCache_.Has(RegCache::GEN_ID))
|
||||
regCache_.Change(RegCache::GEN_ID, RegCache::GEN_INVALID);
|
||||
|
||||
SetJumpTarget(skip);
|
||||
|
||||
|
|
|
@ -199,6 +199,8 @@ void ComputePixelFuncID(PixelFuncID *id) {
|
|||
if (id->applyFog) {
|
||||
id->cached.fogColor = gstate.fogcolor & 0x00FFFFFF;
|
||||
}
|
||||
id->cached.minz = gstate.getDepthRangeMin();
|
||||
id->cached.maxz = gstate.getDepthRangeMax();
|
||||
id->cached.framebufStride = gstate.FrameBufStride();
|
||||
id->cached.depthbufStride = gstate.DepthBufStride();
|
||||
}
|
||||
|
|
|
@ -52,6 +52,8 @@ struct PixelFuncID {
|
|||
uint32_t colorWriteMask{};
|
||||
int8_t ditherMatrix[16]{};
|
||||
uint32_t fogColor;
|
||||
int minz;
|
||||
int maxz;
|
||||
uint16_t framebufStride;
|
||||
uint16_t depthbufStride;
|
||||
} cached;
|
||||
|
|
|
@ -107,12 +107,13 @@ struct RegCache {
|
|||
|
||||
GEN_SRC_ALPHA = 0x0100,
|
||||
GEN_GSTATE = 0x0101,
|
||||
GEN_CONST_BASE = 0x0102,
|
||||
GEN_STENCIL = 0x0103,
|
||||
GEN_COLOR_OFF = 0x0104,
|
||||
GEN_DEPTH_OFF = 0x0105,
|
||||
GEN_RESULT = 0x0106,
|
||||
GEN_SHIFTVAL = 0x0107,
|
||||
GEN_ID = 0x0102,
|
||||
GEN_CONST_BASE = 0x0103,
|
||||
GEN_STENCIL = 0x0104,
|
||||
GEN_COLOR_OFF = 0x0105,
|
||||
GEN_DEPTH_OFF = 0x0106,
|
||||
GEN_RESULT = 0x0107,
|
||||
GEN_SHIFTVAL = 0x0108,
|
||||
|
||||
GEN_ARG_X = 0x0180,
|
||||
GEN_ARG_Y = 0x0181,
|
||||
|
|
Loading…
Add table
Reference in a new issue