mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
samplerjit: Avoid RCX gymanstics with BMI2.
This commit is contained in:
parent
4cadcea6da
commit
3e4afe2a0c
1 changed files with 81 additions and 43 deletions
|
@ -184,12 +184,14 @@ NearestFunc SamplerJitCache::CompileNearest(const SamplerID &id) {
|
||||||
|
|
||||||
if (regCache_.Has(RegCache::GEN_ARG_BUFW_PTR)) {
|
if (regCache_.Has(RegCache::GEN_ARG_BUFW_PTR)) {
|
||||||
// On Linux, RCX is currently bufwptr, but we'll need it for other things.
|
// On Linux, RCX is currently bufwptr, but we'll need it for other things.
|
||||||
X64Reg bufwReg = regCache_.Find(RegCache::GEN_ARG_BUFW_PTR);
|
if (!cpu_info.bBMI2) {
|
||||||
MOV(64, R(R15), R(bufwReg));
|
X64Reg bufwReg = regCache_.Find(RegCache::GEN_ARG_BUFW_PTR);
|
||||||
regCache_.Unlock(bufwReg, RegCache::GEN_ARG_BUFW_PTR);
|
MOV(64, R(R15), R(bufwReg));
|
||||||
regCache_.ForceRelease(RegCache::GEN_ARG_BUFW_PTR);
|
regCache_.Unlock(bufwReg, RegCache::GEN_ARG_BUFW_PTR);
|
||||||
regCache_.ChangeReg(R15, RegCache::GEN_ARG_BUFW_PTR);
|
regCache_.ForceRelease(RegCache::GEN_ARG_BUFW_PTR);
|
||||||
regCache_.ForceRetain(RegCache::GEN_ARG_BUFW_PTR);
|
regCache_.ChangeReg(R15, RegCache::GEN_ARG_BUFW_PTR);
|
||||||
|
regCache_.ForceRetain(RegCache::GEN_ARG_BUFW_PTR);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// Let's load bufwptr/texptrptr into regs. Match Linux just for consistency - RDX is free.
|
// Let's load bufwptr/texptrptr into regs. Match Linux just for consistency - RDX is free.
|
||||||
MOV(64, R(RDX), MDisp(RSP, stackArgPos_ + 0));
|
MOV(64, R(RDX), MDisp(RSP, stackArgPos_ + 0));
|
||||||
|
@ -200,8 +202,10 @@ NearestFunc SamplerJitCache::CompileNearest(const SamplerID &id) {
|
||||||
regCache_.ForceRetain(RegCache::GEN_ARG_BUFW_PTR);
|
regCache_.ForceRetain(RegCache::GEN_ARG_BUFW_PTR);
|
||||||
}
|
}
|
||||||
// Okay, now lock RCX as a shifting reg.
|
// Okay, now lock RCX as a shifting reg.
|
||||||
regCache_.ChangeReg(RCX, RegCache::GEN_SHIFTVAL);
|
if (!cpu_info.bBMI2) {
|
||||||
regCache_.ForceRetain(RegCache::GEN_SHIFTVAL);
|
regCache_.ChangeReg(RCX, RegCache::GEN_SHIFTVAL);
|
||||||
|
regCache_.ForceRetain(RegCache::GEN_SHIFTVAL);
|
||||||
|
}
|
||||||
|
|
||||||
bool success = true;
|
bool success = true;
|
||||||
|
|
||||||
|
@ -345,7 +349,8 @@ NearestFunc SamplerJitCache::CompileNearest(const SamplerID &id) {
|
||||||
regCache_.ForceRelease(RegCache::GEN_ARG_BUFW_PTR);
|
regCache_.ForceRelease(RegCache::GEN_ARG_BUFW_PTR);
|
||||||
if (regCache_.Has(RegCache::GEN_ARG_LEVEL))
|
if (regCache_.Has(RegCache::GEN_ARG_LEVEL))
|
||||||
regCache_.ForceRelease(RegCache::GEN_ARG_LEVEL);
|
regCache_.ForceRelease(RegCache::GEN_ARG_LEVEL);
|
||||||
regCache_.ForceRelease(RegCache::GEN_SHIFTVAL);
|
if (regCache_.Has(RegCache::GEN_SHIFTVAL))
|
||||||
|
regCache_.ForceRelease(RegCache::GEN_SHIFTVAL);
|
||||||
regCache_.ForceRelease(RegCache::GEN_RESULT);
|
regCache_.ForceRelease(RegCache::GEN_RESULT);
|
||||||
|
|
||||||
if (id.hasAnyMips) {
|
if (id.hasAnyMips) {
|
||||||
|
@ -1820,7 +1825,7 @@ bool SamplerJitCache::Jit_GetDXT1Color(const SamplerID &id, int blockSize, int a
|
||||||
regCache_.Release(srcOffsetReg, RegCache::GEN_TEMP1);
|
regCache_.Release(srcOffsetReg, RegCache::GEN_TEMP1);
|
||||||
|
|
||||||
// Make sure we don't grab this as colorIndexReg.
|
// Make sure we don't grab this as colorIndexReg.
|
||||||
if (uReg != ECX)
|
if (uReg != ECX && !cpu_info.bBMI2)
|
||||||
regCache_.ChangeReg(RCX, RegCache::GEN_SHIFTVAL);
|
regCache_.ChangeReg(RCX, RegCache::GEN_SHIFTVAL);
|
||||||
|
|
||||||
// The colorIndex is simply the 2 bits at blockPos + (v & 3), shifted right by (u & 3) twice.
|
// The colorIndex is simply the 2 bits at blockPos + (v & 3), shifted right by (u & 3) twice.
|
||||||
|
@ -1835,6 +1840,9 @@ bool SamplerJitCache::Jit_GetDXT1Color(const SamplerID &id, int blockSize, int a
|
||||||
if (uReg == ECX) {
|
if (uReg == ECX) {
|
||||||
SHR(32, R(colorIndexReg), R(CL));
|
SHR(32, R(colorIndexReg), R(CL));
|
||||||
SHR(32, R(colorIndexReg), R(CL));
|
SHR(32, R(colorIndexReg), R(CL));
|
||||||
|
} else if (cpu_info.bBMI2) {
|
||||||
|
SHRX(32, colorIndexReg, R(colorIndexReg), uReg);
|
||||||
|
SHRX(32, colorIndexReg, R(colorIndexReg), uReg);
|
||||||
} else {
|
} else {
|
||||||
bool hasRCX = regCache_.ChangeReg(RCX, RegCache::GEN_SHIFTVAL);
|
bool hasRCX = regCache_.ChangeReg(RCX, RegCache::GEN_SHIFTVAL);
|
||||||
_assert_(hasRCX);
|
_assert_(hasRCX);
|
||||||
|
@ -2017,16 +2025,21 @@ bool SamplerJitCache::Jit_ApplyDXTAlpha(const SamplerID &id) {
|
||||||
X64Reg uReg = regCache_.Find(RegCache::GEN_ARG_U);
|
X64Reg uReg = regCache_.Find(RegCache::GEN_ARG_U);
|
||||||
X64Reg vReg = regCache_.Find(RegCache::GEN_ARG_V);
|
X64Reg vReg = regCache_.Find(RegCache::GEN_ARG_V);
|
||||||
|
|
||||||
if (uReg != RCX) {
|
if (uReg != RCX && !cpu_info.bBMI2) {
|
||||||
regCache_.ChangeReg(RCX, RegCache::GEN_SHIFTVAL);
|
regCache_.ChangeReg(RCX, RegCache::GEN_SHIFTVAL);
|
||||||
_assert_(regCache_.Has(RegCache::GEN_SHIFTVAL));
|
_assert_(regCache_.Has(RegCache::GEN_SHIFTVAL));
|
||||||
}
|
}
|
||||||
|
|
||||||
X64Reg temp1Reg = regCache_.Alloc(RegCache::GEN_TEMP1);
|
X64Reg temp1Reg = regCache_.Alloc(RegCache::GEN_TEMP1);
|
||||||
MOVZX(32, 16, temp1Reg, MComplex(srcReg, vReg, SCALE_2, 8));
|
MOVZX(32, 16, temp1Reg, MComplex(srcReg, vReg, SCALE_2, 8));
|
||||||
// Still depending on it being GEN_SHIFTVAL or GEN_ARG_U above.
|
if (cpu_info.bBMI2) {
|
||||||
LEA(32, RCX, MScaled(uReg, SCALE_4, 0));
|
LEA(32, uReg, MScaled(uReg, SCALE_4, 0));
|
||||||
SHR(32, R(temp1Reg), R(CL));
|
SHRX(32, temp1Reg, R(temp1Reg), uReg);
|
||||||
|
} else {
|
||||||
|
// Still depending on it being GEN_SHIFTVAL or GEN_ARG_U above.
|
||||||
|
LEA(32, RCX, MScaled(uReg, SCALE_4, 0));
|
||||||
|
SHR(32, R(temp1Reg), R(CL));
|
||||||
|
}
|
||||||
SHL(32, R(temp1Reg), Imm8(28));
|
SHL(32, R(temp1Reg), Imm8(28));
|
||||||
X64Reg resultReg = regCache_.Find(RegCache::GEN_RESULT);
|
X64Reg resultReg = regCache_.Find(RegCache::GEN_RESULT);
|
||||||
OR(32, R(resultReg), R(temp1Reg));
|
OR(32, R(resultReg), R(temp1Reg));
|
||||||
|
@ -2046,7 +2059,7 @@ bool SamplerJitCache::Jit_ApplyDXTAlpha(const SamplerID &id) {
|
||||||
X64Reg uReg = regCache_.Find(RegCache::GEN_ARG_U);
|
X64Reg uReg = regCache_.Find(RegCache::GEN_ARG_U);
|
||||||
X64Reg vReg = regCache_.Find(RegCache::GEN_ARG_V);
|
X64Reg vReg = regCache_.Find(RegCache::GEN_ARG_V);
|
||||||
|
|
||||||
if (uReg != RCX)
|
if (uReg != RCX && !cpu_info.bBMI2)
|
||||||
regCache_.ChangeReg(RCX, RegCache::GEN_SHIFTVAL);
|
regCache_.ChangeReg(RCX, RegCache::GEN_SHIFTVAL);
|
||||||
|
|
||||||
// Let's figure out the alphaIndex bit offset so we can read the right byte.
|
// Let's figure out the alphaIndex bit offset so we can read the right byte.
|
||||||
|
@ -2067,13 +2080,17 @@ bool SamplerJitCache::Jit_ApplyDXTAlpha(const SamplerID &id) {
|
||||||
|
|
||||||
// Load 16 bits and mask, in case it straddles bytes.
|
// Load 16 bits and mask, in case it straddles bytes.
|
||||||
X64Reg srcReg = regCache_.Find(RegCache::GEN_ARG_TEXPTR);
|
X64Reg srcReg = regCache_.Find(RegCache::GEN_ARG_TEXPTR);
|
||||||
MOVZX(32, 16, alphaIndexReg, MComplex(srcReg, alphaIndexReg, SCALE_1, 8));
|
if (cpu_info.bBMI2) {
|
||||||
// If not, it's in what was bufwReg.
|
SHRX(32, alphaIndexReg, MComplex(srcReg, alphaIndexReg, SCALE_1, 8), uReg);
|
||||||
if (uReg != RCX) {
|
} else {
|
||||||
_assert_(regCache_.Has(RegCache::GEN_SHIFTVAL));
|
MOVZX(32, 16, alphaIndexReg, MComplex(srcReg, alphaIndexReg, SCALE_1, 8));
|
||||||
MOV(32, R(RCX), R(uReg));
|
// If not, it's in what was bufwReg.
|
||||||
|
if (uReg != RCX) {
|
||||||
|
_assert_(regCache_.Has(RegCache::GEN_SHIFTVAL));
|
||||||
|
MOV(32, R(RCX), R(uReg));
|
||||||
|
}
|
||||||
|
SHR(32, R(alphaIndexReg), R(CL));
|
||||||
}
|
}
|
||||||
SHR(32, R(alphaIndexReg), R(CL));
|
|
||||||
AND(32, R(alphaIndexReg), Imm32(7));
|
AND(32, R(alphaIndexReg), Imm32(7));
|
||||||
|
|
||||||
regCache_.Unlock(uReg, RegCache::GEN_ARG_U);
|
regCache_.Unlock(uReg, RegCache::GEN_ARG_U);
|
||||||
|
@ -2190,11 +2207,17 @@ bool SamplerJitCache::Jit_GetTexData(const SamplerID &id, int bitsPerTexel) {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 4: {
|
case 4: {
|
||||||
XOR(32, R(temp2Reg), R(temp2Reg));
|
if (cpu_info.bBMI2_fast)
|
||||||
|
MOV(32, R(temp2Reg), Imm32(0x0F));
|
||||||
|
else
|
||||||
|
XOR(32, R(temp2Reg), R(temp2Reg));
|
||||||
SHR(32, R(uReg), Imm8(1));
|
SHR(32, R(uReg), Imm8(1));
|
||||||
FixupBranch skip = J_CC(CC_NC);
|
FixupBranch skip = J_CC(CC_NC);
|
||||||
// Track whether we shifted a 1 off or not.
|
// Track whether we shifted a 1 off or not.
|
||||||
MOV(32, R(temp2Reg), Imm32(4));
|
if (cpu_info.bBMI2_fast)
|
||||||
|
SHL(32, R(temp2Reg), Imm8(4));
|
||||||
|
else
|
||||||
|
MOV(32, R(temp2Reg), Imm32(4));
|
||||||
SetJumpTarget(skip);
|
SetJumpTarget(skip);
|
||||||
LEA(64, temp1Reg, MRegSum(srcReg, uReg));
|
LEA(64, temp1Reg, MRegSum(srcReg, uReg));
|
||||||
break;
|
break;
|
||||||
|
@ -2222,7 +2245,7 @@ bool SamplerJitCache::Jit_GetTexData(const SamplerID &id, int bitsPerTexel) {
|
||||||
// We can throw bufw away, now.
|
// We can throw bufw away, now.
|
||||||
regCache_.ForceRelease(RegCache::GEN_ARG_BUFW);
|
regCache_.ForceRelease(RegCache::GEN_ARG_BUFW);
|
||||||
|
|
||||||
if (bitsPerTexel == 4) {
|
if (bitsPerTexel == 4 && !cpu_info.bBMI2) {
|
||||||
bool hasRCX = regCache_.ChangeReg(RCX, RegCache::GEN_SHIFTVAL);
|
bool hasRCX = regCache_.ChangeReg(RCX, RegCache::GEN_SHIFTVAL);
|
||||||
_assert_(hasRCX);
|
_assert_(hasRCX);
|
||||||
}
|
}
|
||||||
|
@ -2236,12 +2259,20 @@ bool SamplerJitCache::Jit_GetTexData(const SamplerID &id, int bitsPerTexel) {
|
||||||
|
|
||||||
case 4: {
|
case 4: {
|
||||||
SHR(32, R(resultReg), Imm8(1));
|
SHR(32, R(resultReg), Imm8(1));
|
||||||
MOV(8, R(resultReg), MRegSum(temp1Reg, resultReg));
|
if (cpu_info.bBMI2_fast) {
|
||||||
// RCX is now free.
|
MOV(8, R(resultReg), MRegSum(temp1Reg, resultReg));
|
||||||
MOV(8, R(RCX), R(temp2Reg));
|
PEXT(32, resultReg, resultReg, R(temp2Reg));
|
||||||
SHR(8, R(resultReg), R(RCX));
|
} else if (cpu_info.bBMI2) {
|
||||||
// Zero out any bits not shifted off.
|
SHRX(32, resultReg, MRegSum(temp1Reg, resultReg), temp2Reg);
|
||||||
AND(32, R(resultReg), Imm8(0x0F));
|
AND(32, R(resultReg), Imm8(0x0F));
|
||||||
|
} else {
|
||||||
|
MOV(8, R(resultReg), MRegSum(temp1Reg, resultReg));
|
||||||
|
// RCX is now free.
|
||||||
|
MOV(8, R(RCX), R(temp2Reg));
|
||||||
|
SHR(8, R(resultReg), R(RCX));
|
||||||
|
// Zero out any bits not shifted off.
|
||||||
|
AND(32, R(resultReg), Imm8(0x0F));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3330,8 +3361,10 @@ bool SamplerJitCache::Jit_TransformClutIndex(const SamplerID &id, int bitsPerInd
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool hasRCX = regCache_.ChangeReg(RCX, RegCache::GEN_SHIFTVAL);
|
if (!cpu_info.bBMI2) {
|
||||||
_assert_msg_(hasRCX, "Could not obtain RCX, locked?");
|
bool hasRCX = regCache_.ChangeReg(RCX, RegCache::GEN_SHIFTVAL);
|
||||||
|
_assert_msg_(hasRCX, "Could not obtain RCX, locked?");
|
||||||
|
}
|
||||||
|
|
||||||
X64Reg temp1Reg = regCache_.Alloc(RegCache::GEN_TEMP1);
|
X64Reg temp1Reg = regCache_.Alloc(RegCache::GEN_TEMP1);
|
||||||
X64Reg idReg = GetSamplerID();
|
X64Reg idReg = GetSamplerID();
|
||||||
|
@ -3339,23 +3372,28 @@ bool SamplerJitCache::Jit_TransformClutIndex(const SamplerID &id, int bitsPerInd
|
||||||
UnlockSamplerID(idReg);
|
UnlockSamplerID(idReg);
|
||||||
|
|
||||||
X64Reg resultReg = regCache_.Find(RegCache::GEN_RESULT);
|
X64Reg resultReg = regCache_.Find(RegCache::GEN_RESULT);
|
||||||
|
int shiftedToSoFar = 0;
|
||||||
|
|
||||||
// Shift = (clutformat >> 2) & 0x1F
|
// Shift = (clutformat >> 2) & 0x1F
|
||||||
if (id.hasClutShift) {
|
if (id.hasClutShift) {
|
||||||
_assert_(regCache_.Has(RegCache::GEN_SHIFTVAL));
|
SHR(32, R(temp1Reg), Imm8(2 - shiftedToSoFar));
|
||||||
MOV(32, R(RCX), R(temp1Reg));
|
shiftedToSoFar = 2;
|
||||||
SHR(32, R(RCX), Imm8(2));
|
|
||||||
AND(32, R(RCX), Imm8(0x1F));
|
if (cpu_info.bBMI2) {
|
||||||
SHR(32, R(resultReg), R(RCX));
|
SHRX(32, resultReg, R(resultReg), temp1Reg);
|
||||||
|
} else {
|
||||||
|
_assert_(regCache_.Has(RegCache::GEN_SHIFTVAL));
|
||||||
|
MOV(32, R(RCX), R(temp1Reg));
|
||||||
|
SHR(32, R(resultReg), R(RCX));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask = (clutformat >> 8) & 0xFF
|
// Mask = (clutformat >> 8) & 0xFF
|
||||||
if (id.hasClutMask) {
|
if (id.hasClutMask) {
|
||||||
X64Reg temp2Reg = regCache_.Alloc(RegCache::GEN_TEMP2);
|
SHR(32, R(temp1Reg), Imm8(8 - shiftedToSoFar));
|
||||||
MOV(32, R(temp2Reg), R(temp1Reg));
|
shiftedToSoFar = 8;
|
||||||
SHR(32, R(temp2Reg), Imm8(8));
|
|
||||||
AND(32, R(resultReg), R(temp2Reg));
|
AND(32, R(resultReg), R(temp1Reg));
|
||||||
regCache_.Release(temp2Reg, RegCache::GEN_TEMP2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// We need to wrap any entries beyond the first 1024 bytes.
|
// We need to wrap any entries beyond the first 1024 bytes.
|
||||||
|
@ -3369,7 +3407,7 @@ bool SamplerJitCache::Jit_TransformClutIndex(const SamplerID &id, int bitsPerInd
|
||||||
|
|
||||||
// Offset = (clutformat >> 12) & 0x01F0
|
// Offset = (clutformat >> 12) & 0x01F0
|
||||||
if (id.hasClutOffset) {
|
if (id.hasClutOffset) {
|
||||||
SHR(32, R(temp1Reg), Imm8(16));
|
SHR(32, R(temp1Reg), Imm8(16 - shiftedToSoFar));
|
||||||
SHL(32, R(temp1Reg), Imm8(4));
|
SHL(32, R(temp1Reg), Imm8(4));
|
||||||
OR(32, R(resultReg), R(temp1Reg));
|
OR(32, R(resultReg), R(temp1Reg));
|
||||||
AND(32, R(resultReg), Imm32(offsetMask));
|
AND(32, R(resultReg), Imm32(offsetMask));
|
||||||
|
|
Loading…
Add table
Reference in a new issue