Merge pull request #17975 from unknownbrackets/x86-jit-ir

More x86 IR JIT
This commit is contained in:
Henrik Rydgård 2023-08-25 09:41:38 +02:00 committed by GitHub
commit 308e983a99
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 565 additions and 58 deletions

View file

@ -367,7 +367,7 @@ IRNativeReg IRNativeRegCacheBase::FindFreeReg(MIPSLoc type, MIPSMap flags) const
for (int i = 0; i < allocCount; i++) {
IRNativeReg nreg = IRNativeReg(allocOrder[i] - base);
if (nr[nreg].mipsReg == IRREG_INVALID) {
if (nr[nreg].mipsReg == IRREG_INVALID && nr[nreg].tempLockIRIndex < irIndex_) {
return nreg;
}
}
@ -698,6 +698,10 @@ void IRNativeRegCacheBase::ApplyMapping(const Mapping *mapping, int count) {
}
}
auto isNoinit = [](MIPSMap f) {
return (f & MIPSMap::NOINIT) == MIPSMap::NOINIT;
};
auto mapRegs = [&](int i) {
MIPSLoc type = MIPSLoc::MEM;
switch (mapping[i].type) {
@ -714,24 +718,39 @@ void IRNativeRegCacheBase::ApplyMapping(const Mapping *mapping, int count) {
return;
}
MIPSMap flags = mapping[i].flags;
for (int j = 0; j < count; ++j) {
if (mapping[j].type == mapping[i].type && mapping[j].reg == mapping[i].reg && i != j) {
_assert_msg_(mapping[j].lanes == mapping[i].lanes, "Lane aliasing not supported yet");
if (!isNoinit(mapping[j].flags) && isNoinit(flags)) {
flags = (flags & MIPSMap::BACKEND_MASK) | MIPSMap::DIRTY;
}
}
}
if (config_.mapFPUSIMD || mapping[i].type == 'G') {
MapNativeReg(type, mapping[i].reg, mapping[i].lanes, mapping[i].flags);
MapNativeReg(type, mapping[i].reg, mapping[i].lanes, flags);
return;
}
for (int j = 0; j < mapping[i].lanes; ++j)
MapNativeReg(type, mapping[i].reg + j, 1, mapping[i].flags);
MapNativeReg(type, mapping[i].reg + j, 1, flags);
};
auto mapFilteredRegs = [&](auto pred) {
for (int i = 0; i < count; ++i) {
if (pred(mapping[i].flags))
mapRegs(i);
}
};
// Do two passes: first any without NOINIT, then NOINIT.
for (int i = 0; i < count; ++i) {
if ((mapping[i].flags & MIPSMap::NOINIT) != MIPSMap::NOINIT)
mapRegs(i);
}
for (int i = 0; i < count; ++i) {
if ((mapping[i].flags & MIPSMap::NOINIT) == MIPSMap::NOINIT)
mapRegs(i);
}
// Do two passes: with backend special flags, and without.
mapFilteredRegs([](MIPSMap flags) {
return (flags & MIPSMap::BACKEND_MASK) != MIPSMap::INIT;
});
mapFilteredRegs([](MIPSMap flags) {
return (flags & MIPSMap::BACKEND_MASK) == MIPSMap::INIT;
});
}
void IRNativeRegCacheBase::CleanupMapping(const Mapping *mapping, int count) {

View file

@ -79,6 +79,8 @@ enum class MIPSMap : uint8_t {
INIT = 0,
DIRTY = 1,
NOINIT = 2 | DIRTY,
BACKEND_MASK = 0xF0,
};
static inline MIPSMap operator |(const MIPSMap &lhs, const MIPSMap &rhs) {
return MIPSMap((uint8_t)lhs | (uint8_t)rhs);

View file

@ -177,12 +177,22 @@ void X64JitBackend::CompIR_Bits(IRInst inst) {
void X64JitBackend::CompIR_Compare(IRInst inst) {
CONDITIONAL_DISABLE;
auto setCC = [&](const OpArg &arg, CCFlags cc) {
if (regs_.HasLowSubregister(regs_.RX(inst.dest)) && inst.dest != inst.src1 && inst.dest != inst.src2) {
XOR(32, regs_.R(inst.dest), regs_.R(inst.dest));
CMP(32, regs_.R(inst.src1), arg);
SETcc(cc, regs_.R(inst.dest));
} else {
CMP(32, regs_.R(inst.src1), arg);
SETcc(cc, R(SCRATCH1));
MOVZX(32, 8, regs_.RX(inst.dest), R(SCRATCH1));
}
};
switch (inst.op) {
case IROp::Slt:
regs_.Map(inst);
CMP(32, regs_.R(inst.src1), regs_.R(inst.src2));
SETcc(CC_L, R(SCRATCH1));
MOVZX(32, 8, regs_.RX(inst.dest), R(SCRATCH1));
setCC(regs_.R(inst.src2), CC_L);
break;
case IROp::SltConst:
@ -194,17 +204,13 @@ void X64JitBackend::CompIR_Compare(IRInst inst) {
SHR(32, regs_.R(inst.dest), Imm8(31));
} else {
regs_.Map(inst);
CMP(32, regs_.R(inst.src1), Imm32(inst.constant));
SETcc(CC_L, R(SCRATCH1));
MOVZX(32, 8, regs_.RX(inst.dest), R(SCRATCH1));
setCC(Imm32(inst.constant), CC_L);
}
break;
case IROp::SltU:
regs_.Map(inst);
CMP(32, regs_.R(inst.src1), regs_.R(inst.src2));
SETcc(CC_B, R(SCRATCH1));
MOVZX(32, 8, regs_.RX(inst.dest), R(SCRATCH1));
setCC(regs_.R(inst.src2), CC_B);
break;
case IROp::SltUConst:
@ -212,9 +218,7 @@ void X64JitBackend::CompIR_Compare(IRInst inst) {
regs_.SetGPRImm(inst.dest, 0);
} else {
regs_.Map(inst);
CMP(32, regs_.R(inst.src1), Imm32(inst.constant));
SETcc(CC_B, R(SCRATCH1));
MOVZX(32, 8, regs_.RX(inst.dest), R(SCRATCH1));
setCC(Imm32(inst.constant), CC_B);
}
break;
@ -229,10 +233,53 @@ void X64JitBackend::CompIR_CondAssign(IRInst inst) {
switch (inst.op) {
case IROp::MovZ:
if (inst.dest != inst.src2) {
regs_.Map(inst);
CMP(32, regs_.R(inst.src1), Imm32(0));
CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_Z);
}
break;
case IROp::MovNZ:
if (inst.dest != inst.src2) {
regs_.Map(inst);
CMP(32, regs_.R(inst.src1), Imm32(0));
CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_NZ);
}
break;
case IROp::Max:
regs_.Map(inst);
if (inst.src1 == inst.src2) {
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
} else if (inst.dest == inst.src1) {
CMP(32, regs_.R(inst.src1), regs_.R(inst.src2));
CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_L);
} else if (inst.dest == inst.src2) {
CMP(32, regs_.R(inst.src1), regs_.R(inst.src2));
CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src1), CC_G);
} else {
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
CMP(32, regs_.R(inst.dest), regs_.R(inst.src2));
CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_L);
}
break;
case IROp::Min:
CompIR_Generic(inst);
regs_.Map(inst);
if (inst.src1 == inst.src2) {
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
} else if (inst.dest == inst.src1) {
CMP(32, regs_.R(inst.src1), regs_.R(inst.src2));
CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_G);
} else if (inst.dest == inst.src2) {
CMP(32, regs_.R(inst.src1), regs_.R(inst.src2));
CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src1), CC_L);
} else {
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
CMP(32, regs_.R(inst.dest), regs_.R(inst.src2));
CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_G);
}
break;
default:
@ -261,10 +308,54 @@ void X64JitBackend::CompIR_HiLo(IRInst inst) {
switch (inst.op) {
case IROp::MtLo:
#if PPSSPP_ARCH(AMD64)
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
// First, clear the bits we're replacing.
MOV(64, R(SCRATCH1), Imm64(0xFFFFFFFF00000000ULL));
AND(64, regs_.R(IRREG_LO), R(SCRATCH1));
// Now clear the high bits and merge.
MOVZX(64, 32, regs_.RX(inst.src1), regs_.R(inst.src1));
OR(64, regs_.R(IRREG_LO), regs_.R(inst.src1));
#else
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY } });
MOV(32, regs_.R(IRREG_LO), regs_.R(inst.src1));
#endif
break;
case IROp::MtHi:
#if PPSSPP_ARCH(AMD64)
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
// First, clear the bits we're replacing.
MOVZX(64, 32, regs_.RX(IRREG_LO), regs_.R(IRREG_LO));
// Then move the new bits into place.
MOV(32, R(SCRATCH1), regs_.R(inst.src1));
SHL(64, R(SCRATCH1), Imm8(32));
OR(64, regs_.R(IRREG_LO), R(SCRATCH1));
#else
regs_.MapWithExtra(inst, { { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });
MOV(32, regs_.R(IRREG_HI), regs_.R(inst.src1));
#endif
break;
case IROp::MfLo:
#if PPSSPP_ARCH(AMD64)
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::INIT } });
MOV(32, regs_.R(inst.dest), regs_.R(IRREG_LO));
#else
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::INIT } });
MOV(32, regs_.R(inst.dest), regs_.R(IRREG_LO));
#endif
break;
case IROp::MfHi:
CompIR_Generic(inst);
#if PPSSPP_ARCH(AMD64)
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::INIT } });
MOV(64, regs_.R(inst.dest), regs_.R(IRREG_LO));
SHR(64, regs_.R(inst.dest), Imm8(32));
#else
regs_.MapWithExtra(inst, { { 'G', IRREG_HI, 1, MIPSMap::INIT } });
MOV(32, regs_.R(inst.dest), regs_.R(IRREG_HI));
#endif
break;
default:
@ -342,12 +433,111 @@ void X64JitBackend::CompIR_Mult(IRInst inst) {
switch (inst.op) {
case IROp::Mult:
#if PPSSPP_ARCH(AMD64)
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::NOINIT } });
MOVSX(64, 32, regs_.RX(IRREG_LO), regs_.R(inst.src1));
MOVSX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));
IMUL(64, regs_.RX(IRREG_LO), regs_.R(inst.src2));
#else
// Force a spill (before spill locks.)
regs_.MapGPR(IRREG_HI, MIPSMap::NOINIT | X64Map::HIGH_DATA);
// We keep it here so it stays locked.
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::NOINIT }, { 'G', IRREG_HI, 1, MIPSMap::NOINIT | X64Map::HIGH_DATA } });
MOV(32, R(EAX), regs_.R(inst.src1));
IMUL(32, regs_.R(inst.src2));
MOV(32, regs_.R(IRREG_LO), R(EAX));
// IRREG_HI was mapped to EDX.
#endif
break;
case IROp::MultU:
#if PPSSPP_ARCH(AMD64)
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::NOINIT } });
MOVZX(64, 32, regs_.RX(IRREG_LO), regs_.R(inst.src1));
MOVZX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));
IMUL(64, regs_.RX(IRREG_LO), regs_.R(inst.src2));
#else
// Force a spill (before spill locks.)
regs_.MapGPR(IRREG_HI, MIPSMap::NOINIT | X64Map::HIGH_DATA);
// We keep it here so it stays locked.
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::NOINIT }, { 'G', IRREG_HI, 1, MIPSMap::NOINIT | X64Map::HIGH_DATA } });
MOV(32, R(EAX), regs_.R(inst.src1));
MUL(32, regs_.R(inst.src2));
MOV(32, regs_.R(IRREG_LO), R(EAX));
// IRREG_HI was mapped to EDX.
#endif
break;
case IROp::Madd:
#if PPSSPP_ARCH(AMD64)
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
MOVSX(64, 32, SCRATCH1, regs_.R(inst.src1));
MOVSX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));
IMUL(64, SCRATCH1, regs_.R(inst.src2));
ADD(64, regs_.R(IRREG_LO), R(SCRATCH1));
#else
// For ones that modify LO/HI, we can't have anything else in EDX.
regs_.ReserveAndLockXGPR(EDX);
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY }, { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });
MOV(32, R(EAX), regs_.R(inst.src1));
IMUL(32, regs_.R(inst.src2));
ADD(32, regs_.R(IRREG_LO), R(EAX));
ADC(32, regs_.R(IRREG_HI), R(EDX));
#endif
break;
case IROp::MaddU:
#if PPSSPP_ARCH(AMD64)
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
MOVZX(64, 32, SCRATCH1, regs_.R(inst.src1));
MOVZX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));
IMUL(64, SCRATCH1, regs_.R(inst.src2));
ADD(64, regs_.R(IRREG_LO), R(SCRATCH1));
#else
// For ones that modify LO/HI, we can't have anything else in EDX.
regs_.ReserveAndLockXGPR(EDX);
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY }, { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });
MOV(32, R(EAX), regs_.R(inst.src1));
MUL(32, regs_.R(inst.src2));
ADD(32, regs_.R(IRREG_LO), R(EAX));
ADC(32, regs_.R(IRREG_HI), R(EDX));
#endif
break;
case IROp::Msub:
#if PPSSPP_ARCH(AMD64)
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
MOVSX(64, 32, SCRATCH1, regs_.R(inst.src1));
MOVSX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));
IMUL(64, SCRATCH1, regs_.R(inst.src2));
SUB(64, regs_.R(IRREG_LO), R(SCRATCH1));
#else
// For ones that modify LO/HI, we can't have anything else in EDX.
regs_.ReserveAndLockXGPR(EDX);
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY }, { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });
MOV(32, R(EAX), regs_.R(inst.src1));
IMUL(32, regs_.R(inst.src2));
SUB(32, regs_.R(IRREG_LO), R(EAX));
SBB(32, regs_.R(IRREG_HI), R(EDX));
#endif
break;
case IROp::MsubU:
CompIR_Generic(inst);
#if PPSSPP_ARCH(AMD64)
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
MOVZX(64, 32, SCRATCH1, regs_.R(inst.src1));
MOVZX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));
IMUL(64, SCRATCH1, regs_.R(inst.src2));
SUB(64, regs_.R(IRREG_LO), R(SCRATCH1));
#else
// For ones that modify LO/HI, we can't have anything else in EDX.
regs_.ReserveAndLockXGPR(EDX);
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY }, { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });
MOV(32, R(EAX), regs_.R(inst.src1));
MUL(32, regs_.R(inst.src2));
SUB(32, regs_.R(IRREG_LO), R(EAX));
SBB(32, regs_.R(IRREG_HI), R(EDX));
#endif
break;
default:
@ -361,10 +551,74 @@ void X64JitBackend::CompIR_Shift(IRInst inst) {
switch (inst.op) {
case IROp::Shl:
if (cpu_info.bBMI2) {
regs_.Map(inst);
SHLX(32, regs_.RX(inst.dest), regs_.R(inst.src1), regs_.RX(inst.src2));
} else {
regs_.MapWithFlags(inst, X64Map::NONE, X64Map::NONE, X64Map::SHIFT);
if (inst.dest == inst.src1) {
SHL(32, regs_.R(inst.dest), regs_.R(inst.src2));
} else if (inst.dest == inst.src2) {
MOV(32, R(SCRATCH1), regs_.R(inst.src1));
SHL(32, R(SCRATCH1), regs_.R(inst.src2));
MOV(32, regs_.R(inst.dest), R(SCRATCH1));
} else {
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
SHL(32, regs_.R(inst.dest), regs_.R(inst.src2));
}
}
break;
case IROp::Shr:
if (cpu_info.bBMI2) {
regs_.Map(inst);
SHRX(32, regs_.RX(inst.dest), regs_.R(inst.src1), regs_.RX(inst.src2));
} else {
regs_.MapWithFlags(inst, X64Map::NONE, X64Map::NONE, X64Map::SHIFT);
if (inst.dest == inst.src1) {
SHR(32, regs_.R(inst.dest), regs_.R(inst.src2));
} else if (inst.dest == inst.src2) {
MOV(32, R(SCRATCH1), regs_.R(inst.src1));
SHR(32, R(SCRATCH1), regs_.R(inst.src2));
MOV(32, regs_.R(inst.dest), R(SCRATCH1));
} else {
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
SHR(32, regs_.R(inst.dest), regs_.R(inst.src2));
}
}
break;
case IROp::Sar:
if (cpu_info.bBMI2) {
regs_.Map(inst);
SARX(32, regs_.RX(inst.dest), regs_.R(inst.src1), regs_.RX(inst.src2));
} else {
regs_.MapWithFlags(inst, X64Map::NONE, X64Map::NONE, X64Map::SHIFT);
if (inst.dest == inst.src1) {
SAR(32, regs_.R(inst.dest), regs_.R(inst.src2));
} else if (inst.dest == inst.src2) {
MOV(32, R(SCRATCH1), regs_.R(inst.src1));
SAR(32, R(SCRATCH1), regs_.R(inst.src2));
MOV(32, regs_.R(inst.dest), R(SCRATCH1));
} else {
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
SAR(32, regs_.R(inst.dest), regs_.R(inst.src2));
}
}
break;
case IROp::Ror:
CompIR_Generic(inst);
regs_.MapWithFlags(inst, X64Map::NONE, X64Map::NONE, X64Map::SHIFT);
if (inst.dest == inst.src1) {
ROR(32, regs_.R(inst.dest), regs_.R(inst.src2));
} else if (inst.dest == inst.src2) {
MOV(32, R(SCRATCH1), regs_.R(inst.src1));
ROR(32, R(SCRATCH1), regs_.R(inst.src2));
MOV(32, regs_.R(inst.dest), R(SCRATCH1));
} else {
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
ROR(32, regs_.R(inst.dest), regs_.R(inst.src2));
}
break;
case IROp::ShlImm:
@ -427,6 +681,9 @@ void X64JitBackend::CompIR_Shift(IRInst inst) {
regs_.Map(inst);
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
}
} else if (cpu_info.bBMI2) {
regs_.Map(inst);
RORX(32, regs_.RX(inst.dest), regs_.R(inst.src1), inst.src2 & 31);
} else {
regs_.Map(inst);
if (inst.dest != inst.src1)

View file

@ -54,7 +54,8 @@ void X64JitBackend::CompIR_Exit(IRInst inst) {
break;
case IROp::ExitToPC:
CompIR_Generic(inst);
FlushAll();
JMP(dispatcherCheckCoreState_, true);
break;
default:

View file

@ -40,10 +40,11 @@ namespace MIPSComp {
using namespace Gen;
using namespace X64IRJitConstants;
struct SimdConstants {
static struct SimdConstants {
alignas(16) const u32 reverseQNAN[4] = { 0x803FFFFF, 0x803FFFFF, 0x803FFFFF, 0x803FFFFF };
alignas(16) const u32 noSignMask[4] = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF };
alignas(16) const u32 positiveInfinity[4] = { 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000 };
alignas(16) const u32 signBitAll[4] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
} simdConstants;
void X64JitBackend::CompIR_FArith(IRInst inst) {
@ -121,9 +122,48 @@ void X64JitBackend::CompIR_FArith(IRInst inst) {
}
case IROp::FDiv:
if (inst.dest == inst.src1) {
regs_.Map(inst);
DIVSS(regs_.FX(inst.dest), regs_.F(inst.src2));
} else if (cpu_info.bAVX) {
regs_.Map(inst);
VDIVSS(regs_.FX(inst.dest), regs_.FX(inst.src1), regs_.F(inst.src2));
} else if (inst.dest == inst.src2) {
X64Reg tempReg = regs_.MapWithFPRTemp(inst);
MOVAPS(tempReg, regs_.F(inst.src2));
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
DIVSS(regs_.FX(inst.dest), R(tempReg));
} else {
regs_.Map(inst);
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
DIVSS(regs_.FX(inst.dest), regs_.F(inst.src2));
}
break;
case IROp::FSqrt:
regs_.Map(inst);
SQRTSS(regs_.FX(inst.dest), regs_.F(inst.src1));
break;
case IROp::FNeg:
CompIR_Generic(inst);
regs_.Map(inst);
if (cpu_info.bAVX) {
if (RipAccessible(&simdConstants.signBitAll)) {
VXORPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), M(&simdConstants.signBitAll)); // rip accessible
} else {
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.signBitAll));
VXORPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), MatR(SCRATCH1));
}
} else {
if (inst.dest != inst.src1)
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
if (RipAccessible(&simdConstants.signBitAll)) {
XORPS(regs_.FX(inst.dest), M(&simdConstants.signBitAll)); // rip accessible
} else {
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.signBitAll));
XORPS(regs_.FX(inst.dest), MatR(SCRATCH1));
}
}
break;
default:
@ -144,6 +184,26 @@ void X64JitBackend::CompIR_FAssign(IRInst inst) {
break;
case IROp::FAbs:
regs_.Map(inst);
if (cpu_info.bAVX) {
if (RipAccessible(&simdConstants.noSignMask)) {
VANDPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), M(&simdConstants.noSignMask)); // rip accessible
} else {
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.noSignMask));
VANDPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), MatR(SCRATCH1));
}
} else {
if (inst.dest != inst.src1)
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
if (RipAccessible(&simdConstants.noSignMask)) {
ANDPS(regs_.FX(inst.dest), M(&simdConstants.noSignMask)); // rip accessible
} else {
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.noSignMask));
ANDPS(regs_.FX(inst.dest), MatR(SCRATCH1));
}
}
break;
case IROp::FSign:
CompIR_Generic(inst);
break;
@ -159,6 +219,18 @@ void X64JitBackend::CompIR_FCompare(IRInst inst) {
constexpr IRReg IRREG_VFPU_CC = IRREG_VFPU_CTRL_BASE + VFPU_CTRL_CC;
auto ccToFpcond = [&](IRReg lhs, IRReg rhs, CCFlags cc) {
if (regs_.HasLowSubregister(regs_.RX(IRREG_FPCOND))) {
XOR(32, regs_.R(IRREG_FPCOND), regs_.R(IRREG_FPCOND));
UCOMISS(regs_.FX(lhs), regs_.F(rhs));
SETcc(cc, regs_.R(IRREG_FPCOND));
} else {
UCOMISS(regs_.FX(lhs), regs_.F(rhs));
SETcc(cc, R(SCRATCH1));
MOVZX(32, 8, regs_.RX(IRREG_FPCOND), R(SCRATCH1));
}
};
switch (inst.op) {
case IROp::FCmp:
switch (inst.dest) {
@ -168,15 +240,14 @@ void X64JitBackend::CompIR_FCompare(IRInst inst) {
case IRFpCompareMode::EitherUnordered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
UCOMISS(regs_.FX(inst.src1), regs_.F(inst.src2));
// PF = UNORDERED.
SETcc(CC_P, R(SCRATCH1));
MOVZX(32, 8, regs_.RX(IRREG_FPCOND), R(SCRATCH1));
ccToFpcond(inst.src1, inst.src2, CC_P);
break;
case IRFpCompareMode::EqualOrdered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
// Clear the upper bits of SCRATCH1 so we can AND later.
// We don't have a single flag we can check, unfortunately.
XOR(32, R(SCRATCH1), R(SCRATCH1));
UCOMISS(regs_.FX(inst.src1), regs_.F(inst.src2));
// E/ZF = EQUAL or UNORDERED (not exactly what we want.)
@ -196,42 +267,32 @@ void X64JitBackend::CompIR_FCompare(IRInst inst) {
case IRFpCompareMode::EqualUnordered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
UCOMISS(regs_.FX(inst.src1), regs_.F(inst.src2));
// E/ZF = EQUAL or UNORDERED.
SETcc(CC_E, R(SCRATCH1));
MOVZX(32, 8, regs_.RX(IRREG_FPCOND), R(SCRATCH1));
ccToFpcond(inst.src1, inst.src2, CC_E);
break;
case IRFpCompareMode::LessEqualOrdered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
UCOMISS(regs_.FX(inst.src2), regs_.F(inst.src1));
// AE/!CF = GREATER or EQUAL (src2/src1 reversed.)
SETcc(CC_AE, R(SCRATCH1));
MOVZX(32, 8, regs_.RX(IRREG_FPCOND), R(SCRATCH1));
ccToFpcond(inst.src2, inst.src1, CC_AE);
break;
case IRFpCompareMode::LessEqualUnordered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
UCOMISS(regs_.FX(inst.src1), regs_.F(inst.src2));
// BE/CF||ZF = LESS THAN or EQUAL or UNORDERED.
SETcc(CC_BE, R(SCRATCH1));
MOVZX(32, 8, regs_.RX(IRREG_FPCOND), R(SCRATCH1));
ccToFpcond(inst.src1, inst.src2, CC_BE);
break;
case IRFpCompareMode::LessOrdered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
UCOMISS(regs_.FX(inst.src2), regs_.F(inst.src1));
// A/!CF&&!ZF = GREATER (src2/src1 reversed.)
SETcc(CC_A, R(SCRATCH1));
MOVZX(32, 8, regs_.RX(IRREG_FPCOND), R(SCRATCH1));
ccToFpcond(inst.src2, inst.src1, CC_A);
break;
case IRFpCompareMode::LessUnordered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
UCOMISS(regs_.FX(inst.src1), regs_.F(inst.src2));
// B/CF = LESS THAN or UNORDERED.
SETcc(CC_B, R(SCRATCH1));
MOVZX(32, 8, regs_.RX(IRREG_FPCOND), R(SCRATCH1));
ccToFpcond(inst.src1, inst.src2, CC_B);
break;
}
break;
@ -429,7 +490,8 @@ void X64JitBackend::CompIR_FCondAssign(IRInst inst) {
if (cpu_info.bAVX) {
VMINSS(regs_.FX(inst.dest), regs_.FX(inst.src1), regs_.F(inst.src2));
} else {
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
if (inst.dest != inst.src1)
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
MINSS(regs_.FX(inst.dest), regs_.F(inst.src2));
}
SetJumpTarget(finishNAN);
@ -465,7 +527,8 @@ void X64JitBackend::CompIR_FCondAssign(IRInst inst) {
if (cpu_info.bAVX) {
VMAXSS(regs_.FX(inst.dest), regs_.FX(inst.src1), regs_.F(inst.src2));
} else {
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
if (inst.dest != inst.src1)
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
MAXSS(regs_.FX(inst.dest), regs_.F(inst.src2));
}
SetJumpTarget(finishNAN);

View file

@ -65,8 +65,13 @@ void X64JitBackend::CompIR_Basic(IRInst inst) {
break;
case IROp::SetPC:
regs_.Map(inst);
MovToPC(regs_.RX(inst.src1));
break;
case IROp::SetPCConst:
CompIR_Generic(inst);
MOV(32, R(SCRATCH1), Imm32(inst.constant));
MovToPC(SCRATCH1);
break;
default:
@ -95,6 +100,29 @@ void X64JitBackend::CompIR_System(IRInst inst) {
switch (inst.op) {
case IROp::Syscall:
FlushAll();
SaveStaticRegisters();
#ifdef USE_PROFILER
// When profiling, we can't skip CallSyscall, since it times syscalls.
ABI_CallFunctionC((const u8 *)&CallSyscall, inst.constant);
#else
// Skip the CallSyscall where possible.
{
MIPSOpcode op(inst.constant);
void *quickFunc = GetQuickSyscallFunc(op);
if (quickFunc) {
ABI_CallFunctionP((const u8 *)quickFunc, (void *)GetSyscallFuncPointer(op));
} else {
ABI_CallFunctionC((const u8 *)&CallSyscall, inst.constant);
}
}
#endif
LoadStaticRegisters();
// This is always followed by an ExitToPC, where we check coreState.
break;
case IROp::CallReplacement:
case IROp::Break:
CompIR_Generic(inst);
@ -111,9 +139,17 @@ void X64JitBackend::CompIR_Transfer(IRInst inst) {
switch (inst.op) {
case IROp::SetCtrlVFPU:
regs_.SetGPRImm(IRREG_VFPU_CTRL_BASE + inst.dest, (int32_t)inst.constant);
break;
case IROp::SetCtrlVFPUReg:
regs_.Map(inst);
MOV(32, regs_.R(IRREG_VFPU_CTRL_BASE + inst.dest), regs_.R(inst.src1));
break;
case IROp::SetCtrlVFPUFReg:
CompIR_Generic(inst);
regs_.Map(inst);
MOVD_xmm(regs_.R(IRREG_VFPU_CTRL_BASE + inst.dest), regs_.FX(inst.src1));
break;
case IROp::FpCondFromReg:
@ -128,10 +164,14 @@ void X64JitBackend::CompIR_Transfer(IRInst inst) {
case IROp::FpCtrlFromReg:
case IROp::FpCtrlToReg:
case IROp::VfpuCtrlToReg:
CompIR_Generic(inst);
break;
case IROp::VfpuCtrlToReg:
regs_.Map(inst);
MOV(32, regs_.R(inst.dest), regs_.R(IRREG_VFPU_CTRL_BASE + inst.src1));
break;
case IROp::FMovFromGPR:
if (regs_.IsGPRImm(inst.src1) && regs_.GetGPRImm(inst.src1) == 0) {
regs_.MapFPR(inst.dest, MIPSMap::NOINIT);

View file

@ -39,6 +39,11 @@ namespace MIPSComp {
using namespace Gen;
using namespace X64IRJitConstants;
static struct SimdConstants {
alignas(16) const u32 noSignMask[4] = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF };
alignas(16) const u32 signBitAll[4] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
} simdConstants;
alignas(16) static const float vec4InitValues[8][4] = {
{ 0.0f, 0.0f, 0.0f, 0.0f },
{ 1.0f, 1.0f, 1.0f, 1.0f },
@ -143,8 +148,45 @@ void X64JitBackend::CompIR_VecArith(IRInst inst) {
break;
case IROp::Vec4Neg:
regs_.Map(inst);
if (cpu_info.bAVX) {
if (RipAccessible(&simdConstants.signBitAll)) {
VXORPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), M(&simdConstants.signBitAll)); // rip accessible
} else {
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.signBitAll));
VXORPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), MatR(SCRATCH1));
}
} else {
if (inst.dest != inst.src1)
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
if (RipAccessible(&simdConstants.signBitAll)) {
XORPS(regs_.FX(inst.dest), M(&simdConstants.signBitAll)); // rip accessible
} else {
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.signBitAll));
XORPS(regs_.FX(inst.dest), MatR(SCRATCH1));
}
}
break;
case IROp::Vec4Abs:
CompIR_Generic(inst);
regs_.Map(inst);
if (cpu_info.bAVX) {
if (RipAccessible(&simdConstants.noSignMask)) {
VANDPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), M(&simdConstants.noSignMask)); // rip accessible
} else {
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.noSignMask));
VANDPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), MatR(SCRATCH1));
}
} else {
if (inst.dest != inst.src1)
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
if (RipAccessible(&simdConstants.noSignMask)) {
ANDPS(regs_.FX(inst.dest), M(&simdConstants.noSignMask)); // rip accessible
} else {
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.noSignMask));
ANDPS(regs_.FX(inst.dest), MatR(SCRATCH1));
}
}
break;
default:
@ -224,7 +266,23 @@ void X64JitBackend::CompIR_VecHoriz(IRInst inst) {
switch (inst.op) {
case IROp::Vec4Dot:
CompIR_Generic(inst);
// TODO: Handle "aliasing" of sizes. In theory it should be fine if not dirty...
if (Overlap(inst.dest, 1, inst.src1, 4) || Overlap(inst.dest, 1, inst.src2, 4))
DISABLE;
regs_.Map(inst);
if (cpu_info.bSSE4_1 && inst.dest == inst.src1) {
DPPS(regs_.FX(inst.dest), regs_.F(inst.src2), 0xF1);
} else if (cpu_info.bSSE4_1 && inst.dest == inst.src2) {
DPPS(regs_.FX(inst.dest), regs_.F(inst.src1), 0xF1);
} else if (cpu_info.bAVX) {
VDPPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), regs_.F(inst.src2), 0xF1);
} else if (cpu_info.bSSE4_1) {
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
DPPS(regs_.FX(inst.dest), regs_.F(inst.src2), 0xF1);
} else {
CompIR_Generic(inst);
}
break;
default:

View file

@ -63,8 +63,20 @@ const int *X64IRRegCache::GetAllocationOrder(MIPSLoc type, MIPSMap flags, int &c
#endif
};
if ((flags & X64Map::MASK) == X64Map::SHIFT) {
// It's a single option for shifts.
static const int shiftReg[] = { ECX };
count = 1;
return shiftReg;
}
if ((flags & X64Map::MASK) == X64Map::HIGH_DATA) {
// It's a single option for shifts.
static const int shiftReg[] = { EDX };
count = 1;
return shiftReg;
}
#if PPSSPP_ARCH(X86)
if ((flags & X64Map::LOW_SUBREG) == X64Map::LOW_SUBREG) {
if ((flags & X64Map::MASK) == X64Map::LOW_SUBREG) {
static const int lowSubRegAllocationOrder[] = {
EDX, EBX, ECX,
};
@ -132,7 +144,19 @@ X64Reg X64IRRegCache::TryMapTempImm(IRReg r, X64Map flags) {
_dbg_assert_(IsValidGPR(r));
auto canUseReg = [flags](X64Reg r) {
return (flags & X64Map::LOW_SUBREG) != X64Map::LOW_SUBREG || HasLowSubregister(r);
switch (flags & X64Map::MASK) {
case X64Map::NONE:
return true;
case X64Map::LOW_SUBREG:
return HasLowSubregister(r);
case X64Map::SHIFT:
return r == RCX;
case X64Map::HIGH_DATA:
return r == RCX;
default:
_assert_msg_(false, "Unexpected flags");
}
return false;
};
// If already mapped, no need for a temporary.
@ -163,6 +187,13 @@ X64Reg X64IRRegCache::GetAndLockTempR() {
return reg;
}
void X64IRRegCache::ReserveAndLockXGPR(Gen::X64Reg r) {
IRNativeReg nreg = GPRToNativeReg(r);
if (nr[nreg].mipsReg != -1)
FlushNativeReg(nreg);
nr[r].tempLockIRIndex = irIndex_;
}
X64Reg X64IRRegCache::MapWithFPRTemp(IRInst &inst) {
return FromNativeReg(MapWithTemp(inst, MIPSLoc::FREG));
}
@ -175,6 +206,35 @@ void X64IRRegCache::MapWithFlags(IRInst inst, X64Map destFlags, X64Map src1Flags
mapping[1].flags = mapping[1].flags | src1Flags;
mapping[2].flags = mapping[2].flags | src2Flags;
auto flushReg = [&](IRNativeReg nreg) {
for (int i = 0; i < 3; ++i) {
if (mapping[i].reg == nr[nreg].mipsReg && (mapping[i].flags & MIPSMap::NOINIT) == MIPSMap::NOINIT) {
DiscardNativeReg(nreg);
return;
}
}
FlushNativeReg(nreg);
};
// If there are any special rules, we might need to spill.
for (int i = 0; i < 3; ++i) {
switch (mapping[i].flags & X64Map::MASK) {
case X64Map::SHIFT:
if (nr[RCX].mipsReg != mapping[i].reg)
flushReg(RCX);
break;
case X64Map::HIGH_DATA:
if (nr[RDX].mipsReg != mapping[i].reg)
flushReg(RDX);
break;
default:
break;
}
}
ApplyMapping(mapping, 3);
CleanupMapping(mapping, 3);
}

View file

@ -43,7 +43,13 @@ static constexpr auto pcOffset = offsetof(MIPSState, pc) - 128;
enum class X64Map : uint8_t {
NONE = 0,
// On 32-bit: EAX, EBX, ECX, EDX
LOW_SUBREG = 0x10,
// EDX/RDX
HIGH_DATA = 0x20,
// ECX/RCX
SHIFT = 0x30,
MASK = 0xF0,
};
static inline MIPSMap operator |(const MIPSMap &lhs, const X64Map &rhs) {
return MIPSMap((uint8_t)lhs | (uint8_t)rhs);
@ -82,6 +88,7 @@ public:
void FlushBeforeCall();
Gen::X64Reg GetAndLockTempR();
void ReserveAndLockXGPR(Gen::X64Reg r);
Gen::OpArg R(IRReg preg);
Gen::OpArg RPtr(IRReg preg);