mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #17975 from unknownbrackets/x86-jit-ir
More x86 IR JIT
This commit is contained in:
commit
308e983a99
9 changed files with 565 additions and 58 deletions
|
@ -367,7 +367,7 @@ IRNativeReg IRNativeRegCacheBase::FindFreeReg(MIPSLoc type, MIPSMap flags) const
|
|||
for (int i = 0; i < allocCount; i++) {
|
||||
IRNativeReg nreg = IRNativeReg(allocOrder[i] - base);
|
||||
|
||||
if (nr[nreg].mipsReg == IRREG_INVALID) {
|
||||
if (nr[nreg].mipsReg == IRREG_INVALID && nr[nreg].tempLockIRIndex < irIndex_) {
|
||||
return nreg;
|
||||
}
|
||||
}
|
||||
|
@ -698,6 +698,10 @@ void IRNativeRegCacheBase::ApplyMapping(const Mapping *mapping, int count) {
|
|||
}
|
||||
}
|
||||
|
||||
auto isNoinit = [](MIPSMap f) {
|
||||
return (f & MIPSMap::NOINIT) == MIPSMap::NOINIT;
|
||||
};
|
||||
|
||||
auto mapRegs = [&](int i) {
|
||||
MIPSLoc type = MIPSLoc::MEM;
|
||||
switch (mapping[i].type) {
|
||||
|
@ -714,24 +718,39 @@ void IRNativeRegCacheBase::ApplyMapping(const Mapping *mapping, int count) {
|
|||
return;
|
||||
}
|
||||
|
||||
MIPSMap flags = mapping[i].flags;
|
||||
for (int j = 0; j < count; ++j) {
|
||||
if (mapping[j].type == mapping[i].type && mapping[j].reg == mapping[i].reg && i != j) {
|
||||
_assert_msg_(mapping[j].lanes == mapping[i].lanes, "Lane aliasing not supported yet");
|
||||
|
||||
if (!isNoinit(mapping[j].flags) && isNoinit(flags)) {
|
||||
flags = (flags & MIPSMap::BACKEND_MASK) | MIPSMap::DIRTY;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (config_.mapFPUSIMD || mapping[i].type == 'G') {
|
||||
MapNativeReg(type, mapping[i].reg, mapping[i].lanes, mapping[i].flags);
|
||||
MapNativeReg(type, mapping[i].reg, mapping[i].lanes, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
for (int j = 0; j < mapping[i].lanes; ++j)
|
||||
MapNativeReg(type, mapping[i].reg + j, 1, mapping[i].flags);
|
||||
MapNativeReg(type, mapping[i].reg + j, 1, flags);
|
||||
};
|
||||
auto mapFilteredRegs = [&](auto pred) {
|
||||
for (int i = 0; i < count; ++i) {
|
||||
if (pred(mapping[i].flags))
|
||||
mapRegs(i);
|
||||
}
|
||||
};
|
||||
|
||||
// Do two passes: first any without NOINIT, then NOINIT.
|
||||
for (int i = 0; i < count; ++i) {
|
||||
if ((mapping[i].flags & MIPSMap::NOINIT) != MIPSMap::NOINIT)
|
||||
mapRegs(i);
|
||||
}
|
||||
for (int i = 0; i < count; ++i) {
|
||||
if ((mapping[i].flags & MIPSMap::NOINIT) == MIPSMap::NOINIT)
|
||||
mapRegs(i);
|
||||
}
|
||||
// Do two passes: with backend special flags, and without.
|
||||
mapFilteredRegs([](MIPSMap flags) {
|
||||
return (flags & MIPSMap::BACKEND_MASK) != MIPSMap::INIT;
|
||||
});
|
||||
mapFilteredRegs([](MIPSMap flags) {
|
||||
return (flags & MIPSMap::BACKEND_MASK) == MIPSMap::INIT;
|
||||
});
|
||||
}
|
||||
|
||||
void IRNativeRegCacheBase::CleanupMapping(const Mapping *mapping, int count) {
|
||||
|
|
|
@ -79,6 +79,8 @@ enum class MIPSMap : uint8_t {
|
|||
INIT = 0,
|
||||
DIRTY = 1,
|
||||
NOINIT = 2 | DIRTY,
|
||||
|
||||
BACKEND_MASK = 0xF0,
|
||||
};
|
||||
static inline MIPSMap operator |(const MIPSMap &lhs, const MIPSMap &rhs) {
|
||||
return MIPSMap((uint8_t)lhs | (uint8_t)rhs);
|
||||
|
|
|
@ -177,12 +177,22 @@ void X64JitBackend::CompIR_Bits(IRInst inst) {
|
|||
void X64JitBackend::CompIR_Compare(IRInst inst) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
auto setCC = [&](const OpArg &arg, CCFlags cc) {
|
||||
if (regs_.HasLowSubregister(regs_.RX(inst.dest)) && inst.dest != inst.src1 && inst.dest != inst.src2) {
|
||||
XOR(32, regs_.R(inst.dest), regs_.R(inst.dest));
|
||||
CMP(32, regs_.R(inst.src1), arg);
|
||||
SETcc(cc, regs_.R(inst.dest));
|
||||
} else {
|
||||
CMP(32, regs_.R(inst.src1), arg);
|
||||
SETcc(cc, R(SCRATCH1));
|
||||
MOVZX(32, 8, regs_.RX(inst.dest), R(SCRATCH1));
|
||||
}
|
||||
};
|
||||
|
||||
switch (inst.op) {
|
||||
case IROp::Slt:
|
||||
regs_.Map(inst);
|
||||
CMP(32, regs_.R(inst.src1), regs_.R(inst.src2));
|
||||
SETcc(CC_L, R(SCRATCH1));
|
||||
MOVZX(32, 8, regs_.RX(inst.dest), R(SCRATCH1));
|
||||
setCC(regs_.R(inst.src2), CC_L);
|
||||
break;
|
||||
|
||||
case IROp::SltConst:
|
||||
|
@ -194,17 +204,13 @@ void X64JitBackend::CompIR_Compare(IRInst inst) {
|
|||
SHR(32, regs_.R(inst.dest), Imm8(31));
|
||||
} else {
|
||||
regs_.Map(inst);
|
||||
CMP(32, regs_.R(inst.src1), Imm32(inst.constant));
|
||||
SETcc(CC_L, R(SCRATCH1));
|
||||
MOVZX(32, 8, regs_.RX(inst.dest), R(SCRATCH1));
|
||||
setCC(Imm32(inst.constant), CC_L);
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::SltU:
|
||||
regs_.Map(inst);
|
||||
CMP(32, regs_.R(inst.src1), regs_.R(inst.src2));
|
||||
SETcc(CC_B, R(SCRATCH1));
|
||||
MOVZX(32, 8, regs_.RX(inst.dest), R(SCRATCH1));
|
||||
setCC(regs_.R(inst.src2), CC_B);
|
||||
break;
|
||||
|
||||
case IROp::SltUConst:
|
||||
|
@ -212,9 +218,7 @@ void X64JitBackend::CompIR_Compare(IRInst inst) {
|
|||
regs_.SetGPRImm(inst.dest, 0);
|
||||
} else {
|
||||
regs_.Map(inst);
|
||||
CMP(32, regs_.R(inst.src1), Imm32(inst.constant));
|
||||
SETcc(CC_B, R(SCRATCH1));
|
||||
MOVZX(32, 8, regs_.RX(inst.dest), R(SCRATCH1));
|
||||
setCC(Imm32(inst.constant), CC_B);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -229,10 +233,53 @@ void X64JitBackend::CompIR_CondAssign(IRInst inst) {
|
|||
|
||||
switch (inst.op) {
|
||||
case IROp::MovZ:
|
||||
if (inst.dest != inst.src2) {
|
||||
regs_.Map(inst);
|
||||
CMP(32, regs_.R(inst.src1), Imm32(0));
|
||||
CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_Z);
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::MovNZ:
|
||||
if (inst.dest != inst.src2) {
|
||||
regs_.Map(inst);
|
||||
CMP(32, regs_.R(inst.src1), Imm32(0));
|
||||
CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_NZ);
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::Max:
|
||||
regs_.Map(inst);
|
||||
if (inst.src1 == inst.src2) {
|
||||
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
|
||||
} else if (inst.dest == inst.src1) {
|
||||
CMP(32, regs_.R(inst.src1), regs_.R(inst.src2));
|
||||
CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_L);
|
||||
} else if (inst.dest == inst.src2) {
|
||||
CMP(32, regs_.R(inst.src1), regs_.R(inst.src2));
|
||||
CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src1), CC_G);
|
||||
} else {
|
||||
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
|
||||
CMP(32, regs_.R(inst.dest), regs_.R(inst.src2));
|
||||
CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_L);
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::Min:
|
||||
CompIR_Generic(inst);
|
||||
regs_.Map(inst);
|
||||
if (inst.src1 == inst.src2) {
|
||||
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
|
||||
} else if (inst.dest == inst.src1) {
|
||||
CMP(32, regs_.R(inst.src1), regs_.R(inst.src2));
|
||||
CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_G);
|
||||
} else if (inst.dest == inst.src2) {
|
||||
CMP(32, regs_.R(inst.src1), regs_.R(inst.src2));
|
||||
CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src1), CC_L);
|
||||
} else {
|
||||
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
|
||||
CMP(32, regs_.R(inst.dest), regs_.R(inst.src2));
|
||||
CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_G);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -261,10 +308,54 @@ void X64JitBackend::CompIR_HiLo(IRInst inst) {
|
|||
|
||||
switch (inst.op) {
|
||||
case IROp::MtLo:
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
|
||||
// First, clear the bits we're replacing.
|
||||
MOV(64, R(SCRATCH1), Imm64(0xFFFFFFFF00000000ULL));
|
||||
AND(64, regs_.R(IRREG_LO), R(SCRATCH1));
|
||||
// Now clear the high bits and merge.
|
||||
MOVZX(64, 32, regs_.RX(inst.src1), regs_.R(inst.src1));
|
||||
OR(64, regs_.R(IRREG_LO), regs_.R(inst.src1));
|
||||
#else
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY } });
|
||||
MOV(32, regs_.R(IRREG_LO), regs_.R(inst.src1));
|
||||
#endif
|
||||
break;
|
||||
|
||||
case IROp::MtHi:
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
|
||||
// First, clear the bits we're replacing.
|
||||
MOVZX(64, 32, regs_.RX(IRREG_LO), regs_.R(IRREG_LO));
|
||||
// Then move the new bits into place.
|
||||
MOV(32, R(SCRATCH1), regs_.R(inst.src1));
|
||||
SHL(64, R(SCRATCH1), Imm8(32));
|
||||
OR(64, regs_.R(IRREG_LO), R(SCRATCH1));
|
||||
#else
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });
|
||||
MOV(32, regs_.R(IRREG_HI), regs_.R(inst.src1));
|
||||
#endif
|
||||
break;
|
||||
|
||||
case IROp::MfLo:
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::INIT } });
|
||||
MOV(32, regs_.R(inst.dest), regs_.R(IRREG_LO));
|
||||
#else
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::INIT } });
|
||||
MOV(32, regs_.R(inst.dest), regs_.R(IRREG_LO));
|
||||
#endif
|
||||
break;
|
||||
|
||||
case IROp::MfHi:
|
||||
CompIR_Generic(inst);
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::INIT } });
|
||||
MOV(64, regs_.R(inst.dest), regs_.R(IRREG_LO));
|
||||
SHR(64, regs_.R(inst.dest), Imm8(32));
|
||||
#else
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_HI, 1, MIPSMap::INIT } });
|
||||
MOV(32, regs_.R(inst.dest), regs_.R(IRREG_HI));
|
||||
#endif
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -342,12 +433,111 @@ void X64JitBackend::CompIR_Mult(IRInst inst) {
|
|||
|
||||
switch (inst.op) {
|
||||
case IROp::Mult:
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::NOINIT } });
|
||||
MOVSX(64, 32, regs_.RX(IRREG_LO), regs_.R(inst.src1));
|
||||
MOVSX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));
|
||||
IMUL(64, regs_.RX(IRREG_LO), regs_.R(inst.src2));
|
||||
#else
|
||||
// Force a spill (before spill locks.)
|
||||
regs_.MapGPR(IRREG_HI, MIPSMap::NOINIT | X64Map::HIGH_DATA);
|
||||
// We keep it here so it stays locked.
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::NOINIT }, { 'G', IRREG_HI, 1, MIPSMap::NOINIT | X64Map::HIGH_DATA } });
|
||||
MOV(32, R(EAX), regs_.R(inst.src1));
|
||||
IMUL(32, regs_.R(inst.src2));
|
||||
MOV(32, regs_.R(IRREG_LO), R(EAX));
|
||||
// IRREG_HI was mapped to EDX.
|
||||
#endif
|
||||
break;
|
||||
|
||||
case IROp::MultU:
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::NOINIT } });
|
||||
MOVZX(64, 32, regs_.RX(IRREG_LO), regs_.R(inst.src1));
|
||||
MOVZX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));
|
||||
IMUL(64, regs_.RX(IRREG_LO), regs_.R(inst.src2));
|
||||
#else
|
||||
// Force a spill (before spill locks.)
|
||||
regs_.MapGPR(IRREG_HI, MIPSMap::NOINIT | X64Map::HIGH_DATA);
|
||||
// We keep it here so it stays locked.
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::NOINIT }, { 'G', IRREG_HI, 1, MIPSMap::NOINIT | X64Map::HIGH_DATA } });
|
||||
MOV(32, R(EAX), regs_.R(inst.src1));
|
||||
MUL(32, regs_.R(inst.src2));
|
||||
MOV(32, regs_.R(IRREG_LO), R(EAX));
|
||||
// IRREG_HI was mapped to EDX.
|
||||
#endif
|
||||
break;
|
||||
|
||||
case IROp::Madd:
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
|
||||
MOVSX(64, 32, SCRATCH1, regs_.R(inst.src1));
|
||||
MOVSX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));
|
||||
IMUL(64, SCRATCH1, regs_.R(inst.src2));
|
||||
ADD(64, regs_.R(IRREG_LO), R(SCRATCH1));
|
||||
#else
|
||||
// For ones that modify LO/HI, we can't have anything else in EDX.
|
||||
regs_.ReserveAndLockXGPR(EDX);
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY }, { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });
|
||||
MOV(32, R(EAX), regs_.R(inst.src1));
|
||||
IMUL(32, regs_.R(inst.src2));
|
||||
ADD(32, regs_.R(IRREG_LO), R(EAX));
|
||||
ADC(32, regs_.R(IRREG_HI), R(EDX));
|
||||
#endif
|
||||
break;
|
||||
|
||||
case IROp::MaddU:
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
|
||||
MOVZX(64, 32, SCRATCH1, regs_.R(inst.src1));
|
||||
MOVZX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));
|
||||
IMUL(64, SCRATCH1, regs_.R(inst.src2));
|
||||
ADD(64, regs_.R(IRREG_LO), R(SCRATCH1));
|
||||
#else
|
||||
// For ones that modify LO/HI, we can't have anything else in EDX.
|
||||
regs_.ReserveAndLockXGPR(EDX);
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY }, { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });
|
||||
MOV(32, R(EAX), regs_.R(inst.src1));
|
||||
MUL(32, regs_.R(inst.src2));
|
||||
ADD(32, regs_.R(IRREG_LO), R(EAX));
|
||||
ADC(32, regs_.R(IRREG_HI), R(EDX));
|
||||
#endif
|
||||
break;
|
||||
|
||||
case IROp::Msub:
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
|
||||
MOVSX(64, 32, SCRATCH1, regs_.R(inst.src1));
|
||||
MOVSX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));
|
||||
IMUL(64, SCRATCH1, regs_.R(inst.src2));
|
||||
SUB(64, regs_.R(IRREG_LO), R(SCRATCH1));
|
||||
#else
|
||||
// For ones that modify LO/HI, we can't have anything else in EDX.
|
||||
regs_.ReserveAndLockXGPR(EDX);
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY }, { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });
|
||||
MOV(32, R(EAX), regs_.R(inst.src1));
|
||||
IMUL(32, regs_.R(inst.src2));
|
||||
SUB(32, regs_.R(IRREG_LO), R(EAX));
|
||||
SBB(32, regs_.R(IRREG_HI), R(EDX));
|
||||
#endif
|
||||
break;
|
||||
|
||||
case IROp::MsubU:
|
||||
CompIR_Generic(inst);
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
|
||||
MOVZX(64, 32, SCRATCH1, regs_.R(inst.src1));
|
||||
MOVZX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));
|
||||
IMUL(64, SCRATCH1, regs_.R(inst.src2));
|
||||
SUB(64, regs_.R(IRREG_LO), R(SCRATCH1));
|
||||
#else
|
||||
// For ones that modify LO/HI, we can't have anything else in EDX.
|
||||
regs_.ReserveAndLockXGPR(EDX);
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY }, { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });
|
||||
MOV(32, R(EAX), regs_.R(inst.src1));
|
||||
MUL(32, regs_.R(inst.src2));
|
||||
SUB(32, regs_.R(IRREG_LO), R(EAX));
|
||||
SBB(32, regs_.R(IRREG_HI), R(EDX));
|
||||
#endif
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -361,10 +551,74 @@ void X64JitBackend::CompIR_Shift(IRInst inst) {
|
|||
|
||||
switch (inst.op) {
|
||||
case IROp::Shl:
|
||||
if (cpu_info.bBMI2) {
|
||||
regs_.Map(inst);
|
||||
SHLX(32, regs_.RX(inst.dest), regs_.R(inst.src1), regs_.RX(inst.src2));
|
||||
} else {
|
||||
regs_.MapWithFlags(inst, X64Map::NONE, X64Map::NONE, X64Map::SHIFT);
|
||||
if (inst.dest == inst.src1) {
|
||||
SHL(32, regs_.R(inst.dest), regs_.R(inst.src2));
|
||||
} else if (inst.dest == inst.src2) {
|
||||
MOV(32, R(SCRATCH1), regs_.R(inst.src1));
|
||||
SHL(32, R(SCRATCH1), regs_.R(inst.src2));
|
||||
MOV(32, regs_.R(inst.dest), R(SCRATCH1));
|
||||
} else {
|
||||
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
|
||||
SHL(32, regs_.R(inst.dest), regs_.R(inst.src2));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::Shr:
|
||||
if (cpu_info.bBMI2) {
|
||||
regs_.Map(inst);
|
||||
SHRX(32, regs_.RX(inst.dest), regs_.R(inst.src1), regs_.RX(inst.src2));
|
||||
} else {
|
||||
regs_.MapWithFlags(inst, X64Map::NONE, X64Map::NONE, X64Map::SHIFT);
|
||||
if (inst.dest == inst.src1) {
|
||||
SHR(32, regs_.R(inst.dest), regs_.R(inst.src2));
|
||||
} else if (inst.dest == inst.src2) {
|
||||
MOV(32, R(SCRATCH1), regs_.R(inst.src1));
|
||||
SHR(32, R(SCRATCH1), regs_.R(inst.src2));
|
||||
MOV(32, regs_.R(inst.dest), R(SCRATCH1));
|
||||
} else {
|
||||
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
|
||||
SHR(32, regs_.R(inst.dest), regs_.R(inst.src2));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::Sar:
|
||||
if (cpu_info.bBMI2) {
|
||||
regs_.Map(inst);
|
||||
SARX(32, regs_.RX(inst.dest), regs_.R(inst.src1), regs_.RX(inst.src2));
|
||||
} else {
|
||||
regs_.MapWithFlags(inst, X64Map::NONE, X64Map::NONE, X64Map::SHIFT);
|
||||
if (inst.dest == inst.src1) {
|
||||
SAR(32, regs_.R(inst.dest), regs_.R(inst.src2));
|
||||
} else if (inst.dest == inst.src2) {
|
||||
MOV(32, R(SCRATCH1), regs_.R(inst.src1));
|
||||
SAR(32, R(SCRATCH1), regs_.R(inst.src2));
|
||||
MOV(32, regs_.R(inst.dest), R(SCRATCH1));
|
||||
} else {
|
||||
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
|
||||
SAR(32, regs_.R(inst.dest), regs_.R(inst.src2));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::Ror:
|
||||
CompIR_Generic(inst);
|
||||
regs_.MapWithFlags(inst, X64Map::NONE, X64Map::NONE, X64Map::SHIFT);
|
||||
if (inst.dest == inst.src1) {
|
||||
ROR(32, regs_.R(inst.dest), regs_.R(inst.src2));
|
||||
} else if (inst.dest == inst.src2) {
|
||||
MOV(32, R(SCRATCH1), regs_.R(inst.src1));
|
||||
ROR(32, R(SCRATCH1), regs_.R(inst.src2));
|
||||
MOV(32, regs_.R(inst.dest), R(SCRATCH1));
|
||||
} else {
|
||||
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
|
||||
ROR(32, regs_.R(inst.dest), regs_.R(inst.src2));
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::ShlImm:
|
||||
|
@ -427,6 +681,9 @@ void X64JitBackend::CompIR_Shift(IRInst inst) {
|
|||
regs_.Map(inst);
|
||||
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
|
||||
}
|
||||
} else if (cpu_info.bBMI2) {
|
||||
regs_.Map(inst);
|
||||
RORX(32, regs_.RX(inst.dest), regs_.R(inst.src1), inst.src2 & 31);
|
||||
} else {
|
||||
regs_.Map(inst);
|
||||
if (inst.dest != inst.src1)
|
||||
|
|
|
@ -54,7 +54,8 @@ void X64JitBackend::CompIR_Exit(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::ExitToPC:
|
||||
CompIR_Generic(inst);
|
||||
FlushAll();
|
||||
JMP(dispatcherCheckCoreState_, true);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -40,10 +40,11 @@ namespace MIPSComp {
|
|||
using namespace Gen;
|
||||
using namespace X64IRJitConstants;
|
||||
|
||||
struct SimdConstants {
|
||||
static struct SimdConstants {
|
||||
alignas(16) const u32 reverseQNAN[4] = { 0x803FFFFF, 0x803FFFFF, 0x803FFFFF, 0x803FFFFF };
|
||||
alignas(16) const u32 noSignMask[4] = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF };
|
||||
alignas(16) const u32 positiveInfinity[4] = { 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000 };
|
||||
alignas(16) const u32 signBitAll[4] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
|
||||
} simdConstants;
|
||||
|
||||
void X64JitBackend::CompIR_FArith(IRInst inst) {
|
||||
|
@ -121,9 +122,48 @@ void X64JitBackend::CompIR_FArith(IRInst inst) {
|
|||
}
|
||||
|
||||
case IROp::FDiv:
|
||||
if (inst.dest == inst.src1) {
|
||||
regs_.Map(inst);
|
||||
DIVSS(regs_.FX(inst.dest), regs_.F(inst.src2));
|
||||
} else if (cpu_info.bAVX) {
|
||||
regs_.Map(inst);
|
||||
VDIVSS(regs_.FX(inst.dest), regs_.FX(inst.src1), regs_.F(inst.src2));
|
||||
} else if (inst.dest == inst.src2) {
|
||||
X64Reg tempReg = regs_.MapWithFPRTemp(inst);
|
||||
MOVAPS(tempReg, regs_.F(inst.src2));
|
||||
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
DIVSS(regs_.FX(inst.dest), R(tempReg));
|
||||
} else {
|
||||
regs_.Map(inst);
|
||||
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
DIVSS(regs_.FX(inst.dest), regs_.F(inst.src2));
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::FSqrt:
|
||||
regs_.Map(inst);
|
||||
SQRTSS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
break;
|
||||
|
||||
case IROp::FNeg:
|
||||
CompIR_Generic(inst);
|
||||
regs_.Map(inst);
|
||||
if (cpu_info.bAVX) {
|
||||
if (RipAccessible(&simdConstants.signBitAll)) {
|
||||
VXORPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), M(&simdConstants.signBitAll)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.signBitAll));
|
||||
VXORPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), MatR(SCRATCH1));
|
||||
}
|
||||
} else {
|
||||
if (inst.dest != inst.src1)
|
||||
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
if (RipAccessible(&simdConstants.signBitAll)) {
|
||||
XORPS(regs_.FX(inst.dest), M(&simdConstants.signBitAll)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.signBitAll));
|
||||
XORPS(regs_.FX(inst.dest), MatR(SCRATCH1));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -144,6 +184,26 @@ void X64JitBackend::CompIR_FAssign(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::FAbs:
|
||||
regs_.Map(inst);
|
||||
if (cpu_info.bAVX) {
|
||||
if (RipAccessible(&simdConstants.noSignMask)) {
|
||||
VANDPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), M(&simdConstants.noSignMask)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.noSignMask));
|
||||
VANDPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), MatR(SCRATCH1));
|
||||
}
|
||||
} else {
|
||||
if (inst.dest != inst.src1)
|
||||
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
if (RipAccessible(&simdConstants.noSignMask)) {
|
||||
ANDPS(regs_.FX(inst.dest), M(&simdConstants.noSignMask)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.noSignMask));
|
||||
ANDPS(regs_.FX(inst.dest), MatR(SCRATCH1));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::FSign:
|
||||
CompIR_Generic(inst);
|
||||
break;
|
||||
|
@ -159,6 +219,18 @@ void X64JitBackend::CompIR_FCompare(IRInst inst) {
|
|||
|
||||
constexpr IRReg IRREG_VFPU_CC = IRREG_VFPU_CTRL_BASE + VFPU_CTRL_CC;
|
||||
|
||||
auto ccToFpcond = [&](IRReg lhs, IRReg rhs, CCFlags cc) {
|
||||
if (regs_.HasLowSubregister(regs_.RX(IRREG_FPCOND))) {
|
||||
XOR(32, regs_.R(IRREG_FPCOND), regs_.R(IRREG_FPCOND));
|
||||
UCOMISS(regs_.FX(lhs), regs_.F(rhs));
|
||||
SETcc(cc, regs_.R(IRREG_FPCOND));
|
||||
} else {
|
||||
UCOMISS(regs_.FX(lhs), regs_.F(rhs));
|
||||
SETcc(cc, R(SCRATCH1));
|
||||
MOVZX(32, 8, regs_.RX(IRREG_FPCOND), R(SCRATCH1));
|
||||
}
|
||||
};
|
||||
|
||||
switch (inst.op) {
|
||||
case IROp::FCmp:
|
||||
switch (inst.dest) {
|
||||
|
@ -168,15 +240,14 @@ void X64JitBackend::CompIR_FCompare(IRInst inst) {
|
|||
|
||||
case IRFpCompareMode::EitherUnordered:
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
|
||||
UCOMISS(regs_.FX(inst.src1), regs_.F(inst.src2));
|
||||
// PF = UNORDERED.
|
||||
SETcc(CC_P, R(SCRATCH1));
|
||||
MOVZX(32, 8, regs_.RX(IRREG_FPCOND), R(SCRATCH1));
|
||||
ccToFpcond(inst.src1, inst.src2, CC_P);
|
||||
break;
|
||||
|
||||
case IRFpCompareMode::EqualOrdered:
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
|
||||
// Clear the upper bits of SCRATCH1 so we can AND later.
|
||||
// We don't have a single flag we can check, unfortunately.
|
||||
XOR(32, R(SCRATCH1), R(SCRATCH1));
|
||||
UCOMISS(regs_.FX(inst.src1), regs_.F(inst.src2));
|
||||
// E/ZF = EQUAL or UNORDERED (not exactly what we want.)
|
||||
|
@ -196,42 +267,32 @@ void X64JitBackend::CompIR_FCompare(IRInst inst) {
|
|||
|
||||
case IRFpCompareMode::EqualUnordered:
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
|
||||
UCOMISS(regs_.FX(inst.src1), regs_.F(inst.src2));
|
||||
// E/ZF = EQUAL or UNORDERED.
|
||||
SETcc(CC_E, R(SCRATCH1));
|
||||
MOVZX(32, 8, regs_.RX(IRREG_FPCOND), R(SCRATCH1));
|
||||
ccToFpcond(inst.src1, inst.src2, CC_E);
|
||||
break;
|
||||
|
||||
case IRFpCompareMode::LessEqualOrdered:
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
|
||||
UCOMISS(regs_.FX(inst.src2), regs_.F(inst.src1));
|
||||
// AE/!CF = GREATER or EQUAL (src2/src1 reversed.)
|
||||
SETcc(CC_AE, R(SCRATCH1));
|
||||
MOVZX(32, 8, regs_.RX(IRREG_FPCOND), R(SCRATCH1));
|
||||
ccToFpcond(inst.src2, inst.src1, CC_AE);
|
||||
break;
|
||||
|
||||
case IRFpCompareMode::LessEqualUnordered:
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
|
||||
UCOMISS(regs_.FX(inst.src1), regs_.F(inst.src2));
|
||||
// BE/CF||ZF = LESS THAN or EQUAL or UNORDERED.
|
||||
SETcc(CC_BE, R(SCRATCH1));
|
||||
MOVZX(32, 8, regs_.RX(IRREG_FPCOND), R(SCRATCH1));
|
||||
ccToFpcond(inst.src1, inst.src2, CC_BE);
|
||||
break;
|
||||
|
||||
case IRFpCompareMode::LessOrdered:
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
|
||||
UCOMISS(regs_.FX(inst.src2), regs_.F(inst.src1));
|
||||
// A/!CF&&!ZF = GREATER (src2/src1 reversed.)
|
||||
SETcc(CC_A, R(SCRATCH1));
|
||||
MOVZX(32, 8, regs_.RX(IRREG_FPCOND), R(SCRATCH1));
|
||||
ccToFpcond(inst.src2, inst.src1, CC_A);
|
||||
break;
|
||||
|
||||
case IRFpCompareMode::LessUnordered:
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
|
||||
UCOMISS(regs_.FX(inst.src1), regs_.F(inst.src2));
|
||||
// B/CF = LESS THAN or UNORDERED.
|
||||
SETcc(CC_B, R(SCRATCH1));
|
||||
MOVZX(32, 8, regs_.RX(IRREG_FPCOND), R(SCRATCH1));
|
||||
ccToFpcond(inst.src1, inst.src2, CC_B);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
@ -429,7 +490,8 @@ void X64JitBackend::CompIR_FCondAssign(IRInst inst) {
|
|||
if (cpu_info.bAVX) {
|
||||
VMINSS(regs_.FX(inst.dest), regs_.FX(inst.src1), regs_.F(inst.src2));
|
||||
} else {
|
||||
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
if (inst.dest != inst.src1)
|
||||
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
MINSS(regs_.FX(inst.dest), regs_.F(inst.src2));
|
||||
}
|
||||
SetJumpTarget(finishNAN);
|
||||
|
@ -465,7 +527,8 @@ void X64JitBackend::CompIR_FCondAssign(IRInst inst) {
|
|||
if (cpu_info.bAVX) {
|
||||
VMAXSS(regs_.FX(inst.dest), regs_.FX(inst.src1), regs_.F(inst.src2));
|
||||
} else {
|
||||
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
if (inst.dest != inst.src1)
|
||||
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
MAXSS(regs_.FX(inst.dest), regs_.F(inst.src2));
|
||||
}
|
||||
SetJumpTarget(finishNAN);
|
||||
|
|
|
@ -65,8 +65,13 @@ void X64JitBackend::CompIR_Basic(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::SetPC:
|
||||
regs_.Map(inst);
|
||||
MovToPC(regs_.RX(inst.src1));
|
||||
break;
|
||||
|
||||
case IROp::SetPCConst:
|
||||
CompIR_Generic(inst);
|
||||
MOV(32, R(SCRATCH1), Imm32(inst.constant));
|
||||
MovToPC(SCRATCH1);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -95,6 +100,29 @@ void X64JitBackend::CompIR_System(IRInst inst) {
|
|||
|
||||
switch (inst.op) {
|
||||
case IROp::Syscall:
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
|
||||
#ifdef USE_PROFILER
|
||||
// When profiling, we can't skip CallSyscall, since it times syscalls.
|
||||
ABI_CallFunctionC((const u8 *)&CallSyscall, inst.constant);
|
||||
#else
|
||||
// Skip the CallSyscall where possible.
|
||||
{
|
||||
MIPSOpcode op(inst.constant);
|
||||
void *quickFunc = GetQuickSyscallFunc(op);
|
||||
if (quickFunc) {
|
||||
ABI_CallFunctionP((const u8 *)quickFunc, (void *)GetSyscallFuncPointer(op));
|
||||
} else {
|
||||
ABI_CallFunctionC((const u8 *)&CallSyscall, inst.constant);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
LoadStaticRegisters();
|
||||
// This is always followed by an ExitToPC, where we check coreState.
|
||||
break;
|
||||
|
||||
case IROp::CallReplacement:
|
||||
case IROp::Break:
|
||||
CompIR_Generic(inst);
|
||||
|
@ -111,9 +139,17 @@ void X64JitBackend::CompIR_Transfer(IRInst inst) {
|
|||
|
||||
switch (inst.op) {
|
||||
case IROp::SetCtrlVFPU:
|
||||
regs_.SetGPRImm(IRREG_VFPU_CTRL_BASE + inst.dest, (int32_t)inst.constant);
|
||||
break;
|
||||
|
||||
case IROp::SetCtrlVFPUReg:
|
||||
regs_.Map(inst);
|
||||
MOV(32, regs_.R(IRREG_VFPU_CTRL_BASE + inst.dest), regs_.R(inst.src1));
|
||||
break;
|
||||
|
||||
case IROp::SetCtrlVFPUFReg:
|
||||
CompIR_Generic(inst);
|
||||
regs_.Map(inst);
|
||||
MOVD_xmm(regs_.R(IRREG_VFPU_CTRL_BASE + inst.dest), regs_.FX(inst.src1));
|
||||
break;
|
||||
|
||||
case IROp::FpCondFromReg:
|
||||
|
@ -128,10 +164,14 @@ void X64JitBackend::CompIR_Transfer(IRInst inst) {
|
|||
|
||||
case IROp::FpCtrlFromReg:
|
||||
case IROp::FpCtrlToReg:
|
||||
case IROp::VfpuCtrlToReg:
|
||||
CompIR_Generic(inst);
|
||||
break;
|
||||
|
||||
case IROp::VfpuCtrlToReg:
|
||||
regs_.Map(inst);
|
||||
MOV(32, regs_.R(inst.dest), regs_.R(IRREG_VFPU_CTRL_BASE + inst.src1));
|
||||
break;
|
||||
|
||||
case IROp::FMovFromGPR:
|
||||
if (regs_.IsGPRImm(inst.src1) && regs_.GetGPRImm(inst.src1) == 0) {
|
||||
regs_.MapFPR(inst.dest, MIPSMap::NOINIT);
|
||||
|
|
|
@ -39,6 +39,11 @@ namespace MIPSComp {
|
|||
using namespace Gen;
|
||||
using namespace X64IRJitConstants;
|
||||
|
||||
static struct SimdConstants {
|
||||
alignas(16) const u32 noSignMask[4] = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF };
|
||||
alignas(16) const u32 signBitAll[4] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
|
||||
} simdConstants;
|
||||
|
||||
alignas(16) static const float vec4InitValues[8][4] = {
|
||||
{ 0.0f, 0.0f, 0.0f, 0.0f },
|
||||
{ 1.0f, 1.0f, 1.0f, 1.0f },
|
||||
|
@ -143,8 +148,45 @@ void X64JitBackend::CompIR_VecArith(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::Vec4Neg:
|
||||
regs_.Map(inst);
|
||||
if (cpu_info.bAVX) {
|
||||
if (RipAccessible(&simdConstants.signBitAll)) {
|
||||
VXORPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), M(&simdConstants.signBitAll)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.signBitAll));
|
||||
VXORPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), MatR(SCRATCH1));
|
||||
}
|
||||
} else {
|
||||
if (inst.dest != inst.src1)
|
||||
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
if (RipAccessible(&simdConstants.signBitAll)) {
|
||||
XORPS(regs_.FX(inst.dest), M(&simdConstants.signBitAll)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.signBitAll));
|
||||
XORPS(regs_.FX(inst.dest), MatR(SCRATCH1));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::Vec4Abs:
|
||||
CompIR_Generic(inst);
|
||||
regs_.Map(inst);
|
||||
if (cpu_info.bAVX) {
|
||||
if (RipAccessible(&simdConstants.noSignMask)) {
|
||||
VANDPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), M(&simdConstants.noSignMask)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.noSignMask));
|
||||
VANDPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), MatR(SCRATCH1));
|
||||
}
|
||||
} else {
|
||||
if (inst.dest != inst.src1)
|
||||
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
if (RipAccessible(&simdConstants.noSignMask)) {
|
||||
ANDPS(regs_.FX(inst.dest), M(&simdConstants.noSignMask)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.noSignMask));
|
||||
ANDPS(regs_.FX(inst.dest), MatR(SCRATCH1));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -224,7 +266,23 @@ void X64JitBackend::CompIR_VecHoriz(IRInst inst) {
|
|||
|
||||
switch (inst.op) {
|
||||
case IROp::Vec4Dot:
|
||||
CompIR_Generic(inst);
|
||||
// TODO: Handle "aliasing" of sizes. In theory it should be fine if not dirty...
|
||||
if (Overlap(inst.dest, 1, inst.src1, 4) || Overlap(inst.dest, 1, inst.src2, 4))
|
||||
DISABLE;
|
||||
|
||||
regs_.Map(inst);
|
||||
if (cpu_info.bSSE4_1 && inst.dest == inst.src1) {
|
||||
DPPS(regs_.FX(inst.dest), regs_.F(inst.src2), 0xF1);
|
||||
} else if (cpu_info.bSSE4_1 && inst.dest == inst.src2) {
|
||||
DPPS(regs_.FX(inst.dest), regs_.F(inst.src1), 0xF1);
|
||||
} else if (cpu_info.bAVX) {
|
||||
VDPPS(128, regs_.FX(inst.dest), regs_.FX(inst.src1), regs_.F(inst.src2), 0xF1);
|
||||
} else if (cpu_info.bSSE4_1) {
|
||||
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
DPPS(regs_.FX(inst.dest), regs_.F(inst.src2), 0xF1);
|
||||
} else {
|
||||
CompIR_Generic(inst);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -63,8 +63,20 @@ const int *X64IRRegCache::GetAllocationOrder(MIPSLoc type, MIPSMap flags, int &c
|
|||
#endif
|
||||
};
|
||||
|
||||
if ((flags & X64Map::MASK) == X64Map::SHIFT) {
|
||||
// It's a single option for shifts.
|
||||
static const int shiftReg[] = { ECX };
|
||||
count = 1;
|
||||
return shiftReg;
|
||||
}
|
||||
if ((flags & X64Map::MASK) == X64Map::HIGH_DATA) {
|
||||
// It's a single option for shifts.
|
||||
static const int shiftReg[] = { EDX };
|
||||
count = 1;
|
||||
return shiftReg;
|
||||
}
|
||||
#if PPSSPP_ARCH(X86)
|
||||
if ((flags & X64Map::LOW_SUBREG) == X64Map::LOW_SUBREG) {
|
||||
if ((flags & X64Map::MASK) == X64Map::LOW_SUBREG) {
|
||||
static const int lowSubRegAllocationOrder[] = {
|
||||
EDX, EBX, ECX,
|
||||
};
|
||||
|
@ -132,7 +144,19 @@ X64Reg X64IRRegCache::TryMapTempImm(IRReg r, X64Map flags) {
|
|||
_dbg_assert_(IsValidGPR(r));
|
||||
|
||||
auto canUseReg = [flags](X64Reg r) {
|
||||
return (flags & X64Map::LOW_SUBREG) != X64Map::LOW_SUBREG || HasLowSubregister(r);
|
||||
switch (flags & X64Map::MASK) {
|
||||
case X64Map::NONE:
|
||||
return true;
|
||||
case X64Map::LOW_SUBREG:
|
||||
return HasLowSubregister(r);
|
||||
case X64Map::SHIFT:
|
||||
return r == RCX;
|
||||
case X64Map::HIGH_DATA:
|
||||
return r == RCX;
|
||||
default:
|
||||
_assert_msg_(false, "Unexpected flags");
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
// If already mapped, no need for a temporary.
|
||||
|
@ -163,6 +187,13 @@ X64Reg X64IRRegCache::GetAndLockTempR() {
|
|||
return reg;
|
||||
}
|
||||
|
||||
void X64IRRegCache::ReserveAndLockXGPR(Gen::X64Reg r) {
|
||||
IRNativeReg nreg = GPRToNativeReg(r);
|
||||
if (nr[nreg].mipsReg != -1)
|
||||
FlushNativeReg(nreg);
|
||||
nr[r].tempLockIRIndex = irIndex_;
|
||||
}
|
||||
|
||||
X64Reg X64IRRegCache::MapWithFPRTemp(IRInst &inst) {
|
||||
return FromNativeReg(MapWithTemp(inst, MIPSLoc::FREG));
|
||||
}
|
||||
|
@ -175,6 +206,35 @@ void X64IRRegCache::MapWithFlags(IRInst inst, X64Map destFlags, X64Map src1Flags
|
|||
mapping[1].flags = mapping[1].flags | src1Flags;
|
||||
mapping[2].flags = mapping[2].flags | src2Flags;
|
||||
|
||||
auto flushReg = [&](IRNativeReg nreg) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
if (mapping[i].reg == nr[nreg].mipsReg && (mapping[i].flags & MIPSMap::NOINIT) == MIPSMap::NOINIT) {
|
||||
DiscardNativeReg(nreg);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
FlushNativeReg(nreg);
|
||||
};
|
||||
|
||||
// If there are any special rules, we might need to spill.
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
switch (mapping[i].flags & X64Map::MASK) {
|
||||
case X64Map::SHIFT:
|
||||
if (nr[RCX].mipsReg != mapping[i].reg)
|
||||
flushReg(RCX);
|
||||
break;
|
||||
|
||||
case X64Map::HIGH_DATA:
|
||||
if (nr[RDX].mipsReg != mapping[i].reg)
|
||||
flushReg(RDX);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ApplyMapping(mapping, 3);
|
||||
CleanupMapping(mapping, 3);
|
||||
}
|
||||
|
|
|
@ -43,7 +43,13 @@ static constexpr auto pcOffset = offsetof(MIPSState, pc) - 128;
|
|||
|
||||
enum class X64Map : uint8_t {
|
||||
NONE = 0,
|
||||
// On 32-bit: EAX, EBX, ECX, EDX
|
||||
LOW_SUBREG = 0x10,
|
||||
// EDX/RDX
|
||||
HIGH_DATA = 0x20,
|
||||
// ECX/RCX
|
||||
SHIFT = 0x30,
|
||||
MASK = 0xF0,
|
||||
};
|
||||
static inline MIPSMap operator |(const MIPSMap &lhs, const X64Map &rhs) {
|
||||
return MIPSMap((uint8_t)lhs | (uint8_t)rhs);
|
||||
|
@ -82,6 +88,7 @@ public:
|
|||
void FlushBeforeCall();
|
||||
|
||||
Gen::X64Reg GetAndLockTempR();
|
||||
void ReserveAndLockXGPR(Gen::X64Reg r);
|
||||
|
||||
Gen::OpArg R(IRReg preg);
|
||||
Gen::OpArg RPtr(IRReg preg);
|
||||
|
|
Loading…
Add table
Reference in a new issue