mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #17783 from unknownbrackets/riscv-jit
Implement float/vec operations in RISC-V jit
This commit is contained in:
commit
4aa2b1fcac
21 changed files with 1088 additions and 142 deletions
|
@ -180,7 +180,7 @@ void Arm64RegCache::MapRegTo(ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags) {
|
|||
ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false;
|
||||
if ((mapFlags & MAP_NOINIT) != MAP_NOINIT) {
|
||||
if (mipsReg == MIPS_REG_ZERO) {
|
||||
// If we get a request to load the zero register, at least we won't spend
|
||||
// If we get a request to map the zero register, at least we won't spend
|
||||
// time on a memory access...
|
||||
emit_->MOVI2R(reg, 0);
|
||||
|
||||
|
|
|
@ -319,6 +319,7 @@ void Arm64RegCacheFPU::FlushR(MIPSReg r) {
|
|||
if (mr[r].reg == INVALID_REG) {
|
||||
ERROR_LOG(JIT, "FlushR: MipsReg had bad ArmReg");
|
||||
}
|
||||
FlushArmReg((ARM64Reg)(S0 + mr[r].reg));
|
||||
break;
|
||||
|
||||
case ML_MEM:
|
||||
|
@ -329,8 +330,6 @@ void Arm64RegCacheFPU::FlushR(MIPSReg r) {
|
|||
//BAD
|
||||
break;
|
||||
}
|
||||
mr[r].loc = ML_MEM;
|
||||
mr[r].reg = (int)INVALID_REG;
|
||||
}
|
||||
|
||||
Arm64Gen::ARM64Reg Arm64RegCacheFPU::ARM64RegForFlush(int r) {
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/ARM64/Arm64RegCache.h"
|
||||
#include "Core/MIPS/MIPSVFPUUtils.h"
|
||||
|
@ -165,7 +163,6 @@ private:
|
|||
MIPSComp::JitOptions *jo_;
|
||||
|
||||
int numARMFpuReg_;
|
||||
int qTime_;
|
||||
|
||||
enum {
|
||||
// On ARM64, each of the 32 registers are full 128-bit. No sharing of components!
|
||||
|
|
|
@ -455,6 +455,7 @@ void IRFrontend::Comp_Syscall(MIPSOpcode op) {
|
|||
}
|
||||
|
||||
void IRFrontend::Comp_Break(MIPSOpcode op) {
|
||||
ir.Write(IROp::SetPCConst, 0, ir.AddConstant(GetCompilerPC()));
|
||||
ir.Write(IROp::Break);
|
||||
js.compiling = false;
|
||||
}
|
||||
|
|
|
@ -203,7 +203,8 @@ void IRFrontend::Comp_mxc1(MIPSOpcode op) {
|
|||
return;
|
||||
}
|
||||
if (fs == 31) {
|
||||
DISABLE; // TODO: Add a new op
|
||||
// This needs to insert fpcond.
|
||||
ir.Write(IROp::FpCtrlToReg, rt);
|
||||
} else if (fs == 0) {
|
||||
ir.Write(IROp::SetConst, rt, ir.AddConstant(MIPSState::FCR0_VALUE));
|
||||
} else {
|
||||
|
@ -219,7 +220,10 @@ void IRFrontend::Comp_mxc1(MIPSOpcode op) {
|
|||
case 6: //ctc1
|
||||
if (fs == 31) {
|
||||
// Set rounding mode
|
||||
DISABLE;
|
||||
RestoreRoundingMode();
|
||||
ir.Write(IROp::FpCtrlFromReg, 0, rt);
|
||||
UpdateRoundingMode();
|
||||
ApplyRoundingMode();
|
||||
} else {
|
||||
Comp_Generic(op);
|
||||
}
|
||||
|
|
|
@ -112,6 +112,8 @@ static const IRMeta irMeta[] = {
|
|||
{ IROp::FMovToGPR, "FMovToGPR", "GF" },
|
||||
{ IROp::ZeroFpCond, "ZeroFpCond", "" },
|
||||
{ IROp::FpCondToReg, "FpCondToReg", "G" },
|
||||
{ IROp::FpCtrlFromReg, "FpCtrlFromReg", "_G" },
|
||||
{ IROp::FpCtrlToReg, "FpCtrlToReg", "G" },
|
||||
{ IROp::VfpuCtrlToReg, "VfpuCtrlToReg", "GI" },
|
||||
{ IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" },
|
||||
{ IROp::SetCtrlVFPUReg, "SetCtrlVFPUReg", "TG" },
|
||||
|
|
|
@ -135,6 +135,8 @@ enum class IROp : u8 {
|
|||
FSatMinus1_1,
|
||||
|
||||
FpCondToReg,
|
||||
FpCtrlFromReg,
|
||||
FpCtrlToReg,
|
||||
VfpuCtrlToReg,
|
||||
|
||||
ZeroFpCond,
|
||||
|
|
|
@ -768,9 +768,11 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
|
|||
mips->f[inst->dest] = mips->f[inst->src1] / mips->f[inst->src2];
|
||||
break;
|
||||
case IROp::FMin:
|
||||
// TODO: This doesn't handle VFPU ordering right.
|
||||
mips->f[inst->dest] = std::min(mips->f[inst->src1], mips->f[inst->src2]);
|
||||
break;
|
||||
case IROp::FMax:
|
||||
// TODO: This doesn't handle VFPU ordering right.
|
||||
mips->f[inst->dest] = std::max(mips->f[inst->src1], mips->f[inst->src2]);
|
||||
break;
|
||||
|
||||
|
@ -811,6 +813,17 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
|
|||
case IROp::FpCondToReg:
|
||||
mips->r[inst->dest] = mips->fpcond;
|
||||
break;
|
||||
case IROp::FpCtrlFromReg:
|
||||
mips->fcr31 = mips->r[inst->src1] & 0x0181FFFF;
|
||||
// Extract the new fpcond value.
|
||||
// TODO: Is it really helping us to keep it separate?
|
||||
mips->fpcond = (mips->fcr31 >> 23) & 1;
|
||||
break;
|
||||
case IROp::FpCtrlToReg:
|
||||
// Update the fpcond bit first.
|
||||
mips->fcr31 = (mips->fcr31 & ~(1 << 23)) | ((mips->fpcond & 1) << 23);
|
||||
mips->r[inst->dest] = mips->fcr31;
|
||||
break;
|
||||
case IROp::VfpuCtrlToReg:
|
||||
mips->r[inst->dest] = mips->vfpuCtrl[inst->src1];
|
||||
break;
|
||||
|
|
|
@ -694,6 +694,13 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out, const IROptions &opts
|
|||
out.Write(inst);
|
||||
}
|
||||
break;
|
||||
case IROp::FpCtrlFromReg:
|
||||
gpr.MapDirtyIn(IRREG_FCR31, inst.src1);
|
||||
gpr.MapDirty(IRREG_FPCOND);
|
||||
goto doDefault;
|
||||
case IROp::FpCtrlToReg:
|
||||
gpr.MapDirtyInIn(inst.dest, IRREG_FPCOND, IRREG_FCR31);
|
||||
goto doDefault;
|
||||
|
||||
case IROp::Vec4Init:
|
||||
case IROp::Vec4Mov:
|
||||
|
|
|
@ -75,9 +75,8 @@ void RiscVJit::GenerateFixedCode(const JitOptions &jo) {
|
|||
loadStaticRegisters_ = nullptr;
|
||||
}
|
||||
|
||||
// TODO: Do we actually need updateRoundingMode_? Hm.
|
||||
//applyRoundingMode_ = AlignCode16();
|
||||
if (false) {
|
||||
applyRoundingMode_ = AlignCode16();
|
||||
{
|
||||
// Not sure if RISC-V has any flush to zero capability? Leaving it off for now...
|
||||
LWU(SCRATCH2, CTXREG, offsetof(MIPSState, fcr31));
|
||||
|
||||
|
@ -105,30 +104,6 @@ void RiscVJit::GenerateFixedCode(const JitOptions &jo) {
|
|||
RET();
|
||||
}
|
||||
|
||||
//updateRoundingMode_ = AlignCode16();
|
||||
if (false) {
|
||||
LWU(SCRATCH2, CTXREG, offsetof(MIPSState, fcr31));
|
||||
|
||||
// Set SCRATCH2 to FZ:RM (FZ is bit 24, and RM are lowest 2 bits.)
|
||||
ANDI(SCRATCH1, SCRATCH2, 1 << 24);
|
||||
ANDI(SCRATCH2, SCRATCH2, 3);
|
||||
SRLI(SCRATCH1, SCRATCH1, 22);
|
||||
OR(SCRATCH2, SCRATCH2, SCRATCH1);
|
||||
|
||||
// Let's update js.currentRoundingFunc with the right convertS0ToSCRATCH1 func.
|
||||
//LI(SCRATCH1, convertS0ToSCRATCH1);
|
||||
if (cpu_info.RiscV_Zba) {
|
||||
SH_ADD(3, SCRATCH1, SCRATCH2, SCRATCH1);
|
||||
} else {
|
||||
SLLI(SCRATCH2, SCRATCH2, 3);
|
||||
ADD(SCRATCH1, SCRATCH1, SCRATCH2);
|
||||
}
|
||||
LD(SCRATCH2, SCRATCH1, 0);
|
||||
//LI(SCRATCH1, &js.currentRoundingFunc);
|
||||
SW(SCRATCH2, SCRATCH1, 0);
|
||||
RET();
|
||||
}
|
||||
|
||||
enterDispatcher_ = AlignCode16();
|
||||
|
||||
// Start by saving some regs on the stack. There are 12 GPs and 12 FPs we want.
|
||||
|
@ -280,15 +255,6 @@ void RiscVJit::GenerateFixedCode(const JitOptions &jo) {
|
|||
SW(SCRATCH2, SCRATCH1, 0);
|
||||
J(quitLoop);
|
||||
|
||||
// TODO: Do we need this?
|
||||
static const Round roundModes[8] = { Round::NEAREST_EVEN, Round::TOZERO, Round::UP, Round::DOWN, Round::NEAREST_EVEN, Round::TOZERO, Round::UP, Round::DOWN };
|
||||
for (size_t i = 0; i < ARRAY_SIZE(roundModes); ++i) {
|
||||
//convertS0ToSCRATCH1[i] = AlignCode16();
|
||||
|
||||
//FCVT(FConv::W, FConv::S, SCRATCH1, F0, roundModes[i]);
|
||||
//RET();
|
||||
}
|
||||
|
||||
// Leave this at the end, add more stuff above.
|
||||
if (enableDisasm) {
|
||||
std::vector<std::string> lines = DisassembleRV64(start, GetCodePtr() - start);
|
||||
|
|
|
@ -55,7 +55,7 @@ void RiscVJit::CompIR_Exit(IRInst inst) {
|
|||
|
||||
case IROp::ExitToPC:
|
||||
FlushAll();
|
||||
QuickJ(R_RA, dispatcher_);
|
||||
QuickJ(R_RA, dispatcherCheckCoreState_);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -134,7 +134,8 @@ void RiscVJit::CompIR_ExitIf(IRInst inst) {
|
|||
|
||||
case IROp::ExitToConstIfFpTrue:
|
||||
case IROp::ExitToConstIfFpFalse:
|
||||
CompIR_Generic(inst);
|
||||
// Note: not used.
|
||||
DISABLE;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -39,12 +39,67 @@ void RiscVJit::CompIR_FArith(IRInst inst) {
|
|||
|
||||
switch (inst.op) {
|
||||
case IROp::FAdd:
|
||||
fpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2);
|
||||
FADD(32, fpr.R(inst.dest), fpr.R(inst.src1), fpr.R(inst.src2));
|
||||
break;
|
||||
|
||||
case IROp::FSub:
|
||||
fpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2);
|
||||
FSUB(32, fpr.R(inst.dest), fpr.R(inst.src1), fpr.R(inst.src2));
|
||||
break;
|
||||
|
||||
case IROp::FMul:
|
||||
fpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2);
|
||||
// TODO: If FMUL consistently produces NAN across chip vendors, we can skip this.
|
||||
// Luckily this does match the RISC-V canonical NAN.
|
||||
if (inst.src1 != inst.src2) {
|
||||
// These will output 0x80/0x01 if infinity, 0x10/0x80 if zero.
|
||||
// We need to check if one is infinity and the other zero.
|
||||
|
||||
// First, try inf * zero.
|
||||
FCLASS(32, SCRATCH1, fpr.R(inst.src1));
|
||||
FCLASS(32, SCRATCH2, fpr.R(inst.src2));
|
||||
ANDI(R_RA, SCRATCH1, 0x81);
|
||||
FixupBranch lhsNotInf = BEQ(R_RA, R_ZERO);
|
||||
ANDI(R_RA, SCRATCH2, 0x18);
|
||||
FixupBranch infZero = BNE(R_RA, R_ZERO);
|
||||
|
||||
// Okay, what about the other order?
|
||||
SetJumpTarget(lhsNotInf);
|
||||
ANDI(R_RA, SCRATCH1, 0x18);
|
||||
FixupBranch lhsNotZero = BEQ(R_RA, R_ZERO);
|
||||
ANDI(R_RA, SCRATCH2, 0x81);
|
||||
FixupBranch zeroInf = BNE(R_RA, R_ZERO);
|
||||
|
||||
// Nope, all good.
|
||||
SetJumpTarget(lhsNotZero);
|
||||
FMUL(32, fpr.R(inst.dest), fpr.R(inst.src1), fpr.R(inst.src2));
|
||||
FixupBranch skip = J();
|
||||
|
||||
SetJumpTarget(infZero);
|
||||
SetJumpTarget(zeroInf);
|
||||
LI(SCRATCH1, 0x7FC00000);
|
||||
FMV(FMv::W, FMv::X, fpr.R(inst.dest), SCRATCH1);
|
||||
|
||||
SetJumpTarget(skip);
|
||||
} else {
|
||||
FMUL(32, fpr.R(inst.dest), fpr.R(inst.src1), fpr.R(inst.src2));
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::FDiv:
|
||||
fpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2);
|
||||
FDIV(32, fpr.R(inst.dest), fpr.R(inst.src1), fpr.R(inst.src2));
|
||||
break;
|
||||
|
||||
case IROp::FSqrt:
|
||||
fpr.MapDirtyIn(inst.dest, inst.src1);
|
||||
FSQRT(32, fpr.R(inst.dest), fpr.R(inst.src1));
|
||||
break;
|
||||
|
||||
case IROp::FNeg:
|
||||
CompIR_Generic(inst);
|
||||
fpr.MapDirtyIn(inst.dest, inst.src1);
|
||||
FNEG(32, fpr.R(inst.dest), fpr.R(inst.src1));
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -59,6 +114,7 @@ void RiscVJit::CompIR_FCondAssign(IRInst inst) {
|
|||
switch (inst.op) {
|
||||
case IROp::FMin:
|
||||
case IROp::FMax:
|
||||
// TODO: These are tricky, have to handle order correctly.
|
||||
CompIR_Generic(inst);
|
||||
break;
|
||||
|
||||
|
@ -73,11 +129,39 @@ void RiscVJit::CompIR_FAssign(IRInst inst) {
|
|||
|
||||
switch (inst.op) {
|
||||
case IROp::FMov:
|
||||
case IROp::FAbs:
|
||||
case IROp::FSign:
|
||||
CompIR_Generic(inst);
|
||||
fpr.MapDirtyIn(inst.dest, inst.src1);
|
||||
FMV(32, fpr.R(inst.dest), fpr.R(inst.src1));
|
||||
break;
|
||||
|
||||
case IROp::FAbs:
|
||||
fpr.MapDirtyIn(inst.dest, inst.src1);
|
||||
FABS(32, fpr.R(inst.dest), fpr.R(inst.src1));
|
||||
break;
|
||||
|
||||
case IROp::FSign:
|
||||
{
|
||||
fpr.MapDirtyIn(inst.dest, inst.src1);
|
||||
// Check if it's negative zero, either 0x10/0x08 is zero.
|
||||
FCLASS(32, SCRATCH1, fpr.R(inst.src1));
|
||||
ANDI(SCRATCH1, SCRATCH1, 0x18);
|
||||
SEQZ(SCRATCH1, SCRATCH1);
|
||||
// Okay, it's zero if zero, 1 otherwise. Convert 1 to a constant 1.0.
|
||||
// Probably non-zero is the common case, so we make that the straight line.
|
||||
FixupBranch skipOne = BEQ(SCRATCH1, R_ZERO);
|
||||
LI(SCRATCH1, 1.0f);
|
||||
|
||||
// Now we just need the sign from it.
|
||||
FMV(FMv::X, FMv::W, SCRATCH2, fpr.R(inst.src1));
|
||||
// Use a wall to isolate the sign, and combine.
|
||||
SRAIW(SCRATCH2, SCRATCH2, 31);
|
||||
SLLIW(SCRATCH2, SCRATCH2, 31);
|
||||
OR(SCRATCH1, SCRATCH1, SCRATCH2);
|
||||
|
||||
SetJumpTarget(skipOne);
|
||||
FMV(FMv::W, FMv::X, fpr.R(inst.dest), SCRATCH1);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
INVALIDOP;
|
||||
break;
|
||||
|
@ -135,7 +219,6 @@ void RiscVJit::CompIR_FCompare(IRInst inst) {
|
|||
CONDITIONAL_DISABLE;
|
||||
|
||||
switch (inst.op) {
|
||||
case IROp::ZeroFpCond:
|
||||
case IROp::FCmp:
|
||||
case IROp::FCmovVfpuCC:
|
||||
case IROp::FCmpVfpuBit:
|
||||
|
@ -154,9 +237,15 @@ void RiscVJit::CompIR_RoundingMode(IRInst inst) {
|
|||
|
||||
switch (inst.op) {
|
||||
case IROp::RestoreRoundingMode:
|
||||
RestoreRoundingMode();
|
||||
break;
|
||||
|
||||
case IROp::ApplyRoundingMode:
|
||||
ApplyRoundingMode();
|
||||
break;
|
||||
|
||||
case IROp::UpdateRoundingMode:
|
||||
CompIR_Generic(inst);
|
||||
// We don't need to do anything, instructions allow a "dynamic" rounding mode.
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -53,8 +53,8 @@ void RiscVJit::SetScratch1ToSrc1Address(IRReg src1) {
|
|||
#endif
|
||||
}
|
||||
|
||||
int32_t RiscVJit::AdjustForAddressOffset(RiscVGen::RiscVReg *reg, int32_t constant) {
|
||||
if (constant < -2048 || constant > 2047) {
|
||||
int32_t RiscVJit::AdjustForAddressOffset(RiscVGen::RiscVReg *reg, int32_t constant, int32_t range) {
|
||||
if (constant < -2048 || constant + range > 2047) {
|
||||
LI(SCRATCH2, constant);
|
||||
ADD(SCRATCH1, *reg, SCRATCH2);
|
||||
*reg = SCRATCH1;
|
||||
|
@ -124,7 +124,8 @@ void RiscVJit::CompIR_LoadShift(IRInst inst) {
|
|||
switch (inst.op) {
|
||||
case IROp::Load32Left:
|
||||
case IROp::Load32Right:
|
||||
CompIR_Generic(inst);
|
||||
// Should not happen if the pass to split is active.
|
||||
DISABLE;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -136,9 +137,28 @@ void RiscVJit::CompIR_LoadShift(IRInst inst) {
|
|||
void RiscVJit::CompIR_FLoad(IRInst inst) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
RiscVReg addrReg = INVALID_REG;
|
||||
if (inst.src1 == MIPS_REG_ZERO) {
|
||||
// This will get changed by AdjustForAddressOffset.
|
||||
addrReg = MEMBASEREG;
|
||||
#ifdef MASKED_PSP_MEMORY
|
||||
inst.constant &= Memory::MEMVIEW32_MASK;
|
||||
#endif
|
||||
} else if (jo.cachePointers || gpr.IsMappedAsPointer(inst.src1)) {
|
||||
addrReg = gpr.MapRegAsPointer(inst.src1);
|
||||
} else {
|
||||
SetScratch1ToSrc1Address(inst.src1);
|
||||
addrReg = SCRATCH1;
|
||||
}
|
||||
|
||||
s32 imm = AdjustForAddressOffset(&addrReg, inst.constant);
|
||||
|
||||
// TODO: Safe memory? Or enough to have crash handler + validate?
|
||||
|
||||
switch (inst.op) {
|
||||
case IROp::LoadFloat:
|
||||
CompIR_Generic(inst);
|
||||
fpr.MapReg(inst.dest, MIPSMap::NOINIT);
|
||||
FL(32, fpr.R(inst.dest), addrReg, imm);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -150,9 +170,32 @@ void RiscVJit::CompIR_FLoad(IRInst inst) {
|
|||
void RiscVJit::CompIR_VecLoad(IRInst inst) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
RiscVReg addrReg = INVALID_REG;
|
||||
if (inst.src1 == MIPS_REG_ZERO) {
|
||||
// This will get changed by AdjustForAddressOffset.
|
||||
addrReg = MEMBASEREG;
|
||||
#ifdef MASKED_PSP_MEMORY
|
||||
inst.constant &= Memory::MEMVIEW32_MASK;
|
||||
#endif
|
||||
} else if (jo.cachePointers || gpr.IsMappedAsPointer(inst.src1)) {
|
||||
addrReg = gpr.MapRegAsPointer(inst.src1);
|
||||
} else {
|
||||
SetScratch1ToSrc1Address(inst.src1);
|
||||
addrReg = SCRATCH1;
|
||||
}
|
||||
|
||||
// We need to be able to address the whole 16 bytes, so offset of 12.
|
||||
s32 imm = AdjustForAddressOffset(&addrReg, inst.constant, 12);
|
||||
|
||||
// TODO: Safe memory? Or enough to have crash handler + validate?
|
||||
|
||||
switch (inst.op) {
|
||||
case IROp::LoadVec4:
|
||||
CompIR_Generic(inst);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
// Spilling is okay.
|
||||
fpr.MapReg(inst.dest + i, MIPSMap::NOINIT);
|
||||
FL(32, fpr.R(inst.dest + i), addrReg, imm + 4 * i);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -212,7 +255,8 @@ void RiscVJit::CompIR_StoreShift(IRInst inst) {
|
|||
switch (inst.op) {
|
||||
case IROp::Store32Left:
|
||||
case IROp::Store32Right:
|
||||
CompIR_Generic(inst);
|
||||
// Should not happen if the pass to split is active.
|
||||
DISABLE;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -224,9 +268,28 @@ void RiscVJit::CompIR_StoreShift(IRInst inst) {
|
|||
void RiscVJit::CompIR_FStore(IRInst inst) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
RiscVReg addrReg = INVALID_REG;
|
||||
if (inst.src1 == MIPS_REG_ZERO) {
|
||||
// This will get changed by AdjustForAddressOffset.
|
||||
addrReg = MEMBASEREG;
|
||||
#ifdef MASKED_PSP_MEMORY
|
||||
inst.constant &= Memory::MEMVIEW32_MASK;
|
||||
#endif
|
||||
} else if (jo.cachePointers || gpr.IsMappedAsPointer(inst.src1)) {
|
||||
addrReg = gpr.MapRegAsPointer(inst.src1);
|
||||
} else {
|
||||
SetScratch1ToSrc1Address(inst.src1);
|
||||
addrReg = SCRATCH1;
|
||||
}
|
||||
|
||||
s32 imm = AdjustForAddressOffset(&addrReg, inst.constant);
|
||||
|
||||
// TODO: Safe memory? Or enough to have crash handler + validate?
|
||||
|
||||
switch (inst.op) {
|
||||
case IROp::StoreFloat:
|
||||
CompIR_Generic(inst);
|
||||
fpr.MapReg(inst.src3);
|
||||
FS(32, fpr.R(inst.src3), addrReg, imm);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -238,9 +301,32 @@ void RiscVJit::CompIR_FStore(IRInst inst) {
|
|||
void RiscVJit::CompIR_VecStore(IRInst inst) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
RiscVReg addrReg = INVALID_REG;
|
||||
if (inst.src1 == MIPS_REG_ZERO) {
|
||||
// This will get changed by AdjustForAddressOffset.
|
||||
addrReg = MEMBASEREG;
|
||||
#ifdef MASKED_PSP_MEMORY
|
||||
inst.constant &= Memory::MEMVIEW32_MASK;
|
||||
#endif
|
||||
} else if (jo.cachePointers || gpr.IsMappedAsPointer(inst.src1)) {
|
||||
addrReg = gpr.MapRegAsPointer(inst.src1);
|
||||
} else {
|
||||
SetScratch1ToSrc1Address(inst.src1);
|
||||
addrReg = SCRATCH1;
|
||||
}
|
||||
|
||||
// We need to be able to address the whole 16 bytes, so offset of 12.
|
||||
s32 imm = AdjustForAddressOffset(&addrReg, inst.constant, 12);
|
||||
|
||||
// TODO: Safe memory? Or enough to have crash handler + validate?
|
||||
|
||||
switch (inst.op) {
|
||||
case IROp::StoreVec4:
|
||||
CompIR_Generic(inst);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
// Spilling is okay, though not ideal.
|
||||
fpr.MapReg(inst.src3 + i);
|
||||
FS(32, fpr.R(inst.src3 + i), addrReg, imm + 4 * i);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -15,7 +15,12 @@
|
|||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include "Common/Profiler/Profiler.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/HLE/HLE.h"
|
||||
#include "Core/HLE/ReplaceTables.h"
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
#include "Core/MIPS/RiscV/RiscVJit.h"
|
||||
#include "Core/MIPS/RiscV/RiscVRegCache.h"
|
||||
|
||||
|
@ -45,7 +50,15 @@ void RiscVJit::CompIR_Basic(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::SetConstF:
|
||||
CompIR_Generic(inst);
|
||||
fpr.MapReg(inst.dest, MIPSMap::NOINIT);
|
||||
if (inst.constant == 0) {
|
||||
FCVT(FConv::S, FConv::W, fpr.R(inst.dest), R_ZERO);
|
||||
} else {
|
||||
// TODO: In the future, could use FLI if it's approved.
|
||||
// Also, is FCVT faster?
|
||||
LI(SCRATCH1, (int32_t)inst.constant);
|
||||
FMV(FMv::W, FMv::X, fpr.R(inst.dest), SCRATCH1);
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::Downcount:
|
||||
|
@ -78,13 +91,85 @@ void RiscVJit::CompIR_Transfer(IRInst inst) {
|
|||
|
||||
switch (inst.op) {
|
||||
case IROp::SetCtrlVFPU:
|
||||
gpr.SetImm(IRREG_VFPU_CTRL_BASE + inst.dest, (int32_t)inst.constant);
|
||||
break;
|
||||
|
||||
case IROp::SetCtrlVFPUReg:
|
||||
gpr.MapDirtyIn(IRREG_VFPU_CTRL_BASE + inst.dest, inst.src1);
|
||||
MV(gpr.R(IRREG_VFPU_CTRL_BASE + inst.dest), gpr.R(inst.src1));
|
||||
gpr.MarkDirty(gpr.R(IRREG_VFPU_CTRL_BASE + inst.dest), gpr.IsNormalized32(inst.src1));
|
||||
break;
|
||||
|
||||
case IROp::SetCtrlVFPUFReg:
|
||||
gpr.MapReg(IRREG_VFPU_CTRL_BASE + inst.dest, MIPSMap::NOINIT);
|
||||
fpr.MapReg(inst.src1);
|
||||
FMV(FMv::X, FMv::W, gpr.R(IRREG_VFPU_CTRL_BASE + inst.dest), fpr.R(inst.src1));
|
||||
break;
|
||||
|
||||
case IROp::FpCondToReg:
|
||||
gpr.MapDirtyIn(inst.dest, IRREG_FPCOND);
|
||||
MV(gpr.R(inst.dest), gpr.R(IRREG_FPCOND));
|
||||
gpr.MarkDirty(gpr.R(inst.dest), gpr.IsNormalized32(IRREG_FPCOND));
|
||||
break;
|
||||
|
||||
case IROp::ZeroFpCond:
|
||||
gpr.SetImm(IRREG_FPCOND, 0);
|
||||
break;
|
||||
|
||||
case IROp::FpCtrlFromReg:
|
||||
gpr.MapDirtyIn(IRREG_FPCOND, inst.src1, MapType::AVOID_LOAD_MARK_NORM32);
|
||||
LI(SCRATCH1, 0x0181FFFF);
|
||||
AND(SCRATCH1, gpr.R(inst.src1), SCRATCH1);
|
||||
// Extract the new fpcond value.
|
||||
if (cpu_info.RiscV_Zbs) {
|
||||
BEXTI(gpr.R(IRREG_FPCOND), SCRATCH1, 23);
|
||||
} else {
|
||||
SRLI(gpr.R(IRREG_FPCOND), SCRATCH1, 23);
|
||||
ANDI(gpr.R(IRREG_FPCOND), gpr.R(IRREG_FPCOND), 1);
|
||||
}
|
||||
SW(SCRATCH1, CTXREG, IRREG_FCR31 * 4);
|
||||
break;
|
||||
|
||||
case IROp::FpCtrlToReg:
|
||||
gpr.MapDirtyIn(inst.dest, IRREG_FPCOND, MapType::AVOID_LOAD_MARK_NORM32);
|
||||
// Load fcr31 and clear the fpcond bit.
|
||||
LW(SCRATCH1, CTXREG, IRREG_FCR31 * 4);
|
||||
if (cpu_info.RiscV_Zbs) {
|
||||
BCLRI(SCRATCH1, SCRATCH1, 23);
|
||||
} else {
|
||||
LI(SCRATCH2, ~(1 << 23));
|
||||
AND(SCRATCH1, SCRATCH1, SCRATCH2);
|
||||
}
|
||||
|
||||
// Now get the correct fpcond bit.
|
||||
ANDI(SCRATCH2, gpr.R(IRREG_FPCOND), 1);
|
||||
SLLI(SCRATCH2, SCRATCH2, 23);
|
||||
OR(gpr.R(inst.dest), SCRATCH1, SCRATCH2);
|
||||
|
||||
// Also update mips->fcr31 while we're here.
|
||||
SW(gpr.R(inst.dest), CTXREG, IRREG_FCR31 * 4);
|
||||
break;
|
||||
|
||||
case IROp::VfpuCtrlToReg:
|
||||
gpr.MapDirtyIn(inst.dest, IRREG_VFPU_CTRL_BASE + inst.src1);
|
||||
MV(gpr.R(inst.dest), gpr.R(IRREG_VFPU_CTRL_BASE + inst.src1));
|
||||
gpr.MarkDirty(gpr.R(inst.dest), gpr.IsNormalized32(IRREG_VFPU_CTRL_BASE + inst.src1));
|
||||
break;
|
||||
|
||||
case IROp::FMovFromGPR:
|
||||
fpr.MapReg(inst.dest, MIPSMap::NOINIT);
|
||||
if (gpr.IsImm(inst.src1) && gpr.GetImm(inst.src1) == 0) {
|
||||
FCVT(FConv::S, FConv::W, fpr.R(inst.dest), R_ZERO);
|
||||
} else {
|
||||
gpr.MapReg(inst.src1);
|
||||
FMV(FMv::W, FMv::X, fpr.R(inst.dest), gpr.R(inst.src1));
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::FMovToGPR:
|
||||
CompIR_Generic(inst);
|
||||
gpr.MapReg(inst.dest, MIPSMap::NOINIT);
|
||||
fpr.MapReg(inst.src1);
|
||||
FMV(FMv::X, FMv::W, gpr.R(inst.dest), fpr.R(inst.src1));
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -98,10 +183,61 @@ void RiscVJit::CompIR_System(IRInst inst) {
|
|||
|
||||
switch (inst.op) {
|
||||
case IROp::Interpret:
|
||||
// IR protects us against this being a branching instruction (well, hopefully.)
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
LI(X10, (int32_t)inst.constant);
|
||||
QuickCallFunction((const u8 *)MIPSGetInterpretFunc(MIPSOpcode(inst.constant)));
|
||||
LoadStaticRegisters();
|
||||
break;
|
||||
|
||||
case IROp::Syscall:
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
|
||||
#ifdef USE_PROFILER
|
||||
// When profiling, we can't skip CallSyscall, since it times syscalls.
|
||||
LI(X10, (int32_t)inst.constant);
|
||||
QuickCallFunction(&CallSyscall);
|
||||
#else
|
||||
// Skip the CallSyscall where possible.
|
||||
{
|
||||
MIPSOpcode op(inst.constant);
|
||||
void *quickFunc = GetQuickSyscallFunc(op);
|
||||
if (quickFunc) {
|
||||
LI(X10, (uintptr_t)GetSyscallFuncPointer(op));
|
||||
QuickCallFunction((const u8 *)quickFunc);
|
||||
} else {
|
||||
LI(X10, (int32_t)inst.constant);
|
||||
QuickCallFunction(&CallSyscall);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
LoadStaticRegisters();
|
||||
// This is always followed by an ExitToPC, where we check coreState.
|
||||
break;
|
||||
|
||||
case IROp::CallReplacement:
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
QuickCallFunction(GetReplacementFunc(inst.constant)->replaceFunc);
|
||||
LoadStaticRegisters();
|
||||
SUB(DOWNCOUNTREG, DOWNCOUNTREG, X10);
|
||||
break;
|
||||
|
||||
case IROp::Break:
|
||||
CompIR_Generic(inst);
|
||||
FlushAll();
|
||||
// This doesn't naturally have restore/apply around it.
|
||||
RestoreRoundingMode(true);
|
||||
SaveStaticRegisters();
|
||||
MovFromPC(X10);
|
||||
QuickCallFunction(&Core_Break);
|
||||
LoadStaticRegisters();
|
||||
ApplyRoundingMode(true);
|
||||
MovFromPC(SCRATCH1);
|
||||
ADDI(SCRATCH1, SCRATCH1, 4);
|
||||
QuickJ(R_RA, dispatcherPCInSCRATCH1_);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -39,9 +39,88 @@ void RiscVJit::CompIR_VecAssign(IRInst inst) {
|
|||
|
||||
switch (inst.op) {
|
||||
case IROp::Vec4Init:
|
||||
for (int i = 0; i < 4; ++i)
|
||||
fpr.SpillLock(inst.dest + i);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
fpr.MapReg(inst.dest + i, MIPSMap::NOINIT);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
fpr.ReleaseSpillLock(inst.dest + i);
|
||||
|
||||
// TODO: Check if FCVT/FMV/FL is better.
|
||||
switch ((Vec4Init)inst.src1) {
|
||||
case Vec4Init::AllZERO:
|
||||
for (int i = 0; i < 4; ++i)
|
||||
FCVT(FConv::S, FConv::W, fpr.R(inst.dest + i), R_ZERO);
|
||||
break;
|
||||
|
||||
case Vec4Init::AllONE:
|
||||
LI(SCRATCH1, 1.0f);
|
||||
FMV(FMv::W, FMv::X, fpr.R(inst.dest), SCRATCH1);
|
||||
for (int i = 1; i < 4; ++i)
|
||||
FMV(32, fpr.R(inst.dest + i), fpr.R(inst.dest));
|
||||
break;
|
||||
|
||||
case Vec4Init::AllMinusONE:
|
||||
LI(SCRATCH1, -1.0f);
|
||||
FMV(FMv::W, FMv::X, fpr.R(inst.dest), SCRATCH1);
|
||||
for (int i = 1; i < 4; ++i)
|
||||
FMV(32, fpr.R(inst.dest + i), fpr.R(inst.dest));
|
||||
break;
|
||||
|
||||
case Vec4Init::Set_1000:
|
||||
LI(SCRATCH1, 1.0f);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (i == 0)
|
||||
FMV(FMv::W, FMv::X, fpr.R(inst.dest + i), SCRATCH1);
|
||||
else
|
||||
FCVT(FConv::S, FConv::W, fpr.R(inst.dest + i), R_ZERO);
|
||||
}
|
||||
break;
|
||||
|
||||
case Vec4Init::Set_0100:
|
||||
LI(SCRATCH1, 1.0f);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (i == 1)
|
||||
FMV(FMv::W, FMv::X, fpr.R(inst.dest + i), SCRATCH1);
|
||||
else
|
||||
FCVT(FConv::S, FConv::W, fpr.R(inst.dest + i), R_ZERO);
|
||||
}
|
||||
break;
|
||||
|
||||
case Vec4Init::Set_0010:
|
||||
LI(SCRATCH1, 1.0f);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (i == 2)
|
||||
FMV(FMv::W, FMv::X, fpr.R(inst.dest + i), SCRATCH1);
|
||||
else
|
||||
FCVT(FConv::S, FConv::W, fpr.R(inst.dest + i), R_ZERO);
|
||||
}
|
||||
break;
|
||||
|
||||
case Vec4Init::Set_0001:
|
||||
LI(SCRATCH1, 1.0f);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (i == 3)
|
||||
FMV(FMv::W, FMv::X, fpr.R(inst.dest + i), SCRATCH1);
|
||||
else
|
||||
FCVT(FConv::S, FConv::W, fpr.R(inst.dest + i), R_ZERO);
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::Vec4Shuffle:
|
||||
fpr.Map4DirtyIn(inst.dest, inst.src1);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
int lane = (inst.src2 >> (i * 2)) & 3;
|
||||
FMV(32, fpr.R(inst.dest + i), fpr.R(inst.src1 + lane));
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::Vec4Mov:
|
||||
CompIR_Generic(inst);
|
||||
fpr.Map4DirtyIn(inst.dest, inst.src1);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
FMV(32, fpr.R(inst.dest + i), fpr.R(inst.src1 + i));
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -55,13 +134,48 @@ void RiscVJit::CompIR_VecArith(IRInst inst) {
|
|||
|
||||
switch (inst.op) {
|
||||
case IROp::Vec4Add:
|
||||
fpr.Map4DirtyInIn(inst.dest, inst.src1, inst.src2);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
FADD(32, fpr.R(inst.dest + i), fpr.R(inst.src1 + i), fpr.R(inst.src2 + i));
|
||||
break;
|
||||
|
||||
case IROp::Vec4Sub:
|
||||
fpr.Map4DirtyInIn(inst.dest, inst.src1, inst.src2);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
FSUB(32, fpr.R(inst.dest + i), fpr.R(inst.src1 + i), fpr.R(inst.src2 + i));
|
||||
break;
|
||||
|
||||
case IROp::Vec4Mul:
|
||||
fpr.Map4DirtyInIn(inst.dest, inst.src1, inst.src2);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
FMUL(32, fpr.R(inst.dest + i), fpr.R(inst.src1 + i), fpr.R(inst.src2 + i));
|
||||
break;
|
||||
|
||||
case IROp::Vec4Div:
|
||||
fpr.Map4DirtyInIn(inst.dest, inst.src1, inst.src2);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
FDIV(32, fpr.R(inst.dest + i), fpr.R(inst.src1 + i), fpr.R(inst.src2 + i));
|
||||
break;
|
||||
|
||||
case IROp::Vec4Scale:
|
||||
fpr.SpillLock(inst.src2);
|
||||
fpr.MapReg(inst.src2);
|
||||
fpr.Map4DirtyIn(inst.dest, inst.src1);
|
||||
fpr.ReleaseSpillLock(inst.src2);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
FMUL(32, fpr.R(inst.dest + i), fpr.R(inst.src1 + i), fpr.R(inst.src2));
|
||||
break;
|
||||
|
||||
case IROp::Vec4Neg:
|
||||
fpr.Map4DirtyIn(inst.dest, inst.src1);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
FNEG(32, fpr.R(inst.dest + i), fpr.R(inst.src1 + i));
|
||||
break;
|
||||
|
||||
case IROp::Vec4Abs:
|
||||
CompIR_Generic(inst);
|
||||
fpr.Map4DirtyIn(inst.dest, inst.src1);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
FABS(32, fpr.R(inst.dest + i), fpr.R(inst.src1 + i));
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -75,7 +189,39 @@ void RiscVJit::CompIR_VecHoriz(IRInst inst) {
|
|||
|
||||
switch (inst.op) {
|
||||
case IROp::Vec4Dot:
|
||||
CompIR_Generic(inst);
|
||||
// TODO: Maybe some option to call the slow accurate mode?
|
||||
fpr.SpillLock(inst.dest);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
fpr.SpillLock(inst.src1 + i);
|
||||
fpr.SpillLock(inst.src2 + i);
|
||||
}
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
fpr.MapReg(inst.src1 + i);
|
||||
fpr.MapReg(inst.src2 + i);
|
||||
}
|
||||
fpr.MapReg(inst.dest, MIPSMap::NOINIT);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
fpr.ReleaseSpillLock(inst.src1 + i);
|
||||
fpr.ReleaseSpillLock(inst.src2 + i);
|
||||
}
|
||||
fpr.ReleaseSpillLock(inst.dest);
|
||||
|
||||
if ((inst.dest < inst.src1 + 4 && inst.dest >= inst.src1) || (inst.dest < inst.src2 + 4 && inst.dest >= inst.src2)) {
|
||||
// This means inst.dest overlaps one of src1 or src2. We have to do that one first.
|
||||
// Technically this may impact -0.0 and such, but dots accurately need to be aligned anyway.
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (inst.dest == inst.src1 + i || inst.dest == inst.src2 + i)
|
||||
FMUL(32, fpr.R(inst.dest), fpr.R(inst.src1 + i), fpr.R(inst.src2 + i));
|
||||
}
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (inst.dest != inst.src1 + i && inst.dest != inst.src2 + i)
|
||||
FMADD(32, fpr.R(inst.dest), fpr.R(inst.src1 + i), fpr.R(inst.src2 + i), fpr.R(inst.dest));
|
||||
}
|
||||
} else {
|
||||
FMUL(32, fpr.R(inst.dest), fpr.R(inst.src1), fpr.R(inst.src2));
|
||||
for (int i = 1; i < 4; ++i)
|
||||
FMADD(32, fpr.R(inst.dest), fpr.R(inst.src1 + i), fpr.R(inst.src2 + i), fpr.R(inst.dest));
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -26,7 +26,7 @@ namespace MIPSComp {
|
|||
using namespace RiscVGen;
|
||||
using namespace RiscVJitConstants;
|
||||
|
||||
RiscVJit::RiscVJit(MIPSState *mipsState) : IRJit(mipsState), gpr(mipsState, &jo) {
|
||||
RiscVJit::RiscVJit(MIPSState *mipsState) : IRJit(mipsState), gpr(mipsState, &jo), fpr(mipsState, &jo) {
|
||||
// Automatically disable incompatible options.
|
||||
if (((intptr_t)Memory::base & 0x00000000FFFFFFFFUL) != 0) {
|
||||
jo.enablePointerify = false;
|
||||
|
@ -40,7 +40,7 @@ RiscVJit::RiscVJit(MIPSState *mipsState) : IRJit(mipsState), gpr(mipsState, &jo)
|
|||
memset(blockStartAddrs_, 0, sizeof(blockStartAddrs_[0]) * MAX_ALLOWED_JIT_BLOCKS);
|
||||
|
||||
gpr.Init(this);
|
||||
// TODO: fpr
|
||||
fpr.Init(this);
|
||||
|
||||
GenerateFixedCode(jo);
|
||||
}
|
||||
|
@ -79,7 +79,7 @@ bool RiscVJit::CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u
|
|||
blockStartAddrs_[block_num] = GetCodePointer();
|
||||
|
||||
gpr.Start();
|
||||
// TODO: fpr.
|
||||
fpr.Start();
|
||||
|
||||
for (const IRInst &inst : instructions) {
|
||||
CompileIRInst(inst);
|
||||
|
@ -87,9 +87,8 @@ bool RiscVJit::CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u
|
|||
if (jo.Disabled(JitDisable::REGALLOC_GPR)) {
|
||||
gpr.FlushAll();
|
||||
}
|
||||
// TODO
|
||||
if (jo.Disabled(JitDisable::REGALLOC_FPR)) {
|
||||
//fpr.FlushAll();
|
||||
fpr.FlushAll();
|
||||
}
|
||||
|
||||
// Safety check, in case we get a bunch of really large jit ops without a lot of branching.
|
||||
|
@ -107,13 +106,6 @@ bool RiscVJit::CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u
|
|||
return true;
|
||||
}
|
||||
|
||||
static u32 DoIRInst(uint64_t value) {
|
||||
IRInst inst;
|
||||
memcpy(&inst, &value, sizeof(inst));
|
||||
|
||||
return IRInterpret(currentMIPS, &inst, 1);
|
||||
}
|
||||
|
||||
void RiscVJit::CompileIRInst(IRInst inst) {
|
||||
switch (inst.op) {
|
||||
case IROp::Nop:
|
||||
|
@ -281,7 +273,6 @@ void RiscVJit::CompileIRInst(IRInst inst) {
|
|||
CompIR_FSat(inst);
|
||||
break;
|
||||
|
||||
case IROp::ZeroFpCond:
|
||||
case IROp::FCmp:
|
||||
case IROp::FCmovVfpuCC:
|
||||
case IROp::FCmpVfpuBit:
|
||||
|
@ -299,6 +290,9 @@ void RiscVJit::CompileIRInst(IRInst inst) {
|
|||
case IROp::SetCtrlVFPUReg:
|
||||
case IROp::SetCtrlVFPUFReg:
|
||||
case IROp::FpCondToReg:
|
||||
case IROp::ZeroFpCond:
|
||||
case IROp::FpCtrlFromReg:
|
||||
case IROp::FpCtrlToReg:
|
||||
case IROp::VfpuCtrlToReg:
|
||||
case IROp::FMovFromGPR:
|
||||
case IROp::FMovToGPR:
|
||||
|
@ -392,9 +386,15 @@ void RiscVJit::CompileIRInst(IRInst inst) {
|
|||
}
|
||||
}
|
||||
|
||||
static u32 DoIRInst(uint64_t value) {
|
||||
IRInst inst;
|
||||
memcpy(&inst, &value, sizeof(inst));
|
||||
|
||||
return IRInterpret(currentMIPS, &inst, 1);
|
||||
}
|
||||
|
||||
void RiscVJit::CompIR_Generic(IRInst inst) {
|
||||
// For now, we're gonna do it the slow and ugly way.
|
||||
// Maybe there's a smarter way to fallback?
|
||||
// If we got here, we're going the slow way.
|
||||
uint64_t value;
|
||||
memcpy(&value, &inst, sizeof(inst));
|
||||
|
||||
|
@ -403,20 +403,24 @@ void RiscVJit::CompIR_Generic(IRInst inst) {
|
|||
SaveStaticRegisters();
|
||||
QuickCallFunction(&DoIRInst);
|
||||
LoadStaticRegisters();
|
||||
// Result in X10 aka SCRATCH1.
|
||||
_assert_(X10 == SCRATCH1);
|
||||
if (BInRange(dispatcherPCInSCRATCH1_)) {
|
||||
BNE(X10, R_ZERO, dispatcherPCInSCRATCH1_);
|
||||
} else {
|
||||
FixupBranch skip = BEQ(X10, R_ZERO);
|
||||
QuickJ(R_RA, dispatcherPCInSCRATCH1_);
|
||||
SetJumpTarget(skip);
|
||||
|
||||
// We only need to check the return value if it's a potential exit.
|
||||
if ((GetIRMeta(inst.op)->flags & IRFLAG_EXIT) != 0) {
|
||||
// Result in X10 aka SCRATCH1.
|
||||
_assert_(X10 == SCRATCH1);
|
||||
if (BInRange(dispatcherPCInSCRATCH1_)) {
|
||||
BNE(X10, R_ZERO, dispatcherPCInSCRATCH1_);
|
||||
} else {
|
||||
FixupBranch skip = BEQ(X10, R_ZERO);
|
||||
QuickJ(R_RA, dispatcherPCInSCRATCH1_);
|
||||
SetJumpTarget(skip);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RiscVJit::FlushAll() {
|
||||
gpr.FlushAll();
|
||||
// TODO: fpr.
|
||||
fpr.FlushAll();
|
||||
}
|
||||
|
||||
bool RiscVJit::DescribeCodePtr(const u8 *ptr, std::string &name) {
|
||||
|
@ -433,6 +437,8 @@ bool RiscVJit::DescribeCodePtr(const u8 *ptr, std::string &name) {
|
|||
name = "loadStaticRegisters";
|
||||
} else if (ptr == enterDispatcher_) {
|
||||
name = "enterDispatcher";
|
||||
} else if (ptr == applyRoundingMode_) {
|
||||
name = "applyRoundingMode";
|
||||
} else if (!IsInSpace(ptr)) {
|
||||
return false;
|
||||
} else {
|
||||
|
@ -492,20 +498,12 @@ void RiscVJit::ClearCache() {
|
|||
memset(blockStartAddrs_, 0, sizeof(blockStartAddrs_[0]) * MAX_ALLOWED_JIT_BLOCKS);
|
||||
}
|
||||
|
||||
void RiscVJit::UpdateFCR31() {
|
||||
IRJit::UpdateFCR31();
|
||||
|
||||
// TODO: Handle rounding modes?
|
||||
}
|
||||
|
||||
void RiscVJit::RestoreRoundingMode(bool force) {
|
||||
// TODO: Could maybe skip if not hasSetRounding? But that's on IRFrontend...
|
||||
FSRMI(Round::NEAREST_EVEN);
|
||||
}
|
||||
|
||||
void RiscVJit::ApplyRoundingMode(bool force) {
|
||||
// TODO: Also could maybe sometimes skip?
|
||||
//QuickCallFunction(applyRoundingMode_);
|
||||
QuickCallFunction(applyRoundingMode_);
|
||||
}
|
||||
|
||||
void RiscVJit::MovFromPC(RiscVReg r) {
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "Core/MIPS/JitCommon/JitState.h"
|
||||
#include "Core/MIPS/JitCommon/JitCommon.h"
|
||||
#include "Core/MIPS/RiscV/RiscVRegCache.h"
|
||||
#include "Core/MIPS/RiscV/RiscVRegCacheFPU.h"
|
||||
|
||||
namespace MIPSComp {
|
||||
|
||||
|
@ -41,7 +42,6 @@ public:
|
|||
const u8 *GetCrashHandler() const override;
|
||||
|
||||
void ClearCache() override;
|
||||
void UpdateFCR31() override;
|
||||
|
||||
// TODO: GetBlockCacheDebugInterface, block linking?
|
||||
|
||||
|
@ -107,12 +107,13 @@ private:
|
|||
|
||||
void SetScratch1ToSrc1Address(IRReg src1);
|
||||
// Modifies SCRATCH regs.
|
||||
int32_t AdjustForAddressOffset(RiscVGen::RiscVReg *reg, int32_t constant);
|
||||
int32_t AdjustForAddressOffset(RiscVGen::RiscVReg *reg, int32_t constant, int32_t range = 0);
|
||||
void NormalizeSrc1(IRInst inst, RiscVGen::RiscVReg *reg, RiscVGen::RiscVReg tempReg, bool allowOverlap);
|
||||
void NormalizeSrc12(IRInst inst, RiscVGen::RiscVReg *lhs, RiscVGen::RiscVReg *rhs, RiscVGen::RiscVReg lhsTempReg, RiscVGen::RiscVReg rhsTempReg, bool allowOverlap);
|
||||
RiscVGen::RiscVReg NormalizeR(IRRegIndex rs, IRRegIndex rd, RiscVGen::RiscVReg tempReg);
|
||||
|
||||
RiscVRegCache gpr;
|
||||
RiscVRegCacheFPU fpr;
|
||||
|
||||
static constexpr int MAX_ALLOWED_JIT_BLOCKS = 262144;
|
||||
|
||||
|
@ -125,6 +126,7 @@ private:
|
|||
const u8 *dispatcher_ = nullptr;
|
||||
const u8 *dispatcherNoCheck_ = nullptr;
|
||||
const u8 *dispatcherFetch_ = nullptr;
|
||||
const u8 *applyRoundingMode_ = nullptr;
|
||||
|
||||
const u8 *saveStaticRegisters_ = nullptr;
|
||||
const u8 *loadStaticRegisters_ = nullptr;
|
||||
|
|
|
@ -15,15 +15,15 @@
|
|||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#ifndef offsetof
|
||||
#include <cstddef>
|
||||
#endif
|
||||
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Core/MIPS/RiscV/RiscVRegCache.h"
|
||||
#include "Core/MIPS/JitCommon/JitState.h"
|
||||
#include "Core/Reporting.h"
|
||||
|
||||
#ifndef offsetof
|
||||
#include "stddef.h"
|
||||
#endif
|
||||
|
||||
using namespace RiscVGen;
|
||||
using namespace RiscVJitConstants;
|
||||
|
||||
|
@ -36,20 +36,14 @@ void RiscVRegCache::Init(RiscVEmitter *emitter) {
|
|||
}
|
||||
|
||||
void RiscVRegCache::Start() {
|
||||
for (int i = 0; i < NUM_RVREG; i++) {
|
||||
ar[i].mipsReg = IRREG_INVALID;
|
||||
ar[i].isDirty = false;
|
||||
ar[i].pointerified = false;
|
||||
ar[i].tempLocked = false;
|
||||
ar[i].normalized32 = false;
|
||||
}
|
||||
for (int i = 0; i < NUM_MIPSREG; i++) {
|
||||
mr[i].loc = MIPSLoc::MEM;
|
||||
mr[i].reg = INVALID_REG;
|
||||
mr[i].imm = -1;
|
||||
mr[i].spillLock = false;
|
||||
mr[i].isStatic = false;
|
||||
if (!initialReady_) {
|
||||
SetupInitialRegs();
|
||||
initialReady_ = true;
|
||||
}
|
||||
|
||||
memcpy(ar, arInitial_, sizeof(ar));
|
||||
memcpy(mr, mrInitial_, sizeof(mr));
|
||||
|
||||
int numStatics;
|
||||
const StaticAllocation *statics = GetStaticAllocations(numStatics);
|
||||
for (int i = 0; i < numStatics; i++) {
|
||||
|
@ -61,24 +55,41 @@ void RiscVRegCache::Start() {
|
|||
mr[statics[i].mr].isStatic = true;
|
||||
mr[statics[i].mr].spillLock = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RiscVRegCache::SetupInitialRegs() {
|
||||
for (int i = 0; i < NUM_RVREG; i++) {
|
||||
arInitial_[i].mipsReg = IRREG_INVALID;
|
||||
arInitial_[i].isDirty = false;
|
||||
arInitial_[i].pointerified = false;
|
||||
arInitial_[i].tempLocked = false;
|
||||
arInitial_[i].normalized32 = false;
|
||||
}
|
||||
for (int i = 0; i < NUM_MIPSREG; i++) {
|
||||
mrInitial_[i].loc = MIPSLoc::MEM;
|
||||
mrInitial_[i].reg = INVALID_REG;
|
||||
mrInitial_[i].imm = -1;
|
||||
mrInitial_[i].spillLock = false;
|
||||
mrInitial_[i].isStatic = false;
|
||||
}
|
||||
|
||||
// Treat R_ZERO a bit specially, but it's basically static alloc too.
|
||||
ar[R_ZERO].mipsReg = MIPS_REG_ZERO;
|
||||
ar[R_ZERO].normalized32 = true;
|
||||
mr[MIPS_REG_ZERO].loc = MIPSLoc::RVREG_IMM;
|
||||
mr[MIPS_REG_ZERO].reg = R_ZERO;
|
||||
mr[MIPS_REG_ZERO].imm = 0;
|
||||
mr[MIPS_REG_ZERO].isStatic = true;
|
||||
arInitial_[R_ZERO].mipsReg = MIPS_REG_ZERO;
|
||||
arInitial_[R_ZERO].normalized32 = true;
|
||||
mrInitial_[MIPS_REG_ZERO].loc = MIPSLoc::RVREG_IMM;
|
||||
mrInitial_[MIPS_REG_ZERO].reg = R_ZERO;
|
||||
mrInitial_[MIPS_REG_ZERO].imm = 0;
|
||||
mrInitial_[MIPS_REG_ZERO].isStatic = true;
|
||||
}
|
||||
|
||||
const RiscVReg *RiscVRegCache::GetMIPSAllocationOrder(int &count) {
|
||||
// X8 and X9 are the most ideal for static alloc because they can be used with compression.
|
||||
// Otherwise we stick to saved regs - might not be necessary.
|
||||
static const RiscVReg allocationOrder[] = {
|
||||
X7, X8, X9, X12, X13, X14, X5, X6, X15, X16, X17, X18, X19, X20, X21, X22, X23, X28, X29, X30, X31,
|
||||
X8, X9, X12, X13, X14, X15, X5, X6, X7, X16, X17, X18, X19, X20, X21, X22, X23, X28, X29, X30, X31,
|
||||
};
|
||||
static const RiscVReg allocationOrderStaticAlloc[] = {
|
||||
X7, X12, X13, X14, X5, X6, X15, X16, X17, X21, X22, X23, X28, X29, X30, X31,
|
||||
X12, X13, X14, X15, X5, X6, X7, X16, X17, X21, X22, X23, X28, X29, X30, X31,
|
||||
};
|
||||
|
||||
if (jo_->useStaticAlloc) {
|
||||
|
@ -432,6 +443,7 @@ RiscVReg RiscVRegCache::GetAndLockTempR() {
|
|||
RiscVReg reg = AllocateReg();
|
||||
if (reg != INVALID_REG) {
|
||||
ar[reg].tempLocked = true;
|
||||
pendingUnlock_ = true;
|
||||
}
|
||||
return reg;
|
||||
}
|
||||
|
@ -958,14 +970,6 @@ bool RiscVRegCache::IsImm(IRRegIndex r) const {
|
|||
return mr[r].loc == MIPSLoc::IMM || mr[r].loc == MIPSLoc::RVREG_IMM;
|
||||
}
|
||||
|
||||
bool RiscVRegCache::IsPureImm(IRRegIndex r) const {
|
||||
_dbg_assert_(IsValidReg(r));
|
||||
if (r == MIPS_REG_ZERO)
|
||||
return true;
|
||||
else
|
||||
return mr[r].loc == MIPSLoc::IMM;
|
||||
}
|
||||
|
||||
u64 RiscVRegCache::GetImm(IRRegIndex r) const {
|
||||
_dbg_assert_(IsValidReg(r));
|
||||
if (r == MIPS_REG_ZERO)
|
||||
|
@ -1016,9 +1020,13 @@ void RiscVRegCache::SpillLock(IRRegIndex r1, IRRegIndex r2, IRRegIndex r3, IRReg
|
|||
if (r2 != IRREG_INVALID) mr[r2].spillLock = true;
|
||||
if (r3 != IRREG_INVALID) mr[r3].spillLock = true;
|
||||
if (r4 != IRREG_INVALID) mr[r4].spillLock = true;
|
||||
pendingUnlock_ = true;
|
||||
}
|
||||
|
||||
void RiscVRegCache::ReleaseSpillLocksAndDiscardTemps() {
|
||||
if (!pendingUnlock_)
|
||||
return;
|
||||
|
||||
for (int i = 0; i < NUM_MIPSREG; i++) {
|
||||
if (!mr[i].isStatic)
|
||||
mr[i].spillLock = false;
|
||||
|
@ -1026,6 +1034,8 @@ void RiscVRegCache::ReleaseSpillLocksAndDiscardTemps() {
|
|||
for (int i = 0; i < NUM_RVREG; i++) {
|
||||
ar[i].tempLocked = false;
|
||||
}
|
||||
|
||||
pendingUnlock_ = false;
|
||||
}
|
||||
|
||||
void RiscVRegCache::ReleaseSpillLock(IRRegIndex r1, IRRegIndex r2, IRRegIndex r3, IRRegIndex r4) {
|
||||
|
|
|
@ -68,10 +68,6 @@ enum class MapType {
|
|||
|
||||
} // namespace RiscVJitConstants
|
||||
|
||||
namespace MIPSAnalyst {
|
||||
struct AnalysisResults;
|
||||
};
|
||||
|
||||
namespace MIPSComp {
|
||||
struct JitOptions;
|
||||
}
|
||||
|
@ -116,10 +112,7 @@ public:
|
|||
|
||||
void SetImm(IRRegIndex reg, u64 immVal);
|
||||
bool IsImm(IRRegIndex reg) const;
|
||||
bool IsPureImm(IRRegIndex reg) const;
|
||||
u64 GetImm(IRRegIndex reg) const;
|
||||
// Optimally set a register to an imm value (possibly using another register.)
|
||||
void SetRegImm(RiscVGen::RiscVReg reg, u64 imm);
|
||||
|
||||
// May fail and return INVALID_REG if it needs flushing.
|
||||
RiscVGen::RiscVReg TryMapTempImm(IRRegIndex);
|
||||
|
@ -144,7 +137,6 @@ public:
|
|||
void MapDirtyInIn(IRRegIndex rd, IRRegIndex rs, IRRegIndex rt, RiscVJitConstants::MapType type = RiscVJitConstants::MapType::AVOID_LOAD);
|
||||
void MapDirtyDirtyIn(IRRegIndex rd1, IRRegIndex rd2, IRRegIndex rs, RiscVJitConstants::MapType type = RiscVJitConstants::MapType::AVOID_LOAD);
|
||||
void MapDirtyDirtyInIn(IRRegIndex rd1, IRRegIndex rd2, IRRegIndex rs, IRRegIndex rt, RiscVJitConstants::MapType type = RiscVJitConstants::MapType::AVOID_LOAD);
|
||||
void FlushRiscVReg(RiscVGen::RiscVReg r);
|
||||
void FlushBeforeCall();
|
||||
void FlushAll();
|
||||
void FlushR(IRRegIndex r);
|
||||
|
@ -171,12 +163,16 @@ private:
|
|||
RiscVGen::RiscVReg AllocateReg();
|
||||
RiscVGen::RiscVReg FindBestToSpill(bool unusedOnly, bool *clobbered);
|
||||
RiscVGen::RiscVReg RiscVRegForFlush(IRRegIndex r);
|
||||
void FlushRiscVReg(RiscVGen::RiscVReg r);
|
||||
void SetRegImm(RiscVGen::RiscVReg reg, u64 imm);
|
||||
void AddMemBase(RiscVGen::RiscVReg reg);
|
||||
int GetMipsRegOffset(IRRegIndex r);
|
||||
|
||||
bool IsValidReg(IRRegIndex r) const;
|
||||
bool IsValidRegNoZero(IRRegIndex r) const;
|
||||
|
||||
void SetupInitialRegs();
|
||||
|
||||
MIPSState *mips_;
|
||||
RiscVGen::RiscVEmitter *emit_ = nullptr;
|
||||
MIPSComp::JitOptions *jo_;
|
||||
|
@ -188,4 +184,9 @@ private:
|
|||
|
||||
RegStatusRiscV ar[NUM_RVREG]{};
|
||||
RegStatusMIPS mr[NUM_MIPSREG]{};
|
||||
|
||||
bool initialReady_ = false;
|
||||
bool pendingUnlock_ = false;
|
||||
RegStatusRiscV arInitial_[NUM_RVREG];
|
||||
RegStatusMIPS mrInitial_[NUM_MIPSREG];
|
||||
};
|
||||
|
|
|
@ -14,3 +14,401 @@
|
|||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#ifndef offsetof
|
||||
#include <cstddef>
|
||||
#endif
|
||||
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Core/MIPS/RiscV/RiscVRegCacheFPU.h"
|
||||
#include "Core/MIPS/JitCommon/JitState.h"
|
||||
#include "Core/Reporting.h"
|
||||
|
||||
using namespace RiscVGen;
|
||||
using namespace RiscVJitConstants;
|
||||
|
||||
using namespace RiscVGen;
|
||||
using namespace RiscVJitConstants;
|
||||
|
||||
RiscVRegCacheFPU::RiscVRegCacheFPU(MIPSState *mipsState, MIPSComp::JitOptions *jo)
|
||||
: mips_(mipsState), jo_(jo) {}
|
||||
|
||||
void RiscVRegCacheFPU::Init(RiscVEmitter *emitter) {
|
||||
emit_ = emitter;
|
||||
}
|
||||
|
||||
void RiscVRegCacheFPU::Start() {
|
||||
if (!initialReady_) {
|
||||
SetupInitialRegs();
|
||||
initialReady_ = true;
|
||||
}
|
||||
|
||||
memcpy(ar, arInitial_, sizeof(ar));
|
||||
memcpy(mr, mrInitial_, sizeof(mr));
|
||||
pendingFlush_ = false;
|
||||
}
|
||||
|
||||
void RiscVRegCacheFPU::SetupInitialRegs() {
|
||||
for (int i = 0; i < NUM_RVFPUREG; i++) {
|
||||
arInitial_[i].mipsReg = IRREG_INVALID;
|
||||
arInitial_[i].isDirty = false;
|
||||
}
|
||||
for (int i = 0; i < NUM_MIPSFPUREG; i++) {
|
||||
mrInitial_[i].loc = MIPSLoc::MEM;
|
||||
mrInitial_[i].reg = (int)INVALID_REG;
|
||||
mrInitial_[i].spillLock = false;
|
||||
}
|
||||
}
|
||||
|
||||
const RiscVReg *RiscVRegCacheFPU::GetMIPSAllocationOrder(int &count) {
|
||||
// F8 through F15 are used for compression, so they are great.
|
||||
// TODO: Maybe we could remove some saved regs since we rarely need that many? Or maybe worth it?
|
||||
static const RiscVReg allocationOrder[] = {
|
||||
F8, F9, F10, F11, F12, F13, F14, F15,
|
||||
F0, F1, F2, F3, F4, F5, F6, F7,
|
||||
F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, F31,
|
||||
};
|
||||
|
||||
count = ARRAY_SIZE(allocationOrder);
|
||||
return allocationOrder;
|
||||
}
|
||||
|
||||
bool RiscVRegCacheFPU::IsInRAM(IRRegIndex reg) {
|
||||
_dbg_assert_(IsValidReg(reg));
|
||||
return mr[reg].loc == MIPSLoc::MEM;
|
||||
}
|
||||
|
||||
bool RiscVRegCacheFPU::IsMapped(IRRegIndex mipsReg) {
|
||||
_dbg_assert_(IsValidReg(mipsReg));
|
||||
return mr[mipsReg].loc == MIPSLoc::RVREG;
|
||||
}
|
||||
|
||||
RiscVReg RiscVRegCacheFPU::MapReg(IRRegIndex mipsReg, MIPSMap mapFlags) {
|
||||
_dbg_assert_(IsValidReg(mipsReg));
|
||||
_dbg_assert_(mr[mipsReg].loc == MIPSLoc::MEM || mr[mipsReg].loc == MIPSLoc::RVREG);
|
||||
|
||||
pendingFlush_ = true;
|
||||
|
||||
// Let's see if it's already mapped. If so we just need to update the dirty flag.
|
||||
// We don't need to check for NOINIT because we assume that anyone who maps
|
||||
// with that flag immediately writes a "known" value to the register.
|
||||
if (mr[mipsReg].loc == MIPSLoc::RVREG) {
|
||||
_assert_msg_(ar[mr[mipsReg].reg].mipsReg == mipsReg, "GPU mapping out of sync, IR=%i", mipsReg);
|
||||
if ((mapFlags & MIPSMap::DIRTY) == MIPSMap::DIRTY) {
|
||||
ar[mr[mipsReg].reg].isDirty = true;
|
||||
}
|
||||
return (RiscVReg)(mr[mipsReg].reg + F0);
|
||||
}
|
||||
|
||||
// Okay, not mapped, so we need to allocate an RV register.
|
||||
RiscVReg reg = AllocateReg();
|
||||
if (reg != INVALID_REG) {
|
||||
// That means it's free. Grab it, and load the value into it (if requested).
|
||||
ar[reg - F0].isDirty = (mapFlags & MIPSMap::DIRTY) == MIPSMap::DIRTY;
|
||||
if ((mapFlags & MIPSMap::NOINIT) != MIPSMap::NOINIT) {
|
||||
if (mr[mipsReg].loc == MIPSLoc::MEM) {
|
||||
emit_->FL(32, reg, CTXREG, GetMipsRegOffset(mipsReg));
|
||||
}
|
||||
}
|
||||
ar[reg - F0].mipsReg = mipsReg;
|
||||
mr[mipsReg].loc = MIPSLoc::RVREG;
|
||||
mr[mipsReg].reg = reg - F0;
|
||||
return reg;
|
||||
}
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
RiscVReg RiscVRegCacheFPU::AllocateReg() {
|
||||
int allocCount = 0;
|
||||
const RiscVReg *allocOrder = GetMIPSAllocationOrder(allocCount);
|
||||
|
||||
allocate:
|
||||
for (int i = 0; i < allocCount; i++) {
|
||||
RiscVReg reg = allocOrder[i];
|
||||
|
||||
if (ar[reg - F0].mipsReg == IRREG_INVALID) {
|
||||
return reg;
|
||||
}
|
||||
}
|
||||
|
||||
// Still nothing. Let's spill a reg and goto 10.
|
||||
// TODO: Use age or something to choose which register to spill?
|
||||
// TODO: Spill dirty regs first? or opposite?
|
||||
bool clobbered;
|
||||
RiscVReg bestToSpill = FindBestToSpill(true, &clobbered);
|
||||
if (bestToSpill == INVALID_REG) {
|
||||
bestToSpill = FindBestToSpill(false, &clobbered);
|
||||
}
|
||||
|
||||
if (bestToSpill != INVALID_REG) {
|
||||
if (clobbered) {
|
||||
DiscardR(ar[bestToSpill - F0].mipsReg);
|
||||
} else {
|
||||
FlushRiscVReg(bestToSpill);
|
||||
}
|
||||
// Now one must be free.
|
||||
goto allocate;
|
||||
}
|
||||
|
||||
// Uh oh, we have all of them spilllocked....
|
||||
ERROR_LOG_REPORT(JIT, "Out of spillable registers near PC %08x", mips_->pc);
|
||||
_assert_(bestToSpill != INVALID_REG);
|
||||
return INVALID_REG;
|
||||
}
|
||||
|
||||
RiscVReg RiscVRegCacheFPU::FindBestToSpill(bool unusedOnly, bool *clobbered) {
|
||||
int allocCount = 0;
|
||||
const RiscVReg *allocOrder = GetMIPSAllocationOrder(allocCount);
|
||||
|
||||
static const int UNUSED_LOOKAHEAD_OPS = 30;
|
||||
|
||||
*clobbered = false;
|
||||
for (int i = 0; i < allocCount; i++) {
|
||||
RiscVReg reg = allocOrder[i];
|
||||
if (ar[reg - F0].mipsReg != IRREG_INVALID && mr[ar[reg - F0].mipsReg].spillLock)
|
||||
continue;
|
||||
|
||||
// TODO: Look for clobbering in the IRInst array with index?
|
||||
|
||||
// Not awesome. A used reg. Let's try to avoid spilling.
|
||||
// TODO: Actually check if we'd be spilling.
|
||||
if (unusedOnly) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
return INVALID_REG;
|
||||
}
|
||||
|
||||
void RiscVRegCacheFPU::MapInIn(IRRegIndex rd, IRRegIndex rs) {
|
||||
SpillLock(rd, rs);
|
||||
MapReg(rd);
|
||||
MapReg(rs);
|
||||
ReleaseSpillLock(rd);
|
||||
ReleaseSpillLock(rs);
|
||||
}
|
||||
|
||||
void RiscVRegCacheFPU::MapDirtyIn(IRRegIndex rd, IRRegIndex rs, bool avoidLoad) {
|
||||
SpillLock(rd, rs);
|
||||
bool load = !avoidLoad || rd == rs;
|
||||
MapReg(rd, load ? MIPSMap::DIRTY : MIPSMap::NOINIT);
|
||||
MapReg(rs);
|
||||
ReleaseSpillLock(rd);
|
||||
ReleaseSpillLock(rs);
|
||||
}
|
||||
|
||||
void RiscVRegCacheFPU::MapDirtyInIn(IRRegIndex rd, IRRegIndex rs, IRRegIndex rt, bool avoidLoad) {
|
||||
SpillLock(rd, rs, rt);
|
||||
bool load = !avoidLoad || (rd == rs || rd == rt);
|
||||
MapReg(rd, load ? MIPSMap::DIRTY : MIPSMap::NOINIT);
|
||||
MapReg(rt);
|
||||
MapReg(rs);
|
||||
ReleaseSpillLock(rd);
|
||||
ReleaseSpillLock(rs);
|
||||
ReleaseSpillLock(rt);
|
||||
}
|
||||
|
||||
void RiscVRegCacheFPU::Map4DirtyIn(IRRegIndex rdbase, IRRegIndex rsbase, bool avoidLoad) {
|
||||
for (int i = 0; i < 4; ++i)
|
||||
SpillLock(rdbase + i, rsbase + i);
|
||||
bool load = !avoidLoad || (rdbase < rsbase + 4 && rdbase + 4 > rsbase);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
MapReg(rdbase + i, load ? MIPSMap::DIRTY : MIPSMap::NOINIT);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
MapReg(rsbase + i);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
ReleaseSpillLock(rdbase + i, rsbase + i);
|
||||
}
|
||||
|
||||
void RiscVRegCacheFPU::Map4DirtyInIn(IRRegIndex rdbase, IRRegIndex rsbase, IRRegIndex rtbase, bool avoidLoad) {
|
||||
for (int i = 0; i < 4; ++i)
|
||||
SpillLock(rdbase + i, rsbase + i, rtbase + i);
|
||||
bool load = !avoidLoad || (rdbase < rsbase + 4 && rdbase + 4 > rsbase) || (rdbase < rtbase + 4 && rdbase + 4 > rtbase);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
MapReg(rdbase + i, load ? MIPSMap::DIRTY : MIPSMap::NOINIT);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
MapReg(rsbase + i);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
MapReg(rtbase + i);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
ReleaseSpillLock(rdbase + i, rsbase + i, rtbase + i);
|
||||
}
|
||||
|
||||
void RiscVRegCacheFPU::FlushRiscVReg(RiscVReg r) {
|
||||
_dbg_assert_(r >= F0 && r <= F31);
|
||||
int reg = r - F0;
|
||||
if (ar[reg].mipsReg == IRREG_INVALID) {
|
||||
// Nothing to do, reg not mapped.
|
||||
return;
|
||||
}
|
||||
if (ar[reg].isDirty && mr[ar[reg].mipsReg].loc == MIPSLoc::RVREG) {
|
||||
emit_->FS(32, r, CTXREG, GetMipsRegOffset(ar[reg].mipsReg));
|
||||
}
|
||||
mr[ar[reg].mipsReg].loc = MIPSLoc::MEM;
|
||||
mr[ar[reg].mipsReg].reg = (int)INVALID_REG;
|
||||
ar[reg].mipsReg = IRREG_INVALID;
|
||||
ar[reg].isDirty = false;
|
||||
}
|
||||
|
||||
void RiscVRegCacheFPU::FlushR(IRRegIndex r) {
|
||||
_dbg_assert_(IsValidReg(r));
|
||||
RiscVReg reg = RiscVRegForFlush(r);
|
||||
if (reg != INVALID_REG)
|
||||
FlushRiscVReg(reg);
|
||||
}
|
||||
|
||||
RiscVReg RiscVRegCacheFPU::RiscVRegForFlush(IRRegIndex r) {
|
||||
_dbg_assert_(IsValidReg(r));
|
||||
switch (mr[r].loc) {
|
||||
case MIPSLoc::RVREG:
|
||||
_assert_msg_(mr[r].reg != INVALID_REG, "RiscVRegForFlush: IR %d had bad RiscVReg", r);
|
||||
if (mr[r].reg == INVALID_REG) {
|
||||
return INVALID_REG;
|
||||
}
|
||||
return (RiscVReg)(F0 + mr[r].reg);
|
||||
|
||||
case MIPSLoc::MEM:
|
||||
return INVALID_REG;
|
||||
|
||||
default:
|
||||
_assert_(false);
|
||||
return INVALID_REG;
|
||||
}
|
||||
}
|
||||
|
||||
void RiscVRegCacheFPU::FlushAll() {
|
||||
if (!pendingFlush_) {
|
||||
// Nothing allocated. FPU regs are not nearly as common as GPR.
|
||||
return;
|
||||
}
|
||||
|
||||
int numRVRegs = 0;
|
||||
const RiscVReg *order = GetMIPSAllocationOrder(numRVRegs);
|
||||
|
||||
for (int i = 0; i < numRVRegs; i++) {
|
||||
int a = order[i] - F0;
|
||||
int m = ar[a].mipsReg;
|
||||
|
||||
if (ar[a].isDirty) {
|
||||
_assert_(m != MIPS_REG_INVALID);
|
||||
emit_->FS(32, order[i], CTXREG, GetMipsRegOffset(m));
|
||||
|
||||
mr[m].loc = MIPSLoc::MEM;
|
||||
mr[m].reg = (int)INVALID_REG;
|
||||
ar[a].mipsReg = IRREG_INVALID;
|
||||
ar[a].isDirty = false;
|
||||
} else {
|
||||
if (m != IRREG_INVALID) {
|
||||
mr[m].loc = MIPSLoc::MEM;
|
||||
mr[m].reg = (int)INVALID_REG;
|
||||
}
|
||||
ar[a].mipsReg = IRREG_INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
pendingFlush_ = false;
|
||||
}
|
||||
|
||||
void RiscVRegCacheFPU::DiscardR(IRRegIndex r) {
|
||||
_dbg_assert_(IsValidReg(r));
|
||||
switch (mr[r].loc) {
|
||||
case MIPSLoc::RVREG:
|
||||
_assert_(mr[r].reg != INVALID_REG);
|
||||
if (mr[r].reg != INVALID_REG) {
|
||||
// Note that we DO NOT write it back here. That's the whole point of Discard.
|
||||
ar[mr[r].reg].isDirty = false;
|
||||
ar[mr[r].reg].mipsReg = IRREG_INVALID;
|
||||
}
|
||||
break;
|
||||
|
||||
case MIPSLoc::MEM:
|
||||
// Already there, nothing to do.
|
||||
break;
|
||||
|
||||
default:
|
||||
_assert_(false);
|
||||
break;
|
||||
}
|
||||
mr[r].loc = MIPSLoc::MEM;
|
||||
mr[r].reg = (int)INVALID_REG;
|
||||
mr[r].spillLock = false;
|
||||
}
|
||||
|
||||
int RiscVRegCacheFPU::GetMipsRegOffset(IRRegIndex r) {
|
||||
_assert_(IsValidReg(r));
|
||||
// These are offsets within the MIPSState structure.
|
||||
// IR gives us an index that is already 32 after the state index (skipping GPRs.)
|
||||
return (32 + r) * 4;
|
||||
}
|
||||
|
||||
void RiscVRegCacheFPU::SpillLock(IRRegIndex r1, IRRegIndex r2, IRRegIndex r3, IRRegIndex r4) {
|
||||
_dbg_assert_(IsValidReg(r1));
|
||||
_dbg_assert_(r2 == IRREG_INVALID || IsValidReg(r2));
|
||||
_dbg_assert_(r3 == IRREG_INVALID || IsValidReg(r3));
|
||||
_dbg_assert_(r4 == IRREG_INVALID || IsValidReg(r4));
|
||||
mr[r1].spillLock = true;
|
||||
if (r2 != IRREG_INVALID)
|
||||
mr[r2].spillLock = true;
|
||||
if (r3 != IRREG_INVALID)
|
||||
mr[r3].spillLock = true;
|
||||
if (r4 != IRREG_INVALID)
|
||||
mr[r4].spillLock = true;
|
||||
pendingUnlock_ = true;
|
||||
}
|
||||
|
||||
void RiscVRegCacheFPU::ReleaseSpillLocksAndDiscardTemps() {
|
||||
if (!pendingUnlock_)
|
||||
return;
|
||||
|
||||
for (int i = 0; i < NUM_MIPSFPUREG; i++) {
|
||||
mr[i].spillLock = false;
|
||||
}
|
||||
|
||||
pendingUnlock_ = false;
|
||||
}
|
||||
|
||||
void RiscVRegCacheFPU::ReleaseSpillLock(IRRegIndex r1, IRRegIndex r2, IRRegIndex r3, IRRegIndex r4) {
|
||||
_dbg_assert_(IsValidReg(r1));
|
||||
_dbg_assert_(r2 == IRREG_INVALID || IsValidReg(r2));
|
||||
_dbg_assert_(r3 == IRREG_INVALID || IsValidReg(r3));
|
||||
_dbg_assert_(r4 == IRREG_INVALID || IsValidReg(r4));
|
||||
mr[r1].spillLock = false;
|
||||
if (r2 != IRREG_INVALID)
|
||||
mr[r2].spillLock = false;
|
||||
if (r3 != IRREG_INVALID)
|
||||
mr[r3].spillLock = false;
|
||||
if (r4 != IRREG_INVALID)
|
||||
mr[r4].spillLock = false;
|
||||
}
|
||||
|
||||
RiscVReg RiscVRegCacheFPU::R(IRRegIndex mipsReg) {
|
||||
_dbg_assert_(IsValidReg(mipsReg));
|
||||
_dbg_assert_(mr[mipsReg].loc == MIPSLoc::RVREG);
|
||||
if (mr[mipsReg].loc == MIPSLoc::RVREG) {
|
||||
return (RiscVReg)(mr[mipsReg].reg + F0);
|
||||
} else {
|
||||
ERROR_LOG_REPORT(JIT, "Reg %i not in riscv reg", mipsReg);
|
||||
return INVALID_REG; // BAAAD
|
||||
}
|
||||
}
|
||||
|
||||
bool RiscVRegCacheFPU::IsValidReg(IRRegIndex r) const {
|
||||
if (r < 0 || r >= NUM_MIPSFPUREG)
|
||||
return false;
|
||||
|
||||
// See MIPSState for these offsets.
|
||||
int index = r + 32;
|
||||
|
||||
// Allow FPU or VFPU regs here.
|
||||
if (index >= 32 && index < 32 + 32 + 128)
|
||||
return true;
|
||||
// Also allow VFPU temps.
|
||||
if (index >= 224 && index < 224 + 16)
|
||||
return true;
|
||||
|
||||
// Nothing else is allowed for the FPU side cache.
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -16,3 +16,91 @@
|
|||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Common/RiscVEmitter.h"
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/RiscV/RiscVRegCache.h"
|
||||
|
||||
struct FPURegStatusRiscV {
|
||||
int mipsReg; // if -1, no mipsreg attached.
|
||||
bool isDirty; // Should the register be written back?
|
||||
};
|
||||
|
||||
struct FPURegStatusMIPS {
|
||||
// Where is this MIPS register?
|
||||
RiscVJitConstants::MIPSLoc loc;
|
||||
// Index from F0.
|
||||
int reg;
|
||||
|
||||
bool spillLock; // if true, this register cannot be spilled.
|
||||
// If loc == ML_MEM, it's back in its location in the CPU context struct.
|
||||
};
|
||||
|
||||
namespace MIPSComp {
|
||||
struct JitOptions;
|
||||
}
|
||||
|
||||
class RiscVRegCacheFPU {
|
||||
public:
|
||||
RiscVRegCacheFPU(MIPSState *mipsState, MIPSComp::JitOptions *jo);
|
||||
~RiscVRegCacheFPU() {}
|
||||
|
||||
void Init(RiscVGen::RiscVEmitter *emitter);
|
||||
// TODO: Maybe pass in IR block and start PC for logging/debugging?
|
||||
void Start();
|
||||
|
||||
// Protect the RISC-V register containing a MIPS register from spilling, to ensure that
|
||||
// it's being kept allocated.
|
||||
void SpillLock(IRRegIndex reg, IRRegIndex reg2 = IRREG_INVALID, IRRegIndex reg3 = IRREG_INVALID, IRRegIndex reg4 = IRREG_INVALID);
|
||||
void ReleaseSpillLock(IRRegIndex reg, IRRegIndex reg2 = IRREG_INVALID, IRRegIndex reg3 = IRREG_INVALID, IRRegIndex reg4 = IRREG_INVALID);
|
||||
void ReleaseSpillLocksAndDiscardTemps();
|
||||
|
||||
// Returns a RISC-V register containing the requested MIPS register.
|
||||
RiscVGen::RiscVReg MapReg(IRRegIndex reg, RiscVJitConstants::MIPSMap mapFlags = RiscVJitConstants::MIPSMap::INIT);
|
||||
|
||||
bool IsMapped(IRRegIndex r);
|
||||
bool IsInRAM(IRRegIndex r);
|
||||
|
||||
void MapInIn(IRRegIndex rd, IRRegIndex rs);
|
||||
void MapDirtyIn(IRRegIndex rd, IRRegIndex rs, bool avoidLoad = true);
|
||||
void MapDirtyInIn(IRRegIndex rd, IRRegIndex rs, IRRegIndex rt, bool avoidLoad = true);
|
||||
void Map4Dirty(IRRegIndex rdbase, bool avoidLoad = true);
|
||||
void Map4DirtyIn(IRRegIndex rdbase, IRRegIndex rsbase, bool avoidLoad = true);
|
||||
void Map4DirtyInIn(IRRegIndex rdbase, IRRegIndex rsbase, IRRegIndex rtbase, bool avoidLoad = true);
|
||||
void FlushAll();
|
||||
void FlushR(IRRegIndex r);
|
||||
void DiscardR(IRRegIndex r);
|
||||
|
||||
RiscVGen::RiscVReg R(int preg); // Returns a cached register
|
||||
|
||||
private:
|
||||
const RiscVGen::RiscVReg *GetMIPSAllocationOrder(int &count);
|
||||
RiscVGen::RiscVReg AllocateReg();
|
||||
RiscVGen::RiscVReg FindBestToSpill(bool unusedOnly, bool *clobbered);
|
||||
RiscVGen::RiscVReg RiscVRegForFlush(IRRegIndex r);
|
||||
void FlushRiscVReg(RiscVGen::RiscVReg r);
|
||||
int GetMipsRegOffset(IRRegIndex r);
|
||||
|
||||
bool IsValidReg(IRRegIndex r) const;
|
||||
|
||||
void SetupInitialRegs();
|
||||
|
||||
MIPSState *mips_;
|
||||
RiscVGen::RiscVEmitter *emit_ = nullptr;
|
||||
MIPSComp::JitOptions *jo_;
|
||||
|
||||
enum {
|
||||
// On RiscV, each of the 32 registers are full 128-bit. No sharing of components!
|
||||
NUM_RVFPUREG = 32,
|
||||
NUM_MIPSFPUREG = RiscVJitConstants::TOTAL_MAPPABLE_MIPSREGS - 32,
|
||||
};
|
||||
|
||||
FPURegStatusRiscV ar[NUM_RVFPUREG];
|
||||
FPURegStatusMIPS mr[NUM_MIPSFPUREG];
|
||||
|
||||
bool pendingFlush_ = false;
|
||||
bool pendingUnlock_ = false;
|
||||
bool initialReady_ = false;
|
||||
FPURegStatusRiscV arInitial_[NUM_RVFPUREG];
|
||||
FPURegStatusMIPS mrInitial_[NUM_MIPSFPUREG];
|
||||
};
|
||||
|
|
Loading…
Add table
Reference in a new issue