mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #17800 from unknownbrackets/riscv-jit
More RISC-V jit ops
This commit is contained in:
commit
b93275bb35
12 changed files with 572 additions and 137 deletions
|
@ -85,7 +85,8 @@ void IRJit::Compile(u32 em_address) {
|
|||
if (block_num != -1) {
|
||||
IRBlock *b = blocks_.GetBlock(block_num);
|
||||
// Okay, let's link and finalize the block now.
|
||||
b->Finalize(block_num);
|
||||
int cookie = b->GetTargetOffset() < 0 ? block_num : b->GetTargetOffset();
|
||||
b->Finalize(cookie);
|
||||
if (b->IsValid()) {
|
||||
// Success, we're done.
|
||||
return;
|
||||
|
@ -128,13 +129,13 @@ bool IRJit::CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u32
|
|||
b->SetOriginalSize(mipsBytes);
|
||||
if (preload) {
|
||||
// Hash, then only update page stats, don't link yet.
|
||||
b->UpdateHash();
|
||||
blocks_.FinalizeBlock(block_num, true);
|
||||
} else {
|
||||
// Overwrites the first instruction, and also updates stats.
|
||||
// TODO: Should we always hash? Then we can reuse blocks.
|
||||
blocks_.FinalizeBlock(block_num);
|
||||
b->UpdateHash();
|
||||
}
|
||||
if (!CompileTargetBlock(b, block_num, preload))
|
||||
return false;
|
||||
// Overwrites the first instruction, and also updates stats.
|
||||
blocks_.FinalizeBlock(block_num, preload);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -264,7 +265,8 @@ void IRJit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) {
|
|||
|
||||
void IRBlockCache::Clear() {
|
||||
for (int i = 0; i < (int)blocks_.size(); ++i) {
|
||||
blocks_[i].Destroy(i);
|
||||
int cookie = blocks_[i].GetTargetOffset() < 0 ? i : blocks_[i].GetTargetOffset();
|
||||
blocks_[i].Destroy(cookie);
|
||||
}
|
||||
blocks_.clear();
|
||||
byPage_.clear();
|
||||
|
@ -283,7 +285,8 @@ void IRBlockCache::InvalidateICache(u32 address, u32 length) {
|
|||
for (int i : blocksInPage) {
|
||||
if (blocks_[i].OverlapsRange(address, length)) {
|
||||
// Not removing from the page, hopefully doesn't build up with small recompiles.
|
||||
blocks_[i].Destroy(i);
|
||||
int cookie = blocks_[i].GetTargetOffset() < 0 ? i : blocks_[i].GetTargetOffset();
|
||||
blocks_[i].Destroy(cookie);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -291,7 +294,8 @@ void IRBlockCache::InvalidateICache(u32 address, u32 length) {
|
|||
|
||||
void IRBlockCache::FinalizeBlock(int i, bool preload) {
|
||||
if (!preload) {
|
||||
blocks_[i].Finalize(i);
|
||||
int cookie = blocks_[i].GetTargetOffset() < 0 ? i : blocks_[i].GetTargetOffset();
|
||||
blocks_[i].Finalize(cookie);
|
||||
}
|
||||
|
||||
u32 startAddr, size;
|
||||
|
@ -331,13 +335,30 @@ int IRBlockCache::FindPreloadBlock(u32 em_address) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
int IRBlockCache::FindByCookie(int cookie) {
|
||||
if (blocks_.empty())
|
||||
return -1;
|
||||
// TODO: Maybe a flag to determine target offset mode?
|
||||
if (blocks_[0].GetTargetOffset() < 0)
|
||||
return cookie;
|
||||
|
||||
for (int i = 0; i < GetNumBlocks(); ++i) {
|
||||
int offset = blocks_[i].GetTargetOffset();
|
||||
if (offset == cookie)
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<u32> IRBlockCache::SaveAndClearEmuHackOps() {
|
||||
std::vector<u32> result;
|
||||
result.resize(blocks_.size());
|
||||
|
||||
for (int number = 0; number < (int)blocks_.size(); ++number) {
|
||||
IRBlock &b = blocks_[number];
|
||||
if (b.IsValid() && b.RestoreOriginalFirstOp(number)) {
|
||||
int cookie = b.GetTargetOffset() < 0 ? number : b.GetTargetOffset();
|
||||
if (b.IsValid() && b.RestoreOriginalFirstOp(cookie)) {
|
||||
result[number] = number;
|
||||
} else {
|
||||
result[number] = 0;
|
||||
|
@ -357,7 +378,8 @@ void IRBlockCache::RestoreSavedEmuHackOps(std::vector<u32> saved) {
|
|||
IRBlock &b = blocks_[number];
|
||||
// Only if we restored it, write it back.
|
||||
if (b.IsValid() && saved[number] != 0 && b.HasOriginalFirstOp()) {
|
||||
b.Finalize(number);
|
||||
int cookie = b.GetTargetOffset() < 0 ? number : b.GetTargetOffset();
|
||||
b.Finalize(cookie);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -441,8 +463,8 @@ bool IRBlock::HasOriginalFirstOp() const {
|
|||
return Memory::ReadUnchecked_U32(origAddr_) == origFirstOpcode_.encoding;
|
||||
}
|
||||
|
||||
bool IRBlock::RestoreOriginalFirstOp(int number) {
|
||||
const u32 emuhack = MIPS_EMUHACK_OPCODE | number;
|
||||
bool IRBlock::RestoreOriginalFirstOp(int cookie) {
|
||||
const u32 emuhack = MIPS_EMUHACK_OPCODE | cookie;
|
||||
if (Memory::ReadUnchecked_U32(origAddr_) == emuhack) {
|
||||
Memory::Write_Opcode_JIT(origAddr_, origFirstOpcode_);
|
||||
return true;
|
||||
|
@ -450,19 +472,19 @@ bool IRBlock::RestoreOriginalFirstOp(int number) {
|
|||
return false;
|
||||
}
|
||||
|
||||
void IRBlock::Finalize(int number) {
|
||||
void IRBlock::Finalize(int cookie) {
|
||||
// Check it wasn't invalidated, in case this is after preload.
|
||||
// TODO: Allow reusing blocks when the code matches hash_ again, instead.
|
||||
if (origAddr_) {
|
||||
origFirstOpcode_ = Memory::Read_Opcode_JIT(origAddr_);
|
||||
MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | number);
|
||||
MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | cookie);
|
||||
Memory::Write_Opcode_JIT(origAddr_, opcode);
|
||||
}
|
||||
}
|
||||
|
||||
void IRBlock::Destroy(int number) {
|
||||
void IRBlock::Destroy(int cookie) {
|
||||
if (origAddr_) {
|
||||
MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | number);
|
||||
MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | cookie);
|
||||
if (Memory::ReadUnchecked_U32(origAddr_) == opcode.encoding)
|
||||
Memory::Write_Opcode_JIT(origAddr_, origFirstOpcode_);
|
||||
|
||||
|
@ -496,7 +518,7 @@ bool IRBlock::OverlapsRange(u32 addr, u32 size) const {
|
|||
}
|
||||
|
||||
MIPSOpcode IRJit::GetOriginalOp(MIPSOpcode op) {
|
||||
IRBlock *b = blocks_.GetBlock(op.encoding & 0xFFFFFF);
|
||||
IRBlock *b = blocks_.GetBlock(blocks_.FindByCookie(op.encoding & 0xFFFFFF));
|
||||
if (b) {
|
||||
return b->GetOriginalFirstOp();
|
||||
}
|
||||
|
|
|
@ -38,15 +38,16 @@ namespace MIPSComp {
|
|||
// TODO : Use arena allocators. For now let's just malloc.
|
||||
class IRBlock {
|
||||
public:
|
||||
IRBlock() : instr_(nullptr), numInstructions_(0), origAddr_(0), origSize_(0) {}
|
||||
IRBlock(u32 emAddr) : instr_(nullptr), numInstructions_(0), origAddr_(emAddr), origSize_(0) {}
|
||||
IRBlock() {}
|
||||
IRBlock(u32 emAddr) : origAddr_(emAddr) {}
|
||||
IRBlock(IRBlock &&b) {
|
||||
instr_ = b.instr_;
|
||||
numInstructions_ = b.numInstructions_;
|
||||
hash_ = b.hash_;
|
||||
origAddr_ = b.origAddr_;
|
||||
origSize_ = b.origSize_;
|
||||
origFirstOpcode_ = b.origFirstOpcode_;
|
||||
hash_ = b.hash_;
|
||||
targetOffset_ = b.targetOffset_;
|
||||
numInstructions_ = b.numInstructions_;
|
||||
b.instr_ = nullptr;
|
||||
}
|
||||
|
||||
|
@ -71,6 +72,12 @@ public:
|
|||
void SetOriginalSize(u32 size) {
|
||||
origSize_ = size;
|
||||
}
|
||||
void SetTargetOffset(int offset) {
|
||||
targetOffset_ = offset;
|
||||
}
|
||||
int GetTargetOffset() const {
|
||||
return targetOffset_;
|
||||
}
|
||||
void UpdateHash() {
|
||||
hash_ = CalculateHash();
|
||||
}
|
||||
|
@ -90,12 +97,13 @@ public:
|
|||
private:
|
||||
u64 CalculateHash() const;
|
||||
|
||||
IRInst *instr_;
|
||||
u16 numInstructions_;
|
||||
u32 origAddr_;
|
||||
u32 origSize_;
|
||||
IRInst *instr_ = nullptr;
|
||||
u64 hash_ = 0;
|
||||
u32 origAddr_ = 0;
|
||||
u32 origSize_ = 0;
|
||||
MIPSOpcode origFirstOpcode_ = MIPSOpcode(0x68FFFFFF);
|
||||
int targetOffset_ = -1;
|
||||
u16 numInstructions_ = 0;
|
||||
};
|
||||
|
||||
class IRBlockCache : public JitBlockCacheDebugInterface {
|
||||
|
@ -118,6 +126,7 @@ public:
|
|||
}
|
||||
|
||||
int FindPreloadBlock(u32 em_address);
|
||||
int FindByCookie(int cookie);
|
||||
|
||||
std::vector<u32> SaveAndClearEmuHackOps();
|
||||
void RestoreSavedEmuHackOps(std::vector<u32> saved);
|
||||
|
@ -172,6 +181,7 @@ public:
|
|||
|
||||
protected:
|
||||
virtual bool CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u32 &mipsBytes, bool preload);
|
||||
virtual bool CompileTargetBlock(IRBlock *block, int block_num, bool preload) { return true; }
|
||||
|
||||
JitOptions jo;
|
||||
|
||||
|
|
|
@ -112,7 +112,7 @@ void RiscVJit::GenerateFixedCode(const JitOptions &jo) {
|
|||
static constexpr RiscVReg regs_to_save[]{ R_RA, X8, X9, X18, X19, X20, X21, X22, X23, X24, X25, X26, X27 };
|
||||
// TODO: Maybe we shouldn't regalloc all of these? Is it worth it?
|
||||
static constexpr RiscVReg regs_to_save_fp[]{ F8, F9, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27 };
|
||||
int saveSize = 8 * (int)(ARRAY_SIZE(regs_to_save) + ARRAY_SIZE(regs_to_save_fp));
|
||||
int saveSize = (XLEN / 8) * (int)(ARRAY_SIZE(regs_to_save) + ARRAY_SIZE(regs_to_save_fp));
|
||||
if (saveSize & 0xF)
|
||||
saveSize += 8;
|
||||
_assert_msg_((saveSize & 0xF) == 0, "Stack must be kept aligned");
|
||||
|
@ -120,18 +120,18 @@ void RiscVJit::GenerateFixedCode(const JitOptions &jo) {
|
|||
ADDI(R_SP, R_SP, -saveSize);
|
||||
for (RiscVReg r : regs_to_save) {
|
||||
SD(r, R_SP, saveOffset);
|
||||
saveOffset += 8;
|
||||
saveOffset += XLEN / 8;
|
||||
}
|
||||
for (RiscVReg r : regs_to_save_fp) {
|
||||
FS(64, r, R_SP, saveOffset);
|
||||
saveOffset += 8;
|
||||
saveOffset += XLEN / 8;
|
||||
}
|
||||
_assert_(saveOffset <= saveSize);
|
||||
|
||||
// Fixed registers, these are always kept when in Jit context.
|
||||
LI(MEMBASEREG, Memory::base, SCRATCH1);
|
||||
LI(CTXREG, mips_, SCRATCH1);
|
||||
LI(JITBASEREG, blockStartAddrs_, SCRATCH1);
|
||||
LI(JITBASEREG, GetBasePtr(), SCRATCH1);
|
||||
|
||||
LoadStaticRegisters();
|
||||
MovFromPC(SCRATCH1);
|
||||
|
@ -183,35 +183,11 @@ void RiscVJit::GenerateFixedCode(const JitOptions &jo) {
|
|||
// We're in other words comparing to the top 8 bits of MIPS_EMUHACK_OPCODE by subtracting.
|
||||
ADDI(SCRATCH2, SCRATCH2, -(MIPS_EMUHACK_OPCODE >> 24));
|
||||
FixupBranch needsCompile = BNE(SCRATCH2, R_ZERO);
|
||||
// Use a wall to mask by 0x00FFFFFF and extract the block number.
|
||||
// Use a wall to mask by 0x00FFFFFF and extract the block jit offset.
|
||||
SLLI(SCRATCH1, SCRATCH1, XLEN - 24);
|
||||
// But actually, we want * 8, so skip shifting back just a bit.
|
||||
_assert_msg_(sizeof(blockStartAddrs_[0]) == 8, "RiscVAsm currently assumes pointers are 64-bit");
|
||||
SRLI(SCRATCH1, SCRATCH1, XLEN - 24 - 3);
|
||||
if (enableDebug) {
|
||||
// Let's do some extra validation of the block number in debug mode for testing.
|
||||
|
||||
LI(SCRATCH2, MAX_ALLOWED_JIT_BLOCKS * 8);
|
||||
FixupBranch highBlockNum = BGEU(SCRATCH1, SCRATCH2);
|
||||
ADD(SCRATCH1, JITBASEREG, SCRATCH1);
|
||||
// TODO: Consider replacing the block nums after all, just trying to use IR block cache.
|
||||
LD(SCRATCH1, SCRATCH1, 0);
|
||||
LI(SCRATCH2, 2);
|
||||
FixupBranch invalidBlockNum = BEQ(SCRATCH1, R_ZERO);
|
||||
JR(SCRATCH1);
|
||||
|
||||
SetJumpTarget(highBlockNum);
|
||||
LI(SCRATCH2, 1);
|
||||
SetJumpTarget(invalidBlockNum);
|
||||
|
||||
MV(X10, SCRATCH2);
|
||||
QuickCallFunction(&ShowBlockError);
|
||||
} else {
|
||||
ADD(SCRATCH1, JITBASEREG, SCRATCH1);
|
||||
// TODO: Consider replacing the block nums after all, just trying to use IR block cache.
|
||||
LD(SCRATCH1, SCRATCH1, 0);
|
||||
JR(SCRATCH1);
|
||||
}
|
||||
SRLI(SCRATCH1, SCRATCH1, XLEN - 24);
|
||||
ADD(SCRATCH1, JITBASEREG, SCRATCH1);
|
||||
JR(SCRATCH1);
|
||||
SetJumpTarget(needsCompile);
|
||||
|
||||
// No block found, let's jit. We don't need to save static regs, they're all callee saved.
|
||||
|
@ -238,17 +214,16 @@ void RiscVJit::GenerateFixedCode(const JitOptions &jo) {
|
|||
saveOffset = 0;
|
||||
for (RiscVReg r : regs_to_save) {
|
||||
LD(r, R_SP, saveOffset);
|
||||
saveOffset += 8;
|
||||
saveOffset += XLEN / 8;
|
||||
}
|
||||
for (RiscVReg r : regs_to_save_fp) {
|
||||
FL(64, r, R_SP, saveOffset);
|
||||
saveOffset += 8;
|
||||
saveOffset += XLEN / 8;
|
||||
}
|
||||
ADDI(R_SP, R_SP, saveSize);
|
||||
|
||||
RET();
|
||||
|
||||
// TODO
|
||||
crashHandler_ = GetCodePtr();
|
||||
LI(SCRATCH1, &coreState, SCRATCH2);
|
||||
LI(SCRATCH2, CORE_RUNTIME_ERROR);
|
||||
|
|
|
@ -254,7 +254,13 @@ void RiscVJit::CompIR_Bits(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::Clz:
|
||||
CompIR_Generic(inst);
|
||||
if (cpu_info.RiscV_Zbb) {
|
||||
gpr.MapDirtyIn(inst.dest, inst.src1, MapType::AVOID_LOAD_MARK_NORM32);
|
||||
// This even sets to 32 when zero, perfect.
|
||||
CLZW(gpr.R(inst.dest), gpr.R(inst.src1));
|
||||
} else {
|
||||
CompIR_Generic(inst);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -640,10 +646,53 @@ void RiscVJit::CompIR_Mult(IRInst inst) {
|
|||
void RiscVJit::CompIR_Div(IRInst inst) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
RiscVReg numReg, denomReg;
|
||||
switch (inst.op) {
|
||||
case IROp::Div:
|
||||
gpr.MapDirtyDirtyInIn(IRREG_LO, IRREG_HI, inst.src1, inst.src2, MapType::AVOID_LOAD_MARK_NORM32);
|
||||
// We have to do this because of the divide by zero and overflow checks below.
|
||||
NormalizeSrc12(inst, &numReg, &denomReg, SCRATCH1, SCRATCH2, true);
|
||||
DIVW(gpr.R(IRREG_LO), numReg, denomReg);
|
||||
REMW(gpr.R(IRREG_HI), numReg, denomReg);
|
||||
|
||||
// Now some tweaks for divide by zero and overflow.
|
||||
{
|
||||
// Start with divide by zero, remainder is fine.
|
||||
FixupBranch skipNonZero = BNE(denomReg, R_ZERO);
|
||||
FixupBranch keepNegOne = BGE(numReg, R_ZERO);
|
||||
LI(gpr.R(IRREG_LO), 1);
|
||||
SetJumpTarget(keepNegOne);
|
||||
SetJumpTarget(skipNonZero);
|
||||
|
||||
// For overflow, RISC-V sets LO right, but remainder to zero.
|
||||
// Cheating a bit by using R_RA as a temp...
|
||||
LI(R_RA, (int32_t)0x80000000);
|
||||
FixupBranch notMostNegative = BNE(numReg, R_RA);
|
||||
LI(R_RA, -1);
|
||||
FixupBranch notNegativeOne = BNE(denomReg, R_RA);
|
||||
LI(gpr.R(IRREG_HI), -1);
|
||||
SetJumpTarget(notNegativeOne);
|
||||
SetJumpTarget(notMostNegative);
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::DivU:
|
||||
CompIR_Generic(inst);
|
||||
gpr.MapDirtyDirtyInIn(IRREG_LO, IRREG_HI, inst.src1, inst.src2, MapType::AVOID_LOAD_MARK_NORM32);
|
||||
// We have to do this because of the divide by zero check below.
|
||||
NormalizeSrc12(inst, &numReg, &denomReg, SCRATCH1, SCRATCH2, true);
|
||||
DIVUW(gpr.R(IRREG_LO), numReg, denomReg);
|
||||
REMUW(gpr.R(IRREG_HI), numReg, denomReg);
|
||||
|
||||
// On divide by zero, everything is correct already except the 0xFFFF case.
|
||||
{
|
||||
FixupBranch skipNonZero = BNE(denomReg, R_ZERO);
|
||||
// Luckily, we don't need SCRATCH2/denomReg anymore.
|
||||
LI(SCRATCH2, 0xFFFF);
|
||||
FixupBranch keepNegOne = BLTU(SCRATCH2, numReg);
|
||||
MV(gpr.R(IRREG_LO), SCRATCH2);
|
||||
SetJumpTarget(keepNegOne);
|
||||
SetJumpTarget(skipNonZero);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -110,18 +110,68 @@ void RiscVJit::CompIR_FArith(IRInst inst) {
|
|||
|
||||
void RiscVJit::CompIR_FCondAssign(IRInst inst) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
switch (inst.op) {
|
||||
case IROp::FMin:
|
||||
case IROp::FMax:
|
||||
// TODO: These are tricky, have to handle order correctly.
|
||||
CompIR_Generic(inst);
|
||||
break;
|
||||
|
||||
default:
|
||||
if (inst.op != IROp::FMin && inst.op != IROp::FMax)
|
||||
INVALIDOP;
|
||||
break;
|
||||
bool maxCondition = inst.op == IROp::FMax;
|
||||
|
||||
// FMin and FMax are used by VFPU and handle NAN/INF as just a larger exponent.
|
||||
fpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2);
|
||||
FCLASS(32, SCRATCH1, fpr.R(inst.src1));
|
||||
FCLASS(32, SCRATCH2, fpr.R(inst.src2));
|
||||
|
||||
// If either side is a NAN, it needs to participate in the comparison.
|
||||
OR(SCRATCH1, SCRATCH1, SCRATCH2);
|
||||
// NAN is either 0x100 or 0x200.
|
||||
ANDI(SCRATCH1, SCRATCH1, 0x300);
|
||||
FixupBranch useNormalCond = BEQ(SCRATCH1, R_ZERO);
|
||||
|
||||
// Time to use bits... classify won't help because it ignores -NAN.
|
||||
FMV(FMv::X, FMv::W, SCRATCH1, fpr.R(inst.src1));
|
||||
FMV(FMv::X, FMv::W, SCRATCH2, fpr.R(inst.src2));
|
||||
|
||||
// If both are negative, we flip the comparison (not two's compliment.)
|
||||
// We cheat and use RA...
|
||||
AND(R_RA, SCRATCH1, SCRATCH2);
|
||||
SRLIW(R_RA, R_RA, 31);
|
||||
|
||||
if (cpu_info.RiscV_Zbb) {
|
||||
FixupBranch swapCompare = BNE(R_RA, R_ZERO);
|
||||
if (maxCondition)
|
||||
MAX(SCRATCH1, SCRATCH1, SCRATCH2);
|
||||
else
|
||||
MIN(SCRATCH1, SCRATCH1, SCRATCH2);
|
||||
FixupBranch skipSwapCompare = J();
|
||||
SetJumpTarget(swapCompare);
|
||||
if (maxCondition)
|
||||
MIN(SCRATCH1, SCRATCH1, SCRATCH2);
|
||||
else
|
||||
MAX(SCRATCH1, SCRATCH1, SCRATCH2);
|
||||
SetJumpTarget(skipSwapCompare);
|
||||
} else {
|
||||
RiscVReg isSrc1LowerReg = gpr.GetAndLockTempR();
|
||||
gpr.ReleaseSpillLocksAndDiscardTemps();
|
||||
|
||||
SLT(isSrc1LowerReg, SCRATCH1, SCRATCH2);
|
||||
// Flip the flag (to reverse the min/max) based on if both were negative.
|
||||
XOR(isSrc1LowerReg, isSrc1LowerReg, R_RA);
|
||||
FixupBranch useSrc1;
|
||||
if (maxCondition)
|
||||
useSrc1 = BEQ(isSrc1LowerReg, R_ZERO);
|
||||
else
|
||||
useSrc1 = BNE(isSrc1LowerReg, R_ZERO);
|
||||
MV(SCRATCH1, SCRATCH2);
|
||||
SetJumpTarget(useSrc1);
|
||||
}
|
||||
|
||||
FMV(FMv::W, FMv::X, fpr.R(inst.dest), SCRATCH1);
|
||||
FixupBranch finish = J();
|
||||
|
||||
SetJumpTarget(useNormalCond);
|
||||
if (maxCondition)
|
||||
FMAX(32, fpr.R(inst.dest), fpr.R(inst.src1), fpr.R(inst.src2));
|
||||
else
|
||||
FMIN(32, fpr.R(inst.dest), fpr.R(inst.src1), fpr.R(inst.src2));
|
||||
SetJumpTarget(finish);
|
||||
}
|
||||
|
||||
void RiscVJit::CompIR_FAssign(IRInst inst) {
|
||||
|
@ -220,12 +270,209 @@ void RiscVJit::CompIR_FSat(IRInst inst) {
|
|||
void RiscVJit::CompIR_FCompare(IRInst inst) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
constexpr IRRegIndex IRREG_VFPUL_CC = IRREG_VFPU_CTRL_BASE + VFPU_CTRL_CC;
|
||||
|
||||
switch (inst.op) {
|
||||
case IROp::FCmp:
|
||||
switch (inst.dest) {
|
||||
case IRFpCompareMode::False:
|
||||
gpr.SetImm(IRREG_FPCOND, 0);
|
||||
break;
|
||||
|
||||
case IRFpCompareMode::EitherUnordered:
|
||||
fpr.MapInIn(inst.src1, inst.src2);
|
||||
gpr.MapReg(IRREG_FPCOND, MIPSMap::NOINIT | MIPSMap::MARK_NORM32);
|
||||
FCLASS(32, SCRATCH1, fpr.R(inst.src1));
|
||||
FCLASS(32, SCRATCH2, fpr.R(inst.src2));
|
||||
OR(SCRATCH1, SCRATCH1, SCRATCH2);
|
||||
// NAN is 0x100 or 0x200.
|
||||
ANDI(SCRATCH1, SCRATCH1, 0x300);
|
||||
SNEZ(gpr.R(IRREG_FPCOND), SCRATCH1);
|
||||
break;
|
||||
|
||||
case IRFpCompareMode::EqualOrdered:
|
||||
fpr.MapInIn(inst.src1, inst.src2);
|
||||
gpr.MapReg(IRREG_FPCOND, MIPSMap::NOINIT | MIPSMap::MARK_NORM32);
|
||||
FEQ(32, gpr.R(IRREG_FPCOND), fpr.R(inst.src1), fpr.R(inst.src2));
|
||||
break;
|
||||
|
||||
case IRFpCompareMode::EqualUnordered:
|
||||
fpr.MapInIn(inst.src1, inst.src2);
|
||||
gpr.MapReg(IRREG_FPCOND, MIPSMap::NOINIT | MIPSMap::MARK_NORM32);
|
||||
FEQ(32, gpr.R(IRREG_FPCOND), fpr.R(inst.src1), fpr.R(inst.src2));
|
||||
|
||||
// Now let's just OR in the unordered check.
|
||||
FCLASS(32, SCRATCH1, fpr.R(inst.src1));
|
||||
FCLASS(32, SCRATCH2, fpr.R(inst.src2));
|
||||
OR(SCRATCH1, SCRATCH1, SCRATCH2);
|
||||
// NAN is 0x100 or 0x200.
|
||||
ANDI(SCRATCH1, SCRATCH1, 0x300);
|
||||
SNEZ(SCRATCH1, SCRATCH1);
|
||||
OR(gpr.R(IRREG_FPCOND), gpr.R(IRREG_FPCOND), SCRATCH1);
|
||||
break;
|
||||
|
||||
case IRFpCompareMode::LessEqualOrdered:
|
||||
fpr.MapInIn(inst.src1, inst.src2);
|
||||
gpr.MapReg(IRREG_FPCOND, MIPSMap::NOINIT | MIPSMap::MARK_NORM32);
|
||||
FLE(32, gpr.R(IRREG_FPCOND), fpr.R(inst.src1), fpr.R(inst.src2));
|
||||
break;
|
||||
|
||||
case IRFpCompareMode::LessEqualUnordered:
|
||||
fpr.MapInIn(inst.src1, inst.src2);
|
||||
gpr.MapReg(IRREG_FPCOND, MIPSMap::NOINIT | MIPSMap::MARK_NORM32);
|
||||
FLT(32, gpr.R(IRREG_FPCOND), fpr.R(inst.src2), fpr.R(inst.src1));
|
||||
SEQZ(gpr.R(IRREG_FPCOND), gpr.R(IRREG_FPCOND));
|
||||
break;
|
||||
|
||||
case IRFpCompareMode::LessOrdered:
|
||||
fpr.MapInIn(inst.src1, inst.src2);
|
||||
gpr.MapReg(IRREG_FPCOND, MIPSMap::NOINIT | MIPSMap::MARK_NORM32);
|
||||
FLT(32, gpr.R(IRREG_FPCOND), fpr.R(inst.src1), fpr.R(inst.src2));
|
||||
break;
|
||||
|
||||
case IRFpCompareMode::LessUnordered:
|
||||
fpr.MapInIn(inst.src1, inst.src2);
|
||||
gpr.MapReg(IRREG_FPCOND, MIPSMap::NOINIT | MIPSMap::MARK_NORM32);
|
||||
FLE(32, gpr.R(IRREG_FPCOND), fpr.R(inst.src2), fpr.R(inst.src1));
|
||||
SEQZ(gpr.R(IRREG_FPCOND), gpr.R(IRREG_FPCOND));
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::FCmovVfpuCC:
|
||||
gpr.MapReg(IRREG_VFPUL_CC);
|
||||
fpr.MapDirtyIn(inst.dest, inst.src1, false);
|
||||
if ((inst.src2 & 0xF) == 0) {
|
||||
ANDI(SCRATCH1, gpr.R(IRREG_VFPUL_CC), 1);
|
||||
} else if (cpu_info.RiscV_Zbs) {
|
||||
BEXTI(SCRATCH1, gpr.R(IRREG_VFPUL_CC), inst.src2 & 0xF);
|
||||
} else {
|
||||
SRLI(SCRATCH1, gpr.R(IRREG_VFPUL_CC), inst.src2 & 0xF);
|
||||
ANDI(SCRATCH1, SCRATCH1, 1);
|
||||
}
|
||||
if ((inst.src2 >> 7) & 1) {
|
||||
FixupBranch skip = BEQ(SCRATCH1, R_ZERO);
|
||||
FMV(32, fpr.R(inst.dest), fpr.R(inst.src1));
|
||||
SetJumpTarget(skip);
|
||||
} else {
|
||||
FixupBranch skip = BNE(SCRATCH1, R_ZERO);
|
||||
FMV(32, fpr.R(inst.dest), fpr.R(inst.src1));
|
||||
SetJumpTarget(skip);
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::FCmpVfpuBit:
|
||||
gpr.MapReg(IRREG_VFPUL_CC, MIPSMap::DIRTY);
|
||||
|
||||
switch (VCondition(inst.dest & 0xF)) {
|
||||
case VC_EQ:
|
||||
fpr.MapInIn(inst.src1, inst.src2);
|
||||
FEQ(32, SCRATCH1, fpr.R(inst.src1), fpr.R(inst.src2));
|
||||
break;
|
||||
case VC_NE:
|
||||
fpr.MapInIn(inst.src1, inst.src2);
|
||||
// We could almost negate FEQ, except NAN != NAN.
|
||||
// Anything != NAN is false and NAN != NAN is within that, so we only check one side.
|
||||
FCLASS(32, SCRATCH2, fpr.R(inst.src2));
|
||||
// NAN is 0x100 or 0x200.
|
||||
ANDI(SCRATCH2, SCRATCH2, 0x300);
|
||||
SNEZ(SCRATCH2, SCRATCH2);
|
||||
|
||||
FEQ(32, SCRATCH1, fpr.R(inst.src1), fpr.R(inst.src2));
|
||||
SEQZ(SCRATCH1, SCRATCH1);
|
||||
// Just OR in whether that side was a NAN so it's always not equal.
|
||||
OR(SCRATCH1, SCRATCH1, SCRATCH2);
|
||||
break;
|
||||
case VC_LT:
|
||||
fpr.MapInIn(inst.src1, inst.src2);
|
||||
FLT(32, SCRATCH1, fpr.R(inst.src1), fpr.R(inst.src2));
|
||||
break;
|
||||
case VC_LE:
|
||||
fpr.MapInIn(inst.src1, inst.src2);
|
||||
FLE(32, SCRATCH1, fpr.R(inst.src1), fpr.R(inst.src2));
|
||||
break;
|
||||
case VC_GT:
|
||||
fpr.MapInIn(inst.src1, inst.src2);
|
||||
FLT(32, SCRATCH1, fpr.R(inst.src2), fpr.R(inst.src1));
|
||||
break;
|
||||
case VC_GE:
|
||||
fpr.MapInIn(inst.src1, inst.src2);
|
||||
FLE(32, SCRATCH1, fpr.R(inst.src2), fpr.R(inst.src1));
|
||||
break;
|
||||
case VC_EZ:
|
||||
case VC_NZ:
|
||||
fpr.MapReg(inst.src1);
|
||||
// Zero is either 0x10 or 0x08.
|
||||
FCLASS(32, SCRATCH1, gpr.R(inst.src1));
|
||||
ANDI(SCRATCH1, SCRATCH1, 0x18);
|
||||
if ((inst.dest & 4) == 0)
|
||||
SNEZ(SCRATCH1, SCRATCH1);
|
||||
else
|
||||
SEQZ(SCRATCH1, SCRATCH1);
|
||||
break;
|
||||
case VC_EN:
|
||||
case VC_NN:
|
||||
fpr.MapReg(inst.src1);
|
||||
// NAN is either 0x100 or 0x200.
|
||||
FCLASS(32, SCRATCH1, gpr.R(inst.src1));
|
||||
ANDI(SCRATCH1, SCRATCH1, 0x300);
|
||||
if ((inst.dest & 4) == 0)
|
||||
SNEZ(SCRATCH1, SCRATCH1);
|
||||
else
|
||||
SEQZ(SCRATCH1, SCRATCH1);
|
||||
break;
|
||||
case VC_EI:
|
||||
case VC_NI:
|
||||
fpr.MapReg(inst.src1);
|
||||
// Infinity is either 0x80 or 0x01.
|
||||
FCLASS(32, SCRATCH1, gpr.R(inst.src1));
|
||||
ANDI(SCRATCH1, SCRATCH1, 0x81);
|
||||
if ((inst.dest & 4) == 0)
|
||||
SNEZ(SCRATCH1, SCRATCH1);
|
||||
else
|
||||
SEQZ(SCRATCH1, SCRATCH1);
|
||||
break;
|
||||
case VC_ES:
|
||||
case VC_NS:
|
||||
fpr.MapReg(inst.src1);
|
||||
// Infinity is either 0x80 or 0x01, NAN is either 0x100 or 0x200.
|
||||
FCLASS(32, SCRATCH1, gpr.R(inst.src1));
|
||||
ANDI(SCRATCH1, SCRATCH1, 0x381);
|
||||
if ((inst.dest & 4) == 0)
|
||||
SNEZ(SCRATCH1, SCRATCH1);
|
||||
else
|
||||
SEQZ(SCRATCH1, SCRATCH1);
|
||||
break;
|
||||
case VC_TR:
|
||||
LI(SCRATCH1, 1);
|
||||
break;
|
||||
case VC_FL:
|
||||
LI(SCRATCH1, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
ANDI(gpr.R(IRREG_VFPUL_CC), gpr.R(IRREG_VFPUL_CC), ~(1 << (inst.dest >> 4)));
|
||||
if ((inst.dest >> 4) != 0)
|
||||
SLLI(SCRATCH1, SCRATCH1, inst.dest >> 4);
|
||||
OR(gpr.R(IRREG_VFPUL_CC), gpr.R(IRREG_VFPUL_CC), SCRATCH1);
|
||||
break;
|
||||
|
||||
case IROp::FCmpVfpuAggregate:
|
||||
CompIR_Generic(inst);
|
||||
gpr.MapReg(IRREG_VFPUL_CC, MIPSMap::DIRTY);
|
||||
ANDI(SCRATCH1, gpr.R(IRREG_VFPUL_CC), inst.dest);
|
||||
// This is the "any bit", easy.
|
||||
SNEZ(SCRATCH2, SCRATCH1);
|
||||
// To compare to inst.dest for "all", let's simply subtract it and compare to zero.
|
||||
ADDI(SCRATCH1, SCRATCH1, -inst.dest);
|
||||
SEQZ(SCRATCH1, SCRATCH1);
|
||||
// Now we combine those together.
|
||||
SLLI(SCRATCH1, SCRATCH1, 5);
|
||||
SLLI(SCRATCH2, SCRATCH2, 4);
|
||||
OR(SCRATCH1, SCRATCH1, SCRATCH2);
|
||||
|
||||
// Reject those any/all bits and replace them with our own.
|
||||
ANDI(gpr.R(IRREG_VFPUL_CC), gpr.R(IRREG_VFPUL_CC), ~0x30);
|
||||
OR(gpr.R(IRREG_VFPUL_CC), gpr.R(IRREG_VFPUL_CC), SCRATCH1);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -259,13 +506,70 @@ void RiscVJit::CompIR_RoundingMode(IRInst inst) {
|
|||
void RiscVJit::CompIR_FSpecial(IRInst inst) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
#ifdef __riscv_float_abi_soft
|
||||
#error Currently hard float is required.
|
||||
#endif
|
||||
|
||||
auto callFuncF_F = [&](float (*func)(float)){
|
||||
gpr.FlushBeforeCall();
|
||||
fpr.FlushBeforeCall();
|
||||
// It might be in a non-volatile register.
|
||||
if (fpr.IsMapped(inst.src1)) {
|
||||
FMV(32, F10, fpr.R(inst.src1));
|
||||
} else {
|
||||
int offset = offsetof(MIPSState, f) + inst.src1 * 4;
|
||||
FL(32, F10, CTXREG, offset);
|
||||
}
|
||||
QuickCallFunction(func);
|
||||
|
||||
fpr.MapReg(inst.dest, MIPSMap::NOINIT);
|
||||
// If it's already F10, we're done - MapReg doesn't actually overwrite the reg in that case.
|
||||
if (fpr.R(inst.dest) != F10) {
|
||||
FMV(32, fpr.R(inst.dest), F10);
|
||||
}
|
||||
};
|
||||
|
||||
switch (inst.op) {
|
||||
case IROp::FSin:
|
||||
callFuncF_F(&vfpu_sin);
|
||||
break;
|
||||
|
||||
case IROp::FCos:
|
||||
callFuncF_F(&vfpu_cos);
|
||||
break;
|
||||
|
||||
case IROp::FRSqrt:
|
||||
fpr.MapDirtyIn(inst.dest, inst.src1);
|
||||
FSQRT(32, fpr.R(inst.dest), fpr.R(inst.src1));
|
||||
|
||||
// Ugh, we can't really avoid a temp here. Probably not worth a permanent one.
|
||||
LI(SCRATCH1, 1.0f);
|
||||
{
|
||||
// TODO: Smarter allocation of a temp reg?
|
||||
RiscVReg tempReg = fpr.R(inst.dest) == F31 ? F30 : F31;
|
||||
fpr.FlushRiscVReg(tempReg);
|
||||
FMV(FMv::W, FMv::X, tempReg, SCRATCH1);
|
||||
FDIV(32, fpr.R(inst.dest), tempReg, fpr.R(inst.dest));
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::FRecip:
|
||||
fpr.MapDirtyIn(inst.dest, inst.src1);
|
||||
LI(SCRATCH1, 1.0f);
|
||||
if (inst.dest != inst.src1) {
|
||||
// This is the easy case.
|
||||
FMV(FMv::W, FMv::X, fpr.R(inst.dest), SCRATCH1);
|
||||
FDIV(32, fpr.R(inst.dest), fpr.R(inst.dest), fpr.R(inst.src1));
|
||||
} else {
|
||||
RiscVReg tempReg = fpr.R(inst.dest) == F31 ? F30 : F31;
|
||||
fpr.FlushRiscVReg(tempReg);
|
||||
FMV(FMv::W, FMv::X, tempReg, SCRATCH1);
|
||||
FDIV(32, fpr.R(inst.dest), tempReg, fpr.R(inst.src1));
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::FAsin:
|
||||
CompIR_Generic(inst);
|
||||
callFuncF_F(&vfpu_asin);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
#include "Core/HLE/HLE.h"
|
||||
#include "Core/HLE/ReplaceTables.h"
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
#include "Core/MIPS/RiscV/RiscVJit.h"
|
||||
#include "Core/MIPS/RiscV/RiscVRegCache.h"
|
||||
|
||||
|
@ -101,7 +100,7 @@ void RiscVJit::CompIR_Transfer(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::SetCtrlVFPUFReg:
|
||||
gpr.MapReg(IRREG_VFPU_CTRL_BASE + inst.dest, MIPSMap::NOINIT);
|
||||
gpr.MapReg(IRREG_VFPU_CTRL_BASE + inst.dest, MIPSMap::NOINIT | MIPSMap::MARK_NORM32);
|
||||
fpr.MapReg(inst.src1);
|
||||
FMV(FMv::X, FMv::W, gpr.R(IRREG_VFPU_CTRL_BASE + inst.dest), fpr.R(inst.src1));
|
||||
break;
|
||||
|
@ -167,7 +166,7 @@ void RiscVJit::CompIR_Transfer(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::FMovToGPR:
|
||||
gpr.MapReg(inst.dest, MIPSMap::NOINIT);
|
||||
gpr.MapReg(inst.dest, MIPSMap::NOINIT | MIPSMap::MARK_NORM32);
|
||||
fpr.MapReg(inst.src1);
|
||||
FMV(FMv::X, FMv::W, gpr.R(inst.dest), fpr.R(inst.src1));
|
||||
break;
|
||||
|
@ -182,15 +181,6 @@ void RiscVJit::CompIR_System(IRInst inst) {
|
|||
CONDITIONAL_DISABLE;
|
||||
|
||||
switch (inst.op) {
|
||||
case IROp::Interpret:
|
||||
// IR protects us against this being a branching instruction (well, hopefully.)
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
LI(X10, (int32_t)inst.constant);
|
||||
QuickCallFunction((const u8 *)MIPSGetInterpretFunc(MIPSOpcode(inst.constant)));
|
||||
LoadStaticRegisters();
|
||||
break;
|
||||
|
||||
case IROp::Syscall:
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
|
|
|
@ -241,10 +241,25 @@ void RiscVJit::CompIR_VecPack(IRInst inst) {
|
|||
case IROp::Vec4Pack31To8:
|
||||
case IROp::Vec4Pack32To8:
|
||||
case IROp::Vec2Pack31To16:
|
||||
case IROp::Vec2Pack32To16:
|
||||
CompIR_Generic(inst);
|
||||
break;
|
||||
|
||||
case IROp::Vec2Pack32To16:
|
||||
fpr.MapDirtyInIn(inst.dest, inst.src1, inst.src1 + 1);
|
||||
FMV(FMv::X, FMv::W, SCRATCH1, fpr.R(inst.src1));
|
||||
FMV(FMv::X, FMv::W, SCRATCH2, fpr.R(inst.src1 + 1));
|
||||
// Keep in mind, this was sign-extended, so we have to zero the upper.
|
||||
SLLI(SCRATCH1, SCRATCH1, XLEN - 32);
|
||||
// Now we just set (SCRATCH2 & 0xFFFF0000) | SCRATCH1.
|
||||
SRLI(SCRATCH1, SCRATCH1, XLEN - 16);
|
||||
// Use a wall to mask. We can ignore the upper 32 here.
|
||||
SRLI(SCRATCH2, SCRATCH2, 16);
|
||||
SLLI(SCRATCH2, SCRATCH2, 16);
|
||||
OR(SCRATCH1, SCRATCH1, SCRATCH2);
|
||||
// Okay, to the floating point register.
|
||||
FMV(FMv::W, FMv::X, fpr.R(inst.dest), SCRATCH1);
|
||||
break;
|
||||
|
||||
default:
|
||||
INVALIDOP;
|
||||
break;
|
||||
|
|
|
@ -16,7 +16,9 @@
|
|||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include "Common/StringUtils.h"
|
||||
#include "Common/TimeUtil.h"
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
#include "Core/MIPS/RiscV/RiscVJit.h"
|
||||
#include "Core/MIPS/RiscV/RiscVRegCache.h"
|
||||
#include "Common/Profiler/Profiler.h"
|
||||
|
@ -26,19 +28,58 @@ namespace MIPSComp {
|
|||
using namespace RiscVGen;
|
||||
using namespace RiscVJitConstants;
|
||||
|
||||
static constexpr bool enableDebug = false;
|
||||
|
||||
static std::map<uint8_t, int> debugSeenNotCompiledIR;
|
||||
static std::map<const char *, int> debugSeenNotCompiled;
|
||||
double lastDebugLog = 0.0;
|
||||
|
||||
static void LogDebugNotCompiled() {
|
||||
if (!enableDebug)
|
||||
return;
|
||||
|
||||
double now = time_now_d();
|
||||
if (now < lastDebugLog + 1.0)
|
||||
return;
|
||||
lastDebugLog = now;
|
||||
|
||||
int worstIROp = -1;
|
||||
int worstIRVal = 0;
|
||||
for (auto it : debugSeenNotCompiledIR) {
|
||||
if (it.second > worstIRVal) {
|
||||
worstIRVal = it.second;
|
||||
worstIROp = it.first;
|
||||
}
|
||||
}
|
||||
debugSeenNotCompiledIR.clear();
|
||||
|
||||
const char *worstName = nullptr;
|
||||
int worstVal = 0;
|
||||
for (auto it : debugSeenNotCompiled) {
|
||||
if (it.second > worstVal) {
|
||||
worstVal = it.second;
|
||||
worstName = it.first;
|
||||
}
|
||||
}
|
||||
debugSeenNotCompiled.clear();
|
||||
|
||||
if (worstIROp != -1)
|
||||
WARN_LOG(JIT, "Most not compiled IR op: %s (%d)", GetIRMeta((IROp)worstIROp)->name, worstIRVal);
|
||||
if (worstName != nullptr)
|
||||
WARN_LOG(JIT, "Most not compiled op: %s (%d)", worstName, worstVal);
|
||||
}
|
||||
|
||||
RiscVJit::RiscVJit(MIPSState *mipsState) : IRJit(mipsState), gpr(mipsState, &jo), fpr(mipsState, &jo) {
|
||||
// Automatically disable incompatible options.
|
||||
if (((intptr_t)Memory::base & 0x00000000FFFFFFFFUL) != 0) {
|
||||
jo.enablePointerify = false;
|
||||
}
|
||||
|
||||
// Since we store the offset, this is as big as it can be.
|
||||
// We could shift off one bit to double it, would need to change RiscVAsm.
|
||||
AllocCodeSpace(1024 * 1024 * 16);
|
||||
SetAutoCompress(true);
|
||||
|
||||
// TODO: Consider replacing block num method form IRJit - this is 2MB.
|
||||
blockStartAddrs_ = new const u8 *[MAX_ALLOWED_JIT_BLOCKS];
|
||||
memset(blockStartAddrs_, 0, sizeof(blockStartAddrs_[0]) * MAX_ALLOWED_JIT_BLOCKS);
|
||||
|
||||
gpr.Init(this);
|
||||
fpr.Init(this);
|
||||
|
||||
|
@ -46,42 +87,35 @@ RiscVJit::RiscVJit(MIPSState *mipsState) : IRJit(mipsState), gpr(mipsState, &jo)
|
|||
}
|
||||
|
||||
RiscVJit::~RiscVJit() {
|
||||
delete [] blockStartAddrs_;
|
||||
}
|
||||
|
||||
void RiscVJit::RunLoopUntil(u64 globalticks) {
|
||||
if constexpr (enableDebug) {
|
||||
LogDebugNotCompiled();
|
||||
}
|
||||
|
||||
PROFILE_THIS_SCOPE("jit");
|
||||
((void (*)())enterDispatcher_)();
|
||||
}
|
||||
|
||||
bool RiscVJit::CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u32 &mipsBytes, bool preload) {
|
||||
// Check that we're not full (we allow less blocks than IR itself.)
|
||||
if (blocks_.GetNumBlocks() >= MAX_ALLOWED_JIT_BLOCKS - 1)
|
||||
static void NoBlockExits() {
|
||||
_assert_msg_(false, "Never exited block, invalid IR?");
|
||||
}
|
||||
|
||||
bool RiscVJit::CompileTargetBlock(IRBlock *block, int block_num, bool preload) {
|
||||
if (GetSpaceLeft() < 0x800)
|
||||
return false;
|
||||
|
||||
if (!IRJit::CompileBlock(em_address, instructions, mipsBytes, preload))
|
||||
return false;
|
||||
// Don't worry, the codespace isn't large enough to overflow offsets.
|
||||
block->SetTargetOffset((int)GetOffset(GetCodePointer()));
|
||||
|
||||
// TODO: Block linking, checked entries and such.
|
||||
|
||||
int block_num;
|
||||
if (preload) {
|
||||
block_num = blocks_.GetBlockNumberFromStartAddress(em_address);
|
||||
} else {
|
||||
u32 first_inst = Memory::ReadUnchecked_U32(em_address);
|
||||
_assert_msg_(MIPS_IS_RUNBLOCK(first_inst), "Should've written an emuhack");
|
||||
|
||||
block_num = first_inst & MIPS_EMUHACK_VALUE_MASK;
|
||||
}
|
||||
|
||||
_assert_msg_(block_num >= 0 && block_num < MAX_ALLOWED_JIT_BLOCKS, "Bad block num");
|
||||
_assert_msg_(blockStartAddrs_[block_num] == nullptr, "Block %d reused before clear", block_num);
|
||||
blockStartAddrs_[block_num] = GetCodePointer();
|
||||
|
||||
gpr.Start();
|
||||
fpr.Start();
|
||||
|
||||
for (const IRInst &inst : instructions) {
|
||||
for (int i = 0; i < block->GetNumInstructions(); ++i) {
|
||||
const IRInst &inst = block->GetInstructions()[i];
|
||||
CompileIRInst(inst);
|
||||
|
||||
if (jo.Disabled(JitDisable::REGALLOC_GPR)) {
|
||||
|
@ -97,9 +131,11 @@ bool RiscVJit::CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u
|
|||
}
|
||||
}
|
||||
|
||||
// Note: a properly constructed block should never get here.
|
||||
// TODO: Need to do more than just this? Call a func to set an exception?
|
||||
QuickJ(R_RA, crashHandler_);
|
||||
// We should've written an exit above. If we didn't, bad things will happen.
|
||||
if (enableDebug) {
|
||||
QuickCallFunction(&NoBlockExits);
|
||||
QuickJ(R_RA, crashHandler_);
|
||||
}
|
||||
|
||||
FlushIcache();
|
||||
|
||||
|
@ -351,6 +387,9 @@ void RiscVJit::CompileIRInst(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::Interpret:
|
||||
CompIR_Interpret(inst);
|
||||
break;
|
||||
|
||||
case IROp::Syscall:
|
||||
case IROp::CallReplacement:
|
||||
case IROp::Break:
|
||||
|
@ -397,6 +436,9 @@ static u32 DoIRInst(uint64_t value) {
|
|||
IRInst inst;
|
||||
memcpy(&inst, &value, sizeof(inst));
|
||||
|
||||
if constexpr (enableDebug)
|
||||
debugSeenNotCompiledIR[(uint8_t)inst.op]++;
|
||||
|
||||
return IRInterpret(currentMIPS, &inst, 1);
|
||||
}
|
||||
|
||||
|
@ -425,6 +467,26 @@ void RiscVJit::CompIR_Generic(IRInst inst) {
|
|||
}
|
||||
}
|
||||
|
||||
static void DebugInterpretHit(const char *name) {
|
||||
if (enableDebug)
|
||||
debugSeenNotCompiled[name]++;
|
||||
}
|
||||
|
||||
void RiscVJit::CompIR_Interpret(IRInst inst) {
|
||||
MIPSOpcode op(inst.constant);
|
||||
|
||||
// IR protects us against this being a branching instruction (well, hopefully.)
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
if (enableDebug) {
|
||||
LI(X10, MIPSGetName(op));
|
||||
QuickCallFunction(&DebugInterpretHit);
|
||||
}
|
||||
LI(X10, (int32_t)inst.constant);
|
||||
QuickCallFunction((const u8 *)MIPSGetInterpretFunc(op));
|
||||
LoadStaticRegisters();
|
||||
}
|
||||
|
||||
void RiscVJit::FlushAll() {
|
||||
gpr.FlushAll();
|
||||
fpr.FlushAll();
|
||||
|
@ -449,17 +511,14 @@ bool RiscVJit::DescribeCodePtr(const u8 *ptr, std::string &name) {
|
|||
} else if (!IsInSpace(ptr)) {
|
||||
return false;
|
||||
} else {
|
||||
uintptr_t uptr = (uintptr_t)ptr;
|
||||
int offset = (int)GetOffset(ptr);
|
||||
int block_num = -1;
|
||||
for (int i = 0; i < MAX_ALLOWED_JIT_BLOCKS; ++i) {
|
||||
uintptr_t blockptr = (uintptr_t)blockStartAddrs_[i];
|
||||
// Out of allocated blocks.
|
||||
if (uptr == 0)
|
||||
break;
|
||||
|
||||
if (uptr >= blockptr)
|
||||
for (int i = 0; i < blocks_.GetNumBlocks(); ++i) {
|
||||
const auto &b = blocks_.GetBlock(i);
|
||||
// We allocate linearly.
|
||||
if (b->GetTargetOffset() <= offset)
|
||||
block_num = i;
|
||||
if (uptr < blockptr)
|
||||
if (b->GetTargetOffset() > offset)
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -501,8 +560,6 @@ void RiscVJit::ClearCache() {
|
|||
|
||||
ClearCodeSpace(jitStartOffset_);
|
||||
FlushIcacheSection(region + jitStartOffset_, region + region_size - jitStartOffset_);
|
||||
|
||||
memset(blockStartAddrs_, 0, sizeof(blockStartAddrs_[0]) * MAX_ALLOWED_JIT_BLOCKS);
|
||||
}
|
||||
|
||||
void RiscVJit::RestoreRoundingMode(bool force) {
|
||||
|
|
|
@ -46,7 +46,7 @@ public:
|
|||
// TODO: GetBlockCacheDebugInterface, block linking?
|
||||
|
||||
protected:
|
||||
bool CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u32 &mipsBytes, bool preload) override;
|
||||
bool CompileTargetBlock(IRBlock *block, int block_num, bool preload) override;
|
||||
|
||||
void CompileIRInst(IRInst inst);
|
||||
|
||||
|
@ -87,6 +87,7 @@ private:
|
|||
void CompIR_FStore(IRInst inst);
|
||||
void CompIR_Generic(IRInst inst);
|
||||
void CompIR_HiLo(IRInst inst);
|
||||
void CompIR_Interpret(IRInst inst);
|
||||
void CompIR_Load(IRInst inst);
|
||||
void CompIR_LoadShift(IRInst inst);
|
||||
void CompIR_Logic(IRInst inst);
|
||||
|
@ -116,8 +117,6 @@ private:
|
|||
RiscVRegCache gpr;
|
||||
RiscVRegCacheFPU fpr;
|
||||
|
||||
static constexpr int MAX_ALLOWED_JIT_BLOCKS = 262144;
|
||||
|
||||
const u8 *enterDispatcher_ = nullptr;
|
||||
|
||||
const u8 *outerLoop_ = nullptr;
|
||||
|
@ -135,7 +134,6 @@ private:
|
|||
const u8 *crashHandler_ = nullptr;
|
||||
|
||||
int jitStartOffset_ = 0;
|
||||
const u8 **blockStartAddrs_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace MIPSComp
|
||||
|
|
|
@ -140,6 +140,7 @@ public:
|
|||
void FlushBeforeCall();
|
||||
void FlushAll();
|
||||
void FlushR(IRRegIndex r);
|
||||
void FlushRiscVReg(RiscVGen::RiscVReg r);
|
||||
void DiscardR(IRRegIndex r);
|
||||
|
||||
RiscVGen::RiscVReg GetAndLockTempR();
|
||||
|
@ -163,7 +164,6 @@ private:
|
|||
RiscVGen::RiscVReg AllocateReg();
|
||||
RiscVGen::RiscVReg FindBestToSpill(bool unusedOnly, bool *clobbered);
|
||||
RiscVGen::RiscVReg RiscVRegForFlush(IRRegIndex r);
|
||||
void FlushRiscVReg(RiscVGen::RiscVReg r);
|
||||
void SetRegImm(RiscVGen::RiscVReg reg, u64 imm);
|
||||
void AddMemBase(RiscVGen::RiscVReg reg);
|
||||
int GetMipsRegOffset(IRRegIndex r);
|
||||
|
|
|
@ -27,9 +27,6 @@
|
|||
using namespace RiscVGen;
|
||||
using namespace RiscVJitConstants;
|
||||
|
||||
using namespace RiscVGen;
|
||||
using namespace RiscVJitConstants;
|
||||
|
||||
RiscVRegCacheFPU::RiscVRegCacheFPU(MIPSState *mipsState, MIPSComp::JitOptions *jo)
|
||||
: mips_(mipsState), jo_(jo) {}
|
||||
|
||||
|
@ -279,6 +276,24 @@ RiscVReg RiscVRegCacheFPU::RiscVRegForFlush(IRRegIndex r) {
|
|||
}
|
||||
}
|
||||
|
||||
void RiscVRegCacheFPU::FlushBeforeCall() {
|
||||
// Note: don't set this false at the end, since we don't flush everything.
|
||||
if (!pendingFlush_) {
|
||||
return;
|
||||
}
|
||||
|
||||
// These registers are not preserved by function calls.
|
||||
for (int i = 0; i <= 7; ++i) {
|
||||
FlushRiscVReg(RiscVReg(F0 + i));
|
||||
}
|
||||
for (int i = 10; i <= 17; ++i) {
|
||||
FlushRiscVReg(RiscVReg(F0 + i));
|
||||
}
|
||||
for (int i = 28; i <= 31; ++i) {
|
||||
FlushRiscVReg(RiscVReg(F0 + i));
|
||||
}
|
||||
}
|
||||
|
||||
void RiscVRegCacheFPU::FlushAll() {
|
||||
if (!pendingFlush_) {
|
||||
// Nothing allocated. FPU regs are not nearly as common as GPR.
|
||||
|
|
|
@ -64,11 +64,12 @@ public:
|
|||
void MapInIn(IRRegIndex rd, IRRegIndex rs);
|
||||
void MapDirtyIn(IRRegIndex rd, IRRegIndex rs, bool avoidLoad = true);
|
||||
void MapDirtyInIn(IRRegIndex rd, IRRegIndex rs, IRRegIndex rt, bool avoidLoad = true);
|
||||
void Map4Dirty(IRRegIndex rdbase, bool avoidLoad = true);
|
||||
void Map4DirtyIn(IRRegIndex rdbase, IRRegIndex rsbase, bool avoidLoad = true);
|
||||
void Map4DirtyInIn(IRRegIndex rdbase, IRRegIndex rsbase, IRRegIndex rtbase, bool avoidLoad = true);
|
||||
void FlushBeforeCall();
|
||||
void FlushAll();
|
||||
void FlushR(IRRegIndex r);
|
||||
void FlushRiscVReg(RiscVGen::RiscVReg r);
|
||||
void DiscardR(IRRegIndex r);
|
||||
|
||||
RiscVGen::RiscVReg R(int preg); // Returns a cached register
|
||||
|
@ -78,7 +79,6 @@ private:
|
|||
RiscVGen::RiscVReg AllocateReg();
|
||||
RiscVGen::RiscVReg FindBestToSpill(bool unusedOnly, bool *clobbered);
|
||||
RiscVGen::RiscVReg RiscVRegForFlush(IRRegIndex r);
|
||||
void FlushRiscVReg(RiscVGen::RiscVReg r);
|
||||
int GetMipsRegOffset(IRRegIndex r);
|
||||
|
||||
bool IsValidReg(IRRegIndex r) const;
|
||||
|
|
Loading…
Add table
Reference in a new issue