Merge pull request #7855 from hrydgard/static-alloc

ARM64 jit: Statically allocate a few registers, including SP
This commit is contained in:
Henrik Rydgård 2015-07-11 18:13:47 +02:00
commit 1ba2b1cfae
12 changed files with 358 additions and 95 deletions

View file

@ -46,12 +46,12 @@ bool IsPowerOfTwo(uint64_t x) {
bool IsImmArithmetic(uint64_t input, u32 *val, bool *shift) {
if (input < 4096) {
*val = input;
*shift = false;
if (val) *val = input;
if (shift) *shift = false;
return true;
} else if ((input & 0xFFF000) == input) {
*val = input >> 12;
*shift = true;
if (val) *val = input >> 12;
if (shift) *shift = true;
return true;
}
return false;

View file

@ -1207,6 +1207,8 @@ void RestoreReplacedInstruction(u32 address) {
}
void RestoreReplacedInstructions(u32 startAddr, u32 endAddr) {
if (endAddr == startAddr)
return;
// Need to be in order, or we'll hang.
if (endAddr < startAddr)
std::swap(endAddr, startAddr);

View file

@ -33,6 +33,7 @@ using namespace Arm64Gen;
//static int temp32; // unused?
static const bool enableDebug = false;
static const bool enableDisasm = false;
//static bool enableStatistics = false; //unused?
@ -71,10 +72,10 @@ static const bool enableDebug = false;
extern volatile CoreState coreState;
void ShowPC(u32 sp, void *membase, void *jitbase) {
void ShowPC(u32 downcount, void *membase, void *jitbase) {
static int count = 0;
if (currentMIPS) {
ELOG("ShowPC : %08x Downcount : %08x %d %p %p", currentMIPS->pc, sp, count);
ELOG("ShowPC : %08x Downcount : %08x %d %p %p", currentMIPS->pc, downcount, count, membase, jitbase);
} else {
ELOG("Universe corrupt?");
}
@ -93,8 +94,28 @@ namespace MIPSComp {
using namespace Arm64JitConstants;
void Arm64Jit::GenerateFixedCode() {
void Arm64Jit::GenerateFixedCode(const JitOptions &jo) {
const u8 *start = nullptr;
if (jo.useStaticAlloc) {
saveStaticRegisters = AlignCode16();
STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
gpr.EmitSaveStaticAllocs();
RET();
loadStaticRegisters = AlignCode16();
gpr.EmitLoadStaticAllocs();
LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
RET();
start = saveStaticRegisters;
} else {
saveStaticRegisters = nullptr;
loadStaticRegisters = nullptr;
}
enterCode = AlignCode16();
if (!start)
start = enterCode;
BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED);
BitSet32 regs_to_save_fp(Arm64Gen::ALL_CALLEE_SAVED_FP);
@ -106,16 +127,16 @@ void Arm64Jit::GenerateFixedCode() {
MOVP2R(CTXREG, mips_);
MOVP2R(JITBASEREG, GetBasePtr());
RestoreDowncount();
LoadStaticRegisters();
MovFromPC(SCRATCH1);
outerLoopPCInSCRATCH1 = GetCodePtr();
MovToPC(SCRATCH1);
outerLoop = GetCodePtr();
SaveDowncount(); // Advance can change the downcount, so must save/restore
SaveStaticRegisters(); // Advance can change the downcount, so must save/restore
RestoreRoundingMode(true);
QuickCallFunction(SCRATCH1_64, &CoreTiming::Advance);
ApplyRoundingMode(true);
RestoreDowncount();
LoadStaticRegisters();
FixupBranch skipToRealDispatch = B(); //skip the sync and compare first time
dispatcherCheckCoreState = GetCodePtr();
@ -164,12 +185,12 @@ void Arm64Jit::GenerateFixedCode() {
BR(SCRATCH1_64);
SetJumpTarget(skipJump);
// No block found, let's jit
SaveDowncount();
// No block found, let's jit. I don't think we actually need to save static regs that are in callee-save regs here but whatever.
SaveStaticRegisters();
RestoreRoundingMode(true);
QuickCallFunction(SCRATCH1_64, (void *)&MIPSComp::JitAt);
ApplyRoundingMode(true);
RestoreDowncount();
LoadStaticRegisters();
B(dispatcherNoCheck); // no point in special casing this
@ -184,20 +205,13 @@ void Arm64Jit::GenerateFixedCode() {
SetJumpTarget(badCoreState);
breakpointBailout = GetCodePtr();
SaveDowncount();
SaveStaticRegisters();
RestoreRoundingMode(true);
fp.ABI_PopRegisters(regs_to_save_fp);
ABI_PopRegisters(regs_to_save);
RET();
if (false) {
std::vector<std::string> lines = DisassembleArm64(enterCode, GetCodePtr() - enterCode);
for (auto s : lines) {
INFO_LOG(JIT, "%s", s.c_str());
}
}
// Generate some integer conversion funcs.
static const RoundingMode roundModes[8] = {ROUND_N, ROUND_P, ROUND_M, ROUND_Z, ROUND_N, ROUND_P, ROUND_M, ROUND_Z,};
@ -214,6 +228,14 @@ void Arm64Jit::GenerateFixedCode() {
RET();
}
// Leave this at the end, add more stuff above.
if (enableDisasm) {
std::vector<std::string> lines = DisassembleArm64(start, GetCodePtr() - start);
for (auto s : lines) {
INFO_LOG(JIT, "%s", s.c_str());
}
}
// Don't forget to zap the instruction cache! This must stay at the end of this function.
FlushIcache();
}

View file

@ -82,10 +82,22 @@ void Arm64Jit::Comp_IType(MIPSOpcode op) {
switch (op >> 26) {
case 8: // same as addiu?
case 9: // R(rt) = R(rs) + simm; break; //addiu
if (simm >= 0) {
CompImmLogic(rs, rt, simm, &ARM64XEmitter::ADD, &ARM64XEmitter::TryADDI2R, &EvalAdd);
} else if (simm < 0) {
CompImmLogic(rs, rt, -simm, &ARM64XEmitter::SUB, &ARM64XEmitter::TrySUBI2R, &EvalSub);
// Special-case for small adjustments of pointerified registers. Commonly for SP but happens for others.
if (rs == rt && gpr.IsMappedAsPointer(rs) && IsImmArithmetic(simm < 0 ? -simm : simm, nullptr, nullptr)) {
ARM64Reg r32 = gpr.R(rs);
gpr.MarkDirty(r32);
ARM64Reg r = EncodeRegTo64(r32);
if (simm > 0) {
ADDI2R(r, r, simm);
} else {
SUBI2R(r, r, -simm);
}
} else {
if (simm >= 0) {
CompImmLogic(rs, rt, simm, &ARM64XEmitter::ADD, &ARM64XEmitter::TryADDI2R, &EvalAdd);
} else if (simm < 0) {
CompImmLogic(rs, rt, -simm, &ARM64XEmitter::SUB, &ARM64XEmitter::TrySUBI2R, &EvalSub);
}
}
break;
@ -377,7 +389,8 @@ void Arm64Jit::CompShiftVar(MIPSOpcode op, Arm64Gen::ShiftType shiftType) {
return;
}
gpr.MapDirtyInIn(rd, rs, rt);
ANDI2R(SCRATCH1, gpr.R(rs), 0x1F, INVALID_REG); // Not sure if ARM64 wraps like this so let's do it for it.
// Not sure if ARM64 wraps like this so let's do it for it. (TODO: According to the ARM ARM, it will indeed mask for us so this is not necessary)
ANDI2R(SCRATCH1, gpr.R(rs), 0x1F, INVALID_REG);
switch (shiftType) {
case ST_LSL: LSLV(gpr.R(rd), gpr.R(rt), SCRATCH1); break;
case ST_LSR: LSRV(gpr.R(rd), gpr.R(rt), SCRATCH1); break;

View file

@ -591,7 +591,7 @@ void Arm64Jit::Comp_Syscall(MIPSOpcode op)
FlushAll();
SaveDowncount();
SaveStaticRegisters();
#ifdef USE_PROFILER
// When profiling, we can't skip CallSyscall, since it times syscalls.
MOVI2R(W0, op.encoding);
@ -608,8 +608,8 @@ void Arm64Jit::Comp_Syscall(MIPSOpcode op)
QuickCallFunction(X1, (void *)&CallSyscall);
}
#endif
LoadStaticRegisters();
ApplyRoundingMode();
RestoreDowncount();
WriteSyscallExit();
js.compiling = false;

View file

@ -1840,6 +1840,8 @@ namespace MIPSComp {
gpr.FlushBeforeCall();
fpr.FlushAll();
// Don't need to SaveStaticRegs here as long as they are all in callee-save regs - this callee won't read them.
bool negSin1 = (imm & 0x10) ? true : false;
fpr.MapRegV(sreg);

View file

@ -68,7 +68,7 @@ Arm64Jit::Arm64Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &js, &jo), f
gpr.SetEmitter(this);
fpr.SetEmitter(this, &fp);
AllocCodeSpace(1024 * 1024 * 16); // 32MB is the absolute max because that's what an ARM branch instruction can reach, backwards and forwards.
GenerateFixedCode();
GenerateFixedCode(jo);
js.startDefaultPrefix = mips_->HasDefaultPrefix();
}
@ -135,7 +135,7 @@ void Arm64Jit::ClearCache() {
ILOG("ARM64Jit: Clearing the cache!");
blocks.Clear();
ClearCodeSpace();
GenerateFixedCode();
GenerateFixedCode(jo);
}
void Arm64Jit::InvalidateCache() {
@ -233,7 +233,8 @@ MIPSOpcode Arm64Jit::GetOffsetInstruction(int offset) {
const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b) {
js.cancel = false;
js.blockStart = js.compilerPC = mips_->pc;
js.blockStart = mips_->pc;
js.compilerPC = mips_->pc;
js.lastContinuedPC = 0;
js.initialBlockSize = 0;
js.nextExit = 0;
@ -308,7 +309,7 @@ const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b) {
char temp[256];
if (logBlocks > 0 && dontLogBlocks == 0) {
ILOG("=============== mips ===============");
ILOG("=============== mips %d ===============", blocks.GetNumBlocks());
for (u32 cpc = em_address; cpc != GetCompilerPC() + 4; cpc += 4) {
MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp, true);
ILOG("M: %08x %s", cpc, temp);
@ -379,9 +380,11 @@ bool Arm64Jit::ReplaceJalTo(u32 dest) {
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
SaveStaticRegisters();
RestoreRoundingMode();
QuickCallFunction(SCRATCH1_64, (const void *)(entry->replaceFunc));
ApplyRoundingMode();
LoadStaticRegisters();
WriteDownCountR(W0);
}
@ -428,6 +431,7 @@ void Arm64Jit::Comp_ReplacementFunc(MIPSOpcode op)
}
} else if (entry->replaceFunc) {
FlushAll();
SaveStaticRegisters();
RestoreRoundingMode();
gpr.SetRegImm(SCRATCH1, GetCompilerPC());
MovToPC(SCRATCH1);
@ -439,9 +443,11 @@ void Arm64Jit::Comp_ReplacementFunc(MIPSOpcode op)
if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) {
// Compile the original instruction at this address. We ignore cycles for hooks.
ApplyRoundingMode();
LoadStaticRegisters();
MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true));
} else {
ApplyRoundingMode();
LoadStaticRegisters();
LDR(INDEX_UNSIGNED, W1, CTXREG, MIPS_REG_RA * 4);
WriteDownCountR(W0);
WriteExitDestInR(W1);
@ -456,7 +462,7 @@ void Arm64Jit::Comp_Generic(MIPSOpcode op) {
FlushAll();
MIPSInterpretFunc func = MIPSGetInterpretFunc(op);
if (func) {
SaveDowncount();
SaveStaticRegisters();
// TODO: Perhaps keep the rounding mode for interp?
RestoreRoundingMode();
MOVI2R(SCRATCH1, GetCompilerPC());
@ -464,7 +470,7 @@ void Arm64Jit::Comp_Generic(MIPSOpcode op) {
MOVI2R(W0, op.encoding);
QuickCallFunction(SCRATCH2_64, (void *)func);
ApplyRoundingMode();
RestoreDowncount();
LoadStaticRegisters();
}
const MIPSInfo info = MIPSGetInfo(op);
@ -484,12 +490,21 @@ void Arm64Jit::MovToPC(ARM64Reg r) {
}
// Should not really be necessary except when entering Advance
void Arm64Jit::SaveDowncount() {
STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
void Arm64Jit::SaveStaticRegisters() {
if (jo.useStaticAlloc) {
QuickCallFunction(SCRATCH2_64, saveStaticRegisters);
} else {
// Inline the single operation
STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
}
}
void Arm64Jit::RestoreDowncount() {
LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
void Arm64Jit::LoadStaticRegisters() {
if (jo.useStaticAlloc) {
QuickCallFunction(SCRATCH2_64, loadStaticRegisters);
} else {
LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
}
}
void Arm64Jit::WriteDownCount(int offset) {

View file

@ -177,7 +177,7 @@ public:
void EatPrefix() { js.EatPrefix(); }
private:
void GenerateFixedCode();
void GenerateFixedCode(const JitOptions &jo);
void FlushAll();
void FlushPrefixV();
@ -197,8 +197,8 @@ private:
bool ReplaceJalTo(u32 dest);
void SaveDowncount();
void RestoreDowncount();
void SaveStaticRegisters();
void LoadStaticRegisters();
void WriteExit(u32 destination, int exit_num);
void WriteExitDestInR(Arm64Gen::ARM64Reg Reg);
@ -263,6 +263,9 @@ public:
const u8 *breakpointBailout;
const u8 *saveStaticRegisters;
const u8 *loadStaticRegisters;
// Indexed by FPCR FZ:RN bits for convenience. Uses SCRATCH2.
const u8 *convertS0ToSCRATCH1[8];
};

View file

@ -15,6 +15,7 @@
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "base/logging.h"
#include "Core/MemMap.h"
#include "Core/MIPS/ARM64/Arm64RegCache.h"
#include "Core/MIPS/ARM64/Arm64Jit.h"
@ -47,17 +48,82 @@ void Arm64RegCache::Start(MIPSAnalyst::AnalysisResults &stats) {
mr[i].reg = INVALID_REG;
mr[i].imm = -1;
mr[i].spillLock = false;
mr[i].isStatic = false;
}
int numStatics;
const StaticAllocation *statics = GetStaticAllocations(numStatics);
for (int i = 0; i < numStatics; i++) {
ar[statics[i].ar].mipsReg = statics[i].mr;
ar[statics[i].ar].pointerified = statics[i].pointerified;
mr[statics[i].mr].loc = ML_ARMREG;
mr[statics[i].mr].reg = statics[i].ar;
mr[statics[i].mr].isStatic = true;
}
}
const ARM64Reg *Arm64RegCache::GetMIPSAllocationOrder(int &count) {
// See register alloc remarks in Arm64Asm.cpp
// TODO: Add static allocation of top MIPS registers like SP
// W19-W22 are most suitable for static allocation. Those that are chosen for static allocation
// should be omitted here and added in GetStaticAllocations.
static const ARM64Reg allocationOrder[] = {
W19, W20, W21, W22, W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15,
};
count = sizeof(allocationOrder) / sizeof(const int);
return allocationOrder;
static const ARM64Reg allocationOrderStaticAlloc[] = {
W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15,
};
if (jo_->useStaticAlloc) {
count = ARRAY_SIZE(allocationOrderStaticAlloc);
return allocationOrderStaticAlloc;
} else {
count = ARRAY_SIZE(allocationOrder);
return allocationOrder;
}
}
const Arm64RegCache::StaticAllocation *Arm64RegCache::GetStaticAllocations(int &count) {
static const StaticAllocation none[] = {
};
static const StaticAllocation allocs[] = {
{MIPS_REG_SP, W19, true},
{MIPS_REG_V0, W20},
{MIPS_REG_V1, W22},
{MIPS_REG_A0, W21},
};
if (jo_->useStaticAlloc) {
count = ARRAY_SIZE(allocs);
return allocs;
} else {
count = 0;
return none;
}
}
void Arm64RegCache::EmitLoadStaticAllocs() {
int count;
const StaticAllocation *allocs = GetStaticAllocations(count);
// TODO: Use LDP when possible.
// This only needs to run once (by Asm) so checks don't need to be fast.
for (int i = 0; i < count; i++) {
int offset = GetMipsRegOffset(allocs[i].mr);
emit_->LDR(INDEX_UNSIGNED, allocs[i].ar, CTXREG, offset);
if (allocs[i].pointerified) {
emit_->MOVK(EncodeRegTo64(allocs[i].ar), ((uint64_t)Memory::base) >> 32, SHIFT_32);
}
}
}
void Arm64RegCache::EmitSaveStaticAllocs() {
int count;
const StaticAllocation *allocs = GetStaticAllocations(count);
// TODO: Use LDP when possible.
// This only needs to run once (by Asm) so checks don't need to be fast.
for (int i = 0; i < count; i++) {
int offset = GetMipsRegOffset(allocs[i].mr);
emit_->STR(INDEX_UNSIGNED, allocs[i].ar, CTXREG, offset);
}
}
void Arm64RegCache::FlushBeforeCall() {
@ -69,22 +135,39 @@ void Arm64RegCache::FlushBeforeCall() {
}
bool Arm64RegCache::IsMapped(MIPSGPReg mipsReg) {
return mr[mipsReg].loc == ML_ARMREG;
return mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM;
}
bool Arm64RegCache::IsMappedAsPointer(MIPSGPReg mipsReg) {
if (IsMapped(mipsReg)) {
if (mr[mipsReg].loc == ML_ARMREG) {
return ar[mr[mipsReg].reg].pointerified;
} else if (mr[mipsReg].loc == ML_ARMREG_IMM) {
if (ar[mr[mipsReg].reg].pointerified) {
ELOG("Really shouldn't be pointerified here");
}
}
return false;
}
void Arm64RegCache::MarkDirty(ARM64Reg reg) {
ar[reg].isDirty = true;
}
void Arm64RegCache::SetRegImm(ARM64Reg reg, u64 imm) {
if (reg == INVALID_REG) {
ELOG("SetRegImm to invalid register: at %08x", js_->compilerPC);
return;
}
// On ARM64, at least Cortex A57, good old MOVT/MOVW (MOVK in 64-bit) is really fast.
emit_->MOVI2R(reg, imm);
// ar[reg].pointerified = false;
}
void Arm64RegCache::MapRegTo(ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags) {
if (mr[mipsReg].isStatic) {
ELOG("Cannot MapRegTo static register %d", mipsReg);
return;
}
ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false;
if ((mapFlags & MAP_NOINIT) != MAP_NOINIT) {
if (mipsReg == MIPS_REG_ZERO) {
@ -147,6 +230,8 @@ ARM64Reg Arm64RegCache::FindBestToSpill(bool unusedOnly, bool *clobbered) {
if (ar[reg].mipsReg != MIPS_REG_INVALID && mr[ar[reg].mipsReg].spillLock)
continue;
// As it's in alloc-order, we know it's not static so we don't need to check for that.
// Awesome, a clobbered reg. Let's use it.
if (MIPSAnalyst::IsRegisterClobbered(ar[reg].mipsReg, compilerPC_, UNUSED_LOOKAHEAD_OPS)) {
*clobbered = true;
@ -171,11 +256,41 @@ ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) {
ERROR_LOG_REPORT(JIT, "Cannot map HI in Arm64RegCache");
return INVALID_REG;
}
if (mipsReg == MIPS_REG_INVALID) {
ERROR_LOG(JIT, "Cannot map invalid register");
return INVALID_REG;
}
ARM64Reg armReg = mr[mipsReg].reg;
if (mr[mipsReg].isStatic) {
if (armReg == INVALID_REG) {
ERROR_LOG(JIT, "MapReg on statically mapped reg %d failed - armReg got lost", mipsReg);
}
if (mr[mipsReg].loc == ML_IMM) {
// Back into the register, with or without the imm value.
// If noinit, the MAP_DIRTY check below will take care of the rest.
if ((mapFlags & MAP_NOINIT) != MAP_NOINIT) {
SetRegImm(armReg, mr[mipsReg].imm);
mr[mipsReg].loc = ML_ARMREG_IMM;
ar[armReg].pointerified = false;
}
}
// Erasing the imm on dirty (necessary since otherwise we will still think it's ML_ARMREG_IMM and return
// true for IsImm and calculate crazily wrong things). /unknown
if (mapFlags & MAP_DIRTY) {
mr[mipsReg].loc = ML_ARMREG; // As we are dirty, can't keep ARMREG_IMM, we will quickly drift out of sync
ar[armReg].pointerified = false;
ar[armReg].isDirty = true; // Not that it matters
}
return mr[mipsReg].reg;
}
// Let's see if it's already mapped. If so we just need to update the dirty flag.
// We don't need to check for ML_NOINIT because we assume that anyone who maps
// with that flag immediately writes a "known" value to the register.
if (mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM) {
ARM64Reg armReg = mr[mipsReg].reg;
if (ar[armReg].mipsReg != mipsReg) {
ERROR_LOG_REPORT(JIT, "Register mapping out of sync! %i", mipsReg);
}
@ -216,13 +331,12 @@ allocate:
}
if (bestToSpill != INVALID_REG) {
// ERROR_LOG(JIT, "Out of registers at PC %08x - spills register %i.", mips_->pc, bestToSpill);
// TODO: Broken somehow in Dante's Inferno, but most games work. Bad flags in MIPSTables somewhere?
if (clobbered) {
DiscardR(ar[bestToSpill].mipsReg);
} else {
FlushArmReg(bestToSpill);
}
// Now one must be free.
goto allocate;
}
@ -233,18 +347,22 @@ allocate:
Arm64Gen::ARM64Reg Arm64RegCache::MapRegAsPointer(MIPSGPReg reg) {
ARM64Reg retval = INVALID_REG;
if (mr[reg].loc != ML_ARMREG) {
if (mr[reg].loc != ML_ARMREG && mr[reg].loc != ML_ARMREG_IMM) {
retval = MapReg(reg);
} else {
retval = mr[reg].reg;
}
if (mr[reg].loc == ML_ARMREG) {
if (mr[reg].loc == ML_ARMREG || mr[reg].loc == ML_ARMREG_IMM) {
// If there was an imm attached, discard it.
mr[reg].loc = ML_ARMREG;
int a = DecodeReg(mr[reg].reg);
if (!ar[a].pointerified) {
emit_->MOVK(ARM64Reg(X0 + a), ((uint64_t)Memory::base) >> 32, SHIFT_32);
ar[a].pointerified = true;
}
} else {
ERROR_LOG(JIT, "MapRegAsPointer : MapReg failed to allocate a register?");
ELOG("MapRegAsPointer : MapReg failed to allocate a register?");
}
return retval;
}
@ -299,6 +417,10 @@ void Arm64RegCache::MapDirtyDirtyInIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs
}
void Arm64RegCache::FlushArmReg(ARM64Reg r) {
if (r == INVALID_REG) {
ELOG("FlushArmReg called on invalid register %d", r);
return;
}
if (ar[r].mipsReg == MIPS_REG_INVALID) {
// Nothing to do, reg not mapped.
if (ar[r].isDirty) {
@ -306,21 +428,23 @@ void Arm64RegCache::FlushArmReg(ARM64Reg r) {
}
return;
}
if (ar[r].mipsReg != MIPS_REG_INVALID) {
auto &mreg = mr[ar[r].mipsReg];
if (mreg.loc == ML_ARMREG_IMM || ar[r].mipsReg == MIPS_REG_ZERO) {
// We know its immedate value, no need to STR now.
mreg.loc = ML_IMM;
mreg.reg = INVALID_REG;
} else {
// Note: may be a 64-bit reg.
ARM64Reg storeReg = ARM64RegForFlush(ar[r].mipsReg);
if (storeReg != INVALID_REG)
emit_->STR(INDEX_UNSIGNED, storeReg, CTXREG, GetMipsRegOffset(ar[r].mipsReg));
mreg.loc = ML_MEM;
mreg.reg = INVALID_REG;
mreg.imm = 0;
}
if (mr[ar[r].mipsReg].isStatic) {
ELOG("Cannot FlushArmReg a statically mapped register");
return;
}
auto &mreg = mr[ar[r].mipsReg];
if (mreg.loc == ML_ARMREG_IMM || ar[r].mipsReg == MIPS_REG_ZERO) {
// We know its immedate value, no need to STR now.
mreg.loc = ML_IMM;
mreg.reg = INVALID_REG;
} else {
// Note: may be a 64-bit reg.
ARM64Reg storeReg = ARM64RegForFlush(ar[r].mipsReg);
if (storeReg != INVALID_REG)
emit_->STR(INDEX_UNSIGNED, storeReg, CTXREG, GetMipsRegOffset(ar[r].mipsReg));
mreg.loc = ML_MEM;
mreg.reg = INVALID_REG;
mreg.imm = 0;
}
ar[r].isDirty = false;
ar[r].mipsReg = MIPS_REG_INVALID;
@ -328,6 +452,17 @@ void Arm64RegCache::FlushArmReg(ARM64Reg r) {
}
void Arm64RegCache::DiscardR(MIPSGPReg mipsReg) {
if (mr[mipsReg].isStatic) {
// Simply do nothing unless it's an ArmregImm, in case we just switch it over to armreg, losing the value.
if (mr[mipsReg].loc == ML_ARMREG_IMM || mr[mipsReg].loc == ML_IMM) {
ARM64Reg armReg = mr[mipsReg].reg;
// Ignore the imm value, restore sanity
mr[mipsReg].loc = ML_ARMREG;
ar[armReg].pointerified = false;
ar[armReg].isDirty = false;
}
return;
}
const RegMIPSLoc prevLoc = mr[mipsReg].loc;
if (prevLoc == ML_ARMREG || prevLoc == ML_ARMREG_IMM) {
ARM64Reg armReg = mr[mipsReg].reg;
@ -349,6 +484,9 @@ void Arm64RegCache::DiscardR(MIPSGPReg mipsReg) {
}
ARM64Reg Arm64RegCache::ARM64RegForFlush(MIPSGPReg r) {
if (mr[r].isStatic)
return INVALID_REG; // No flushing needed
switch (mr[r].loc) {
case ML_IMM:
if (r == MIPS_REG_ZERO) {
@ -392,6 +530,11 @@ ARM64Reg Arm64RegCache::ARM64RegForFlush(MIPSGPReg r) {
}
void Arm64RegCache::FlushR(MIPSGPReg r) {
if (mr[r].isStatic) {
ELOG("Cannot flush static reg %d", r);
return;
}
switch (mr[r].loc) {
case ML_IMM:
// IMM is always "dirty".
@ -445,6 +588,7 @@ void Arm64RegCache::FlushAll() {
// Flush it first so we don't get it confused.
FlushR(MIPS_REG_LO);
// Try to flush in pairs when possible.
// 1 because MIPS_REG_ZERO isn't flushable anyway.
// 31 because 30 and 31 are the last possible pair - MIPS_REG_FPCOND, etc. are too far away.
for (int i = 1; i < 31; i++) {
@ -454,10 +598,10 @@ void Arm64RegCache::FlushAll() {
ARM64Reg areg2 = ARM64RegForFlush(mreg2);
// If either one doesn't have a reg yet, try flushing imms to scratch regs.
if (areg1 == INVALID_REG && IsImm(mreg1)) {
if (areg1 == INVALID_REG && IsPureImm(mreg1) && !mr[i].isStatic) {
areg1 = SCRATCH1;
}
if (areg2 == INVALID_REG && IsImm(mreg2)) {
if (areg2 == INVALID_REG && IsPureImm(mreg2) && !mr[i + 1].isStatic) {
areg2 = SCRATCH2;
}
@ -483,12 +627,41 @@ void Arm64RegCache::FlushAll() {
// Final pass to grab any that were left behind.
for (int i = 0; i < NUM_MIPSREG; i++) {
MIPSGPReg mipsReg = MIPSGPReg(i);
if (mr[i].isStatic) {
Arm64Gen::ARM64Reg armReg = mr[i].reg;
if (mr[i].loc == ML_IMM) {
SetRegImm(mr[i].reg, mr[i].imm);
mr[i].loc = ML_ARMREG_IMM;
ar[armReg].pointerified = false;
} else if (mr[i].loc == ML_ARMREG_IMM) {
if (ar[armReg].pointerified) {
ELOG("ML_ARMREG_IMM but pointerified. Wrong.");
ar[armReg].pointerified = false;
}
}
if (i != MIPS_REG_ZERO && mr[i].reg == INVALID_REG) {
ELOG("ARM reg of static %i is invalid", i);
continue;
}
continue;
}
FlushR(mipsReg);
}
int count = 0;
const StaticAllocation *allocs = GetStaticAllocations(count);
for (int i = 0; i < count; i++) {
if (allocs[i].pointerified && !ar[allocs[i].ar].pointerified) {
// Re-pointerify
emit_->MOVK(EncodeRegTo64(allocs[i].ar), ((uint64_t)Memory::base) >> 32, SHIFT_32);
} else {
// If this register got pointerified on the way, mark it as not, so that after save/reload (like in an interpreter fallback), it won't be regarded as such, as it simply won't be.
ar[allocs[i].ar].pointerified = false;
}
}
// Sanity check
for (int i = 0; i < NUM_ARMREG; i++) {
if (ar[i].mipsReg != MIPS_REG_INVALID) {
if (ar[i].mipsReg != MIPS_REG_INVALID && mr[ar[i].mipsReg].isStatic == false) {
ERROR_LOG_REPORT(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg);
}
}
@ -504,23 +677,43 @@ void Arm64RegCache::SetImm(MIPSGPReg r, u64 immVal) {
// Already have that value, let's keep it in the reg.
return;
}
// Zap existing value if cached in a reg
if (mr[r].reg != INVALID_REG) {
ar[mr[r].reg].mipsReg = MIPS_REG_INVALID;
ar[mr[r].reg].isDirty = false;
if (mr[r].isStatic) {
mr[r].loc = ML_IMM;
mr[r].imm = immVal;
Arm64Gen::ARM64Reg armReg = mr[r].reg;
ar[armReg].pointerified = false;
// We do not change reg to INVALID_REG for obvious reasons..
} else {
// Zap existing value if cached in a reg
if (mr[r].reg != INVALID_REG) {
ar[mr[r].reg].mipsReg = MIPS_REG_INVALID;
ar[mr[r].reg].isDirty = false;
ar[mr[r].reg].pointerified = false;
}
mr[r].loc = ML_IMM;
mr[r].imm = immVal;
mr[r].reg = INVALID_REG;
}
mr[r].loc = ML_IMM;
mr[r].imm = immVal;
mr[r].reg = INVALID_REG;
}
bool Arm64RegCache::IsImm(MIPSGPReg r) const {
if (r == MIPS_REG_ZERO) return true;
return mr[r].loc == ML_IMM || mr[r].loc == ML_ARMREG_IMM;
if (r == MIPS_REG_ZERO)
return true;
else
return mr[r].loc == ML_IMM || mr[r].loc == ML_ARMREG_IMM;
}
bool Arm64RegCache::IsPureImm(MIPSGPReg r) const {
if (r == MIPS_REG_ZERO)
return true;
else
return mr[r].loc == ML_IMM;
}
u64 Arm64RegCache::GetImm(MIPSGPReg r) const {
if (r == MIPS_REG_ZERO) return 0;
if (r == MIPS_REG_ZERO)
return 0;
if (mr[r].loc != ML_IMM && mr[r].loc != ML_ARMREG_IMM) {
ERROR_LOG_REPORT(JIT, "Trying to get imm from non-imm register %i", r);
}
@ -554,12 +747,14 @@ void Arm64RegCache::SpillLock(MIPSGPReg r1, MIPSGPReg r2, MIPSGPReg r3, MIPSGPRe
void Arm64RegCache::ReleaseSpillLocks() {
for (int i = 0; i < NUM_MIPSREG; i++) {
mr[i].spillLock = false;
if (!mr[i].isStatic)
mr[i].spillLock = false;
}
}
void Arm64RegCache::ReleaseSpillLock(MIPSGPReg reg) {
mr[reg].spillLock = false;
if (!mr[reg].isStatic)
mr[reg].spillLock = false;
}
ARM64Reg Arm64RegCache::R(MIPSGPReg mipsReg) {

View file

@ -56,19 +56,11 @@ enum {
MAP_NOINIT = 2 | MAP_DIRTY,
};
}
// R1 to R6: mapped MIPS regs
// R8 = flags (maybe we could do better here?)
// R9 = code pointers
// R10 = MIPS context
// R11 = base pointer
// R14 = scratch (actually LR)
} // namespace
typedef int MIPSReg;
struct RegARM {
struct RegARM64 {
MIPSGPReg mipsReg; // if -1, no mipsreg attached.
bool isDirty; // Should the register be written back?
bool pointerified; // Has used movk to move the memory base into the top part of the reg. Note - still usable as 32-bit reg!
@ -81,6 +73,7 @@ struct RegMIPS {
u64 imm;
Arm64Gen::ARM64Reg reg; // reg index
bool spillLock; // if true, this register cannot be spilled.
bool isStatic; // if true, this register will not be written back to ram by the regcache
// If loc == ML_MEM, it's back in its location in the CPU context struct.
};
@ -105,6 +98,7 @@ public:
void SetImm(MIPSGPReg reg, u64 immVal);
bool IsImm(MIPSGPReg reg) const;
bool IsPureImm(MIPSGPReg reg) const;
u64 GetImm(MIPSGPReg reg) const;
// Optimally set a register to an imm value (possibly using another register.)
void SetRegImm(Arm64Gen::ARM64Reg reg, u64 imm);
@ -116,6 +110,7 @@ public:
bool IsMapped(MIPSGPReg reg);
bool IsMappedAsPointer(MIPSGPReg reg);
void MarkDirty(Arm64Gen::ARM64Reg reg);
void MapIn(MIPSGPReg rs);
void MapInIn(MIPSGPReg rd, MIPSGPReg rs);
void MapDirtyIn(MIPSGPReg rd, MIPSGPReg rs, bool avoidLoad = true);
@ -123,9 +118,9 @@ public:
void MapDirtyDirtyIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, bool avoidLoad = true);
void MapDirtyDirtyInIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad = true);
void FlushArmReg(Arm64Gen::ARM64Reg r);
void FlushR(MIPSGPReg r);
void FlushBeforeCall();
void FlushAll();
void FlushR(MIPSGPReg r);
void DiscardR(MIPSGPReg r);
Arm64Gen::ARM64Reg R(MIPSGPReg preg); // Returns a cached register, while checking that it's NOT mapped as a pointer
@ -138,12 +133,22 @@ public:
int GetMipsRegOffset(MIPSGPReg r);
// Call these when leaving/entering the JIT
void EmitLoadStaticAllocs();
void EmitSaveStaticAllocs();
private:
struct StaticAllocation {
MIPSGPReg mr;
Arm64Gen::ARM64Reg ar;
bool pointerified;
};
const StaticAllocation *GetStaticAllocations(int &count);
const Arm64Gen::ARM64Reg *GetMIPSAllocationOrder(int &count);
void MapRegTo(Arm64Gen::ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags);
Arm64Gen::ARM64Reg FindBestToSpill(bool unusedOnly, bool *clobbered);
Arm64Gen::ARM64Reg ARM64RegForFlush(MIPSGPReg r);
MIPSState *mips_;
Arm64Gen::ARM64XEmitter *emit_;
MIPSComp::JitState *js_;
@ -155,6 +160,6 @@ private:
NUM_MIPSREG = Arm64JitConstants::TOTAL_MAPPABLE_MIPSREGS,
};
RegARM ar[NUM_ARMREG];
RegARM64 ar[NUM_ARMREG];
RegMIPS mr[NUM_MIPSREG];
};

View file

@ -45,5 +45,10 @@ namespace MIPSComp {
continueBranches = false;
continueJumps = false;
continueMaxInstructions = 300;
useStaticAlloc = false;
#ifdef ARM64
useStaticAlloc = true;
#endif
}
}

View file

@ -194,6 +194,7 @@ namespace MIPSComp {
bool downcountInRegister;
// ARM64 only
bool useASIMDVFPU;
bool useStaticAlloc;
// Common
bool enableBlocklink;