From a0bf9347960847c9cd1a5e30cad328bf6b969af9 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 5 Jul 2015 20:50:03 +0200 Subject: [PATCH 01/11] ARM64: Some work on static allocation. Close to working, cube.elf runs 700 blocks but then hangs (?!) --- Core/MIPS/ARM64/Arm64Asm.cpp | 44 +++++--- Core/MIPS/ARM64/Arm64CompBranch.cpp | 4 +- Core/MIPS/ARM64/Arm64Jit.cpp | 16 +-- Core/MIPS/ARM64/Arm64Jit.h | 9 +- Core/MIPS/ARM64/Arm64RegCache.cpp | 160 ++++++++++++++++++++++------ Core/MIPS/ARM64/Arm64RegCache.h | 28 ++--- Core/MIPS/JitCommon/JitState.h | 1 + 7 files changed, 189 insertions(+), 73 deletions(-) diff --git a/Core/MIPS/ARM64/Arm64Asm.cpp b/Core/MIPS/ARM64/Arm64Asm.cpp index bc97d3c7cb..67d1a957e5 100644 --- a/Core/MIPS/ARM64/Arm64Asm.cpp +++ b/Core/MIPS/ARM64/Arm64Asm.cpp @@ -71,10 +71,10 @@ static const bool enableDebug = false; extern volatile CoreState coreState; -void ShowPC(u32 sp, void *membase, void *jitbase) { +void ShowPC(u32 downcount, void *membase, void *jitbase) { static int count = 0; if (currentMIPS) { - ELOG("ShowPC : %08x Downcount : %08x %d %p %p", currentMIPS->pc, sp, count); + ELOG("ShowPC : %08x Downcount : %08x %d %p %p", currentMIPS->pc, downcount, count, membase, jitbase); } else { ELOG("Universe corrupt?"); } @@ -93,7 +93,18 @@ namespace MIPSComp { using namespace Arm64JitConstants; -void Arm64Jit::GenerateFixedCode() { +void Arm64Jit::GenerateFixedCode(const JitOptions &jo) { + saveStaticRegisters = AlignCode16(); + const u8 *start = saveStaticRegisters; + STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); + gpr.EmitSaveStaticAllocs(); + RET(); + + loadStaticRegisters = AlignCode16(); + gpr.EmitLoadStaticAllocs(); + LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); + RET(); + enterCode = AlignCode16(); BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED); @@ -106,16 +117,16 @@ void Arm64Jit::GenerateFixedCode() { MOVP2R(CTXREG, mips_); MOVP2R(JITBASEREG, GetBasePtr()); - RestoreDowncount(); + LoadStaticRegisters(); MovFromPC(SCRATCH1); outerLoopPCInSCRATCH1 = GetCodePtr(); MovToPC(SCRATCH1); outerLoop = GetCodePtr(); - SaveDowncount(); // Advance can change the downcount, so must save/restore + SaveStaticRegisters(); // Advance can change the downcount, so must save/restore RestoreRoundingMode(true); QuickCallFunction(SCRATCH1_64, &CoreTiming::Advance); ApplyRoundingMode(true); - RestoreDowncount(); + LoadStaticRegisters(); FixupBranch skipToRealDispatch = B(); //skip the sync and compare first time dispatcherCheckCoreState = GetCodePtr(); @@ -165,11 +176,11 @@ void Arm64Jit::GenerateFixedCode() { SetJumpTarget(skipJump); // No block found, let's jit - SaveDowncount(); + SaveStaticRegisters(); RestoreRoundingMode(true); QuickCallFunction(SCRATCH1_64, (void *)&MIPSComp::JitAt); ApplyRoundingMode(true); - RestoreDowncount(); + LoadStaticRegisters(); B(dispatcherNoCheck); // no point in special casing this @@ -184,20 +195,13 @@ void Arm64Jit::GenerateFixedCode() { SetJumpTarget(badCoreState); breakpointBailout = GetCodePtr(); - SaveDowncount(); + SaveStaticRegisters(); RestoreRoundingMode(true); fp.ABI_PopRegisters(regs_to_save_fp); ABI_PopRegisters(regs_to_save); RET(); - - if (false) { - std::vector lines = DisassembleArm64(enterCode, GetCodePtr() - enterCode); - for (auto s : lines) { - INFO_LOG(JIT, "%s", s.c_str()); - } - } // Generate some integer conversion funcs. static const RoundingMode roundModes[8] = {ROUND_N, ROUND_P, ROUND_M, ROUND_Z, ROUND_N, ROUND_P, ROUND_M, ROUND_Z,}; @@ -214,6 +218,14 @@ void Arm64Jit::GenerateFixedCode() { RET(); } + // Leave this at the end, add more stuff above. + if (true) { + std::vector lines = DisassembleArm64(start, GetCodePtr() - start); + for (auto s : lines) { + INFO_LOG(JIT, "%s", s.c_str()); + } + } + // Don't forget to zap the instruction cache! This must stay at the end of this function. FlushIcache(); } diff --git a/Core/MIPS/ARM64/Arm64CompBranch.cpp b/Core/MIPS/ARM64/Arm64CompBranch.cpp index afae6ec40e..7d943c42b7 100644 --- a/Core/MIPS/ARM64/Arm64CompBranch.cpp +++ b/Core/MIPS/ARM64/Arm64CompBranch.cpp @@ -591,7 +591,7 @@ void Arm64Jit::Comp_Syscall(MIPSOpcode op) FlushAll(); - SaveDowncount(); + SaveStaticRegisters(); #ifdef USE_PROFILER // When profiling, we can't skip CallSyscall, since it times syscalls. MOVI2R(W0, op.encoding); @@ -608,8 +608,8 @@ void Arm64Jit::Comp_Syscall(MIPSOpcode op) QuickCallFunction(X1, (void *)&CallSyscall); } #endif + LoadStaticRegisters(); ApplyRoundingMode(); - RestoreDowncount(); WriteSyscallExit(); js.compiling = false; diff --git a/Core/MIPS/ARM64/Arm64Jit.cpp b/Core/MIPS/ARM64/Arm64Jit.cpp index c31e48de74..b4cd354692 100644 --- a/Core/MIPS/ARM64/Arm64Jit.cpp +++ b/Core/MIPS/ARM64/Arm64Jit.cpp @@ -68,7 +68,7 @@ Arm64Jit::Arm64Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &js, &jo), f gpr.SetEmitter(this); fpr.SetEmitter(this, &fp); AllocCodeSpace(1024 * 1024 * 16); // 32MB is the absolute max because that's what an ARM branch instruction can reach, backwards and forwards. - GenerateFixedCode(); + GenerateFixedCode(jo); js.startDefaultPrefix = mips_->HasDefaultPrefix(); } @@ -135,7 +135,7 @@ void Arm64Jit::ClearCache() { ILOG("ARM64Jit: Clearing the cache!"); blocks.Clear(); ClearCodeSpace(); - GenerateFixedCode(); + GenerateFixedCode(jo); } void Arm64Jit::InvalidateCache() { @@ -456,7 +456,7 @@ void Arm64Jit::Comp_Generic(MIPSOpcode op) { FlushAll(); MIPSInterpretFunc func = MIPSGetInterpretFunc(op); if (func) { - SaveDowncount(); + SaveStaticRegisters(); // TODO: Perhaps keep the rounding mode for interp? RestoreRoundingMode(); MOVI2R(SCRATCH1, GetCompilerPC()); @@ -464,7 +464,7 @@ void Arm64Jit::Comp_Generic(MIPSOpcode op) { MOVI2R(W0, op.encoding); QuickCallFunction(SCRATCH2_64, (void *)func); ApplyRoundingMode(); - RestoreDowncount(); + LoadStaticRegisters(); } const MIPSInfo info = MIPSGetInfo(op); @@ -484,12 +484,12 @@ void Arm64Jit::MovToPC(ARM64Reg r) { } // Should not really be necessary except when entering Advance -void Arm64Jit::SaveDowncount() { - STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); +void Arm64Jit::SaveStaticRegisters() { + QuickCallFunction(SCRATCH2_64, saveStaticRegisters); } -void Arm64Jit::RestoreDowncount() { - LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); +void Arm64Jit::LoadStaticRegisters() { + QuickCallFunction(SCRATCH2_64, loadStaticRegisters); } void Arm64Jit::WriteDownCount(int offset) { diff --git a/Core/MIPS/ARM64/Arm64Jit.h b/Core/MIPS/ARM64/Arm64Jit.h index 56811110b9..f4f2773924 100644 --- a/Core/MIPS/ARM64/Arm64Jit.h +++ b/Core/MIPS/ARM64/Arm64Jit.h @@ -177,7 +177,7 @@ public: void EatPrefix() { js.EatPrefix(); } private: - void GenerateFixedCode(); + void GenerateFixedCode(const JitOptions &jo); void FlushAll(); void FlushPrefixV(); @@ -197,8 +197,8 @@ private: bool ReplaceJalTo(u32 dest); - void SaveDowncount(); - void RestoreDowncount(); + void SaveStaticRegisters(); + void LoadStaticRegisters(); void WriteExit(u32 destination, int exit_num); void WriteExitDestInR(Arm64Gen::ARM64Reg Reg); @@ -263,6 +263,9 @@ public: const u8 *breakpointBailout; + const u8 *saveStaticRegisters; + const u8 *loadStaticRegisters; + // Indexed by FPCR FZ:RN bits for convenience. Uses SCRATCH2. const u8 *convertS0ToSCRATCH1[8]; }; diff --git a/Core/MIPS/ARM64/Arm64RegCache.cpp b/Core/MIPS/ARM64/Arm64RegCache.cpp index 5aa8529da9..37c8f28ab7 100644 --- a/Core/MIPS/ARM64/Arm64RegCache.cpp +++ b/Core/MIPS/ARM64/Arm64RegCache.cpp @@ -15,6 +15,7 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. +#include "base/logging.h" #include "Core/MemMap.h" #include "Core/MIPS/ARM64/Arm64RegCache.h" #include "Core/MIPS/ARM64/Arm64Jit.h" @@ -47,19 +48,62 @@ void Arm64RegCache::Start(MIPSAnalyst::AnalysisResults &stats) { mr[i].reg = INVALID_REG; mr[i].imm = -1; mr[i].spillLock = false; + mr[i].isStatic = false; + } + int numStatics; + const StaticAllocation *statics = GetStaticAllocations(numStatics); + for (int i = 0; i < numStatics; i++) { + ar[statics[i].ar].mipsReg = statics[i].mr; + ar[statics[i].ar].pointerified = false; // TODO: Support static pointerification for SP. + mr[statics[i].mr].loc = ML_ARMREG; + mr[statics[i].mr].reg = statics[i].ar; + mr[statics[i].mr].isStatic = true; } } const ARM64Reg *Arm64RegCache::GetMIPSAllocationOrder(int &count) { // See register alloc remarks in Arm64Asm.cpp - // TODO: Add static allocation of top MIPS registers like SP + + // W19-W22 are most suitable for static allocation. Those that are chosen for static allocation + // should be omitted here and added in GetStaticAllocations. + static const ARM64Reg allocationOrder[] = { - W19, W20, W21, W22, W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, + W20, W21, W22, W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, }; - count = sizeof(allocationOrder) / sizeof(const int); + count = ARRAY_SIZE(allocationOrder); return allocationOrder; } +const Arm64RegCache::StaticAllocation *Arm64RegCache::GetStaticAllocations(int &count) { + static const StaticAllocation allocs[] = { + {MIPS_REG_V0, W19}, + }; + count = ARRAY_SIZE(allocs); + return allocs; +} + +void Arm64RegCache::EmitLoadStaticAllocs() { + int count; + const StaticAllocation *allocs = GetStaticAllocations(count); + // TODO: Use LDP when possible. + // This only needs to run once (by Asm) so checks don't need to be fast. + for (int i = 0; i < count; i++) { + int offset = GetMipsRegOffset(allocs[i].mr); + emit_->LDR(INDEX_UNSIGNED, allocs[i].ar, CTXREG, offset); + } +} + +void Arm64RegCache::EmitSaveStaticAllocs() { + int count; + const StaticAllocation *allocs = GetStaticAllocations(count); + // TODO: Use LDP when possible. + // This only needs to run once (by Asm) so checks don't need to be fast. + for (int i = 0; i < count; i++) { + int offset = GetMipsRegOffset(allocs[i].mr); + emit_->STR(INDEX_UNSIGNED, allocs[i].ar, CTXREG, offset); + } +} + void Arm64RegCache::FlushBeforeCall() { // These registers are not preserved by function calls. for (int i = 0; i < 19; ++i) { @@ -85,6 +129,10 @@ void Arm64RegCache::SetRegImm(ARM64Reg reg, u64 imm) { } void Arm64RegCache::MapRegTo(ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags) { + if (mr[mipsReg].isStatic) { + ELOG("Cannot MapRegTo static register %d", mipsReg); + return; + } ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false; if ((mapFlags & MAP_NOINIT) != MAP_NOINIT) { if (mipsReg == MIPS_REG_ZERO) { @@ -146,6 +194,8 @@ ARM64Reg Arm64RegCache::FindBestToSpill(bool unusedOnly, bool *clobbered) { ARM64Reg reg = allocOrder[i]; if (ar[reg].mipsReg != MIPS_REG_INVALID && mr[ar[reg].mipsReg].spillLock) continue; + if (mr[ar[reg].mipsReg].isStatic) + continue; // Awesome, a clobbered reg. Let's use it. if (MIPSAnalyst::IsRegisterClobbered(ar[reg].mipsReg, compilerPC_, UNUSED_LOOKAHEAD_OPS)) { @@ -171,6 +221,12 @@ ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) { ERROR_LOG_REPORT(JIT, "Cannot map HI in Arm64RegCache"); return INVALID_REG; } + + if (mr[mipsReg].isStatic) { + // Dirty or not doesn't matter as it's a fixed register. + return mr[mipsReg].reg; + } + // Let's see if it's already mapped. If so we just need to update the dirty flag. // We don't need to check for ML_NOINIT because we assume that anyone who maps // with that flag immediately writes a "known" value to the register. @@ -216,13 +272,12 @@ allocate: } if (bestToSpill != INVALID_REG) { - // ERROR_LOG(JIT, "Out of registers at PC %08x - spills register %i.", mips_->pc, bestToSpill); - // TODO: Broken somehow in Dante's Inferno, but most games work. Bad flags in MIPSTables somewhere? if (clobbered) { DiscardR(ar[bestToSpill].mipsReg); } else { FlushArmReg(bestToSpill); } + // Now one must be free. goto allocate; } @@ -299,6 +354,10 @@ void Arm64RegCache::MapDirtyDirtyInIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs } void Arm64RegCache::FlushArmReg(ARM64Reg r) { + if (r == INVALID_REG) { + ELOG("FlushArmReg called on invalid register %d", r); + return; + } if (ar[r].mipsReg == MIPS_REG_INVALID) { // Nothing to do, reg not mapped. if (ar[r].isDirty) { @@ -306,28 +365,39 @@ void Arm64RegCache::FlushArmReg(ARM64Reg r) { } return; } - if (ar[r].mipsReg != MIPS_REG_INVALID) { - auto &mreg = mr[ar[r].mipsReg]; - if (mreg.loc == ML_ARMREG_IMM || ar[r].mipsReg == MIPS_REG_ZERO) { - // We know its immedate value, no need to STR now. - mreg.loc = ML_IMM; - mreg.reg = INVALID_REG; - } else { - // Note: may be a 64-bit reg. - ARM64Reg storeReg = ARM64RegForFlush(ar[r].mipsReg); - if (storeReg != INVALID_REG) - emit_->STR(INDEX_UNSIGNED, storeReg, CTXREG, GetMipsRegOffset(ar[r].mipsReg)); - mreg.loc = ML_MEM; - mreg.reg = INVALID_REG; - mreg.imm = 0; - } + if (mr[ar[r].mipsReg].isStatic) { + ELOG("Cannot FlushArmReg a statically mapped register"); + return; } + + auto &mreg = mr[ar[r].mipsReg]; + if (mreg.loc == ML_ARMREG_IMM || ar[r].mipsReg == MIPS_REG_ZERO) { + // We know its immedate value, no need to STR now. + mreg.loc = ML_IMM; + mreg.reg = INVALID_REG; + } else { + // Note: may be a 64-bit reg. + ARM64Reg storeReg = ARM64RegForFlush(ar[r].mipsReg); + if (storeReg != INVALID_REG) + emit_->STR(INDEX_UNSIGNED, storeReg, CTXREG, GetMipsRegOffset(ar[r].mipsReg)); + mreg.loc = ML_MEM; + mreg.reg = INVALID_REG; + mreg.imm = 0; + } + ar[r].isDirty = false; ar[r].mipsReg = MIPS_REG_INVALID; ar[r].pointerified = false; } void Arm64RegCache::DiscardR(MIPSGPReg mipsReg) { + if (mr[mipsReg].isStatic) { + // Simply do nothing unless it's an ArmregImm, in case we just switch it over to armreg, losing the value. + if (mr[mipsReg].loc == ML_ARMREG_IMM) { + mr[mipsReg].loc = ML_ARMREG; + } + return; + } const RegMIPSLoc prevLoc = mr[mipsReg].loc; if (prevLoc == ML_ARMREG || prevLoc == ML_ARMREG_IMM) { ARM64Reg armReg = mr[mipsReg].reg; @@ -349,6 +419,9 @@ void Arm64RegCache::DiscardR(MIPSGPReg mipsReg) { } ARM64Reg Arm64RegCache::ARM64RegForFlush(MIPSGPReg r) { + if (mr[r].isStatic) + return INVALID_REG; // No flushing needed + switch (mr[r].loc) { case ML_IMM: if (r == MIPS_REG_ZERO) { @@ -392,6 +465,11 @@ ARM64Reg Arm64RegCache::ARM64RegForFlush(MIPSGPReg r) { } void Arm64RegCache::FlushR(MIPSGPReg r) { + if (mr[r].isStatic) { + ELOG("Cannot flush static reg %d", r); + return; + } + switch (mr[r].loc) { case ML_IMM: // IMM is always "dirty". @@ -483,6 +561,14 @@ void Arm64RegCache::FlushAll() { // Final pass to grab any that were left behind. for (int i = 0; i < NUM_MIPSREG; i++) { MIPSGPReg mipsReg = MIPSGPReg(i); + if (mr[i].isStatic) { + if (i != MIPS_REG_ZERO && mr[i].reg == INVALID_REG) { + ELOG("ARM reg of static %i is invalid", i); + continue; + } + ar[mr[i].reg].mipsReg = MIPS_REG_INVALID; // make the sanity check happy + continue; + } FlushR(mipsReg); } @@ -504,19 +590,29 @@ void Arm64RegCache::SetImm(MIPSGPReg r, u64 immVal) { // Already have that value, let's keep it in the reg. return; } - // Zap existing value if cached in a reg - if (mr[r].reg != INVALID_REG) { - ar[mr[r].reg].mipsReg = MIPS_REG_INVALID; - ar[mr[r].reg].isDirty = false; + + if (mr[r].isStatic) { + // TODO: Just set to IMM + mr[r].loc = ML_ARMREG_IMM; + mr[r].imm = immVal; + SetRegImm(mr[r].reg, immVal); + } else { + // Zap existing value if cached in a reg + if (mr[r].reg != INVALID_REG) { + ar[mr[r].reg].mipsReg = MIPS_REG_INVALID; + ar[mr[r].reg].isDirty = false; + } + mr[r].loc = ML_IMM; + mr[r].imm = immVal; + mr[r].reg = INVALID_REG; } - mr[r].loc = ML_IMM; - mr[r].imm = immVal; - mr[r].reg = INVALID_REG; } bool Arm64RegCache::IsImm(MIPSGPReg r) const { - if (r == MIPS_REG_ZERO) return true; - return mr[r].loc == ML_IMM || mr[r].loc == ML_ARMREG_IMM; + if (r == MIPS_REG_ZERO) + return true; + else + return mr[r].loc == ML_IMM || mr[r].loc == ML_ARMREG_IMM; } u64 Arm64RegCache::GetImm(MIPSGPReg r) const { @@ -554,12 +650,14 @@ void Arm64RegCache::SpillLock(MIPSGPReg r1, MIPSGPReg r2, MIPSGPReg r3, MIPSGPRe void Arm64RegCache::ReleaseSpillLocks() { for (int i = 0; i < NUM_MIPSREG; i++) { - mr[i].spillLock = false; + if (!mr[i].isStatic) + mr[i].spillLock = false; } } void Arm64RegCache::ReleaseSpillLock(MIPSGPReg reg) { - mr[reg].spillLock = false; + if (!mr[reg].isStatic) + mr[reg].spillLock = false; } ARM64Reg Arm64RegCache::R(MIPSGPReg mipsReg) { diff --git a/Core/MIPS/ARM64/Arm64RegCache.h b/Core/MIPS/ARM64/Arm64RegCache.h index 1e59526788..960f13cd70 100644 --- a/Core/MIPS/ARM64/Arm64RegCache.h +++ b/Core/MIPS/ARM64/Arm64RegCache.h @@ -56,19 +56,11 @@ enum { MAP_NOINIT = 2 | MAP_DIRTY, }; -} - -// R1 to R6: mapped MIPS regs -// R8 = flags (maybe we could do better here?) -// R9 = code pointers -// R10 = MIPS context -// R11 = base pointer -// R14 = scratch (actually LR) - +} // namespace typedef int MIPSReg; -struct RegARM { +struct RegARM64 { MIPSGPReg mipsReg; // if -1, no mipsreg attached. bool isDirty; // Should the register be written back? bool pointerified; // Has used movk to move the memory base into the top part of the reg. Note - still usable as 32-bit reg! @@ -81,6 +73,7 @@ struct RegMIPS { u64 imm; Arm64Gen::ARM64Reg reg; // reg index bool spillLock; // if true, this register cannot be spilled. + bool isStatic; // if true, this register will not be written back to ram by the regcache // If loc == ML_MEM, it's back in its location in the CPU context struct. }; @@ -123,7 +116,6 @@ public: void MapDirtyDirtyIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, bool avoidLoad = true); void MapDirtyDirtyInIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad = true); void FlushArmReg(Arm64Gen::ARM64Reg r); - void FlushR(MIPSGPReg r); void FlushBeforeCall(); void FlushAll(); void DiscardR(MIPSGPReg r); @@ -138,12 +130,22 @@ public: int GetMipsRegOffset(MIPSGPReg r); + // Call these when leaving/entering the JIT + void EmitLoadStaticAllocs(); + void EmitSaveStaticAllocs(); + private: + struct StaticAllocation { + MIPSGPReg mr; + Arm64Gen::ARM64Reg ar; + }; + const StaticAllocation *GetStaticAllocations(int &count); const Arm64Gen::ARM64Reg *GetMIPSAllocationOrder(int &count); void MapRegTo(Arm64Gen::ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags); Arm64Gen::ARM64Reg FindBestToSpill(bool unusedOnly, bool *clobbered); Arm64Gen::ARM64Reg ARM64RegForFlush(MIPSGPReg r); - + void FlushR(MIPSGPReg r); + MIPSState *mips_; Arm64Gen::ARM64XEmitter *emit_; MIPSComp::JitState *js_; @@ -155,6 +157,6 @@ private: NUM_MIPSREG = Arm64JitConstants::TOTAL_MAPPABLE_MIPSREGS, }; - RegARM ar[NUM_ARMREG]; + RegARM64 ar[NUM_ARMREG]; RegMIPS mr[NUM_MIPSREG]; }; diff --git a/Core/MIPS/JitCommon/JitState.h b/Core/MIPS/JitCommon/JitState.h index 3559dd56a8..b4e81b18fe 100644 --- a/Core/MIPS/JitCommon/JitState.h +++ b/Core/MIPS/JitCommon/JitState.h @@ -194,6 +194,7 @@ namespace MIPSComp { bool downcountInRegister; // ARM64 only bool useASIMDVFPU; + bool useStaticAlloc; // Common bool enableBlocklink; From 844a3f19a961c4bd0ae6da3cd6e5924288ce76d5 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Mon, 6 Jul 2015 21:16:49 +0200 Subject: [PATCH 02/11] Fix bugs and issues in ARM64 static alloc. Thanks unknown for finding the problem. --- Core/MIPS/ARM64/Arm64Asm.cpp | 29 +++++++++++------- Core/MIPS/ARM64/Arm64Jit.cpp | 13 ++++++-- Core/MIPS/ARM64/Arm64RegCache.cpp | 50 +++++++++++++++++++++++-------- Core/MIPS/ARM64/Arm64RegCache.h | 2 +- Core/MIPS/JitCommon/JitState.cpp | 5 ++++ 5 files changed, 73 insertions(+), 26 deletions(-) diff --git a/Core/MIPS/ARM64/Arm64Asm.cpp b/Core/MIPS/ARM64/Arm64Asm.cpp index 67d1a957e5..b9221dd9e7 100644 --- a/Core/MIPS/ARM64/Arm64Asm.cpp +++ b/Core/MIPS/ARM64/Arm64Asm.cpp @@ -94,18 +94,27 @@ namespace MIPSComp { using namespace Arm64JitConstants; void Arm64Jit::GenerateFixedCode(const JitOptions &jo) { - saveStaticRegisters = AlignCode16(); - const u8 *start = saveStaticRegisters; - STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); - gpr.EmitSaveStaticAllocs(); - RET(); + const u8 *start = nullptr; + if (jo.useStaticAlloc) { + saveStaticRegisters = AlignCode16(); + STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); + gpr.EmitSaveStaticAllocs(); + RET(); - loadStaticRegisters = AlignCode16(); - gpr.EmitLoadStaticAllocs(); - LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); - RET(); + loadStaticRegisters = AlignCode16(); + gpr.EmitLoadStaticAllocs(); + LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); + RET(); + + start = saveStaticRegisters; + } else { + saveStaticRegisters = nullptr; + loadStaticRegisters = nullptr; + } enterCode = AlignCode16(); + if (!start) + start = enterCode; BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED); BitSet32 regs_to_save_fp(Arm64Gen::ALL_CALLEE_SAVED_FP); @@ -219,7 +228,7 @@ void Arm64Jit::GenerateFixedCode(const JitOptions &jo) { } // Leave this at the end, add more stuff above. - if (true) { + if (false) { std::vector lines = DisassembleArm64(start, GetCodePtr() - start); for (auto s : lines) { INFO_LOG(JIT, "%s", s.c_str()); diff --git a/Core/MIPS/ARM64/Arm64Jit.cpp b/Core/MIPS/ARM64/Arm64Jit.cpp index b4cd354692..22110fd5a9 100644 --- a/Core/MIPS/ARM64/Arm64Jit.cpp +++ b/Core/MIPS/ARM64/Arm64Jit.cpp @@ -485,11 +485,20 @@ void Arm64Jit::MovToPC(ARM64Reg r) { // Should not really be necessary except when entering Advance void Arm64Jit::SaveStaticRegisters() { - QuickCallFunction(SCRATCH2_64, saveStaticRegisters); + if (jo.useStaticAlloc) { + QuickCallFunction(SCRATCH2_64, saveStaticRegisters); + } else { + // Inline the single operation + STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); + } } void Arm64Jit::LoadStaticRegisters() { - QuickCallFunction(SCRATCH2_64, loadStaticRegisters); + if (jo.useStaticAlloc) { + QuickCallFunction(SCRATCH2_64, loadStaticRegisters); + } else { + LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); + } } void Arm64Jit::WriteDownCount(int offset) { diff --git a/Core/MIPS/ARM64/Arm64RegCache.cpp b/Core/MIPS/ARM64/Arm64RegCache.cpp index 37c8f28ab7..1f140d4a1d 100644 --- a/Core/MIPS/ARM64/Arm64RegCache.cpp +++ b/Core/MIPS/ARM64/Arm64RegCache.cpp @@ -66,20 +66,36 @@ const ARM64Reg *Arm64RegCache::GetMIPSAllocationOrder(int &count) { // W19-W22 are most suitable for static allocation. Those that are chosen for static allocation // should be omitted here and added in GetStaticAllocations. - static const ARM64Reg allocationOrder[] = { + W19, W20, W21, W22, W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, + }; + static const ARM64Reg allocationOrderStaticAlloc[] = { W20, W21, W22, W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, }; - count = ARRAY_SIZE(allocationOrder); - return allocationOrder; + + if (jo_->useStaticAlloc) { + count = ARRAY_SIZE(allocationOrderStaticAlloc); + return allocationOrderStaticAlloc; + } else { + count = ARRAY_SIZE(allocationOrder); + return allocationOrder; + } } const Arm64RegCache::StaticAllocation *Arm64RegCache::GetStaticAllocations(int &count) { + static const StaticAllocation none[] = { + }; static const StaticAllocation allocs[] = { {MIPS_REG_V0, W19}, }; - count = ARRAY_SIZE(allocs); - return allocs; + + if (jo_->useStaticAlloc) { + count = ARRAY_SIZE(allocs); + return allocs; + } else { + count = 0; + return none; + } } void Arm64RegCache::EmitLoadStaticAllocs() { @@ -194,8 +210,8 @@ ARM64Reg Arm64RegCache::FindBestToSpill(bool unusedOnly, bool *clobbered) { ARM64Reg reg = allocOrder[i]; if (ar[reg].mipsReg != MIPS_REG_INVALID && mr[ar[reg].mipsReg].spillLock) continue; - if (mr[ar[reg].mipsReg].isStatic) - continue; + + // As it's in alloc-order, we know it's not static so we don't need to check for that. // Awesome, a clobbered reg. Let's use it. if (MIPSAnalyst::IsRegisterClobbered(ar[reg].mipsReg, compilerPC_, UNUSED_LOOKAHEAD_OPS)) { @@ -222,8 +238,20 @@ ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) { return INVALID_REG; } + if (mipsReg == MIPS_REG_INVALID) { + ERROR_LOG(JIT, "Cannot map invalid register"); + return INVALID_REG; + } + + ARM64Reg armReg = mr[mipsReg].reg; + if (mr[mipsReg].isStatic) { - // Dirty or not doesn't matter as it's a fixed register. + // Erasing the imm on dirty (necessary since otherwise we will still think it's ML_ARMREG_IMM and return + // true for IsImm and calculate crazily wrong things). /unknown + if (mapFlags & MAP_DIRTY) { + mr[mipsReg].loc = ML_ARMREG; + ar[armReg].pointerified = false; + } return mr[mipsReg].reg; } @@ -231,7 +259,6 @@ ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) { // We don't need to check for ML_NOINIT because we assume that anyone who maps // with that flag immediately writes a "known" value to the register. if (mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM) { - ARM64Reg armReg = mr[mipsReg].reg; if (ar[armReg].mipsReg != mipsReg) { ERROR_LOG_REPORT(JIT, "Register mapping out of sync! %i", mipsReg); } @@ -369,7 +396,6 @@ void Arm64RegCache::FlushArmReg(ARM64Reg r) { ELOG("Cannot FlushArmReg a statically mapped register"); return; } - auto &mreg = mr[ar[r].mipsReg]; if (mreg.loc == ML_ARMREG_IMM || ar[r].mipsReg == MIPS_REG_ZERO) { // We know its immedate value, no need to STR now. @@ -384,7 +410,6 @@ void Arm64RegCache::FlushArmReg(ARM64Reg r) { mreg.reg = INVALID_REG; mreg.imm = 0; } - ar[r].isDirty = false; ar[r].mipsReg = MIPS_REG_INVALID; ar[r].pointerified = false; @@ -566,7 +591,6 @@ void Arm64RegCache::FlushAll() { ELOG("ARM reg of static %i is invalid", i); continue; } - ar[mr[i].reg].mipsReg = MIPS_REG_INVALID; // make the sanity check happy continue; } FlushR(mipsReg); @@ -574,7 +598,7 @@ void Arm64RegCache::FlushAll() { // Sanity check for (int i = 0; i < NUM_ARMREG; i++) { - if (ar[i].mipsReg != MIPS_REG_INVALID) { + if (ar[i].mipsReg != MIPS_REG_INVALID && mr[ar[i].mipsReg].isStatic == false) { ERROR_LOG_REPORT(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg); } } diff --git a/Core/MIPS/ARM64/Arm64RegCache.h b/Core/MIPS/ARM64/Arm64RegCache.h index 960f13cd70..ab2ac4c8a3 100644 --- a/Core/MIPS/ARM64/Arm64RegCache.h +++ b/Core/MIPS/ARM64/Arm64RegCache.h @@ -118,6 +118,7 @@ public: void FlushArmReg(Arm64Gen::ARM64Reg r); void FlushBeforeCall(); void FlushAll(); + void FlushR(MIPSGPReg r); void DiscardR(MIPSGPReg r); Arm64Gen::ARM64Reg R(MIPSGPReg preg); // Returns a cached register, while checking that it's NOT mapped as a pointer @@ -144,7 +145,6 @@ private: void MapRegTo(Arm64Gen::ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags); Arm64Gen::ARM64Reg FindBestToSpill(bool unusedOnly, bool *clobbered); Arm64Gen::ARM64Reg ARM64RegForFlush(MIPSGPReg r); - void FlushR(MIPSGPReg r); MIPSState *mips_; Arm64Gen::ARM64XEmitter *emit_; diff --git a/Core/MIPS/JitCommon/JitState.cpp b/Core/MIPS/JitCommon/JitState.cpp index a679df0676..a27ff3e9fb 100644 --- a/Core/MIPS/JitCommon/JitState.cpp +++ b/Core/MIPS/JitCommon/JitState.cpp @@ -45,5 +45,10 @@ namespace MIPSComp { continueBranches = false; continueJumps = false; continueMaxInstructions = 300; + + useStaticAlloc = false; +#ifdef ARM64 + useStaticAlloc = true; +#endif } } From 9af6abd8a10caf40c95dbb1e926b83ba80e084e3 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Mon, 6 Jul 2015 21:46:00 +0200 Subject: [PATCH 03/11] ARM64: Support pointerified static allocs, statically allocate SP --- Core/MIPS/ARM64/Arm64Asm.cpp | 3 ++- Core/MIPS/ARM64/Arm64RegCache.cpp | 16 ++++++++++++++-- Core/MIPS/ARM64/Arm64RegCache.h | 1 + 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/Core/MIPS/ARM64/Arm64Asm.cpp b/Core/MIPS/ARM64/Arm64Asm.cpp index b9221dd9e7..af8be217c2 100644 --- a/Core/MIPS/ARM64/Arm64Asm.cpp +++ b/Core/MIPS/ARM64/Arm64Asm.cpp @@ -33,6 +33,7 @@ using namespace Arm64Gen; //static int temp32; // unused? static const bool enableDebug = false; +static const bool enableDisasm = false; //static bool enableStatistics = false; //unused? @@ -228,7 +229,7 @@ void Arm64Jit::GenerateFixedCode(const JitOptions &jo) { } // Leave this at the end, add more stuff above. - if (false) { + if (enableDisasm) { std::vector lines = DisassembleArm64(start, GetCodePtr() - start); for (auto s : lines) { INFO_LOG(JIT, "%s", s.c_str()); diff --git a/Core/MIPS/ARM64/Arm64RegCache.cpp b/Core/MIPS/ARM64/Arm64RegCache.cpp index 1f140d4a1d..bb77fd867d 100644 --- a/Core/MIPS/ARM64/Arm64RegCache.cpp +++ b/Core/MIPS/ARM64/Arm64RegCache.cpp @@ -54,7 +54,7 @@ void Arm64RegCache::Start(MIPSAnalyst::AnalysisResults &stats) { const StaticAllocation *statics = GetStaticAllocations(numStatics); for (int i = 0; i < numStatics; i++) { ar[statics[i].ar].mipsReg = statics[i].mr; - ar[statics[i].ar].pointerified = false; // TODO: Support static pointerification for SP. + ar[statics[i].ar].pointerified = statics[i].pointerified; mr[statics[i].mr].loc = ML_ARMREG; mr[statics[i].mr].reg = statics[i].ar; mr[statics[i].mr].isStatic = true; @@ -70,7 +70,7 @@ const ARM64Reg *Arm64RegCache::GetMIPSAllocationOrder(int &count) { W19, W20, W21, W22, W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, }; static const ARM64Reg allocationOrderStaticAlloc[] = { - W20, W21, W22, W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, + W21, W22, W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, }; if (jo_->useStaticAlloc) { @@ -87,6 +87,7 @@ const Arm64RegCache::StaticAllocation *Arm64RegCache::GetStaticAllocations(int & }; static const StaticAllocation allocs[] = { {MIPS_REG_V0, W19}, + {MIPS_REG_SP, W20, true}, }; if (jo_->useStaticAlloc) { @@ -106,6 +107,9 @@ void Arm64RegCache::EmitLoadStaticAllocs() { for (int i = 0; i < count; i++) { int offset = GetMipsRegOffset(allocs[i].mr); emit_->LDR(INDEX_UNSIGNED, allocs[i].ar, CTXREG, offset); + if (allocs[i].pointerified) { + emit_->MOVK(EncodeRegTo64(allocs[i].ar), ((uint64_t)Memory::base) >> 32, SHIFT_32); + } } } @@ -596,6 +600,14 @@ void Arm64RegCache::FlushAll() { FlushR(mipsReg); } + int count = 0; + const StaticAllocation *allocs = GetStaticAllocations(count); + for (int i = 0; i < count; i++) { + if (allocs[i].pointerified && !ar[allocs[i].ar].pointerified) { + // Re-pointerify + emit_->MOVK(EncodeRegTo64(allocs[i].ar), ((uint64_t)Memory::base) >> 32, SHIFT_32); + } + } // Sanity check for (int i = 0; i < NUM_ARMREG; i++) { if (ar[i].mipsReg != MIPS_REG_INVALID && mr[ar[i].mipsReg].isStatic == false) { diff --git a/Core/MIPS/ARM64/Arm64RegCache.h b/Core/MIPS/ARM64/Arm64RegCache.h index ab2ac4c8a3..9416d72578 100644 --- a/Core/MIPS/ARM64/Arm64RegCache.h +++ b/Core/MIPS/ARM64/Arm64RegCache.h @@ -139,6 +139,7 @@ private: struct StaticAllocation { MIPSGPReg mr; Arm64Gen::ARM64Reg ar; + bool pointerified; }; const StaticAllocation *GetStaticAllocations(int &count); const Arm64Gen::ARM64Reg *GetMIPSAllocationOrder(int &count); From f42f81a4daa7b0aa80994b4a0a08e072004f6f0b Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Mon, 6 Jul 2015 22:17:16 +0200 Subject: [PATCH 04/11] ARM64: Optimize small adjustments of pointerified registers --- Common/Arm64Emitter.cpp | 8 ++++---- Core/MIPS/ARM64/Arm64CompALU.cpp | 20 ++++++++++++++++---- Core/MIPS/ARM64/Arm64RegCache.cpp | 4 ++++ Core/MIPS/ARM64/Arm64RegCache.h | 1 + 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/Common/Arm64Emitter.cpp b/Common/Arm64Emitter.cpp index 669123baf5..0a86be89f1 100644 --- a/Common/Arm64Emitter.cpp +++ b/Common/Arm64Emitter.cpp @@ -46,12 +46,12 @@ bool IsPowerOfTwo(uint64_t x) { bool IsImmArithmetic(uint64_t input, u32 *val, bool *shift) { if (input < 4096) { - *val = input; - *shift = false; + if (val) *val = input; + if (shift) *shift = false; return true; } else if ((input & 0xFFF000) == input) { - *val = input >> 12; - *shift = true; + if (val) *val = input >> 12; + if (shift) *shift = true; return true; } return false; diff --git a/Core/MIPS/ARM64/Arm64CompALU.cpp b/Core/MIPS/ARM64/Arm64CompALU.cpp index a0ddf233ab..289b16b400 100644 --- a/Core/MIPS/ARM64/Arm64CompALU.cpp +++ b/Core/MIPS/ARM64/Arm64CompALU.cpp @@ -82,10 +82,22 @@ void Arm64Jit::Comp_IType(MIPSOpcode op) { switch (op >> 26) { case 8: // same as addiu? case 9: // R(rt) = R(rs) + simm; break; //addiu - if (simm >= 0) { - CompImmLogic(rs, rt, simm, &ARM64XEmitter::ADD, &ARM64XEmitter::TryADDI2R, &EvalAdd); - } else if (simm < 0) { - CompImmLogic(rs, rt, -simm, &ARM64XEmitter::SUB, &ARM64XEmitter::TrySUBI2R, &EvalSub); + // Special-case for small adjustments of pointerified registers. Commonly for SP but happens for others. + if (rs == rt && gpr.IsMappedAsPointer(rs) && IsImmArithmetic(simm < 0 ? -simm : simm, nullptr, nullptr)) { + ARM64Reg r32 = gpr.R(rs); + gpr.MarkDirty(r32); + ARM64Reg r = EncodeRegTo64(r32); + if (simm > 0) { + ADDI2R(r, r, simm); + } else { + SUBI2R(r, r, -simm); + } + } else { + if (simm >= 0) { + CompImmLogic(rs, rt, simm, &ARM64XEmitter::ADD, &ARM64XEmitter::TryADDI2R, &EvalAdd); + } else if (simm < 0) { + CompImmLogic(rs, rt, -simm, &ARM64XEmitter::SUB, &ARM64XEmitter::TrySUBI2R, &EvalSub); + } } break; diff --git a/Core/MIPS/ARM64/Arm64RegCache.cpp b/Core/MIPS/ARM64/Arm64RegCache.cpp index bb77fd867d..5dfc094a45 100644 --- a/Core/MIPS/ARM64/Arm64RegCache.cpp +++ b/Core/MIPS/ARM64/Arm64RegCache.cpp @@ -143,6 +143,10 @@ bool Arm64RegCache::IsMappedAsPointer(MIPSGPReg mipsReg) { return false; } +void Arm64RegCache::MarkDirty(ARM64Reg reg) { + ar[reg].isDirty = true; +} + void Arm64RegCache::SetRegImm(ARM64Reg reg, u64 imm) { // On ARM64, at least Cortex A57, good old MOVT/MOVW (MOVK in 64-bit) is really fast. emit_->MOVI2R(reg, imm); diff --git a/Core/MIPS/ARM64/Arm64RegCache.h b/Core/MIPS/ARM64/Arm64RegCache.h index 9416d72578..56c0eec18c 100644 --- a/Core/MIPS/ARM64/Arm64RegCache.h +++ b/Core/MIPS/ARM64/Arm64RegCache.h @@ -109,6 +109,7 @@ public: bool IsMapped(MIPSGPReg reg); bool IsMappedAsPointer(MIPSGPReg reg); + void MarkDirty(Arm64Gen::ARM64Reg reg); void MapIn(MIPSGPReg rs); void MapInIn(MIPSGPReg rd, MIPSGPReg rs); void MapDirtyIn(MIPSGPReg rd, MIPSGPReg rs, bool avoidLoad = true); From 4920f3e3c1228e97f1630e69686ab935e587c8bd Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Mon, 6 Jul 2015 23:12:23 +0200 Subject: [PATCH 05/11] Work towards handling ML_IMM in static registers but doesn't work yet (enable on line 649) --- Core/MIPS/ARM64/Arm64RegCache.cpp | 36 ++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/Core/MIPS/ARM64/Arm64RegCache.cpp b/Core/MIPS/ARM64/Arm64RegCache.cpp index 5dfc094a45..6515c6e693 100644 --- a/Core/MIPS/ARM64/Arm64RegCache.cpp +++ b/Core/MIPS/ARM64/Arm64RegCache.cpp @@ -133,7 +133,7 @@ void Arm64RegCache::FlushBeforeCall() { } bool Arm64RegCache::IsMapped(MIPSGPReg mipsReg) { - return mr[mipsReg].loc == ML_ARMREG; + return mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM; } bool Arm64RegCache::IsMappedAsPointer(MIPSGPReg mipsReg) { @@ -148,6 +148,10 @@ void Arm64RegCache::MarkDirty(ARM64Reg reg) { } void Arm64RegCache::SetRegImm(ARM64Reg reg, u64 imm) { + if (reg == INVALID_REG) { + ELOG("SetRegImm to invalid register: at %08x", js_->compilerPC); + return; + } // On ARM64, at least Cortex A57, good old MOVT/MOVW (MOVK in 64-bit) is really fast. emit_->MOVI2R(reg, imm); } @@ -254,12 +258,18 @@ ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) { ARM64Reg armReg = mr[mipsReg].reg; if (mr[mipsReg].isStatic) { + if (mr[mipsReg].loc == ML_IMM) { + if (!(mapFlags & MAP_NOINIT)) + SetRegImm(armReg, mr[mipsReg].imm); + mr[mipsReg].loc = ML_ARMREG_IMM; + } // Erasing the imm on dirty (necessary since otherwise we will still think it's ML_ARMREG_IMM and return // true for IsImm and calculate crazily wrong things). /unknown if (mapFlags & MAP_DIRTY) { mr[mipsReg].loc = ML_ARMREG; - ar[armReg].pointerified = false; + ar[armReg].isDirty = true; // Not that it matters } + ar[armReg].pointerified = false; return mr[mipsReg].reg; } @@ -426,7 +436,8 @@ void Arm64RegCache::FlushArmReg(ARM64Reg r) { void Arm64RegCache::DiscardR(MIPSGPReg mipsReg) { if (mr[mipsReg].isStatic) { // Simply do nothing unless it's an ArmregImm, in case we just switch it over to armreg, losing the value. - if (mr[mipsReg].loc == ML_ARMREG_IMM) { + if (mr[mipsReg].loc == ML_ARMREG_IMM || mr[mipsReg].loc == ML_IMM) { + // Ignore the imm value, restore sanity mr[mipsReg].loc = ML_ARMREG; } return; @@ -595,6 +606,9 @@ void Arm64RegCache::FlushAll() { for (int i = 0; i < NUM_MIPSREG; i++) { MIPSGPReg mipsReg = MIPSGPReg(i); if (mr[i].isStatic) { + if (mr[i].loc == ML_IMM) { + SetRegImm(mr[i].reg, mr[i].imm); + } if (i != MIPS_REG_ZERO && mr[i].reg == INVALID_REG) { ELOG("ARM reg of static %i is invalid", i); continue; @@ -632,10 +646,15 @@ void Arm64RegCache::SetImm(MIPSGPReg r, u64 immVal) { } if (mr[r].isStatic) { - // TODO: Just set to IMM - mr[r].loc = ML_ARMREG_IMM; - mr[r].imm = immVal; - SetRegImm(mr[r].reg, immVal); + if (true) { // Set to false to use ML_IMM + mr[r].loc = ML_ARMREG_IMM; + mr[r].imm = immVal; + SetRegImm(mr[r].reg, immVal); + } else { + mr[r].loc = ML_IMM; + mr[r].imm = immVal; + } + // We do not change reg to INVALID_REG for obvious reasons.. } else { // Zap existing value if cached in a reg if (mr[r].reg != INVALID_REG) { @@ -656,7 +675,8 @@ bool Arm64RegCache::IsImm(MIPSGPReg r) const { } u64 Arm64RegCache::GetImm(MIPSGPReg r) const { - if (r == MIPS_REG_ZERO) return 0; + if (r == MIPS_REG_ZERO) + return 0; if (mr[r].loc != ML_IMM && mr[r].loc != ML_ARMREG_IMM) { ERROR_LOG_REPORT(JIT, "Trying to get imm from non-imm register %i", r); } From 1b8549b26f918c76b19075d7ba4e5379b61ff94a Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Mon, 6 Jul 2015 23:26:40 +0200 Subject: [PATCH 06/11] Couple more regcache fixes --- Core/MIPS/ARM64/Arm64RegCache.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Core/MIPS/ARM64/Arm64RegCache.cpp b/Core/MIPS/ARM64/Arm64RegCache.cpp index 6515c6e693..946b473638 100644 --- a/Core/MIPS/ARM64/Arm64RegCache.cpp +++ b/Core/MIPS/ARM64/Arm64RegCache.cpp @@ -259,17 +259,20 @@ ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) { if (mr[mipsReg].isStatic) { if (mr[mipsReg].loc == ML_IMM) { - if (!(mapFlags & MAP_NOINIT)) + if ((mapFlags & MAP_NOINIT) == MAP_NOINIT) { + mr[mipsReg].loc = ML_ARMREG; + } else { SetRegImm(armReg, mr[mipsReg].imm); - mr[mipsReg].loc = ML_ARMREG_IMM; + mr[mipsReg].loc = ML_ARMREG_IMM; + } } // Erasing the imm on dirty (necessary since otherwise we will still think it's ML_ARMREG_IMM and return // true for IsImm and calculate crazily wrong things). /unknown if (mapFlags & MAP_DIRTY) { mr[mipsReg].loc = ML_ARMREG; + ar[armReg].pointerified = false; ar[armReg].isDirty = true; // Not that it matters } - ar[armReg].pointerified = false; return mr[mipsReg].reg; } From 568e2abb2b4de043f8e082017ba398f334a5ba87 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Tue, 7 Jul 2015 01:12:01 +0200 Subject: [PATCH 07/11] Save/load static registers around replacement funcs --- Core/MIPS/ARM64/Arm64Jit.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Core/MIPS/ARM64/Arm64Jit.cpp b/Core/MIPS/ARM64/Arm64Jit.cpp index 22110fd5a9..25042e0ba2 100644 --- a/Core/MIPS/ARM64/Arm64Jit.cpp +++ b/Core/MIPS/ARM64/Arm64Jit.cpp @@ -308,7 +308,7 @@ const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b) { char temp[256]; if (logBlocks > 0 && dontLogBlocks == 0) { - ILOG("=============== mips ==============="); + ILOG("=============== mips %d ===============", blocks.GetNumBlocks()); for (u32 cpc = em_address; cpc != GetCompilerPC() + 4; cpc += 4) { MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp, true); ILOG("M: %08x %s", cpc, temp); @@ -379,9 +379,11 @@ bool Arm64Jit::ReplaceJalTo(u32 dest) { gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8); CompileDelaySlot(DELAYSLOT_NICE); FlushAll(); + SaveStaticRegisters(); RestoreRoundingMode(); QuickCallFunction(SCRATCH1_64, (const void *)(entry->replaceFunc)); ApplyRoundingMode(); + LoadStaticRegisters(); WriteDownCountR(W0); } @@ -428,6 +430,7 @@ void Arm64Jit::Comp_ReplacementFunc(MIPSOpcode op) } } else if (entry->replaceFunc) { FlushAll(); + SaveStaticRegisters(); RestoreRoundingMode(); gpr.SetRegImm(SCRATCH1, GetCompilerPC()); MovToPC(SCRATCH1); @@ -439,9 +442,11 @@ void Arm64Jit::Comp_ReplacementFunc(MIPSOpcode op) if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) { // Compile the original instruction at this address. We ignore cycles for hooks. ApplyRoundingMode(); + LoadStaticRegisters(); MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true)); } else { ApplyRoundingMode(); + LoadStaticRegisters(); LDR(INDEX_UNSIGNED, W1, CTXREG, MIPS_REG_RA * 4); WriteDownCountR(W0); WriteExitDestInR(W1); From d1bbc1d3c0cefd869a26713d0359bcf6ccafda93 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Tue, 7 Jul 2015 01:12:42 +0200 Subject: [PATCH 08/11] More regcache fixes. ML_IMM works now although there is another stability issue somewhere. --- Core/HLE/ReplaceTables.cpp | 2 ++ Core/MIPS/ARM64/Arm64Asm.cpp | 2 +- Core/MIPS/ARM64/Arm64CompVFPU.cpp | 2 ++ Core/MIPS/ARM64/Arm64RegCache.cpp | 44 ++++++++++++++++++++++++------- Core/MIPS/ARM64/Arm64RegCache.h | 1 + 5 files changed, 41 insertions(+), 10 deletions(-) diff --git a/Core/HLE/ReplaceTables.cpp b/Core/HLE/ReplaceTables.cpp index 4f8e18f968..f06e02c728 100644 --- a/Core/HLE/ReplaceTables.cpp +++ b/Core/HLE/ReplaceTables.cpp @@ -1207,6 +1207,8 @@ void RestoreReplacedInstruction(u32 address) { } void RestoreReplacedInstructions(u32 startAddr, u32 endAddr) { + if (endAddr == startAddr) + return; // Need to be in order, or we'll hang. if (endAddr < startAddr) std::swap(endAddr, startAddr); diff --git a/Core/MIPS/ARM64/Arm64Asm.cpp b/Core/MIPS/ARM64/Arm64Asm.cpp index af8be217c2..61bdbb4efa 100644 --- a/Core/MIPS/ARM64/Arm64Asm.cpp +++ b/Core/MIPS/ARM64/Arm64Asm.cpp @@ -185,7 +185,7 @@ void Arm64Jit::GenerateFixedCode(const JitOptions &jo) { BR(SCRATCH1_64); SetJumpTarget(skipJump); - // No block found, let's jit + // No block found, let's jit. I don't think we actually need to save static regs that are in callee-save regs here but whatever. SaveStaticRegisters(); RestoreRoundingMode(true); QuickCallFunction(SCRATCH1_64, (void *)&MIPSComp::JitAt); diff --git a/Core/MIPS/ARM64/Arm64CompVFPU.cpp b/Core/MIPS/ARM64/Arm64CompVFPU.cpp index 7ec8b631ee..4242fa4397 100644 --- a/Core/MIPS/ARM64/Arm64CompVFPU.cpp +++ b/Core/MIPS/ARM64/Arm64CompVFPU.cpp @@ -1841,6 +1841,8 @@ namespace MIPSComp { gpr.FlushBeforeCall(); fpr.FlushAll(); + // Don't need to SaveStaticRegs here as long as they are all in callee-save regs - this callee won't read them. + bool negSin1 = (imm & 0x10) ? true : false; fpr.MapRegV(sreg); diff --git a/Core/MIPS/ARM64/Arm64RegCache.cpp b/Core/MIPS/ARM64/Arm64RegCache.cpp index 946b473638..0844b3c854 100644 --- a/Core/MIPS/ARM64/Arm64RegCache.cpp +++ b/Core/MIPS/ARM64/Arm64RegCache.cpp @@ -70,7 +70,7 @@ const ARM64Reg *Arm64RegCache::GetMIPSAllocationOrder(int &count) { W19, W20, W21, W22, W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, }; static const ARM64Reg allocationOrderStaticAlloc[] = { - W21, W22, W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, + W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, }; if (jo_->useStaticAlloc) { @@ -88,6 +88,8 @@ const Arm64RegCache::StaticAllocation *Arm64RegCache::GetStaticAllocations(int & static const StaticAllocation allocs[] = { {MIPS_REG_V0, W19}, {MIPS_REG_SP, W20, true}, + {MIPS_REG_A0, W21}, + {MIPS_REG_V1, W22}, }; if (jo_->useStaticAlloc) { @@ -137,8 +139,12 @@ bool Arm64RegCache::IsMapped(MIPSGPReg mipsReg) { } bool Arm64RegCache::IsMappedAsPointer(MIPSGPReg mipsReg) { - if (IsMapped(mipsReg)) { + if (mr[mipsReg].loc == ML_ARMREG) { return ar[mr[mipsReg].reg].pointerified; + } else if (mr[mipsReg].loc == ML_ARMREG_IMM) { + if (ar[mr[mipsReg].reg].pointerified) { + ELOG("Really shouldn't be pointerified here"); + } } return false; } @@ -154,6 +160,7 @@ void Arm64RegCache::SetRegImm(ARM64Reg reg, u64 imm) { } // On ARM64, at least Cortex A57, good old MOVT/MOVW (MOVK in 64-bit) is really fast. emit_->MOVI2R(reg, imm); + // ar[reg].pointerified = false; } void Arm64RegCache::MapRegTo(ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags) { @@ -259,17 +266,20 @@ ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) { if (mr[mipsReg].isStatic) { if (mr[mipsReg].loc == ML_IMM) { + // Back into the register, with or without the imm value. if ((mapFlags & MAP_NOINIT) == MAP_NOINIT) { mr[mipsReg].loc = ML_ARMREG; } else { SetRegImm(armReg, mr[mipsReg].imm); mr[mipsReg].loc = ML_ARMREG_IMM; + ar[armReg].pointerified = false; } } + // Erasing the imm on dirty (necessary since otherwise we will still think it's ML_ARMREG_IMM and return // true for IsImm and calculate crazily wrong things). /unknown if (mapFlags & MAP_DIRTY) { - mr[mipsReg].loc = ML_ARMREG; + mr[mipsReg].loc = ML_ARMREG; // As we are dirty, can't keep ARMREG_IMM, we will quickly drift out of sync ar[armReg].pointerified = false; ar[armReg].isDirty = true; // Not that it matters } @@ -336,18 +346,22 @@ allocate: Arm64Gen::ARM64Reg Arm64RegCache::MapRegAsPointer(MIPSGPReg reg) { ARM64Reg retval = INVALID_REG; - if (mr[reg].loc != ML_ARMREG) { + if (mr[reg].loc != ML_ARMREG && mr[reg].loc != ML_ARMREG_IMM) { retval = MapReg(reg); + } else { + retval = mr[reg].reg; } - if (mr[reg].loc == ML_ARMREG) { + if (mr[reg].loc == ML_ARMREG || mr[reg].loc == ML_ARMREG_IMM) { + // If there was an imm attached, discard it. + mr[reg].loc = ML_ARMREG; int a = DecodeReg(mr[reg].reg); if (!ar[a].pointerified) { emit_->MOVK(ARM64Reg(X0 + a), ((uint64_t)Memory::base) >> 32, SHIFT_32); ar[a].pointerified = true; } } else { - ERROR_LOG(JIT, "MapRegAsPointer : MapReg failed to allocate a register?"); + ELOG("MapRegAsPointer : MapReg failed to allocate a register?"); } return retval; } @@ -442,6 +456,7 @@ void Arm64RegCache::DiscardR(MIPSGPReg mipsReg) { if (mr[mipsReg].loc == ML_ARMREG_IMM || mr[mipsReg].loc == ML_IMM) { // Ignore the imm value, restore sanity mr[mipsReg].loc = ML_ARMREG; + ar[mr[mipsReg].reg].pointerified = false; } return; } @@ -570,6 +585,7 @@ void Arm64RegCache::FlushAll() { // Flush it first so we don't get it confused. FlushR(MIPS_REG_LO); + // Try to flush in pairs when possible. // 1 because MIPS_REG_ZERO isn't flushable anyway. // 31 because 30 and 31 are the last possible pair - MIPS_REG_FPCOND, etc. are too far away. for (int i = 1; i < 31; i++) { @@ -579,10 +595,10 @@ void Arm64RegCache::FlushAll() { ARM64Reg areg2 = ARM64RegForFlush(mreg2); // If either one doesn't have a reg yet, try flushing imms to scratch regs. - if (areg1 == INVALID_REG && IsImm(mreg1)) { + if (areg1 == INVALID_REG && IsPureImm(mreg1) && !mr[i].isStatic) { areg1 = SCRATCH1; } - if (areg2 == INVALID_REG && IsImm(mreg2)) { + if (areg2 == INVALID_REG && IsPureImm(mreg2) && !mr[i].isStatic) { areg2 = SCRATCH2; } @@ -611,6 +627,7 @@ void Arm64RegCache::FlushAll() { if (mr[i].isStatic) { if (mr[i].loc == ML_IMM) { SetRegImm(mr[i].reg, mr[i].imm); + ar[mr[mipsReg].reg].pointerified = false; } if (i != MIPS_REG_ZERO && mr[i].reg == INVALID_REG) { ELOG("ARM reg of static %i is invalid", i); @@ -649,13 +666,15 @@ void Arm64RegCache::SetImm(MIPSGPReg r, u64 immVal) { } if (mr[r].isStatic) { - if (true) { // Set to false to use ML_IMM + if (false) { // Set to false to use ML_IMM mr[r].loc = ML_ARMREG_IMM; mr[r].imm = immVal; SetRegImm(mr[r].reg, immVal); + ar[mr[r].reg].pointerified = false; } else { mr[r].loc = ML_IMM; mr[r].imm = immVal; + ar[mr[r].reg].pointerified = false; } // We do not change reg to INVALID_REG for obvious reasons.. } else { @@ -677,6 +696,13 @@ bool Arm64RegCache::IsImm(MIPSGPReg r) const { return mr[r].loc == ML_IMM || mr[r].loc == ML_ARMREG_IMM; } +bool Arm64RegCache::IsPureImm(MIPSGPReg r) const { + if (r == MIPS_REG_ZERO) + return true; + else + return mr[r].loc == ML_IMM; +} + u64 Arm64RegCache::GetImm(MIPSGPReg r) const { if (r == MIPS_REG_ZERO) return 0; diff --git a/Core/MIPS/ARM64/Arm64RegCache.h b/Core/MIPS/ARM64/Arm64RegCache.h index 56c0eec18c..ff3efe20de 100644 --- a/Core/MIPS/ARM64/Arm64RegCache.h +++ b/Core/MIPS/ARM64/Arm64RegCache.h @@ -98,6 +98,7 @@ public: void SetImm(MIPSGPReg reg, u64 immVal); bool IsImm(MIPSGPReg reg) const; + bool IsPureImm(MIPSGPReg reg) const; u64 GetImm(MIPSGPReg reg) const; // Optimally set a register to an imm value (possibly using another register.) void SetRegImm(Arm64Gen::ARM64Reg reg, u64 imm); From 2a8560e52292392be813084b8e95f0d787ae3a9d Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 8 Jul 2015 10:05:18 +0200 Subject: [PATCH 09/11] ARM64: Another pair of fixes for static alloc. Still crashes in many games... --- Core/MIPS/ARM64/Arm64RegCache.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Core/MIPS/ARM64/Arm64RegCache.cpp b/Core/MIPS/ARM64/Arm64RegCache.cpp index 0844b3c854..36b9d31ea8 100644 --- a/Core/MIPS/ARM64/Arm64RegCache.cpp +++ b/Core/MIPS/ARM64/Arm64RegCache.cpp @@ -598,7 +598,7 @@ void Arm64RegCache::FlushAll() { if (areg1 == INVALID_REG && IsPureImm(mreg1) && !mr[i].isStatic) { areg1 = SCRATCH1; } - if (areg2 == INVALID_REG && IsPureImm(mreg2) && !mr[i].isStatic) { + if (areg2 == INVALID_REG && IsPureImm(mreg2) && !mr[i + 1].isStatic) { areg2 = SCRATCH2; } @@ -627,6 +627,7 @@ void Arm64RegCache::FlushAll() { if (mr[i].isStatic) { if (mr[i].loc == ML_IMM) { SetRegImm(mr[i].reg, mr[i].imm); + mr[i].loc = ML_ARMREG_IMM; ar[mr[mipsReg].reg].pointerified = false; } if (i != MIPS_REG_ZERO && mr[i].reg == INVALID_REG) { From 444fc0885c8c2edcac5f4b6d99904ffb053972e4 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 8 Jul 2015 11:43:38 +0200 Subject: [PATCH 10/11] Fix the crash bug (need to mark static non-pointer regs as depointerified on flush) --- Core/MIPS/ARM64/Arm64RegCache.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Core/MIPS/ARM64/Arm64RegCache.cpp b/Core/MIPS/ARM64/Arm64RegCache.cpp index 36b9d31ea8..e3f221d9e2 100644 --- a/Core/MIPS/ARM64/Arm64RegCache.cpp +++ b/Core/MIPS/ARM64/Arm64RegCache.cpp @@ -645,6 +645,9 @@ void Arm64RegCache::FlushAll() { if (allocs[i].pointerified && !ar[allocs[i].ar].pointerified) { // Re-pointerify emit_->MOVK(EncodeRegTo64(allocs[i].ar), ((uint64_t)Memory::base) >> 32, SHIFT_32); + } else { + // If this register got pointerified on the way, mark it as not, so that after save/reload (like in an interpreter fallback), it won't be regarded as such, as it simply won't be. + ar[allocs[i].ar].pointerified = false; } } // Sanity check From b73920fcba7b12a44947f735f4dfe564a3a9b39d Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 8 Jul 2015 11:44:17 +0200 Subject: [PATCH 11/11] ARM64: Cleanups in regcache and jit --- Core/MIPS/ARM64/Arm64CompALU.cpp | 3 ++- Core/MIPS/ARM64/Arm64Jit.cpp | 3 ++- Core/MIPS/ARM64/Arm64RegCache.cpp | 42 +++++++++++++++++-------------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/Core/MIPS/ARM64/Arm64CompALU.cpp b/Core/MIPS/ARM64/Arm64CompALU.cpp index 289b16b400..0e891e164d 100644 --- a/Core/MIPS/ARM64/Arm64CompALU.cpp +++ b/Core/MIPS/ARM64/Arm64CompALU.cpp @@ -389,7 +389,8 @@ void Arm64Jit::CompShiftVar(MIPSOpcode op, Arm64Gen::ShiftType shiftType) { return; } gpr.MapDirtyInIn(rd, rs, rt); - ANDI2R(SCRATCH1, gpr.R(rs), 0x1F, INVALID_REG); // Not sure if ARM64 wraps like this so let's do it for it. + // Not sure if ARM64 wraps like this so let's do it for it. (TODO: According to the ARM ARM, it will indeed mask for us so this is not necessary) + ANDI2R(SCRATCH1, gpr.R(rs), 0x1F, INVALID_REG); switch (shiftType) { case ST_LSL: LSLV(gpr.R(rd), gpr.R(rt), SCRATCH1); break; case ST_LSR: LSRV(gpr.R(rd), gpr.R(rt), SCRATCH1); break; diff --git a/Core/MIPS/ARM64/Arm64Jit.cpp b/Core/MIPS/ARM64/Arm64Jit.cpp index 25042e0ba2..880bdda20c 100644 --- a/Core/MIPS/ARM64/Arm64Jit.cpp +++ b/Core/MIPS/ARM64/Arm64Jit.cpp @@ -233,7 +233,8 @@ MIPSOpcode Arm64Jit::GetOffsetInstruction(int offset) { const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b) { js.cancel = false; - js.blockStart = js.compilerPC = mips_->pc; + js.blockStart = mips_->pc; + js.compilerPC = mips_->pc; js.lastContinuedPC = 0; js.initialBlockSize = 0; js.nextExit = 0; diff --git a/Core/MIPS/ARM64/Arm64RegCache.cpp b/Core/MIPS/ARM64/Arm64RegCache.cpp index e3f221d9e2..fd888ac5f8 100644 --- a/Core/MIPS/ARM64/Arm64RegCache.cpp +++ b/Core/MIPS/ARM64/Arm64RegCache.cpp @@ -86,10 +86,10 @@ const Arm64RegCache::StaticAllocation *Arm64RegCache::GetStaticAllocations(int & static const StaticAllocation none[] = { }; static const StaticAllocation allocs[] = { - {MIPS_REG_V0, W19}, - {MIPS_REG_SP, W20, true}, - {MIPS_REG_A0, W21}, + {MIPS_REG_SP, W19, true}, + {MIPS_REG_V0, W20}, {MIPS_REG_V1, W22}, + {MIPS_REG_A0, W21}, }; if (jo_->useStaticAlloc) { @@ -265,17 +265,18 @@ ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) { ARM64Reg armReg = mr[mipsReg].reg; if (mr[mipsReg].isStatic) { + if (armReg == INVALID_REG) { + ERROR_LOG(JIT, "MapReg on statically mapped reg %d failed - armReg got lost", mipsReg); + } if (mr[mipsReg].loc == ML_IMM) { // Back into the register, with or without the imm value. - if ((mapFlags & MAP_NOINIT) == MAP_NOINIT) { - mr[mipsReg].loc = ML_ARMREG; - } else { + // If noinit, the MAP_DIRTY check below will take care of the rest. + if ((mapFlags & MAP_NOINIT) != MAP_NOINIT) { SetRegImm(armReg, mr[mipsReg].imm); mr[mipsReg].loc = ML_ARMREG_IMM; ar[armReg].pointerified = false; } } - // Erasing the imm on dirty (necessary since otherwise we will still think it's ML_ARMREG_IMM and return // true for IsImm and calculate crazily wrong things). /unknown if (mapFlags & MAP_DIRTY) { @@ -454,9 +455,11 @@ void Arm64RegCache::DiscardR(MIPSGPReg mipsReg) { if (mr[mipsReg].isStatic) { // Simply do nothing unless it's an ArmregImm, in case we just switch it over to armreg, losing the value. if (mr[mipsReg].loc == ML_ARMREG_IMM || mr[mipsReg].loc == ML_IMM) { + ARM64Reg armReg = mr[mipsReg].reg; // Ignore the imm value, restore sanity mr[mipsReg].loc = ML_ARMREG; - ar[mr[mipsReg].reg].pointerified = false; + ar[armReg].pointerified = false; + ar[armReg].isDirty = false; } return; } @@ -625,10 +628,16 @@ void Arm64RegCache::FlushAll() { for (int i = 0; i < NUM_MIPSREG; i++) { MIPSGPReg mipsReg = MIPSGPReg(i); if (mr[i].isStatic) { + Arm64Gen::ARM64Reg armReg = mr[i].reg; if (mr[i].loc == ML_IMM) { SetRegImm(mr[i].reg, mr[i].imm); mr[i].loc = ML_ARMREG_IMM; - ar[mr[mipsReg].reg].pointerified = false; + ar[armReg].pointerified = false; + } else if (mr[i].loc == ML_ARMREG_IMM) { + if (ar[armReg].pointerified) { + ELOG("ML_ARMREG_IMM but pointerified. Wrong."); + ar[armReg].pointerified = false; + } } if (i != MIPS_REG_ZERO && mr[i].reg == INVALID_REG) { ELOG("ARM reg of static %i is invalid", i); @@ -670,22 +679,17 @@ void Arm64RegCache::SetImm(MIPSGPReg r, u64 immVal) { } if (mr[r].isStatic) { - if (false) { // Set to false to use ML_IMM - mr[r].loc = ML_ARMREG_IMM; - mr[r].imm = immVal; - SetRegImm(mr[r].reg, immVal); - ar[mr[r].reg].pointerified = false; - } else { - mr[r].loc = ML_IMM; - mr[r].imm = immVal; - ar[mr[r].reg].pointerified = false; - } + mr[r].loc = ML_IMM; + mr[r].imm = immVal; + Arm64Gen::ARM64Reg armReg = mr[r].reg; + ar[armReg].pointerified = false; // We do not change reg to INVALID_REG for obvious reasons.. } else { // Zap existing value if cached in a reg if (mr[r].reg != INVALID_REG) { ar[mr[r].reg].mipsReg = MIPS_REG_INVALID; ar[mr[r].reg].isDirty = false; + ar[mr[r].reg].pointerified = false; } mr[r].loc = ML_IMM; mr[r].imm = immVal;