mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
More JIT work, not quite there yet...
This commit is contained in:
parent
f732fbb885
commit
e7e58591da
8 changed files with 114 additions and 166 deletions
|
@ -31,7 +31,7 @@ using namespace Arm64Gen;
|
|||
|
||||
//static int temp32; // unused?
|
||||
|
||||
static const bool enableDebug = false;
|
||||
static const bool enableDebug = true;
|
||||
|
||||
//static bool enableStatistics = false; //unused?
|
||||
|
||||
|
@ -68,8 +68,9 @@ static const bool enableDebug = false;
|
|||
// saving them when we call out of the JIT. We will perform regular dynamic register allocation in the rest (x0-x15)
|
||||
|
||||
// STATIC ALLOCATION ARM64 (these are all callee-save registers):
|
||||
// x25 : MSR/MRS temporary (to be eliminated later)
|
||||
// x26 : JIT base reg
|
||||
// x27 : MIPS state
|
||||
// x27 : MIPS state (Could eliminate by placing the MIPS state right at the memory base)
|
||||
// x28 : Memory base pointer.
|
||||
// x29 : Down counter
|
||||
|
||||
|
@ -102,7 +103,6 @@ void Arm64Jit::GenerateFixedCode() {
|
|||
|
||||
ABI_PushRegisters(regs_to_save);
|
||||
|
||||
|
||||
// Fixed registers, these are always kept when in Jit context.
|
||||
// R8 is used to hold flags during delay slots. Not always needed.
|
||||
// R13 cannot be used as it's the stack pointer.
|
||||
|
@ -119,7 +119,7 @@ void Arm64Jit::GenerateFixedCode() {
|
|||
|
||||
RestoreDowncount();
|
||||
MovFromPC(SCRATCH1);
|
||||
outerLoopPCInR0 = GetCodePtr();
|
||||
outerLoopPCInSCRATCH1 = GetCodePtr();
|
||||
MovToPC(SCRATCH1);
|
||||
outerLoop = GetCodePtr();
|
||||
SaveDowncount();
|
||||
|
@ -159,8 +159,7 @@ void Arm64Jit::GenerateFixedCode() {
|
|||
|
||||
// Debug
|
||||
if (enableDebug) {
|
||||
// MOV(SCRATCH1, R13);
|
||||
// QuickCallFunction(R1, (void *)&ShowPC);
|
||||
QuickCallFunction(SCRATCH1, (void *)&ShowPC);
|
||||
}
|
||||
|
||||
LDR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, pc));
|
||||
|
@ -168,7 +167,7 @@ void Arm64Jit::GenerateFixedCode() {
|
|||
ANDI2R(SCRATCH2, SCRATCH1, 0xFF000000); // rotation is to the right, in 2-bit increments.
|
||||
ANDI2R(SCRATCH1, SCRATCH1, 0x00FFFFFF); // TODO: Replace this and the next op by a bit field extract
|
||||
LSR(SCRATCH2, SCRATCH2, 24);
|
||||
CMP(SCRATCH2, MIPS_EMUHACK_OPCODE);
|
||||
CMP(SCRATCH2, MIPS_EMUHACK_OPCODE>>24);
|
||||
FixupBranch skipJump = B(CC_NEQ);
|
||||
// IDEA - we have 26 bits, why not just use offsets from base of code?
|
||||
// Another idea: Shift the bloc number left by two in the op, this would let us do
|
||||
|
@ -207,9 +206,12 @@ void Arm64Jit::GenerateFixedCode() {
|
|||
|
||||
ABI_PopRegisters(regs_to_save);
|
||||
|
||||
INFO_LOG(JIT, "THE DISASM ========================");
|
||||
DisassembleArm64(enterCode, GetCodePtr() - enterCode);
|
||||
INFO_LOG(JIT, "END OF THE DISASM ========================");
|
||||
INFO_LOG(JIT, "THE DISASM : %p ========================", enterCode);
|
||||
std::vector<std::string> lines = DisassembleArm64(enterCode, GetCodePtr() - enterCode);
|
||||
for (auto s : lines) {
|
||||
INFO_LOG(JIT, "%s", s.c_str());
|
||||
}
|
||||
INFO_LOG(JIT, "END OF THE DISASM : %p ========================", GetCodePtr());
|
||||
|
||||
// Don't forget to zap the instruction cache!
|
||||
FlushIcache();
|
||||
|
|
|
@ -124,9 +124,14 @@ void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
|
|||
// We might be able to flip the condition (EQ/NEQ are easy.)
|
||||
const bool canFlip = cc == CC_EQ || cc == CC_NEQ;
|
||||
|
||||
// TODO ARM64: Optimize for immediates
|
||||
gpr.MapInIn(rs, rt);
|
||||
CMP(gpr.R(rs), gpr.R(rt));
|
||||
// TODO ARM64: Optimize for immediates other than zero
|
||||
if (rt == 0) {
|
||||
gpr.MapIn(rs);
|
||||
CMP(gpr.R(rs), 0);
|
||||
} else {
|
||||
gpr.MapInIn(rs, rt);
|
||||
CMP(gpr.R(rs), gpr.R(rt));
|
||||
}
|
||||
|
||||
Arm64Gen::FixupBranch ptr;
|
||||
if (!likely) {
|
||||
|
@ -493,7 +498,7 @@ void Arm64Jit::Comp_JumpReg(MIPSOpcode op)
|
|||
delaySlotIsNice = false;
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
|
||||
ARM64Reg destReg = X8;
|
||||
ARM64Reg destReg = X18;
|
||||
if (IsSyscall(delaySlotOp)) {
|
||||
gpr.MapReg(rs);
|
||||
MovToPC(gpr.R(rs)); // For syscall to be able to return.
|
||||
|
@ -502,6 +507,7 @@ void Arm64Jit::Comp_JumpReg(MIPSOpcode op)
|
|||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
return; // Syscall wrote exit code.
|
||||
} else if (delaySlotIsNice) {
|
||||
INFO_LOG(JIT, "jreg DelaySlotIsNice");
|
||||
if (andLink)
|
||||
gpr.SetImm(rd, js.compilerPC + 8);
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
@ -533,7 +539,7 @@ void Arm64Jit::Comp_JumpReg(MIPSOpcode op)
|
|||
} else {
|
||||
// Delay slot - this case is very rare, might be able to free up R8.
|
||||
gpr.MapReg(rs);
|
||||
MOV(W8, gpr.R(rs));
|
||||
MOV(X18, gpr.R(rs));
|
||||
if (andLink)
|
||||
gpr.SetImm(rd, js.compilerPC + 8);
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
@ -586,15 +592,12 @@ void Arm64Jit::Comp_Syscall(MIPSOpcode op)
|
|||
SaveDowncount();
|
||||
// Skip the CallSyscall where possible.
|
||||
void *quickFunc = GetQuickSyscallFunc(op);
|
||||
if (quickFunc)
|
||||
{
|
||||
gpr.SetRegImm(W0, (u32)(intptr_t)GetSyscallInfo(op));
|
||||
if (quickFunc) {
|
||||
MOVI2R(W0, (u32)(intptr_t)GetSyscallInfo(op));
|
||||
// Already flushed, so X1 is safe.
|
||||
QuickCallFunction(X1, quickFunc);
|
||||
}
|
||||
else
|
||||
{
|
||||
gpr.SetRegImm(W0, op.encoding);
|
||||
} else {
|
||||
MOVI2R(W0, op.encoding);
|
||||
QuickCallFunction(X1, (void *)&CallSyscall);
|
||||
}
|
||||
ApplyRoundingMode();
|
||||
|
|
|
@ -35,13 +35,22 @@
|
|||
#include "Core/MIPS/ARM64/Arm64RegCacheFPU.h"
|
||||
|
||||
#include "Core/MIPS/ARM64/Arm64Jit.h"
|
||||
|
||||
#include "ext/disarm.h"
|
||||
#include "Core/MIPS/JitCommon/JitCommon.h"
|
||||
|
||||
using namespace Arm64JitConstants;
|
||||
|
||||
void DisassembleArm64Print(const u8 *data, int size) {
|
||||
ILOG("ARM64 TODO");
|
||||
std::vector<std::string> lines = DisassembleArm64(data, size);
|
||||
for (auto s : lines) {
|
||||
ILOG("%s", s.c_str());
|
||||
}
|
||||
ILOG("+++");
|
||||
// A format friendly to Online Disassembler which gets endianness wrong
|
||||
for (size_t i = 0; i < lines.size(); i++) {
|
||||
uint32_t opcode = ((uint32_t *)data)[i];
|
||||
ILOG("%08x", swap32(opcode));
|
||||
}
|
||||
ILOG("===");
|
||||
}
|
||||
|
||||
namespace MIPSComp
|
||||
|
@ -124,6 +133,7 @@ void Arm64Jit::FlushPrefixV()
|
|||
|
||||
void Arm64Jit::ClearCache()
|
||||
{
|
||||
ILOG("ARM64Jit: Clearing the cache!");
|
||||
blocks.Clear();
|
||||
ClearCodeSpace();
|
||||
GenerateFixedCode();
|
||||
|
@ -155,15 +165,32 @@ void Arm64Jit::EatInstruction(MIPSOpcode op) {
|
|||
|
||||
void Arm64Jit::CompileDelaySlot(int flags)
|
||||
{
|
||||
// TODO ARM64
|
||||
// preserve flag around the delay slot! Maybe this is not always necessary on ARM where
|
||||
// we can (mostly) control whether we set the flag or not. Of course, if someone puts an slt in to the
|
||||
// delay slot, we're screwed.
|
||||
if (flags & DELAYSLOT_SAFE)
|
||||
MRS(FLAGTEMPREG, FIELD_NZCV); // Save flags register. X18 is preserved through function calls and is not allocated.
|
||||
|
||||
js.inDelaySlot = true;
|
||||
MIPSOpcode op = Memory::Read_Opcode_JIT(js.compilerPC + 4);
|
||||
MIPSCompileOp(op);
|
||||
js.inDelaySlot = false;
|
||||
|
||||
if (flags & DELAYSLOT_FLUSH)
|
||||
FlushAll();
|
||||
if (flags & DELAYSLOT_SAFE)
|
||||
_MSR(FIELD_NZCV, FLAGTEMPREG); // Restore flags register
|
||||
}
|
||||
|
||||
|
||||
void Arm64Jit::Compile(u32 em_address) {
|
||||
if (GetSpaceLeft() < 0x10000 || blocks.IsFull()) {
|
||||
INFO_LOG(JIT, "Space left: %i", GetSpaceLeft());
|
||||
ClearCache();
|
||||
}
|
||||
|
||||
INFO_LOG(JIT, "In Compile, at %08x!", em_address);
|
||||
|
||||
int block_num = blocks.AllocateBlock(em_address);
|
||||
JitBlock *b = blocks.GetBlock(block_num);
|
||||
DoJit(em_address, b);
|
||||
|
@ -213,37 +240,31 @@ const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b)
|
|||
js.inDelaySlot = false;
|
||||
js.PrefixStart();
|
||||
|
||||
logBlocks = 1;
|
||||
|
||||
// We add a downcount flag check before the block, used when entering from a linked block.
|
||||
// The last block decremented downcounter, and the flag should still be available.
|
||||
// Got three variants here of where we position the code, needs detailed benchmarking.
|
||||
|
||||
FixupBranch bail;
|
||||
/*
|
||||
if (jo.useBackJump) {
|
||||
// Moves the MOVI2R and B *before* checkedEntry, and just branch backwards there.
|
||||
// Speedup seems to be zero unfortunately but I guess it may vary from device to device.
|
||||
// Not intrusive so keeping it around here to experiment with, may help on ARMv6 due to
|
||||
// large/slow construction of 32-bit immediates?
|
||||
JumpTarget backJump = GetCodePtr();
|
||||
gpr.SetRegImm(R0, js.blockStart);
|
||||
B((const void *)outerLoopPCInR0);
|
||||
const u8 *backJump = GetCodePtr();
|
||||
MOVI2R(SCRATCH1, js.blockStart);
|
||||
B((const void *)outerLoopPCInSCRATCH1);
|
||||
b->checkedEntry = GetCodePtr();
|
||||
SetCC(CC_LT);
|
||||
B(backJump);
|
||||
SetCC(CC_AL);
|
||||
B(CC_LT, backJump);
|
||||
} else if (jo.useForwardJump) {
|
||||
b->checkedEntry = GetCodePtr();
|
||||
SetCC(CC_LT);
|
||||
bail = B();
|
||||
SetCC(CC_AL);
|
||||
bail = B(CC_LT);
|
||||
} else {
|
||||
b->checkedEntry = GetCodePtr();
|
||||
SetCC(CC_LT);
|
||||
gpr.SetRegImm(R0, js.blockStart);
|
||||
B((const void *)outerLoopPCInR0);
|
||||
SetCC(CC_AL);
|
||||
}*/
|
||||
// TODO ARM64
|
||||
MOVI2R(SCRATCH1, js.blockStart);
|
||||
B(CC_LT, (const void *)outerLoopPCInSCRATCH1);
|
||||
}
|
||||
|
||||
b->normalEntry = GetCodePtr();
|
||||
// TODO: this needs work
|
||||
|
@ -281,9 +302,9 @@ const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b)
|
|||
}
|
||||
|
||||
if (jo.useForwardJump) {
|
||||
//SetJumpTarget(bail);
|
||||
//gpr.SetRegImm(R0, js.blockStart);
|
||||
//B((const void *)outerLoopPCInR0);
|
||||
SetJumpTarget(bail);
|
||||
gpr.SetRegImm(SCRATCH1, js.blockStart);
|
||||
B((const void *)outerLoopPCInSCRATCH1);
|
||||
}
|
||||
|
||||
char temp[256];
|
||||
|
@ -298,7 +319,7 @@ const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b)
|
|||
b->codeSize = GetCodePtr() - b->normalEntry;
|
||||
|
||||
if (logBlocks > 0 && dontLogBlocks == 0) {
|
||||
INFO_LOG(JIT, "=============== ARM ===============");
|
||||
INFO_LOG(JIT, "=============== ARM (%d instructions -> %d bytes) ===============", js.numInstructions, b->codeSize);
|
||||
DisassembleArm64Print(b->normalEntry, GetCodePtr() - b->normalEntry);
|
||||
}
|
||||
if (logBlocks > 0)
|
||||
|
@ -317,6 +338,7 @@ const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b)
|
|||
blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (js.compilerPC - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
|
||||
b->originalSize = js.initialBlockSize;
|
||||
}
|
||||
|
||||
return b->normalEntry;
|
||||
}
|
||||
|
||||
|
@ -359,18 +381,16 @@ void Arm64Jit::Comp_Generic(MIPSOpcode op)
|
|||
SaveDowncount();
|
||||
// TODO: Perhaps keep the rounding mode for interp?
|
||||
RestoreRoundingMode();
|
||||
// gpr.SetRegImm(SCRATCHREG1, js.compilerPC);
|
||||
// MovToPC(SCRATCHREG1);
|
||||
//gpr.SetRegImm(R0, op.encoding);
|
||||
//QuickCallFunction(R1, (void *)func);
|
||||
// TODO ARM64
|
||||
MOVI2R(SCRATCH1, js.compilerPC);
|
||||
MovToPC(SCRATCH1);
|
||||
MOVI2R(W0, op.encoding);
|
||||
QuickCallFunction(SCRATCH2_64, (void *)func);
|
||||
ApplyRoundingMode();
|
||||
RestoreDowncount();
|
||||
}
|
||||
|
||||
const MIPSInfo info = MIPSGetInfo(op);
|
||||
if ((info & IS_VFPU) != 0 && (info & VFPU_NO_PREFIX) == 0)
|
||||
{
|
||||
if ((info & IS_VFPU) != 0 && (info & VFPU_NO_PREFIX) == 0) {
|
||||
// If it does eat them, it'll happen in MIPSCompileOp().
|
||||
if ((info & OUT_EAT_PREFIX) == 0)
|
||||
js.PrefixUnknown();
|
||||
|
@ -386,28 +406,19 @@ void Arm64Jit::MovToPC(ARM64Reg r) {
|
|||
}
|
||||
|
||||
void Arm64Jit::SaveDowncount() {
|
||||
if (jo.downcountInRegister)
|
||||
STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
|
||||
STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
|
||||
}
|
||||
|
||||
void Arm64Jit::RestoreDowncount() {
|
||||
if (jo.downcountInRegister)
|
||||
LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
|
||||
LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
|
||||
}
|
||||
|
||||
void Arm64Jit::WriteDownCount(int offset) {
|
||||
// TODO ARM64
|
||||
}
|
||||
|
||||
// Abuses R2
|
||||
void Arm64Jit::WriteDownCountR(ARM64Reg reg) {
|
||||
if (jo.downcountInRegister) {
|
||||
SUBS(DOWNCOUNTREG, DOWNCOUNTREG, reg);
|
||||
} else {
|
||||
LDR(INDEX_UNSIGNED, X2, CTXREG, offsetof(MIPSState, downcount));
|
||||
SUBS(X2, X2, reg);
|
||||
STR(INDEX_UNSIGNED, X2, CTXREG, offsetof(MIPSState, downcount));
|
||||
}
|
||||
SUBS(DOWNCOUNTREG, DOWNCOUNTREG, reg);
|
||||
}
|
||||
|
||||
void Arm64Jit::RestoreRoundingMode(bool force) {
|
||||
|
@ -441,7 +452,7 @@ void Arm64Jit::WriteExit(u32 destination, int exit_num)
|
|||
B(blocks.GetBlock(block)->checkedEntry);
|
||||
b->linkStatus[exit_num] = true;
|
||||
} else {
|
||||
gpr.SetRegImm(X0, destination);
|
||||
MOVI2R(SCRATCH1, destination);
|
||||
B((const void *)dispatcherPCInSCRATCH1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,8 +37,7 @@ namespace MIPSComp
|
|||
struct Arm64JitOptions
|
||||
{
|
||||
Arm64JitOptions() {
|
||||
enableBlocklink = true;
|
||||
downcountInRegister = true;
|
||||
enableBlocklink = false;
|
||||
useBackJump = false;
|
||||
useForwardJump = false;
|
||||
cachePointers = true;
|
||||
|
@ -54,7 +53,6 @@ struct Arm64JitOptions
|
|||
|
||||
bool useNEONVFPU;
|
||||
bool enableBlocklink;
|
||||
bool downcountInRegister;
|
||||
bool useBackJump;
|
||||
bool useForwardJump;
|
||||
bool cachePointers;
|
||||
|
@ -278,7 +276,7 @@ public:
|
|||
const u8 *enterCode;
|
||||
|
||||
const u8 *outerLoop;
|
||||
const u8 *outerLoopPCInR0;
|
||||
const u8 *outerLoopPCInSCRATCH1;
|
||||
const u8 *dispatcherCheckCoreState;
|
||||
const u8 *dispatcherPCInSCRATCH1;
|
||||
const u8 *dispatcher;
|
||||
|
|
|
@ -50,35 +50,18 @@ void Arm64RegCache::Start(MIPSAnalyst::AnalysisResults &stats) {
|
|||
}
|
||||
|
||||
const ARM64Reg *Arm64RegCache::GetMIPSAllocationOrder(int &count) {
|
||||
// Note that R0 is reserved as scratch for now.
|
||||
// R12 is also potentially usable.
|
||||
// R4-R7 are registers we could use for static allocation or downcount.
|
||||
// R8 is used to preserve flags in nasty branches.
|
||||
// R9 and upwards are reserved for jit basics.
|
||||
// R14 (LR) is used as a scratch reg (overwritten on calls/return.)
|
||||
|
||||
// TODO ARM64
|
||||
if (jo_->downcountInRegister) {
|
||||
static const ARM64Reg allocationOrder[] = {
|
||||
X1, X2, X3, X4, X5, X6, X12,
|
||||
};
|
||||
count = sizeof(allocationOrder) / sizeof(const int);
|
||||
return allocationOrder;
|
||||
} else {
|
||||
static const ARM64Reg allocationOrder2[] = {
|
||||
X1, X2, X3, X4, X5, X6, X7, X12,
|
||||
};
|
||||
count = sizeof(allocationOrder2) / sizeof(const int);
|
||||
return allocationOrder2;
|
||||
}
|
||||
// See register alloc remarks in Arm64Asm.cpp
|
||||
// TODO: Add static allocation of top MIPS registers like SP
|
||||
static const ARM64Reg allocationOrder[] = {
|
||||
W19, W20, W21, W22, W23, W24, W25, W27, W28, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, W0, W1,
|
||||
};
|
||||
count = sizeof(allocationOrder) / sizeof(const int);
|
||||
return allocationOrder;
|
||||
}
|
||||
|
||||
void Arm64RegCache::FlushBeforeCall() {
|
||||
// R4-R11 are preserved. Others need flushing.
|
||||
FlushArmReg(X1);
|
||||
FlushArmReg(X2);
|
||||
FlushArmReg(X3);
|
||||
FlushArmReg(X12);
|
||||
// TODO: More optimal
|
||||
FlushAll();
|
||||
}
|
||||
|
||||
bool Arm64RegCache::IsMapped(MIPSGPReg mipsReg) {
|
||||
|
@ -105,9 +88,13 @@ void Arm64RegCache::MapRegTo(ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags) {
|
|||
} else {
|
||||
switch (mr[mipsReg].loc) {
|
||||
case ML_MEM:
|
||||
emit_->LDR(INDEX_UNSIGNED, reg, CTXREG, GetMipsRegOffset(mipsReg));
|
||||
{
|
||||
int offset = GetMipsRegOffset(mipsReg);
|
||||
INFO_LOG(JIT, "MapRegTo %d mips: %d offset %d", (int)reg, mipsReg, offset);
|
||||
emit_->LDR(INDEX_UNSIGNED, reg, CTXREG, offset);
|
||||
mr[mipsReg].loc = ML_ARMREG;
|
||||
break;
|
||||
}
|
||||
case ML_IMM:
|
||||
SetRegImm(reg, mr[mipsReg].imm);
|
||||
ar[reg].isDirty = true; // IMM is always dirty.
|
||||
|
@ -190,20 +177,6 @@ ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) {
|
|||
int allocCount;
|
||||
const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount);
|
||||
|
||||
ARM64Reg desiredReg = INVALID_REG;
|
||||
// Try to "statically" allocate the first 6 regs after v0.
|
||||
int desiredOrder = allocCount - (6 - (mipsReg - (int)MIPS_REG_V0));
|
||||
if (desiredOrder >= 0 && desiredOrder < allocCount)
|
||||
desiredReg = allocOrder[desiredOrder];
|
||||
|
||||
if (desiredReg != INVALID_REG) {
|
||||
if (ar[desiredReg].mipsReg == MIPS_REG_INVALID) {
|
||||
// With this placement, we may be able to optimize flush.
|
||||
MapRegTo(desiredReg, mipsReg, mapFlags);
|
||||
return desiredReg;
|
||||
}
|
||||
}
|
||||
|
||||
allocate:
|
||||
for (int i = 0; i < allocCount; i++) {
|
||||
ARM64Reg reg = allocOrder[i];
|
||||
|
@ -240,6 +213,10 @@ allocate:
|
|||
return INVALID_REG;
|
||||
}
|
||||
|
||||
void Arm64RegCache::MapIn(MIPSGPReg rs) {
|
||||
MapReg(rs);
|
||||
}
|
||||
|
||||
void Arm64RegCache::MapInIn(MIPSGPReg rd, MIPSGPReg rs) {
|
||||
SpillLock(rd, rs);
|
||||
MapReg(rd);
|
||||
|
@ -340,7 +317,7 @@ void Arm64RegCache::FlushR(MIPSGPReg r) {
|
|||
}
|
||||
if (ar[mr[r].reg].isDirty) {
|
||||
if (r != MIPS_REG_ZERO) {
|
||||
emit_->STR(INDEX_UNSIGNED, (ARM64Reg)mr[r].reg, CTXREG, GetMipsRegOffset(r));
|
||||
emit_->STR(INDEX_UNSIGNED, mr[r].reg, CTXREG, GetMipsRegOffset(r));
|
||||
}
|
||||
ar[mr[r].reg].isDirty = false;
|
||||
}
|
||||
|
@ -360,58 +337,6 @@ void Arm64RegCache::FlushR(MIPSGPReg r) {
|
|||
mr[r].imm = 0;
|
||||
}
|
||||
|
||||
// Note: if allowFlushImm is set, this also flushes imms while checking the sequence.
|
||||
int Arm64RegCache::FlushGetSequential(MIPSGPReg startMipsReg, bool allowFlushImm) {
|
||||
// Only start a sequence on a dirty armreg.
|
||||
// TODO: Could also start with an imm?
|
||||
const auto &startMipsInfo = mr[startMipsReg];
|
||||
if ((startMipsInfo.loc != ML_ARMREG && startMipsInfo.loc != ML_ARMREG_IMM) || startMipsInfo.reg == INVALID_REG || !ar[startMipsInfo.reg].isDirty) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int allocCount;
|
||||
const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount);
|
||||
|
||||
int c = 1;
|
||||
// The sequence needs to have ascending arm regs for STMIA.
|
||||
int lastArmReg = startMipsInfo.reg;
|
||||
// Can't use HI/LO, only regs in the main r[] array.
|
||||
for (int r = (int)startMipsReg + 1; r < 32; ++r) {
|
||||
if ((mr[r].loc == ML_ARMREG || mr[r].loc == ML_ARMREG_IMM) && mr[r].reg != INVALID_REG) {
|
||||
if ((int)mr[r].reg > lastArmReg && ar[mr[r].reg].isDirty) {
|
||||
++c;
|
||||
lastArmReg = mr[r].reg;
|
||||
continue;
|
||||
}
|
||||
// If we're not allowed to flush imms, don't even consider them.
|
||||
} else if (allowFlushImm && mr[r].loc == ML_IMM && MIPSGPReg(r) != MIPS_REG_ZERO) {
|
||||
// Okay, let's search for a free (and later) reg to put this imm into.
|
||||
bool found = false;
|
||||
for (int j = 0; j < allocCount; ++j) {
|
||||
ARM64Reg immReg = allocOrder[j];
|
||||
if ((int)immReg > lastArmReg && ar[immReg].mipsReg == MIPS_REG_INVALID) {
|
||||
++c;
|
||||
lastArmReg = immReg;
|
||||
|
||||
// Even if the sequence fails, we'll need it in a reg anyway, might as well be this one.
|
||||
MapRegTo(immReg, MIPSGPReg(r), 0);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// If it didn't hit a continue above, the chain is over.
|
||||
// There's no way to skip a slot with STMIA.
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
void Arm64RegCache::FlushAll() {
|
||||
// TODO: Flush in pairs
|
||||
for (int i = 0; i < NUM_MIPSREG; i++) {
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
namespace Arm64JitConstants {
|
||||
|
||||
// Bogus mappings, TODO ARM64
|
||||
const Arm64Gen::ARM64Reg FLAGTEMPREG = Arm64Gen::X25;
|
||||
const Arm64Gen::ARM64Reg JITBASEREG = Arm64Gen::X26;
|
||||
const Arm64Gen::ARM64Reg CTXREG = Arm64Gen::X27;
|
||||
const Arm64Gen::ARM64Reg MEMBASEREG = Arm64Gen::X28;
|
||||
|
@ -113,6 +114,7 @@ public:
|
|||
bool IsMapped(MIPSGPReg reg);
|
||||
bool IsMappedAsPointer(MIPSGPReg reg);
|
||||
|
||||
void MapIn(MIPSGPReg rs);
|
||||
void MapInIn(MIPSGPReg rd, MIPSGPReg rs);
|
||||
void MapDirtyIn(MIPSGPReg rd, MIPSGPReg rs, bool avoidLoad = true);
|
||||
void MapDirtyInIn(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad = true);
|
||||
|
@ -136,7 +138,6 @@ public:
|
|||
private:
|
||||
const Arm64Gen::ARM64Reg *GetMIPSAllocationOrder(int &count);
|
||||
void MapRegTo(Arm64Gen::ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags);
|
||||
int FlushGetSequential(MIPSGPReg startMipsReg, bool allowFlushImm);
|
||||
Arm64Gen::ARM64Reg FindBestToSpill(bool unusedOnly, bool *clobbered);
|
||||
|
||||
MIPSState *mips_;
|
||||
|
@ -146,7 +147,7 @@ private:
|
|||
u32 compilerPC_;
|
||||
|
||||
enum {
|
||||
NUM_ARMREG = 32, // 31 actual registers, plus the zero register.
|
||||
NUM_ARMREG = 32, // 31 actual registers, plus the zero/sp register which is not mappable.
|
||||
NUM_MIPSREG = Arm64JitConstants::TOTAL_MAPPABLE_MIPSREGS,
|
||||
};
|
||||
|
||||
|
|
|
@ -99,6 +99,12 @@ std::vector<std::string> DisassembleArm64(const u8 *data, int size) {
|
|||
lines.push_back(StringFromFormat("BKPT 1 (x%i)", bkpt_count));
|
||||
bkpt_count = 0;
|
||||
}
|
||||
if (true) {
|
||||
uint64_t addr = (intptr_t)(data + i);
|
||||
char buf2[16];
|
||||
snprintf(buf2, sizeof(buf2), "%04x%08x", addr >> 32, addr & 0xFFFFFFFF);
|
||||
buf = std::string(buf2) + " " + buf;
|
||||
}
|
||||
lines.push_back(buf);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -237,6 +237,7 @@ void MIPSState::UpdateCore(CPUCore desired) {
|
|||
PSP_CoreParameter().cpuCore = desired;
|
||||
switch (PSP_CoreParameter().cpuCore) {
|
||||
case CPU_JIT:
|
||||
INFO_LOG(CPU, "Switching to JIT");
|
||||
if (!MIPSComp::jit) {
|
||||
#ifdef ARM
|
||||
MIPSComp::jit = new MIPSComp::ArmJit(this);
|
||||
|
@ -253,6 +254,7 @@ void MIPSState::UpdateCore(CPUCore desired) {
|
|||
break;
|
||||
|
||||
case CPU_INTERPRETER:
|
||||
INFO_LOG(CPU, "Switching to interpreter");
|
||||
delete MIPSComp::jit;
|
||||
MIPSComp::jit = 0;
|
||||
break;
|
||||
|
|
Loading…
Add table
Reference in a new issue