mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
330 lines
11 KiB
C++
330 lines
11 KiB
C++
// Copyright (c) 2015- PPSSPP Project.
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, version 2.0 or later versions.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
// Official git repository and contact information can be found at
|
|
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
|
|
|
#include "ppsspp_config.h"
|
|
#if PPSSPP_ARCH(ARM64)
|
|
|
|
#include "base/logging.h"
|
|
|
|
#include "Core/MemMap.h"
|
|
#include "Core/MIPS/MIPS.h"
|
|
#include "Core/System.h"
|
|
#include "Core/CoreTiming.h"
|
|
#include "Common/MemoryUtil.h"
|
|
#include "Common/CPUDetect.h"
|
|
#include "Common/Arm64Emitter.h"
|
|
#include "Core/MIPS/ARM64/Arm64Jit.h"
|
|
#include "Core/MIPS/JitCommon/JitCommon.h"
|
|
|
|
using namespace Arm64Gen;
|
|
|
|
//static int temp32; // unused?
|
|
|
|
static const bool enableDebug = false;
|
|
static const bool enableDisasm = false;
|
|
|
|
//static bool enableStatistics = false; //unused?
|
|
|
|
|
|
// ARM64 calling conventions
|
|
// Standard: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
|
|
// Apple: https://developer.apple.com/library/ios/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallingConventions.html
|
|
|
|
// Summary:
|
|
// ===========
|
|
// SP ("x31") is not a GPR so irrelevant.
|
|
// x0-x7: 8 parameter/result registers
|
|
// x8: "Indirect result location register" (points to struct return values? I think we can map this)
|
|
// x9-x15: 7 temporary registers (no need to save)
|
|
// x16: temporary register/procedure call scratch register 1
|
|
// x17: temporary register/procedure call scratch register 2
|
|
// x18: unavailable (reserved for use by the OS or linker or whatever - iOS, for example, uses it)
|
|
// x19-x28: 10 callee-saved registers
|
|
// x29: the frame pointer register
|
|
// x30: link register for procedure calls
|
|
|
|
// So: Scratch registers: x16, x17
|
|
// Mappable registers in priority order:
|
|
// x19, x20, x21, x22, x23, (x24, x25, x26, x27, x28), x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x0, x1,
|
|
// That's a whole lot of registers so we might be able to statically allocate a bunch of common MIPS registers.
|
|
// We should put statically allocated registers in the 7 callee-save regs that are left over after the system regs (x19-x25), so we don't have to bother with
|
|
// saving them when we call out of the JIT. We will perform regular dynamic register allocation in the rest (x0-x15)
|
|
|
|
// STATIC ALLOCATION ARM64 (these are all callee-save registers):
|
|
// x23 : Down counter
|
|
// x24 : PC save on JR with non-nice delay slot (to be eliminated later?)
|
|
// x25 : MSR/MRS temporary (to be eliminated later)
|
|
// x26 : JIT base reg
|
|
// x27 : MIPS state (Could eliminate by placing the MIPS state right at the memory base)
|
|
// x28 : Memory base pointer.
|
|
|
|
extern volatile CoreState coreState;
|
|
|
|
void ShowPC(u32 downcount, void *membase, void *jitbase) {
|
|
static int count = 0;
|
|
if (currentMIPS) {
|
|
ELOG("ShowPC : %08x Downcount : %08x %d %p %p", currentMIPS->pc, downcount, count, membase, jitbase);
|
|
} else {
|
|
ELOG("Universe corrupt?");
|
|
}
|
|
//if (count > 2000)
|
|
// exit(0);
|
|
count++;
|
|
}
|
|
|
|
void DisassembleArm(const u8 *data, int size);
|
|
|
|
// PLAN: no more block numbers - crazy opcodes just contain offset within
|
|
// dynarec buffer
|
|
// At this offset - 4, there is an int specifying the block number.
|
|
|
|
namespace MIPSComp {
|
|
|
|
using namespace Arm64JitConstants;
|
|
|
|
void Arm64Jit::GenerateFixedCode(const JitOptions &jo) {
|
|
const u8 *start = AlignCodePage();
|
|
BeginWrite();
|
|
|
|
if (jo.useStaticAlloc) {
|
|
saveStaticRegisters = AlignCode16();
|
|
STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
|
|
gpr.EmitSaveStaticRegisters();
|
|
RET();
|
|
|
|
loadStaticRegisters = AlignCode16();
|
|
gpr.EmitLoadStaticRegisters();
|
|
LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
|
|
RET();
|
|
|
|
start = saveStaticRegisters;
|
|
} else {
|
|
saveStaticRegisters = nullptr;
|
|
loadStaticRegisters = nullptr;
|
|
}
|
|
|
|
restoreRoundingMode = AlignCode16(); {
|
|
MRS(SCRATCH2_64, FIELD_FPCR);
|
|
// We are not in flush-to-zero mode outside the JIT, so let's turn it off.
|
|
uint32_t mask = ~(4 << 22);
|
|
// Assume we're always in round-to-nearest mode beforehand.
|
|
mask &= ~(3 << 22);
|
|
ANDI2R(SCRATCH2, SCRATCH2, mask);
|
|
_MSR(FIELD_FPCR, SCRATCH2_64);
|
|
RET();
|
|
}
|
|
|
|
applyRoundingMode = AlignCode16(); {
|
|
LDR(INDEX_UNSIGNED, SCRATCH2, CTXREG, offsetof(MIPSState, fcr31));
|
|
TSTI2R(SCRATCH2, 1 << 24);
|
|
ANDI2R(SCRATCH2, SCRATCH2, 3);
|
|
FixupBranch skip1 = B(CC_EQ);
|
|
ADDI2R(SCRATCH2, SCRATCH2, 4);
|
|
SetJumpTarget(skip1);
|
|
|
|
// We can skip if the rounding mode is nearest (0) and flush is not set.
|
|
// (as restoreRoundingMode cleared it out anyway)
|
|
CMPI2R(SCRATCH2, 0);
|
|
FixupBranch skip = B(CC_EQ);
|
|
|
|
// MIPS Rounding Mode: ARM Rounding Mode
|
|
// 0: Round nearest 0
|
|
// 1: Round to zero 3
|
|
// 2: Round up (ceil) 1
|
|
// 3: Round down (floor) 2
|
|
ANDI2R(SCRATCH1, SCRATCH2, 3);
|
|
CMPI2R(SCRATCH1, 1);
|
|
|
|
FixupBranch skipadd = B(CC_NEQ);
|
|
ADDI2R(SCRATCH2, SCRATCH2, 2);
|
|
SetJumpTarget(skipadd);
|
|
FixupBranch skipsub = B(CC_LE);
|
|
SUBI2R(SCRATCH2, SCRATCH2, 1);
|
|
SetJumpTarget(skipsub);
|
|
|
|
// Actually change the system FPCR register
|
|
MRS(SCRATCH1_64, FIELD_FPCR);
|
|
// Clear both flush-to-zero and rounding before re-setting them.
|
|
ANDI2R(SCRATCH1, SCRATCH1, ~((4 | 3) << 22));
|
|
ORR(SCRATCH1, SCRATCH1, SCRATCH2, ArithOption(SCRATCH2, ST_LSL, 22));
|
|
_MSR(FIELD_FPCR, SCRATCH1_64);
|
|
|
|
SetJumpTarget(skip);
|
|
RET();
|
|
}
|
|
|
|
updateRoundingMode = AlignCode16(); {
|
|
LDR(INDEX_UNSIGNED, SCRATCH2, CTXREG, offsetof(MIPSState, fcr31));
|
|
|
|
TSTI2R(SCRATCH2, 1 << 24);
|
|
ANDI2R(SCRATCH2, SCRATCH2, 3);
|
|
FixupBranch skip = B(CC_EQ);
|
|
ADDI2R(SCRATCH2, SCRATCH2, 4);
|
|
SetJumpTarget(skip);
|
|
|
|
PUSH(SCRATCH2);
|
|
// We can only skip if the rounding mode is zero and flush is not set.
|
|
// TODO: This actually seems to compare against 3??
|
|
CMPI2R(SCRATCH2, 0);
|
|
FixupBranch skip2 = B(CC_EQ);
|
|
MOVI2R(SCRATCH2, 1);
|
|
MOVP2R(SCRATCH1_64, &js.hasSetRounding);
|
|
STRB(INDEX_UNSIGNED, SCRATCH2, SCRATCH1_64, 0);
|
|
SetJumpTarget(skip2);
|
|
POP(SCRATCH2);
|
|
|
|
// Let's update js.currentRoundingFunc with the right convertS0ToSCRATCH1 func.
|
|
MOVP2R(SCRATCH1_64, convertS0ToSCRATCH1);
|
|
LSL(SCRATCH2, SCRATCH2, 3);
|
|
LDR(SCRATCH2_64, SCRATCH1_64, SCRATCH2);
|
|
MOVP2R(SCRATCH1_64, &js.currentRoundingFunc);
|
|
STR(INDEX_UNSIGNED, SCRATCH2_64, SCRATCH1_64, 0);
|
|
RET();
|
|
}
|
|
|
|
enterDispatcher = AlignCode16();
|
|
|
|
uint32_t regs_to_save = Arm64Gen::ALL_CALLEE_SAVED;
|
|
uint32_t regs_to_save_fp = Arm64Gen::ALL_CALLEE_SAVED_FP;
|
|
fp.ABI_PushRegisters(regs_to_save, regs_to_save_fp);
|
|
|
|
// Fixed registers, these are always kept when in Jit context.
|
|
MOVP2R(MEMBASEREG, Memory::base);
|
|
MOVP2R(CTXREG, mips_);
|
|
MOVP2R(JITBASEREG, GetBasePtr());
|
|
|
|
LoadStaticRegisters();
|
|
MovFromPC(SCRATCH1);
|
|
outerLoopPCInSCRATCH1 = GetCodePtr();
|
|
MovToPC(SCRATCH1);
|
|
outerLoop = GetCodePtr();
|
|
SaveStaticRegisters(); // Advance can change the downcount, so must save/restore
|
|
RestoreRoundingMode(true);
|
|
QuickCallFunction(SCRATCH1_64, &CoreTiming::Advance);
|
|
ApplyRoundingMode(true);
|
|
LoadStaticRegisters();
|
|
FixupBranch skipToCoreStateCheck = B(); //skip the downcount check
|
|
|
|
dispatcherCheckCoreState = GetCodePtr();
|
|
|
|
// The result of slice decrementation should be in flags if somebody jumped here
|
|
// IMPORTANT - We jump on negative, not carry!!!
|
|
FixupBranch bailCoreState = B(CC_MI);
|
|
|
|
SetJumpTarget(skipToCoreStateCheck);
|
|
|
|
MOVP2R(SCRATCH1_64, &coreState);
|
|
LDR(INDEX_UNSIGNED, SCRATCH1, SCRATCH1_64, 0);
|
|
CMP(SCRATCH1, 0);
|
|
FixupBranch badCoreState = B(CC_NEQ);
|
|
FixupBranch skipToRealDispatch2 = B(); //skip the sync and compare first time
|
|
|
|
dispatcherPCInSCRATCH1 = GetCodePtr();
|
|
// TODO: Do we always need to write PC to RAM here?
|
|
MovToPC(SCRATCH1);
|
|
|
|
// At this point : flags = EQ. Fine for the next check, no need to jump over it.
|
|
dispatcher = GetCodePtr();
|
|
|
|
// The result of slice decrementation should be in flags if somebody jumped here
|
|
// IMPORTANT - We jump on negative, not carry!!!
|
|
FixupBranch bail = B(CC_MI);
|
|
|
|
SetJumpTarget(skipToRealDispatch2);
|
|
|
|
dispatcherNoCheck = GetCodePtr();
|
|
|
|
// Debug
|
|
if (enableDebug) {
|
|
MOV(W0, DOWNCOUNTREG);
|
|
MOV(X1, MEMBASEREG);
|
|
MOV(X2, JITBASEREG);
|
|
QuickCallFunction(SCRATCH1_64, (void *)&ShowPC);
|
|
}
|
|
|
|
LDR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, pc));
|
|
LDR(SCRATCH1, MEMBASEREG, SCRATCH1_64);
|
|
LSR(SCRATCH2, SCRATCH1, 24); // or UBFX(SCRATCH2, SCRATCH1, 24, 8)
|
|
ANDI2R(SCRATCH1, SCRATCH1, 0x00FFFFFF);
|
|
CMP(SCRATCH2, MIPS_EMUHACK_OPCODE >> 24);
|
|
FixupBranch skipJump = B(CC_NEQ);
|
|
ADD(SCRATCH1_64, JITBASEREG, SCRATCH1_64);
|
|
BR(SCRATCH1_64);
|
|
SetJumpTarget(skipJump);
|
|
|
|
// No block found, let's jit. I don't think we actually need to save static regs that are in callee-save regs here but whatever.
|
|
// Also, rounding mode gotta be irrelevant here..
|
|
SaveStaticRegisters();
|
|
RestoreRoundingMode(true);
|
|
QuickCallFunction(SCRATCH1_64, (void *)&MIPSComp::JitAt);
|
|
ApplyRoundingMode(true);
|
|
LoadStaticRegisters();
|
|
|
|
B(dispatcherNoCheck); // no point in special casing this
|
|
|
|
SetJumpTarget(bail);
|
|
SetJumpTarget(bailCoreState);
|
|
|
|
MOVP2R(SCRATCH1_64, &coreState);
|
|
LDR(INDEX_UNSIGNED, SCRATCH1, SCRATCH1_64, 0);
|
|
CMP(SCRATCH1, 0);
|
|
B(CC_EQ, outerLoop);
|
|
|
|
SetJumpTarget(badCoreState);
|
|
breakpointBailout = GetCodePtr();
|
|
|
|
SaveStaticRegisters();
|
|
RestoreRoundingMode(true);
|
|
|
|
fp.ABI_PopRegisters(regs_to_save, regs_to_save_fp);
|
|
|
|
RET();
|
|
|
|
// Generate some integer conversion funcs.
|
|
// MIPS order!
|
|
static const RoundingMode roundModes[8] = { ROUND_N, ROUND_Z, ROUND_P, ROUND_M, ROUND_N, ROUND_Z, ROUND_P, ROUND_M };
|
|
for (size_t i = 0; i < ARRAY_SIZE(roundModes); ++i) {
|
|
convertS0ToSCRATCH1[i] = AlignCode16();
|
|
|
|
fp.FCMP(S0, S0); // Detect NaN
|
|
fp.FCVTS(S0, S0, roundModes[i]);
|
|
FixupBranch skip = B(CC_VC);
|
|
MOVI2R(SCRATCH2, 0x7FFFFFFF);
|
|
fp.FMOV(S0, SCRATCH2);
|
|
SetJumpTarget(skip);
|
|
|
|
RET();
|
|
}
|
|
|
|
// Leave this at the end, add more stuff above.
|
|
if (enableDisasm) {
|
|
std::vector<std::string> lines = DisassembleArm64(start, GetCodePtr() - start);
|
|
for (auto s : lines) {
|
|
INFO_LOG(JIT, "%s", s.c_str());
|
|
}
|
|
}
|
|
|
|
// Don't forget to zap the instruction cache! This must stay at the end of this function.
|
|
FlushIcache();
|
|
// Let's spare the pre-generated code from unprotect-reprotect.
|
|
AlignCodePage();
|
|
EndWrite();
|
|
}
|
|
|
|
} // namespace MIPSComp
|
|
|
|
#endif // PPSSPP_ARCH(ARM64)
|