From 1b756ff8c12308b01311e5b41773bea86294f678 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 3 Sep 2023 11:30:04 -0700 Subject: [PATCH 1/2] arm64jit: Add initial base for IR jit. This works, but very slowly at this point. --- CMakeLists.txt | 11 + Common/Arm64Emitter.cpp | 2 +- Core/Core.vcxproj | 11 + Core/Core.vcxproj.filters | 33 ++ Core/MIPS/ARM64/Arm64IRAsm.cpp | 274 +++++++++++ Core/MIPS/ARM64/Arm64IRCompALU.cpp | 227 +++++++++ Core/MIPS/ARM64/Arm64IRCompBranch.cpp | 83 ++++ Core/MIPS/ARM64/Arm64IRCompFPU.cpp | 218 +++++++++ Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp | 174 +++++++ Core/MIPS/ARM64/Arm64IRCompSystem.cpp | 147 ++++++ Core/MIPS/ARM64/Arm64IRCompVec.cpp | 132 ++++++ Core/MIPS/ARM64/Arm64IRJit.cpp | 372 +++++++++++++++ Core/MIPS/ARM64/Arm64IRJit.h | 153 ++++++ Core/MIPS/ARM64/Arm64IRRegCache.cpp | 577 +++++++++++++++++++++++ Core/MIPS/ARM64/Arm64IRRegCache.h | 108 +++++ Core/MIPS/IR/IRFrontend.cpp | 1 - Core/MIPS/IR/IRNativeCommon.cpp | 4 + Core/MIPS/IR/IRNativeCommon.h | 4 + Core/MIPS/JitCommon/JitCommon.cpp | 3 + Core/MIPS/RiscV/RiscVJit.cpp | 5 +- Core/MIPS/RiscV/RiscVRegCache.cpp | 2 - Core/MIPS/RiscV/RiscVRegCache.h | 2 +- Core/MIPS/x86/X64IRCompALU.cpp | 1 + Core/MIPS/x86/X64IRCompSystem.cpp | 2 +- Core/MIPS/x86/X64IRJit.cpp | 2 - Core/MIPS/x86/X64IRJit.h | 6 +- Core/MIPS/x86/X64IRRegCache.cpp | 12 +- Core/MIPS/x86/X64IRRegCache.h | 4 +- UWP/CoreUWP/CoreUWP.vcxproj | 11 + UWP/CoreUWP/CoreUWP.vcxproj.filters | 33 ++ android/jni/Android.mk | 9 + libretro/Makefile.common | 9 + 32 files changed, 2610 insertions(+), 22 deletions(-) create mode 100644 Core/MIPS/ARM64/Arm64IRAsm.cpp create mode 100644 Core/MIPS/ARM64/Arm64IRCompALU.cpp create mode 100644 Core/MIPS/ARM64/Arm64IRCompBranch.cpp create mode 100644 Core/MIPS/ARM64/Arm64IRCompFPU.cpp create mode 100644 Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp create mode 100644 Core/MIPS/ARM64/Arm64IRCompSystem.cpp create mode 100644 Core/MIPS/ARM64/Arm64IRCompVec.cpp create mode 100644 Core/MIPS/ARM64/Arm64IRJit.cpp create mode 100644 Core/MIPS/ARM64/Arm64IRJit.h create mode 100644 Core/MIPS/ARM64/Arm64IRRegCache.cpp create mode 100644 Core/MIPS/ARM64/Arm64IRRegCache.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 4d6033eada..199d5f53ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1591,6 +1591,17 @@ list(APPEND CoreExtra Core/MIPS/ARM64/Arm64RegCache.h Core/MIPS/ARM64/Arm64RegCacheFPU.cpp Core/MIPS/ARM64/Arm64RegCacheFPU.h + Core/MIPS/ARM64/Arm64IRAsm.cpp + Core/MIPS/ARM64/Arm64IRCompALU.cpp + Core/MIPS/ARM64/Arm64IRCompBranch.cpp + Core/MIPS/ARM64/Arm64IRCompFPU.cpp + Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp + Core/MIPS/ARM64/Arm64IRCompSystem.cpp + Core/MIPS/ARM64/Arm64IRCompVec.cpp + Core/MIPS/ARM64/Arm64IRJit.cpp + Core/MIPS/ARM64/Arm64IRJit.h + Core/MIPS/ARM64/Arm64IRRegCache.cpp + Core/MIPS/ARM64/Arm64IRRegCache.h GPU/Common/VertexDecoderArm64.cpp Core/Util/DisArm64.cpp ) diff --git a/Common/Arm64Emitter.cpp b/Common/Arm64Emitter.cpp index 91c658d445..2323d289e2 100644 --- a/Common/Arm64Emitter.cpp +++ b/Common/Arm64Emitter.cpp @@ -514,7 +514,7 @@ void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const voi distance >>= 2; - _assert_msg_(distance >= -0x1FFF && distance < 0x1FFF, "%s: Received too large distance: %llx", __FUNCTION__, distance); + _assert_msg_(distance >= -0x2000 && distance <= 0x1FFF, "%s: Received too large distance: %llx", __FUNCTION__, distance); Rt = DecodeReg(Rt); Write32((b64Bit << 31) | (0x36 << 24) | (op << 24) | \ diff --git a/Core/Core.vcxproj b/Core/Core.vcxproj index bd3fb17ad7..cbba66cf14 100644 --- a/Core/Core.vcxproj +++ b/Core/Core.vcxproj @@ -580,6 +580,15 @@ + + + + + + + + + @@ -1175,6 +1184,8 @@ + + diff --git a/Core/Core.vcxproj.filters b/Core/Core.vcxproj.filters index 15964c072c..1de238c01f 100644 --- a/Core/Core.vcxproj.filters +++ b/Core/Core.vcxproj.filters @@ -1270,6 +1270,33 @@ MIPS\x86 + + MIPS\ARM64 + + + MIPS\ARM64 + + + MIPS\ARM64 + + + MIPS\ARM64 + + + MIPS\ARM64 + + + MIPS\ARM64 + + + MIPS\ARM64 + + + MIPS\ARM64 + + + MIPS\ARM64 + @@ -2037,6 +2064,12 @@ MIPS\x86 + + MIPS\ARM64 + + + MIPS\ARM64 + diff --git a/Core/MIPS/ARM64/Arm64IRAsm.cpp b/Core/MIPS/ARM64/Arm64IRAsm.cpp new file mode 100644 index 0000000000..a7c97293c8 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64IRAsm.cpp @@ -0,0 +1,274 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "ppsspp_config.h" +// In other words, PPSSPP_ARCH(ARM64) || DISASM_ALL. +#if PPSSPP_ARCH(ARM64) || (PPSSPP_PLATFORM(WINDOWS) && !defined(__LIBRETRO__)) + +#include "Common/Log.h" +#include "Core/CoreTiming.h" +#include "Core/MemMap.h" +#include "Core/MIPS/ARM64/Arm64IRJit.h" +#include "Core/MIPS/ARM64/Arm64IRRegCache.h" +#include "Core/MIPS/JitCommon/JitCommon.h" +#include "Core/MIPS/JitCommon/JitState.h" +#include "Core/System.h" + +namespace MIPSComp { + +using namespace Arm64Gen; +using namespace Arm64IRJitConstants; + +static const bool enableDebug = false; +static const bool enableDisasm = false; + +static void ShowPC(void *membase, void *jitbase) { + static int count = 0; + if (currentMIPS) { + u32 downcount = currentMIPS->downcount; + ERROR_LOG(JIT, "[%08x] ShowPC Downcount : %08x %d %p %p", currentMIPS->pc, downcount, count, membase, jitbase); + } else { + ERROR_LOG(JIT, "Universe corrupt?"); + } + //if (count > 2000) + // exit(0); + count++; +} + +void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) { + BeginWrite(GetMemoryProtectPageSize()); + const u8 *start = AlignCodePage(); + + if (jo.useStaticAlloc) { + saveStaticRegisters_ = AlignCode16(); + STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); + regs_.EmitSaveStaticRegisters(); + RET(); + + loadStaticRegisters_ = AlignCode16(); + regs_.EmitLoadStaticRegisters(); + LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); + RET(); + + start = saveStaticRegisters_; + } else { + saveStaticRegisters_ = nullptr; + loadStaticRegisters_ = nullptr; + } + + restoreRoundingMode_ = AlignCode16(); + { + MRS(SCRATCH2_64, FIELD_FPCR); + // We are not in flush-to-zero mode outside the JIT, so let's turn it off. + uint32_t mask = ~(4 << 22); + // Assume we're always in round-to-nearest mode beforehand. + mask &= ~(3 << 22); + ANDI2R(SCRATCH2, SCRATCH2, mask); + _MSR(FIELD_FPCR, SCRATCH2_64); + RET(); + } + + applyRoundingMode_ = AlignCode16(); + { + LDR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, fcr31)); + ANDI2R(SCRATCH2, SCRATCH1, 3); + FixupBranch skip1 = TBZ(SCRATCH1, 24); + ADDI2R(SCRATCH2, SCRATCH2, 4); + SetJumpTarget(skip1); + + // We can skip if the rounding mode is nearest (0) and flush is not set. + // (as restoreRoundingMode cleared it out anyway) + FixupBranch skip = CBZ(SCRATCH2); + + // MIPS Rounding Mode: ARM Rounding Mode + // 0: Round nearest 0 + // 1: Round to zero 3 + // 2: Round up (ceil) 1 + // 3: Round down (floor) 2 + ANDI2R(SCRATCH1, SCRATCH2, 3); + CMPI2R(SCRATCH1, 1); + + FixupBranch skipadd = B(CC_NEQ); + ADDI2R(SCRATCH2, SCRATCH2, 2); + SetJumpTarget(skipadd); + FixupBranch skipsub = B(CC_LE); + SUBI2R(SCRATCH2, SCRATCH2, 1); + SetJumpTarget(skipsub); + + // Actually change the system FPCR register + MRS(SCRATCH1_64, FIELD_FPCR); + // Clear both flush-to-zero and rounding before re-setting them. + ANDI2R(SCRATCH1, SCRATCH1, ~((4 | 3) << 22)); + ORR(SCRATCH1, SCRATCH1, SCRATCH2, ArithOption(SCRATCH2, ST_LSL, 22)); + _MSR(FIELD_FPCR, SCRATCH1_64); + + SetJumpTarget(skip); + RET(); + } + + updateRoundingMode_ = AlignCode16(); + { + LDR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, fcr31)); + + // Set SCRATCH2 to FZ:RM (FZ is bit 24, and RM are lowest 2 bits.) + ANDI2R(SCRATCH2, SCRATCH1, 3); + FixupBranch skip = TBZ(SCRATCH1, 24); + ADDI2R(SCRATCH2, SCRATCH2, 4); + SetJumpTarget(skip); + + // Update currentRoundingFunc_ with the right convertS0ToSCRATCH1_ func. + MOVP2R(SCRATCH1_64, convertS0ToSCRATCH1_); + LSL(SCRATCH2, SCRATCH2, 3); + LDR(SCRATCH2_64, SCRATCH1_64, SCRATCH2); + MOVP2R(SCRATCH1_64, ¤tRoundingFunc_); + STR(INDEX_UNSIGNED, SCRATCH2_64, SCRATCH1_64, 0); + RET(); + } + + hooks_.enterDispatcher = (IRNativeFuncNoArg)AlignCode16(); + + uint32_t regs_to_save = Arm64Gen::ALL_CALLEE_SAVED; + uint32_t regs_to_save_fp = Arm64Gen::ALL_CALLEE_SAVED_FP; + fp_.ABI_PushRegisters(regs_to_save, regs_to_save_fp); + + // Fixed registers, these are always kept when in Jit context. + MOVP2R(MEMBASEREG, Memory::base); + MOVP2R(CTXREG, mipsState); + // Pre-subtract this to save time later. + MOVI2R(JITBASEREG, (intptr_t)GetBasePtr() - MIPS_EMUHACK_OPCODE); + + LoadStaticRegisters(); + MovFromPC(SCRATCH1); + outerLoopPCInSCRATCH1_ = GetCodePtr(); + MovToPC(SCRATCH1); + outerLoop_ = GetCodePtr(); + SaveStaticRegisters(); // Advance can change the downcount, so must save/restore + RestoreRoundingMode(true); + QuickCallFunction(SCRATCH1_64, &CoreTiming::Advance); + ApplyRoundingMode(true); + LoadStaticRegisters(); + + dispatcherCheckCoreState_ = GetCodePtr(); + + MOVP2R(SCRATCH1_64, &coreState); + LDR(INDEX_UNSIGNED, SCRATCH1, SCRATCH1_64, 0); + FixupBranch badCoreState = CBNZ(SCRATCH1); + + // Check downcount. + TBNZ(DOWNCOUNTREG, 31, outerLoop_); + FixupBranch skipToRealDispatch = B(); + + dispatcherPCInSCRATCH1_ = GetCodePtr(); + MovToPC(SCRATCH1); + + hooks_.dispatcher = GetCodePtr(); + + FixupBranch bail = TBNZ(DOWNCOUNTREG, 31); + SetJumpTarget(skipToRealDispatch); + + dispatcherNoCheck_ = GetCodePtr(); + + // Debug + if (enableDebug) { + MOV(W0, DOWNCOUNTREG); + MOV(X1, MEMBASEREG); + MOV(X2, JITBASEREG); + QuickCallFunction(SCRATCH1_64, &ShowPC); + } + + MovFromPC(SCRATCH1); +#ifdef MASKED_PSP_MEMORY + ANDI2R(SCRATCH1, SCRATCH1, Memory::MEMVIEW32_MASK); +#endif + hooks_.dispatchFetch = GetCodePtr(); + LDR(SCRATCH1, MEMBASEREG, SCRATCH1_64); + LSR(SCRATCH2, SCRATCH1, 24); // or UBFX(SCRATCH2, SCRATCH1, 24, 8) + // We don't mask SCRATCH1 as that's already baked into JITBASEREG. + CMP(SCRATCH2, MIPS_EMUHACK_OPCODE >> 24); + FixupBranch skipJump = B(CC_NEQ); + ADD(SCRATCH1_64, JITBASEREG, SCRATCH1_64); + BR(SCRATCH1_64); + SetJumpTarget(skipJump); + + // No block found, let's jit. We don't need to save static regs, they're all callee saved. + RestoreRoundingMode(true); + QuickCallFunction(SCRATCH1_64, &MIPSComp::JitAt); + ApplyRoundingMode(true); + + // Let's just dispatch again, we'll enter the block since we know it's there. + B(dispatcherNoCheck_); + + SetJumpTarget(bail); + + MOVP2R(SCRATCH1_64, &coreState); + LDR(INDEX_UNSIGNED, SCRATCH1, SCRATCH1_64, 0); + CBZ(SCRATCH1, outerLoop_); + + const uint8_t *quitLoop = GetCodePtr(); + SetJumpTarget(badCoreState); + + SaveStaticRegisters(); + RestoreRoundingMode(true); + + fp_.ABI_PopRegisters(regs_to_save, regs_to_save_fp); + + RET(); + + hooks_.crashHandler = GetCodePtr(); + MOVP2R(SCRATCH1_64, &coreState); + MOVI2R(SCRATCH2, CORE_RUNTIME_ERROR); + STR(INDEX_UNSIGNED, SCRATCH2, SCRATCH1_64, 0); + B(quitLoop); + + // Generate some integer conversion funcs. + // MIPS order! + static const RoundingMode roundModes[8] = { ROUND_N, ROUND_Z, ROUND_P, ROUND_M, ROUND_N, ROUND_Z, ROUND_P, ROUND_M }; + for (size_t i = 0; i < ARRAY_SIZE(roundModes); ++i) { + convertS0ToSCRATCH1_[i] = AlignCode16(); + + fp_.FCMP(S0, S0); // Detect NaN + fp_.FCVTS(S0, S0, roundModes[i]); + FixupBranch skip = B(CC_VC); + MOVI2R(SCRATCH2, 0x7FFFFFFF); + fp_.FMOV(S0, SCRATCH2); + SetJumpTarget(skip); + + RET(); + } + + // Leave this at the end, add more stuff above. + if (enableDisasm) { + std::vector lines = DisassembleArm64(start, (int)(GetCodePtr() - start)); + for (auto s : lines) { + INFO_LOG(JIT, "%s", s.c_str()); + } + } + + // Let's spare the pre-generated code from unprotect-reprotect. + AlignCodePage(); + jitStartOffset_ = (int)(GetCodePtr() - start); + // Don't forget to zap the instruction cache! This must stay at the end of this function. + FlushIcache(); + EndWrite(); + + // Update our current cached rounding mode func, too. + UpdateFCR31(mipsState); +} + +} // namespace MIPSComp + +#endif diff --git a/Core/MIPS/ARM64/Arm64IRCompALU.cpp b/Core/MIPS/ARM64/Arm64IRCompALU.cpp new file mode 100644 index 0000000000..f7cefcc723 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64IRCompALU.cpp @@ -0,0 +1,227 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "ppsspp_config.h" +// In other words, PPSSPP_ARCH(ARM64) || DISASM_ALL. +#if PPSSPP_ARCH(ARM64) || (PPSSPP_PLATFORM(WINDOWS) && !defined(__LIBRETRO__)) + +#include "Common/CPUDetect.h" +#include "Core/MemMap.h" +#include "Core/MIPS/ARM64/Arm64IRJit.h" +#include "Core/MIPS/ARM64/Arm64IRRegCache.h" + +// This file contains compilation for integer / arithmetic / logic related instructions. +// +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. No flags because that's in IR already. + +// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; } +#define CONDITIONAL_DISABLE {} +#define DISABLE { CompIR_Generic(inst); return; } +#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; } + +namespace MIPSComp { + +using namespace Arm64Gen; +using namespace Arm64IRJitConstants; + +void Arm64JitBackend::CompIR_Arith(IRInst inst) { + CONDITIONAL_DISABLE; + + bool allowPtrMath = inst.constant <= 0x7FFFFFFF; +#ifdef MASKED_PSP_MEMORY + // Since we modify it, we can't safely. + allowPtrMath = false; +#endif + + switch (inst.op) { + case IROp::Add: + case IROp::Sub: + case IROp::AddConst: + case IROp::SubConst: + case IROp::Neg: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_Assign(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Mov: + case IROp::Ext8to32: + case IROp::Ext16to32: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_Bits(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::BSwap32: + case IROp::ReverseBits: + case IROp::BSwap16: + case IROp::Clz: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_Compare(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Slt: + case IROp::SltConst: + case IROp::SltU: + case IROp::SltUConst: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_CondAssign(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::MovZ: + case IROp::MovNZ: + case IROp::Max: + case IROp::Min: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_Div(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Div: + case IROp::DivU: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_HiLo(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::MtLo: + case IROp::MtHi: + case IROp::MfLo: + case IROp::MfHi: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_Logic(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::And: + case IROp::Or: + case IROp::Xor: + case IROp::AndConst: + case IROp::OrConst: + case IROp::XorConst: + case IROp::Not: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_Mult(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Mult: + case IROp::MultU: + case IROp::Madd: + case IROp::MaddU: + case IROp::Msub: + case IROp::MsubU: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_Shift(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Shl: + case IROp::Shr: + case IROp::Sar: + case IROp::Ror: + case IROp::ShlImm: + case IROp::ShrImm: + case IROp::SarImm: + case IROp::RorImm: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +} // namespace MIPSComp + +#endif diff --git a/Core/MIPS/ARM64/Arm64IRCompBranch.cpp b/Core/MIPS/ARM64/Arm64IRCompBranch.cpp new file mode 100644 index 0000000000..a14bc6c325 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64IRCompBranch.cpp @@ -0,0 +1,83 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "ppsspp_config.h" +// In other words, PPSSPP_ARCH(ARM64) || DISASM_ALL. +#if PPSSPP_ARCH(ARM64) || (PPSSPP_PLATFORM(WINDOWS) && !defined(__LIBRETRO__)) + +#include "Core/MIPS/ARM64/Arm64IRJit.h" +#include "Core/MIPS/ARM64/Arm64IRRegCache.h" + +// This file contains compilation for exits. +// +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. No flags because that's in IR already. + +// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; } +#define CONDITIONAL_DISABLE {} +#define DISABLE { CompIR_Generic(inst); return; } +#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; } + +namespace MIPSComp { + +using namespace Arm64Gen; +using namespace Arm64IRJitConstants; + +void Arm64JitBackend::CompIR_Exit(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::ExitToConst: + case IROp::ExitToReg: + case IROp::ExitToPC: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_ExitIf(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::ExitToConstIfEq: + case IROp::ExitToConstIfNeq: + case IROp::ExitToConstIfGtZ: + case IROp::ExitToConstIfGeZ: + case IROp::ExitToConstIfLtZ: + case IROp::ExitToConstIfLeZ: + CompIR_Generic(inst); + break; + + case IROp::ExitToConstIfFpTrue: + case IROp::ExitToConstIfFpFalse: + // Note: not used. + DISABLE; + break; + + default: + INVALIDOP; + break; + } +} + +} // namespace MIPSComp + +#endif diff --git a/Core/MIPS/ARM64/Arm64IRCompFPU.cpp b/Core/MIPS/ARM64/Arm64IRCompFPU.cpp new file mode 100644 index 0000000000..c3000b5969 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64IRCompFPU.cpp @@ -0,0 +1,218 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "ppsspp_config.h" +// In other words, PPSSPP_ARCH(ARM64) || DISASM_ALL. +#if PPSSPP_ARCH(ARM64) || (PPSSPP_PLATFORM(WINDOWS) && !defined(__LIBRETRO__)) + +#ifndef offsetof +#include +#endif + +#include "Core/MIPS/ARM64/Arm64IRJit.h" +#include "Core/MIPS/ARM64/Arm64IRRegCache.h" + +// This file contains compilation for floating point related instructions. +// +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. No flags because that's in IR already. + +// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; } +#define CONDITIONAL_DISABLE {} +#define DISABLE { CompIR_Generic(inst); return; } +#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; } + +namespace MIPSComp { + +using namespace Arm64Gen; +using namespace Arm64IRJitConstants; + +void Arm64JitBackend::CompIR_FArith(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::FAdd: + case IROp::FSub: + case IROp::FMul: + case IROp::FDiv: + case IROp::FSqrt: + case IROp::FNeg: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_FAssign(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::FMov: + case IROp::FAbs: + case IROp::FSign: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_FCompare(IRInst inst) { + CONDITIONAL_DISABLE; + + constexpr IRReg IRREG_VFPU_CC = IRREG_VFPU_CTRL_BASE + VFPU_CTRL_CC; + + switch (inst.op) { + case IROp::FCmp: + switch (inst.dest) { + case IRFpCompareMode::False: + case IRFpCompareMode::EitherUnordered: + case IRFpCompareMode::EqualOrdered: + case IRFpCompareMode::EqualUnordered: + case IRFpCompareMode::LessEqualOrdered: + case IRFpCompareMode::LessEqualUnordered: + case IRFpCompareMode::LessOrdered: + case IRFpCompareMode::LessUnordered: + CompIR_Generic(inst); + break; + } + break; + + case IROp::FCmovVfpuCC: + case IROp::FCmpVfpuBit: + case IROp::FCmpVfpuAggregate: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_FCondAssign(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::FMin: + case IROp::FMax: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_FCvt(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::FCvtWS: + case IROp::FCvtSW: + case IROp::FCvtScaledWS: + case IROp::FCvtScaledSW: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_FRound(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::FRound: + case IROp::FTrunc: + case IROp::FCeil: + case IROp::FFloor: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_FSat(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::FSat0_1: + case IROp::FSatMinus1_1: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_FSpecial(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::FSin: + case IROp::FCos: + case IROp::FRSqrt: + case IROp::FRecip: + case IROp::FAsin: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_RoundingMode(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::RestoreRoundingMode: + RestoreRoundingMode(); + break; + + case IROp::ApplyRoundingMode: + ApplyRoundingMode(); + break; + + case IROp::UpdateRoundingMode: + UpdateRoundingMode(); + break; + + default: + INVALIDOP; + break; + } +} + +} // namespace MIPSComp + +#endif diff --git a/Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp b/Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp new file mode 100644 index 0000000000..29167b7dd8 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp @@ -0,0 +1,174 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "ppsspp_config.h" +// In other words, PPSSPP_ARCH(ARM64) || DISASM_ALL. +#if PPSSPP_ARCH(ARM64) || (PPSSPP_PLATFORM(WINDOWS) && !defined(__LIBRETRO__)) + +#include "Core/MemMap.h" +#include "Core/MIPS/ARM64/Arm64IRJit.h" +#include "Core/MIPS/ARM64/Arm64IRRegCache.h" + +// This file contains compilation for load/store instructions. +// +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. No flags because that's in IR already. + +// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; } +#define CONDITIONAL_DISABLE {} +#define DISABLE { CompIR_Generic(inst); return; } +#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; } + +namespace MIPSComp { + +using namespace Arm64Gen; +using namespace Arm64IRJitConstants; + +void Arm64JitBackend::CompIR_CondStore(IRInst inst) { + CONDITIONAL_DISABLE; + if (inst.op != IROp::Store32Conditional) + INVALIDOP; + + CompIR_Generic(inst); +} + +void Arm64JitBackend::CompIR_FLoad(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::LoadFloat: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_FStore(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::StoreFloat: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_Load(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Load8: + case IROp::Load8Ext: + case IROp::Load16: + case IROp::Load16Ext: + case IROp::Load32: + case IROp::Load32Linked: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_LoadShift(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Load32Left: + case IROp::Load32Right: + // Should not happen if the pass to split is active. + DISABLE; + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_Store(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Store8: + case IROp::Store16: + case IROp::Store32: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_StoreShift(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Store32Left: + case IROp::Store32Right: + // Should not happen if the pass to split is active. + DISABLE; + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_VecLoad(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::LoadVec4: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_VecStore(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::StoreVec4: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +} // namespace MIPSComp + +#endif diff --git a/Core/MIPS/ARM64/Arm64IRCompSystem.cpp b/Core/MIPS/ARM64/Arm64IRCompSystem.cpp new file mode 100644 index 0000000000..be4400f465 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64IRCompSystem.cpp @@ -0,0 +1,147 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "ppsspp_config.h" +// In other words, PPSSPP_ARCH(ARM64) || DISASM_ALL. +#if PPSSPP_ARCH(ARM64) || (PPSSPP_PLATFORM(WINDOWS) && !defined(__LIBRETRO__)) + +#include "Common/Profiler/Profiler.h" +#include "Core/Core.h" +#include "Core/HLE/HLE.h" +#include "Core/HLE/ReplaceTables.h" +#include "Core/MemMap.h" +#include "Core/MIPS/ARM64/Arm64IRJit.h" +#include "Core/MIPS/ARM64/Arm64IRRegCache.h" + +// This file contains compilation for basic PC/downcount accounting, syscalls, debug funcs, etc. +// +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. No flags because that's in IR already. + +// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; } +#define CONDITIONAL_DISABLE {} +#define DISABLE { CompIR_Generic(inst); return; } +#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; } + +namespace MIPSComp { + +using namespace Arm64Gen; +using namespace Arm64IRJitConstants; + +void Arm64JitBackend::CompIR_Basic(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Downcount: + case IROp::SetConst: + case IROp::SetConstF: + case IROp::SetPC: + case IROp::SetPCConst: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_Breakpoint(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Breakpoint: + case IROp::MemoryCheck: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_System(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Syscall: + case IROp::CallReplacement: + case IROp::Break: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_Transfer(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::SetCtrlVFPU: + case IROp::SetCtrlVFPUReg: + case IROp::SetCtrlVFPUFReg: + case IROp::FpCondFromReg: + case IROp::FpCondToReg: + case IROp::FpCtrlFromReg: + case IROp::FpCtrlToReg: + case IROp::VfpuCtrlToReg: + case IROp::FMovFromGPR: + case IROp::FMovToGPR: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_ValidateAddress(IRInst inst) { + CONDITIONAL_DISABLE; + + bool isWrite = inst.src2 & 1; + int alignment = 0; + switch (inst.op) { + case IROp::ValidateAddress8: + alignment = 1; + break; + + case IROp::ValidateAddress16: + alignment = 2; + break; + + case IROp::ValidateAddress32: + alignment = 4; + break; + + case IROp::ValidateAddress128: + alignment = 16; + break; + + default: + INVALIDOP; + break; + } +} + +} // namespace MIPSComp + +#endif diff --git a/Core/MIPS/ARM64/Arm64IRCompVec.cpp b/Core/MIPS/ARM64/Arm64IRCompVec.cpp new file mode 100644 index 0000000000..e4fe133e3d --- /dev/null +++ b/Core/MIPS/ARM64/Arm64IRCompVec.cpp @@ -0,0 +1,132 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "ppsspp_config.h" +// In other words, PPSSPP_ARCH(ARM64) || DISASM_ALL. +#if PPSSPP_ARCH(ARM64) || (PPSSPP_PLATFORM(WINDOWS) && !defined(__LIBRETRO__)) + +#include +#include "Common/CPUDetect.h" +#include "Core/MemMap.h" +#include "Core/MIPS/ARM64/Arm64IRJit.h" +#include "Core/MIPS/ARM64/Arm64IRRegCache.h" + +// This file contains compilation for vector instructions. +// +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. No flags because that's in IR already. + +// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; } +#define CONDITIONAL_DISABLE {} +#define DISABLE { CompIR_Generic(inst); return; } +#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; } + +namespace MIPSComp { + +using namespace Arm64Gen; +using namespace Arm64IRJitConstants; + +void Arm64JitBackend::CompIR_VecArith(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Vec4Add: + case IROp::Vec4Sub: + case IROp::Vec4Mul: + case IROp::Vec4Div: + case IROp::Vec4Scale: + case IROp::Vec4Neg: + case IROp::Vec4Abs: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_VecAssign(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Vec4Init: + case IROp::Vec4Shuffle: + case IROp::Vec4Blend: + case IROp::Vec4Mov: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_VecClamp(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Vec4ClampToZero: + case IROp::Vec2ClampToZero: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_VecHoriz(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Vec4Dot: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +void Arm64JitBackend::CompIR_VecPack(IRInst inst) { + CONDITIONAL_DISABLE; + + switch (inst.op) { + case IROp::Vec2Unpack16To31: + case IROp::Vec4Pack32To8: + case IROp::Vec2Pack31To16: + case IROp::Vec4Unpack8To32: + case IROp::Vec2Unpack16To32: + case IROp::Vec4DuplicateUpperBitsAndShift1: + case IROp::Vec4Pack31To8: + case IROp::Vec2Pack32To16: + CompIR_Generic(inst); + break; + + default: + INVALIDOP; + break; + } +} + +} // namespace MIPSComp + +#endif diff --git a/Core/MIPS/ARM64/Arm64IRJit.cpp b/Core/MIPS/ARM64/Arm64IRJit.cpp new file mode 100644 index 0000000000..d315afef17 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64IRJit.cpp @@ -0,0 +1,372 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "ppsspp_config.h" +// In other words, PPSSPP_ARCH(ARM64) || DISASM_ALL. +#if PPSSPP_ARCH(ARM64) || (PPSSPP_PLATFORM(WINDOWS) && !defined(__LIBRETRO__)) + +#include +#include "Core/MemMap.h" +#include "Core/MIPS/MIPSTables.h" +#include "Core/MIPS/ARM64/Arm64IRJit.h" +#include "Core/MIPS/ARM64/Arm64IRRegCache.h" + +namespace MIPSComp { + +using namespace Arm64Gen; +using namespace Arm64IRJitConstants; + +// Invalidations just need at most two MOVs and B. +static constexpr int MIN_BLOCK_NORMAL_LEN = 12; +// As long as we can fit a B, we should be fine. +static constexpr int MIN_BLOCK_EXIT_LEN = 4; + +Arm64JitBackend::Arm64JitBackend(JitOptions &jitopt, IRBlockCache &blocks) + : IRNativeBackend(blocks), jo(jitopt), regs_(&jo), fp_(this) { + // Automatically disable incompatible options. + if (((intptr_t)Memory::base & 0x00000000FFFFFFFFUL) != 0) { + jo.enablePointerify = false; + } +#ifdef MASKED_PSP_MEMORY + jo.enablePointerify = false; +#endif + + // Since we store the offset, this is as big as it can be. + AllocCodeSpace(1024 * 1024 * 16); + + regs_.Init(this, &fp_); +} + +Arm64JitBackend::~Arm64JitBackend() {} + +void Arm64JitBackend::UpdateFCR31(MIPSState *mipsState) { + currentRoundingFunc_ = convertS0ToSCRATCH1_[mipsState->fcr31 & 3]; +} + +static void NoBlockExits() { + _assert_msg_(false, "Never exited block, invalid IR?"); +} + +bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) { + if (GetSpaceLeft() < 0x800) + return false; + + BeginWrite(std::min(GetSpaceLeft(), (size_t)block->GetNumInstructions() * 32)); + + u32 startPC = block->GetOriginalStart(); + bool wroteCheckedOffset = false; + if (jo.enableBlocklink && !jo.useBackJump) { + SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer())); + wroteCheckedOffset = true; + + // Check the sign bit to check if negative. + FixupBranch normalEntry = TBZ(DOWNCOUNTREG, 31); + MOVI2R(SCRATCH1, startPC); + B(outerLoopPCInSCRATCH1_); + SetJumpTarget(normalEntry); + } + + // Don't worry, the codespace isn't large enough to overflow offsets. + const u8 *blockStart = GetCodePointer(); + block->SetTargetOffset((int)GetOffset(blockStart)); + compilingBlockNum_ = block_num; + + regs_.Start(block); + + std::map addresses; + for (int i = 0; i < block->GetNumInstructions(); ++i) { + const IRInst &inst = block->GetInstructions()[i]; + regs_.SetIRIndex(i); + // TODO: This might be a little wasteful when compiling if we're not debugging jit... + addresses[GetCodePtr()] = i; + + CompileIRInst(inst); + + if (jo.Disabled(JitDisable::REGALLOC_GPR) || jo.Disabled(JitDisable::REGALLOC_FPR)) + regs_.FlushAll(jo.Disabled(JitDisable::REGALLOC_GPR), jo.Disabled(JitDisable::REGALLOC_FPR)); + + // Safety check, in case we get a bunch of really large jit ops without a lot of branching. + if (GetSpaceLeft() < 0x800) { + compilingBlockNum_ = -1; + return false; + } + } + + // We should've written an exit above. If we didn't, bad things will happen. + // Only check if debug stats are enabled - needlessly wastes jit space. + if (DebugStatsEnabled()) { + QuickCallFunction(SCRATCH2_64, &NoBlockExits); + B(hooks_.crashHandler); + } + + int len = (int)GetOffset(GetCodePointer()) - block->GetTargetOffset(); + if (len < MIN_BLOCK_NORMAL_LEN) { + // We need at least 10 bytes to invalidate blocks with. + ReserveCodeSpace(MIN_BLOCK_NORMAL_LEN - len); + } + + if (!wroteCheckedOffset) { + // Always record this, even if block link disabled - it's used for size calc. + SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer())); + } + + if (jo.enableBlocklink && jo.useBackJump) { + // Small blocks are common, check if it's < 32KB long. + ptrdiff_t distance = blockStart - GetCodePointer(); + if (distance >= -0x8000 && distance < 0x8000) { + TBZ(DOWNCOUNTREG, 31, blockStart); + } else { + FixupBranch toDispatch = TBNZ(DOWNCOUNTREG, 31); + B(blockStart); + SetJumpTarget(toDispatch); + } + + MOVI2R(SCRATCH1, startPC); + B(outerLoopPCInSCRATCH1_); + } + + if (logBlocks_ > 0) { + --logBlocks_; + + INFO_LOG(JIT, "=============== ARM64 (%08x, %d bytes) ===============", startPC, len); + for (const u8 *p = blockStart; p < GetCodePointer(); ) { + auto it = addresses.find(p); + if (it != addresses.end()) { + const IRInst &inst = block->GetInstructions()[it->second]; + + char temp[512]; + DisassembleIR(temp, sizeof(temp), inst); + INFO_LOG(JIT, "IR: #%d %s", it->second, temp); + } + + auto next = std::next(it); + const u8 *nextp = next == addresses.end() ? GetCodePointer() : next->first; + + auto lines = DisassembleArm64(p, (int)(nextp - p)); + for (const auto &line : lines) + INFO_LOG(JIT, " A: %s", line.c_str()); + p = nextp; + } + } + + EndWrite(); + FlushIcache(); + compilingBlockNum_ = -1; + + return true; +} + +void Arm64JitBackend::WriteConstExit(uint32_t pc) { + int block_num = blocks_.GetBlockNumberFromStartAddress(pc); + const IRNativeBlock *nativeBlock = GetNativeBlock(block_num); + + int exitStart = (int)GetOffset(GetCodePointer()); + if (block_num >= 0 && jo.enableBlocklink && nativeBlock && nativeBlock->checkedOffset != 0) { + B(GetBasePtr() + nativeBlock->checkedOffset); + } else { + MOVI2R(SCRATCH1, pc); + B(dispatcherPCInSCRATCH1_); + } + + if (jo.enableBlocklink) { + // In case of compression or early link, make sure it's large enough. + int len = (int)GetOffset(GetCodePointer()) - exitStart; + if (len < MIN_BLOCK_EXIT_LEN) { + ReserveCodeSpace(MIN_BLOCK_EXIT_LEN - len); + len = MIN_BLOCK_EXIT_LEN; + } + + AddLinkableExit(compilingBlockNum_, pc, exitStart, len); + } +} + +void Arm64JitBackend::OverwriteExit(int srcOffset, int len, int block_num) { + _dbg_assert_(len >= MIN_BLOCK_EXIT_LEN); + + const IRNativeBlock *nativeBlock = GetNativeBlock(block_num); + if (nativeBlock) { + u8 *writable = GetWritablePtrFromCodePtr(GetBasePtr()) + srcOffset; + if (PlatformIsWXExclusive()) { + ProtectMemoryPages(writable, len, MEM_PROT_READ | MEM_PROT_WRITE); + } + + ARM64XEmitter emitter(GetBasePtr() + srcOffset, writable); + emitter.B(GetBasePtr() + nativeBlock->checkedOffset); + int bytesWritten = (int)(emitter.GetWritableCodePtr() - writable); + _dbg_assert_(bytesWritten <= MIN_BLOCK_EXIT_LEN); + if (bytesWritten < len) + emitter.ReserveCodeSpace(len - bytesWritten); + emitter.FlushIcache(); + + if (PlatformIsWXExclusive()) { + ProtectMemoryPages(writable, 16, MEM_PROT_READ | MEM_PROT_EXEC); + } + } +} + +void Arm64JitBackend::CompIR_Generic(IRInst inst) { + // If we got here, we're going the slow way. + uint64_t value; + memcpy(&value, &inst, sizeof(inst)); + + FlushAll(); + SaveStaticRegisters(); + MOVI2R(X0, value); + QuickCallFunction(SCRATCH2_64, &DoIRInst); + LoadStaticRegisters(); + + // We only need to check the return value if it's a potential exit. + if ((GetIRMeta(inst.op)->flags & IRFLAG_EXIT) != 0) { + MOV(SCRATCH1, X0); + + ptrdiff_t distance = dispatcherPCInSCRATCH1_ - GetCodePointer(); + if (distance >= -0x100000 && distance < 0x100000) { + // Convenient, we can do a simple branch if within 1MB. + CBNZ(W0, dispatcherPCInSCRATCH1_); + } else { + // That's a shame, we need a long branch. + FixupBranch keepOnKeepingOn = CBZ(W0); + B(dispatcherPCInSCRATCH1_); + SetJumpTarget(keepOnKeepingOn); + } + } +} + +void Arm64JitBackend::CompIR_Interpret(IRInst inst) { + MIPSOpcode op(inst.constant); + + // IR protects us against this being a branching instruction (well, hopefully.) + FlushAll(); + SaveStaticRegisters(); + if (DebugStatsEnabled()) { + MOVP2R(X0, MIPSGetName(op)); + QuickCallFunction(SCRATCH2_64, &NotifyMIPSInterpret); + } + MOVI2R(X0, inst.constant); + QuickCallFunction(SCRATCH2_64, MIPSGetInterpretFunc(op)); + LoadStaticRegisters(); +} + +void Arm64JitBackend::FlushAll() { + regs_.FlushAll(); +} + +bool Arm64JitBackend::DescribeCodePtr(const u8 *ptr, std::string &name) const { + // Used in disassembly viewer and profiling tools. + // Don't use spaces; profilers get confused or truncate them. + if (ptr == dispatcherPCInSCRATCH1_) { + name = "dispatcherPCInSCRATCH1"; + } else if (ptr == outerLoopPCInSCRATCH1_) { + name = "outerLoopPCInSCRATCH1"; + } else if (ptr == dispatcherNoCheck_) { + name = "dispatcherNoCheck"; + } else if (ptr == saveStaticRegisters_) { + name = "saveStaticRegisters"; + } else if (ptr == loadStaticRegisters_) { + name = "loadStaticRegisters"; + } else if (ptr == restoreRoundingMode_) { + name = "restoreRoundingMode"; + } else if (ptr == applyRoundingMode_) { + name = "applyRoundingMode"; + } else if (ptr == updateRoundingMode_) { + name = "updateRoundingMode"; + } else if (ptr == currentRoundingFunc_) { + name = "currentRoundingFunc"; + } else if (ptr >= convertS0ToSCRATCH1_[0] && ptr <= convertS0ToSCRATCH1_[7]) { + name = "convertS0ToSCRATCH1"; + } else if (ptr >= GetBasePtr() && ptr < GetBasePtr() + jitStartOffset_) { + name = "fixedCode"; + } else { + return IRNativeBackend::DescribeCodePtr(ptr, name); + } + return true; +} + +void Arm64JitBackend::ClearAllBlocks() { + ClearCodeSpace(jitStartOffset_); + FlushIcacheSection(region + jitStartOffset_, region + region_size - jitStartOffset_); + EraseAllLinks(-1); +} + +void Arm64JitBackend::InvalidateBlock(IRBlock *block, int block_num) { + int offset = block->GetTargetOffset(); + u8 *writable = GetWritablePtrFromCodePtr(GetBasePtr()) + offset; + + // Overwrite the block with a jump to compile it again. + u32 pc = block->GetOriginalStart(); + if (pc != 0) { + // Hopefully we always have at least 16 bytes, which should be all we need. + if (PlatformIsWXExclusive()) { + ProtectMemoryPages(writable, MIN_BLOCK_NORMAL_LEN, MEM_PROT_READ | MEM_PROT_WRITE); + } + + ARM64XEmitter emitter(GetBasePtr() + offset, writable); + emitter.MOVI2R(SCRATCH1, pc); + emitter.B(dispatcherPCInSCRATCH1_); + int bytesWritten = (int)(emitter.GetWritableCodePtr() - writable); + if (bytesWritten < MIN_BLOCK_NORMAL_LEN) + emitter.ReserveCodeSpace(MIN_BLOCK_NORMAL_LEN - bytesWritten); + emitter.FlushIcache(); + + if (PlatformIsWXExclusive()) { + ProtectMemoryPages(writable, MIN_BLOCK_NORMAL_LEN, MEM_PROT_READ | MEM_PROT_EXEC); + } + } + + EraseAllLinks(block_num); +} + +void Arm64JitBackend::RestoreRoundingMode(bool force) { + QuickCallFunction(SCRATCH2_64, restoreRoundingMode_); +} + +void Arm64JitBackend::ApplyRoundingMode(bool force) { + QuickCallFunction(SCRATCH2_64, applyRoundingMode_); +} + +void Arm64JitBackend::UpdateRoundingMode(bool force) { + QuickCallFunction(SCRATCH2_64, updateRoundingMode_); +} + +void Arm64JitBackend::MovFromPC(ARM64Reg r) { + LDR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc)); +} + +void Arm64JitBackend::MovToPC(ARM64Reg r) { + STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc)); +} + +void Arm64JitBackend::SaveStaticRegisters() { + if (jo.useStaticAlloc) { + QuickCallFunction(SCRATCH2_64, saveStaticRegisters_); + } else { + // Inline the single operation + STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); + } +} + +void Arm64JitBackend::LoadStaticRegisters() { + if (jo.useStaticAlloc) { + QuickCallFunction(SCRATCH2_64, loadStaticRegisters_); + } else { + LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); + } +} + +} // namespace MIPSComp + +#endif diff --git a/Core/MIPS/ARM64/Arm64IRJit.h b/Core/MIPS/ARM64/Arm64IRJit.h new file mode 100644 index 0000000000..fa8bfd89f2 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64IRJit.h @@ -0,0 +1,153 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "ppsspp_config.h" +// In other words, PPSSPP_ARCH(ARM64) || DISASM_ALL. +#if PPSSPP_ARCH(ARM64) || (PPSSPP_PLATFORM(WINDOWS) && !defined(__LIBRETRO__)) + +#include +#include +#include "Common/Arm64Emitter.h" +#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRNativeCommon.h" +#include "Core/MIPS/JitCommon/JitState.h" +#include "Core/MIPS/JitCommon/JitCommon.h" +#include "Core/MIPS/ARM64/Arm64IRRegCache.h" + +namespace MIPSComp { + +class Arm64JitBackend : public Arm64Gen::ARM64CodeBlock, public IRNativeBackend { +public: + Arm64JitBackend(JitOptions &jo, IRBlockCache &blocks); + ~Arm64JitBackend(); + + bool DescribeCodePtr(const u8 *ptr, std::string &name) const override; + + void GenerateFixedCode(MIPSState *mipsState) override; + bool CompileBlock(IRBlock *block, int block_num, bool preload) override; + void ClearAllBlocks() override; + void InvalidateBlock(IRBlock *block, int block_num) override; + + void UpdateFCR31(MIPSState *mipsState) override; + +protected: + const CodeBlockCommon &CodeBlock() const override { + return *this; + } + +private: + void RestoreRoundingMode(bool force = false); + void ApplyRoundingMode(bool force = false); + void UpdateRoundingMode(bool force = false); + void MovFromPC(Arm64Gen::ARM64Reg r); + void MovToPC(Arm64Gen::ARM64Reg r); + + void SaveStaticRegisters(); + void LoadStaticRegisters(); + + // Note: destroys SCRATCH1. + void FlushAll(); + + void WriteConstExit(uint32_t pc); + void OverwriteExit(int srcOffset, int len, int block_num) override; + + void CompIR_Arith(IRInst inst) override; + void CompIR_Assign(IRInst inst) override; + void CompIR_Basic(IRInst inst) override; + void CompIR_Bits(IRInst inst) override; + void CompIR_Breakpoint(IRInst inst) override; + void CompIR_Compare(IRInst inst) override; + void CompIR_CondAssign(IRInst inst) override; + void CompIR_CondStore(IRInst inst) override; + void CompIR_Div(IRInst inst) override; + void CompIR_Exit(IRInst inst) override; + void CompIR_ExitIf(IRInst inst) override; + void CompIR_FArith(IRInst inst) override; + void CompIR_FAssign(IRInst inst) override; + void CompIR_FCompare(IRInst inst) override; + void CompIR_FCondAssign(IRInst inst) override; + void CompIR_FCvt(IRInst inst) override; + void CompIR_FLoad(IRInst inst) override; + void CompIR_FRound(IRInst inst) override; + void CompIR_FSat(IRInst inst) override; + void CompIR_FSpecial(IRInst inst) override; + void CompIR_FStore(IRInst inst) override; + void CompIR_Generic(IRInst inst) override; + void CompIR_HiLo(IRInst inst) override; + void CompIR_Interpret(IRInst inst) override; + void CompIR_Load(IRInst inst) override; + void CompIR_LoadShift(IRInst inst) override; + void CompIR_Logic(IRInst inst) override; + void CompIR_Mult(IRInst inst) override; + void CompIR_RoundingMode(IRInst inst) override; + void CompIR_Shift(IRInst inst) override; + void CompIR_Store(IRInst inst) override; + void CompIR_StoreShift(IRInst inst) override; + void CompIR_System(IRInst inst) override; + void CompIR_Transfer(IRInst inst) override; + void CompIR_VecArith(IRInst inst) override; + void CompIR_VecAssign(IRInst inst) override; + void CompIR_VecClamp(IRInst inst) override; + void CompIR_VecHoriz(IRInst inst) override; + void CompIR_VecLoad(IRInst inst) override; + void CompIR_VecPack(IRInst inst) override; + void CompIR_VecStore(IRInst inst) override; + void CompIR_ValidateAddress(IRInst inst) override; + + JitOptions &jo; + Arm64IRRegCache regs_; + Arm64Gen::ARM64FloatEmitter fp_; + + const u8 *outerLoop_ = nullptr; + const u8 *outerLoopPCInSCRATCH1_ = nullptr; + const u8 *dispatcherCheckCoreState_ = nullptr; + const u8 *dispatcherPCInSCRATCH1_ = nullptr; + const u8 *dispatcherNoCheck_ = nullptr; + const u8 *restoreRoundingMode_ = nullptr; + const u8 *applyRoundingMode_ = nullptr; + const u8 *updateRoundingMode_ = nullptr; + + const u8 *saveStaticRegisters_ = nullptr; + const u8 *loadStaticRegisters_ = nullptr; + + // Indexed by FPCR FZ:RN bits for convenience. Uses SCRATCH2. + const u8 *convertS0ToSCRATCH1_[8]; + + // Note: mutable state used at runtime. + const u8 *currentRoundingFunc_ = nullptr; + + int jitStartOffset_ = 0; + int compilingBlockNum_ = -1; + int logBlocks_ = 0; +}; + +class Arm64IRJit : public IRNativeJit { +public: + Arm64IRJit(MIPSState *mipsState) + : IRNativeJit(mipsState), arm64Backend_(jo, blocks_) { + Init(arm64Backend_); + } + +private: + Arm64JitBackend arm64Backend_; +}; + +} // namespace MIPSComp + +#endif diff --git a/Core/MIPS/ARM64/Arm64IRRegCache.cpp b/Core/MIPS/ARM64/Arm64IRRegCache.cpp new file mode 100644 index 0000000000..f65e418d53 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64IRRegCache.cpp @@ -0,0 +1,577 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "ppsspp_config.h" +// In other words, PPSSPP_ARCH(ARM64) || DISASM_ALL. +#if PPSSPP_ARCH(ARM64) || (PPSSPP_PLATFORM(WINDOWS) && !defined(__LIBRETRO__)) + +#ifndef offsetof +#include +#endif + +#include "Common/CPUDetect.h" +#include "Common/LogReporting.h" +#include "Core/MemMap.h" +#include "Core/MIPS/IR/IRInst.h" +#include "Core/MIPS/IR/IRAnalysis.h" +#include "Core/MIPS/ARM64/Arm64IRRegCache.h" +#include "Core/MIPS/JitCommon/JitState.h" + +using namespace Arm64Gen; +using namespace Arm64IRJitConstants; + +Arm64IRRegCache::Arm64IRRegCache(MIPSComp::JitOptions *jo) + : IRNativeRegCacheBase(jo) { + // The S/D/Q regs overlap, so we just use one slot. The numbers don't match ARM64Reg. + config_.totalNativeRegs = NUM_X_REGS + NUM_X_FREGS; + config_.mapFPUSIMD = true; + // XMM regs are used for both FPU and Vec, so we don't need VREGs. + config_.mapUseVRegs = false; +} + +void Arm64IRRegCache::Init(ARM64XEmitter *emitter, ARM64FloatEmitter *fp) { + emit_ = emitter; + fp_ = fp; +} + +const int *Arm64IRRegCache::GetAllocationOrder(MIPSLoc type, MIPSMap flags, int &count, int &base) const { + if (type == MIPSLoc::REG) { + // See register alloc remarks in Arm64Asm.cpp. + base = W0; + + // W19-W23 are most suitable for static allocation. Those that are chosen for static allocation + // should be omitted here and added in GetStaticAllocations. + static const int allocationOrder[] = { + W19, W20, W21, W22, W23, W24, W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, + }; + static const int allocationOrderStaticAlloc[] = { + W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, + }; + + if (jo_->useStaticAlloc) { + count = ARRAY_SIZE(allocationOrderStaticAlloc); + return allocationOrderStaticAlloc; + } + count = ARRAY_SIZE(allocationOrder); + return allocationOrder; + } else if (type == MIPSLoc::FREG) { + base = S0 - NUM_X_REGS; + + // We don't really need four temps, probably. + // We start with S8 for call flushes. + static const int allocationOrder[] = { + // Reserve four full 128-bit temp registers, should be plenty. + S8, S9, S10, S11, // Partially callee-save (bottom 64 bits) + S12, S13, S14, S15, // Partially callee-save (bottom 64 bits) + S16, S17, S18, S19, + S20, S21, S22, S23, + S24, S25, S26, S27, + S28, S29, S30, S31, + S4, S5, S6, S7, + }; + + count = ARRAY_SIZE(allocationOrder); + return allocationOrder; + } else { + _assert_msg_(false, "Allocation order not yet implemented"); + count = 0; + return nullptr; + } +} + +const Arm64IRRegCache::StaticAllocation *Arm64IRRegCache::GetStaticAllocations(int &count) const { + static const StaticAllocation allocs[] = { + { MIPS_REG_SP, W19, MIPSLoc::REG, true }, + { MIPS_REG_V0, W20, MIPSLoc::REG }, + { MIPS_REG_V1, W21, MIPSLoc::REG }, + { MIPS_REG_A0, W22, MIPSLoc::REG }, + { MIPS_REG_A1, W23, MIPSLoc::REG }, + { MIPS_REG_RA, W24, MIPSLoc::REG }, + }; + + if (jo_->useStaticAlloc) { + count = ARRAY_SIZE(allocs); + return allocs; + } + return IRNativeRegCacheBase::GetStaticAllocations(count); +} + +void Arm64IRRegCache::EmitLoadStaticRegisters() { + int count = 0; + const StaticAllocation *allocs = GetStaticAllocations(count); + for (int i = 0; i < count; ++i) { + int offset = GetMipsRegOffset(allocs[i].mr); + if (i + 1 < count && allocs[i].mr == allocs[i + 1].mr - 1) { + _assert_(!allocs[i].pointerified && !allocs[i + 1].pointerified); + emit_->LDP(INDEX_SIGNED, FromNativeReg(allocs[i].nr), FromNativeReg(allocs[i + 1].nr), CTXREG, offset); + ++i; + } else { + emit_->LDR(INDEX_UNSIGNED, FromNativeReg(allocs[i].nr), CTXREG, offset); + if (allocs[i].pointerified && jo_->enablePointerify) { + ARM64Reg r64 = FromNativeReg64(allocs[i].nr); + uint32_t membaseHigh = (uint32_t)((uint64_t)Memory::base >> 32); + emit_->MOVK(r64, membaseHigh & 0xFFFF, SHIFT_32); + if (membaseHigh & 0xFFFF0000) + emit_->MOVK(r64, membaseHigh >> 16, SHIFT_48); + } + } + } +} + +void Arm64IRRegCache::EmitSaveStaticRegisters() { + int count = 0; + const StaticAllocation *allocs = GetStaticAllocations(count); + // This only needs to run once (by Asm) so checks don't need to be fast. + for (int i = 0; i < count; ++i) { + int offset = GetMipsRegOffset(allocs[i].mr); + if (i + 1 < count && allocs[i].mr == allocs[i + 1].mr - 1) { + emit_->STP(INDEX_SIGNED, FromNativeReg(allocs[i].nr), FromNativeReg(allocs[i + 1].nr), CTXREG, offset); + ++i; + } else { + emit_->STR(INDEX_UNSIGNED, FromNativeReg(allocs[i].nr), CTXREG, offset); + } + } +} + +void Arm64IRRegCache::FlushBeforeCall() { + // These registers are not preserved by function calls. + auto isGPRSaved = [&](IRNativeReg nreg) { + ARM64Reg ar = FromNativeReg(nreg); + return ar >= W19 && ar <= W29; + }; + auto isFPRSaved = [&](IRNativeReg nreg) { + ARM64Reg ar = FromNativeReg(nreg); + return ar >= S8 && ar <= S15; + }; + + // Go through by IR index first, to use STP where we can. + for (int i = 1; i < TOTAL_MAPPABLE_IRREGS - 1; ++i) { + if (mr[i].nReg == -1 || mr[i + 1].nReg == -1 || mr[i].isStatic || mr[i + 1].isStatic) + continue; + // Ignore multilane regs. + if (mr[i].lane != -1 || mr[i + 1].lane != -1) + continue; + if (!nr[mr[i].nReg].isDirty || !nr[mr[i + 1].nReg].isDirty) + continue; + + int offset = GetMipsRegOffset(i); + + // Okay, it's a maybe. Are we flushing both as GPRs? + if (!isGPRSaved(mr[i].nReg) && !isGPRSaved(mr[i + 1].nReg) && offset <= 252) { + // If either is mapped as a pointer, fix it. + if (mr[i].loc == MIPSLoc::REG_AS_PTR) + AdjustNativeRegAsPtr(mr[i].nReg, false); + if (mr[i + 1].loc == MIPSLoc::REG_AS_PTR) + AdjustNativeRegAsPtr(mr[i + 1].nReg, false); + + // That means we should use STP. + emit_->STP(INDEX_SIGNED, FromNativeReg(mr[i].nReg), FromNativeReg(mr[i + 1].nReg), CTXREG, offset); + + DiscardNativeReg(mr[i].nReg); + DiscardNativeReg(mr[i + 1].nReg); + + ++i; + continue; + } + + // Perhaps as FPRs? Note: these must be single lane at this point. + // TODO: Could use STP on quads etc. too, i.e. i & i + 4. + if (!isFPRSaved(mr[i].nReg) && !isFPRSaved(mr[i + 1].nReg) && offset <= 252) { + fp_->STP(32, INDEX_SIGNED, FromNativeReg(mr[i].nReg), FromNativeReg(mr[i + 1].nReg), CTXREG, offset); + + DiscardNativeReg(mr[i].nReg); + DiscardNativeReg(mr[i + 1].nReg); + + ++i; + continue; + } + } + + // Alright, now go through any that didn't get flushed with STP. + for (int i = 0; i < 19; ++i) { + FlushNativeReg(GPRToNativeReg(ARM64Reg(W0 + i))); + } + FlushNativeReg(GPRToNativeReg(W30)); + + for (int i = 0; i < 8; ++i) { + FlushNativeReg(VFPToNativeReg(ARM64Reg(S0 + i))); + } + for (int i = 8; i < 16; ++i) { + // These are preserved but only the low 64 bits. + IRNativeReg nreg = VFPToNativeReg(ARM64Reg(S0 + i)); + if (nr[nreg].mipsReg != IRREG_INVALID && GetFPRLaneCount(nr[nreg].mipsReg - 32) > 2) + FlushNativeReg(nreg); + } + for (int i = 16; i < 32; ++i) { + FlushNativeReg(VFPToNativeReg(ARM64Reg(S0 + i))); + } +} + +ARM64Reg Arm64IRRegCache::TryMapTempImm(IRReg r) { + _dbg_assert_(IsValidGPR(r)); + + // If already mapped, no need for a temporary. + if (IsGPRMapped(r)) { + return R(r); + } + + if (mr[r].loc == MIPSLoc::IMM) { + // Can we just use zero? + if (mr[r].imm == 0) + return WZR; + + // Try our luck - check for an exact match in another xreg. + for (int i = 1; i < TOTAL_MAPPABLE_IRREGS; ++i) { + if (mr[i].loc == MIPSLoc::REG_IMM && mr[i].imm == mr[r].imm) { + // Awesome, let's just use this reg. + return FromNativeReg(mr[i].nReg); + } + } + } + + return INVALID_REG; +} + +ARM64Reg Arm64IRRegCache::GetAndLockTempGPR() { + IRNativeReg reg = AllocateReg(MIPSLoc::REG, MIPSMap::INIT); + if (reg != -1) { + nr[reg].tempLockIRIndex = irIndex_; + } + return FromNativeReg(reg); +} + +ARM64Reg Arm64IRRegCache::GetAndLockTempFPR() { + IRNativeReg reg = AllocateReg(MIPSLoc::FREG, MIPSMap::INIT); + if (reg != -1) { + nr[reg].tempLockIRIndex = irIndex_; + } + return FromNativeReg(reg); +} + +ARM64Reg Arm64IRRegCache::MapWithFPRTemp(const IRInst &inst) { + return FromNativeReg(MapWithTemp(inst, MIPSLoc::FREG)); +} + +ARM64Reg Arm64IRRegCache::MapGPR(IRReg mipsReg, MIPSMap mapFlags) { + _dbg_assert_(IsValidGPR(mipsReg)); + + // Okay, not mapped, so we need to allocate an arm64 register. + IRNativeReg nreg = MapNativeReg(MIPSLoc::REG, mipsReg, 1, mapFlags); + return FromNativeReg(nreg); +} + +ARM64Reg Arm64IRRegCache::MapGPR2(IRReg mipsReg, MIPSMap mapFlags) { + _dbg_assert_(IsValidGPR(mipsReg) && IsValidGPR(mipsReg + 1)); + + // Okay, not mapped, so we need to allocate an arm64 register. + IRNativeReg nreg = MapNativeReg(MIPSLoc::REG, mipsReg, 2, mapFlags); + return FromNativeReg64(nreg); +} + +ARM64Reg Arm64IRRegCache::MapGPRAsPointer(IRReg reg) { + return FromNativeReg64(MapNativeRegAsPointer(reg)); +} + +ARM64Reg Arm64IRRegCache::MapFPR(IRReg mipsReg, MIPSMap mapFlags) { + _dbg_assert_(IsValidFPR(mipsReg)); + _dbg_assert_(mr[mipsReg + 32].loc == MIPSLoc::MEM || mr[mipsReg + 32].loc == MIPSLoc::FREG); + + IRNativeReg nreg = MapNativeReg(MIPSLoc::FREG, mipsReg + 32, 1, mapFlags); + if (nreg != -1) + return FromNativeReg(nreg); + return INVALID_REG; +} + +ARM64Reg Arm64IRRegCache::MapVec4(IRReg first, MIPSMap mapFlags) { + _dbg_assert_(IsValidFPR(first)); + _dbg_assert_((first & 3) == 0); + _dbg_assert_(mr[first + 32].loc == MIPSLoc::MEM || mr[first + 32].loc == MIPSLoc::FREG); + + IRNativeReg nreg = MapNativeReg(MIPSLoc::FREG, first + 32, 4, mapFlags); + if (nreg != -1) + return EncodeRegToQuad(FromNativeReg(nreg)); + return INVALID_REG; +} + +void Arm64IRRegCache::AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) { + _assert_(nreg >= 0 && nreg < (IRNativeReg)WZR); + ARM64Reg r = FromNativeReg64(nreg); + if (state) { + if (!jo_->enablePointerify) { +#if defined(MASKED_PSP_MEMORY) + // This destroys the value... + _dbg_assert_(!nr[nreg].isDirty); + emit_->ANDI2R(r, r, Memory::MEMVIEW32_MASK); +#endif + emit_->ADD(r, r, MEMBASEREG); + } else { + uint32_t membaseHigh = (uint32_t)((uint64_t)Memory::base >> 32); + emit_->MOVK(r, membaseHigh & 0xFFFF, SHIFT_32); + if (membaseHigh & 0xFFFF0000) + emit_->MOVK(r, membaseHigh >> 16, SHIFT_48); + } + } else { + if (!jo_->enablePointerify) { +#if defined(MASKED_PSP_MEMORY) + _dbg_assert_(!nr[nreg].isDirty); +#endif + emit_->SUB(r, r, MEMBASEREG); + } else { + // Nothing to do, just ignore the high 32 bits. + } + } +} + +bool Arm64IRRegCache::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags) { + // No special flags, skip the check for a little speed. + return true; +} + +void Arm64IRRegCache::LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) { + ARM64Reg r = FromNativeReg(nreg); + _dbg_assert_(first != MIPS_REG_ZERO); + if (nreg < NUM_X_REGS) { + _assert_(lanes == 1 || (lanes == 2 && first == IRREG_LO)); + if (lanes == 1) + emit_->LDR(INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(first)); + else if (lanes == 2) + emit_->LDR(INDEX_UNSIGNED, EncodeRegTo64(r), CTXREG, GetMipsRegOffset(first)); + else + _assert_(false); + } else { + _dbg_assert_(nreg < NUM_X_REGS + NUM_X_FREGS); + _assert_msg_(mr[first].loc == MIPSLoc::FREG, "Cannot load this type: %d", (int)mr[first].loc); + if (lanes == 1) + fp_->LDR(32, INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(first)); + else if (lanes == 2) + fp_->LDR(64, INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(first)); + else if (lanes == 4) + fp_->LDR(128, INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(first)); + else + _assert_(false); + } +} + +void Arm64IRRegCache::StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) { + ARM64Reg r = FromNativeReg(nreg); + _dbg_assert_(first != MIPS_REG_ZERO); + if (nreg < NUM_X_REGS) { + _assert_(lanes == 1 || (lanes == 2 && first == IRREG_LO)); + _assert_(mr[first].loc == MIPSLoc::REG || mr[first].loc == MIPSLoc::REG_IMM); + if (lanes == 1) + emit_->STR(INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(first)); + else if (lanes == 2) + emit_->STR(INDEX_UNSIGNED, EncodeRegTo64(r), CTXREG, GetMipsRegOffset(first)); + else + _assert_(false); + } else { + _dbg_assert_(nreg < NUM_X_REGS + NUM_X_FREGS); + _assert_msg_(mr[first].loc == MIPSLoc::FREG, "Cannot store this type: %d", (int)mr[first].loc); + if (lanes == 1) + fp_->STR(32, INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(first)); + else if (lanes == 2) + fp_->STR(64, INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(first)); + else if (lanes == 4) + fp_->STR(128, INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(first)); + else + _assert_(false); + } +} + +void Arm64IRRegCache::SetNativeRegValue(IRNativeReg nreg, uint32_t imm) { + ARM64Reg r = FromNativeReg(nreg); + _dbg_assert_(nreg >= 0 && nreg < (IRNativeReg)WZR); + // On ARM64, MOVZ/MOVK is really fast. + emit_->MOVI2R(r, imm); +} + +void Arm64IRRegCache::StoreRegValue(IRReg mreg, uint32_t imm) { + _assert_(IsValidGPRNoZero(mreg)); + // Try to optimize using a different reg. + ARM64Reg storeReg = INVALID_REG; + if (imm == 0) + storeReg = WZR; + + // Could we get lucky? Check for an exact match in another xreg. + for (int i = 1; i < TOTAL_MAPPABLE_IRREGS; ++i) { + if (mr[i].loc == MIPSLoc::REG_IMM && mr[i].imm == imm) { + // Awesome, let's just store this reg. + storeReg = (ARM64Reg)mr[i].nReg; + break; + } + } + + if (storeReg == INVALID_REG) { + emit_->MOVI2R(SCRATCH1, imm); + storeReg = SCRATCH1; + } + emit_->STR(INDEX_UNSIGNED, storeReg, CTXREG, GetMipsRegOffset(mreg)); +} + +void Arm64IRRegCache::FlushAll(bool gprs, bool fprs) { + // Note: make sure not to change the registers when flushing: + // Branching code may expect the armreg to retain its value. + + // Try to flush in pairs when possible. + for (int i = 1; i < TOTAL_MAPPABLE_IRREGS - 1; ++i) { + if (mr[i].loc == MIPSLoc::MEM || mr[i].loc == MIPSLoc::MEM || mr[i].isStatic || mr[i + 1].isStatic) + continue; + // Ignore multilane regs. Could handle with more smartness... + if (mr[i].lane != -1 || mr[i + 1].lane != -1) + continue; + if (mr[i].nReg != -1 && !nr[mr[i].nReg].isDirty) + continue; + if (mr[i + 1].nReg != -1 && !nr[mr[i + 1].nReg].isDirty) + continue; + if (mr[i].loc == MIPSLoc::MEM || mr[i + 1].loc == MIPSLoc::MEM) + continue; + + int offset = GetMipsRegOffset(i); + + // If both are imms, let's materialize a single reg and store. + if (mr[i].loc == MIPSLoc::IMM && mr[i + 1].loc == MIPSLoc::IMM) { + if ((i & 1) == 0) { + uint64_t fullImm = ((uint64_t) mr[i + 1].imm << 32) | mr[i].imm; + emit_->MOVI2R(SCRATCH1_64, fullImm); + emit_->STR(INDEX_UNSIGNED, SCRATCH1_64, CTXREG, offset); + DiscardReg(i); + DiscardReg(i + 1); + ++i; + } + continue; + } + + // Okay, two dirty regs in a row, in need of flushing. Both GPRs? + if (IsValidGPR(i) && IsValidGPR(i + 1) && offset <= 252) { + auto setupForFlush = [&](ARM64Reg &ar, IRReg r) { + if (mr[r].loc == MIPSLoc::IMM) { + ar = TryMapTempImm(r); + if (ar == INVALID_REG) { + // Both cannot be imms, so this is safe. + ar = SCRATCH1; + emit_->MOVI2R(ar, mr[r].imm); + } + } else if (mr[r].loc == MIPSLoc::REG_AS_PTR) { + AdjustNativeRegAsPtr(r, false); + ar = FromNativeReg(mr[r].nReg); + } else { + _dbg_assert_(mr[r].loc == MIPSLoc::REG || mr[r].loc == MIPSLoc::REG_IMM); + ar = FromNativeReg(mr[r].nReg); + } + }; + + ARM64Reg armRegs[2]{ INVALID_REG, INVALID_REG }; + setupForFlush(armRegs[0], i); + setupForFlush(armRegs[1], i + 1); + + emit_->STP(INDEX_SIGNED, armRegs[0], armRegs[1], CTXREG, offset); + DiscardReg(i); + DiscardReg(i + 1); + ++i; + continue; + } + + // Perhaps as FPRs? Note: these must be single lane at this point. + // TODO: Could use STP on quads etc. too, i.e. i & i + 4. + if (i >= 32 && IsValidFPR(i - 32) && IsValidFPR(i + 1 - 32) && offset <= 252) { + _dbg_assert_(mr[i].loc == MIPSLoc::FREG && mr[i + 1].loc == MIPSLoc::FREG); + fp_->STP(32, INDEX_SIGNED, FromNativeReg(mr[i].nReg), FromNativeReg(mr[i + 1].nReg), CTXREG, offset); + + DiscardNativeReg(mr[i].nReg); + DiscardNativeReg(mr[i + 1].nReg); + + ++i; + continue; + } + } + + // Flush all the rest that weren't done via STP. + IRNativeRegCacheBase::FlushAll(gprs, fprs); +} + +ARM64Reg Arm64IRRegCache::R(IRReg mipsReg) { + _dbg_assert_(IsValidGPR(mipsReg)); + _dbg_assert_(mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM); + if (mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM) { + return FromNativeReg(mr[mipsReg].nReg); + } else { + ERROR_LOG_REPORT(JIT, "Reg %i not in arm64 reg", mipsReg); + return INVALID_REG; // BAAAD + } +} + +ARM64Reg Arm64IRRegCache::RPtr(IRReg mipsReg) { + _dbg_assert_(IsValidGPR(mipsReg)); + _dbg_assert_(mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM || mr[mipsReg].loc == MIPSLoc::REG_AS_PTR); + if (mr[mipsReg].loc == MIPSLoc::REG_AS_PTR) { + return FromNativeReg64(mr[mipsReg].nReg); + } else if (mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM) { + int r = mr[mipsReg].nReg; + _dbg_assert_(nr[r].pointerified); + if (nr[r].pointerified) { + return FromNativeReg64(mr[mipsReg].nReg); + } else { + ERROR_LOG(JIT, "Tried to use a non-pointer register as a pointer"); + return INVALID_REG; + } + } else { + ERROR_LOG_REPORT(JIT, "Reg %i not in arm64 reg", mipsReg); + return INVALID_REG; // BAAAD + } +} + +ARM64Reg Arm64IRRegCache::F(IRReg mipsReg) { + _dbg_assert_(IsValidFPR(mipsReg)); + _dbg_assert_(mr[mipsReg + 32].loc == MIPSLoc::FREG); + if (mr[mipsReg + 32].loc == MIPSLoc::FREG) { + return FromNativeReg(mr[mipsReg + 32].nReg); + } else { + ERROR_LOG_REPORT(JIT, "Reg %i not in arm64 reg", mipsReg); + return INVALID_REG; // BAAAD + } +} + +ARM64Reg Arm64IRRegCache::FD(IRReg mipsReg) { + return EncodeRegToDouble(F(mipsReg)); +} + +ARM64Reg Arm64IRRegCache::FQ(IRReg mipsReg) { + return EncodeRegToQuad(F(mipsReg)); +} + +IRNativeReg Arm64IRRegCache::GPRToNativeReg(ARM64Reg r) { + _dbg_assert_msg_(r >= 0 && r < 0x40, "Not a GPR?"); + return (IRNativeReg)DecodeReg(r); +} + +IRNativeReg Arm64IRRegCache::VFPToNativeReg(ARM64Reg r) { + _dbg_assert_msg_(r >= 0x40 && r < 0xE0, "Not VFP?"); + return (IRNativeReg)(NUM_X_REGS + (int)DecodeReg(r)); +} + +ARM64Reg Arm64IRRegCache::FromNativeReg(IRNativeReg r) { + if (r >= NUM_X_REGS) + return EncodeRegToSingle((Arm64Gen::ARM64Reg)r); + return (Arm64Gen::ARM64Reg)r; +} + +ARM64Reg Arm64IRRegCache::FromNativeReg64(IRNativeReg r) { + _dbg_assert_msg_(r >= 0 && r < NUM_X_REGS, "Not a GPR?"); + return EncodeRegTo64((Arm64Gen::ARM64Reg)r); +} + +#endif diff --git a/Core/MIPS/ARM64/Arm64IRRegCache.h b/Core/MIPS/ARM64/Arm64IRRegCache.h new file mode 100644 index 0000000000..89e570d643 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64IRRegCache.h @@ -0,0 +1,108 @@ +// Copyright (c) 2023- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "ppsspp_config.h" +// In other words, PPSSPP_ARCH(ARM64) || DISASM_ALL. +#if PPSSPP_ARCH(ARM64) || (PPSSPP_PLATFORM(WINDOWS) && !defined(__LIBRETRO__)) + +#include "Common/Arm64Emitter.h" +#include "Core/MIPS/MIPS.h" +#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRRegCache.h" + +namespace Arm64IRJitConstants { + +const Arm64Gen::ARM64Reg DOWNCOUNTREG = Arm64Gen::W25; +// Note: this is actually offset from the base. +const Arm64Gen::ARM64Reg JITBASEREG = Arm64Gen::X26; +const Arm64Gen::ARM64Reg CTXREG = Arm64Gen::X27; +const Arm64Gen::ARM64Reg MEMBASEREG = Arm64Gen::X28; +const Arm64Gen::ARM64Reg SCRATCH1_64 = Arm64Gen::X16; +const Arm64Gen::ARM64Reg SCRATCH2_64 = Arm64Gen::X17; +const Arm64Gen::ARM64Reg SCRATCH1 = Arm64Gen::W16; +const Arm64Gen::ARM64Reg SCRATCH2 = Arm64Gen::W17; +// TODO: How many do we actually need? +const Arm64Gen::ARM64Reg SCRATCHF1 = Arm64Gen::S0; +const Arm64Gen::ARM64Reg SCRATCHF2 = Arm64Gen::S1; +const Arm64Gen::ARM64Reg SCRATCHF3 = Arm64Gen::S2; +const Arm64Gen::ARM64Reg SCRATCHF4 = Arm64Gen::S3; + +} // namespace X64IRJitConstants + +class Arm64IRRegCache : public IRNativeRegCacheBase { +public: + Arm64IRRegCache(MIPSComp::JitOptions *jo); + + void Init(Arm64Gen::ARM64XEmitter *emitter, Arm64Gen::ARM64FloatEmitter *fp); + + // May fail and return INVALID_REG if it needs flushing. + Arm64Gen::ARM64Reg TryMapTempImm(IRReg reg); + + // Returns an arm64 register containing the requested MIPS register. + Arm64Gen::ARM64Reg MapGPR(IRReg reg, MIPSMap mapFlags = MIPSMap::INIT); + Arm64Gen::ARM64Reg MapGPR2(IRReg reg, MIPSMap mapFlags = MIPSMap::INIT); + Arm64Gen::ARM64Reg MapGPRAsPointer(IRReg reg); + Arm64Gen::ARM64Reg MapFPR(IRReg reg, MIPSMap mapFlags = MIPSMap::INIT); + Arm64Gen::ARM64Reg MapVec4(IRReg first, MIPSMap mapFlags = MIPSMap::INIT); + + Arm64Gen::ARM64Reg MapWithFPRTemp(const IRInst &inst); + + void FlushBeforeCall(); + void FlushAll(bool gprs = true, bool fprs = true) override; + + Arm64Gen::ARM64Reg GetAndLockTempGPR(); + Arm64Gen::ARM64Reg GetAndLockTempFPR(); + + Arm64Gen::ARM64Reg R(IRReg preg); // Returns a cached register, while checking that it's NOT mapped as a pointer + Arm64Gen::ARM64Reg RPtr(IRReg preg); // Returns a cached register, if it has been mapped as a pointer + Arm64Gen::ARM64Reg F(IRReg preg); + Arm64Gen::ARM64Reg FD(IRReg preg); + Arm64Gen::ARM64Reg FQ(IRReg preg); + + // These are called once on startup to generate functions, that you should then call. + void EmitLoadStaticRegisters(); + void EmitSaveStaticRegisters(); + +protected: + const StaticAllocation *GetStaticAllocations(int &count) const override; + const int *GetAllocationOrder(MIPSLoc type, MIPSMap flags, int &count, int &base) const override; + void AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) override; + + bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags) override; + void LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) override; + void StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) override; + void SetNativeRegValue(IRNativeReg nreg, uint32_t imm) override; + void StoreRegValue(IRReg mreg, uint32_t imm) override; + +private: + IRNativeReg GPRToNativeReg(Arm64Gen::ARM64Reg r); + IRNativeReg VFPToNativeReg(Arm64Gen::ARM64Reg r); + Arm64Gen::ARM64Reg FromNativeReg(IRNativeReg r); + Arm64Gen::ARM64Reg FromNativeReg64(IRNativeReg r); + + Arm64Gen::ARM64XEmitter *emit_ = nullptr; + Arm64Gen::ARM64FloatEmitter *fp_ = nullptr; + + enum { + NUM_X_REGS = 32, + NUM_X_FREGS = 32, + }; +}; + +#endif diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp index 7cd47c3d11..272c4fdb5f 100644 --- a/Core/MIPS/IR/IRFrontend.cpp +++ b/Core/MIPS/IR/IRFrontend.cpp @@ -34,7 +34,6 @@ namespace MIPSComp { IRFrontend::IRFrontend(bool startDefaultPrefix) { js.startDefaultPrefix = startDefaultPrefix; js.hasSetRounding = false; - // js.currentRoundingFunc = convertS0ToSCRATCH1[0]; // The debugger sets this so that "go" on a breakpoint will actually... go. // But if they reset, we can end up hitting it by mistake, since it's based on PC and ticks. diff --git a/Core/MIPS/IR/IRNativeCommon.cpp b/Core/MIPS/IR/IRNativeCommon.cpp index f9a8e00bca..6ce1e0f0d6 100644 --- a/Core/MIPS/IR/IRNativeCommon.cpp +++ b/Core/MIPS/IR/IRNativeCommon.cpp @@ -490,6 +490,10 @@ const u8 *IRNativeJit::GetCrashHandler() const { return backend_->GetNativeHooks().crashHandler; } +void IRNativeJit::UpdateFCR31() { + backend_->UpdateFCR31(mips_); +} + JitBlockCacheDebugInterface *IRNativeJit::GetBlockCacheDebugInterface() { return &debugInterface_; } diff --git a/Core/MIPS/IR/IRNativeCommon.h b/Core/MIPS/IR/IRNativeCommon.h index cef2c593e3..93273f6f0f 100644 --- a/Core/MIPS/IR/IRNativeCommon.h +++ b/Core/MIPS/IR/IRNativeCommon.h @@ -59,6 +59,8 @@ public: virtual void InvalidateBlock(IRBlock *block, int block_num) = 0; void FinalizeBlock(IRBlock *block, int block_num, const JitOptions &jo); + virtual void UpdateFCR31(MIPSState *mipsState) {} + const IRNativeHooks &GetNativeHooks() const { return hooks_; } @@ -168,6 +170,8 @@ public: const u8 *GetDispatcher() const override; const u8 *GetCrashHandler() const override; + void UpdateFCR31() override; + JitBlockCacheDebugInterface *GetBlockCacheDebugInterface() override; protected: diff --git a/Core/MIPS/JitCommon/JitCommon.cpp b/Core/MIPS/JitCommon/JitCommon.cpp index 48feebbbb5..259b500f0b 100644 --- a/Core/MIPS/JitCommon/JitCommon.cpp +++ b/Core/MIPS/JitCommon/JitCommon.cpp @@ -41,6 +41,7 @@ #include "../ARM/ArmJit.h" #elif PPSSPP_ARCH(ARM64) #include "../ARM64/Arm64Jit.h" +#include "../ARM64/Arm64IRJit.h" #elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64) #include "../x86/Jit.h" #include "../x86/X64IRJit.h" @@ -106,6 +107,8 @@ namespace MIPSComp { #if PPSSPP_ARCH(ARM) return new MIPSComp::ArmJit(mipsState); #elif PPSSPP_ARCH(ARM64) + if (useIR) + return new MIPSComp::Arm64IRJit(mipsState); return new MIPSComp::Arm64Jit(mipsState); #elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64) if (useIR) diff --git a/Core/MIPS/RiscV/RiscVJit.cpp b/Core/MIPS/RiscV/RiscVJit.cpp index be97dd4267..023687e47f 100644 --- a/Core/MIPS/RiscV/RiscVJit.cpp +++ b/Core/MIPS/RiscV/RiscVJit.cpp @@ -56,6 +56,8 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) if (GetSpaceLeft() < 0x800) return false; + BeginWrite(std::min(GetSpaceLeft(), (size_t)block->GetNumInstructions() * 32)); + u32 startPC = block->GetOriginalStart(); bool wroteCheckedOffset = false; if (jo.enableBlocklink && !jo.useBackJump) { @@ -151,6 +153,7 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) } } + EndWrite(); FlushIcache(); compilingBlockNum_ = -1; @@ -163,8 +166,6 @@ void RiscVJitBackend::WriteConstExit(uint32_t pc) { int exitStart = (int)GetOffset(GetCodePointer()); if (block_num >= 0 && jo.enableBlocklink && nativeBlock && nativeBlock->checkedOffset != 0) { - // Don't bother recording, we don't ever overwrite to "unlink". - // Instead, we would mark the target block to jump to the dispatcher. QuickJ(SCRATCH1, GetBasePtr() + nativeBlock->checkedOffset); } else { LI(SCRATCH1, pc); diff --git a/Core/MIPS/RiscV/RiscVRegCache.cpp b/Core/MIPS/RiscV/RiscVRegCache.cpp index 3e98406380..1c981eefd4 100644 --- a/Core/MIPS/RiscV/RiscVRegCache.cpp +++ b/Core/MIPS/RiscV/RiscVRegCache.cpp @@ -77,7 +77,6 @@ const int *RiscVRegCache::GetAllocationOrder(MIPSLoc type, MIPSMap flags, int &c } } else if (type == MIPSLoc::FREG) { // F8 through F15 are used for compression, so they are great. - // TODO: Maybe we could remove some saved regs since we rarely need that many? Or maybe worth it? static const int allocationOrder[] = { F8, F9, F10, F11, F12, F13, F14, F15, F0, F1, F2, F3, F4, F5, F6, F7, @@ -312,7 +311,6 @@ void RiscVRegCache::LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) { _dbg_assert_(r > X0); _dbg_assert_(first != MIPS_REG_ZERO); if (r <= X31) { - // Multilane not yet supported. _assert_(lanes == 1 || (lanes == 2 && first == IRREG_LO)); if (lanes == 1) emit_->LW(r, CTXREG, GetMipsRegOffset(first)); diff --git a/Core/MIPS/RiscV/RiscVRegCache.h b/Core/MIPS/RiscV/RiscVRegCache.h index a97a9fc169..facfa52195 100644 --- a/Core/MIPS/RiscV/RiscVRegCache.h +++ b/Core/MIPS/RiscV/RiscVRegCache.h @@ -44,7 +44,7 @@ public: void Init(RiscVGen::RiscVEmitter *emitter); // May fail and return INVALID_REG if it needs flushing. - RiscVGen::RiscVReg TryMapTempImm(IRReg); + RiscVGen::RiscVReg TryMapTempImm(IRReg reg); // Returns an RV register containing the requested MIPS register. RiscVGen::RiscVReg MapGPR(IRReg reg, MIPSMap mapFlags = MIPSMap::INIT); diff --git a/Core/MIPS/x86/X64IRCompALU.cpp b/Core/MIPS/x86/X64IRCompALU.cpp index e1621b3049..fc8d7c9b14 100644 --- a/Core/MIPS/x86/X64IRCompALU.cpp +++ b/Core/MIPS/x86/X64IRCompALU.cpp @@ -505,6 +505,7 @@ void X64JitBackend::CompIR_Logic(IRInst inst) { AND(32, regs_.R(inst.dest), regs_.R(inst.src2)); } break; + case IROp::Or: regs_.Map(inst); if (inst.dest == inst.src1) { diff --git a/Core/MIPS/x86/X64IRCompSystem.cpp b/Core/MIPS/x86/X64IRCompSystem.cpp index 91d4730773..5febc50ca6 100644 --- a/Core/MIPS/x86/X64IRCompSystem.cpp +++ b/Core/MIPS/x86/X64IRCompSystem.cpp @@ -219,7 +219,7 @@ int ReportBadAddress(uint32_t addr, uint32_t alignment, uint32_t isWrite) { return toss(MemoryExceptionType::ALIGNMENT); } return 0; -}; +} void X64JitBackend::CompIR_ValidateAddress(IRInst inst) { CONDITIONAL_DISABLE; diff --git a/Core/MIPS/x86/X64IRJit.cpp b/Core/MIPS/x86/X64IRJit.cpp index 75b11b4ceb..f70901eba8 100644 --- a/Core/MIPS/x86/X64IRJit.cpp +++ b/Core/MIPS/x86/X64IRJit.cpp @@ -166,8 +166,6 @@ void X64JitBackend::WriteConstExit(uint32_t pc) { int exitStart = (int)GetOffset(GetCodePointer()); if (block_num >= 0 && jo.enableBlocklink && nativeBlock && nativeBlock->checkedOffset != 0) { - // Don't bother recording, we don't ever overwrite to "unlink". - // Instead, we would mark the target block to jump to the dispatcher. JMP(GetBasePtr() + nativeBlock->checkedOffset, true); } else { MOV(32, R(SCRATCH1), Imm32(pc)); diff --git a/Core/MIPS/x86/X64IRJit.h b/Core/MIPS/x86/X64IRJit.h index b63d340485..6a2c09aef5 100644 --- a/Core/MIPS/x86/X64IRJit.h +++ b/Core/MIPS/x86/X64IRJit.h @@ -164,12 +164,12 @@ private: class X64IRJit : public IRNativeJit { public: X64IRJit(MIPSState *mipsState) - : IRNativeJit(mipsState), rvBackend_(jo, blocks_) { - Init(rvBackend_); + : IRNativeJit(mipsState), x64Backend_(jo, blocks_) { + Init(x64Backend_); } private: - X64JitBackend rvBackend_; + X64JitBackend x64Backend_; }; } // namespace MIPSComp diff --git a/Core/MIPS/x86/X64IRRegCache.cpp b/Core/MIPS/x86/X64IRRegCache.cpp index be98e5f244..387a64bdd1 100644 --- a/Core/MIPS/x86/X64IRRegCache.cpp +++ b/Core/MIPS/x86/X64IRRegCache.cpp @@ -262,7 +262,7 @@ void X64IRRegCache::MapWithFlags(IRInst inst, X64Map destFlags, X64Map src1Flags X64Reg X64IRRegCache::MapGPR(IRReg mipsReg, MIPSMap mapFlags) { _dbg_assert_(IsValidGPR(mipsReg)); - // Okay, not mapped, so we need to allocate an RV register. + // Okay, not mapped, so we need to allocate an x64 register. IRNativeReg nreg = MapNativeReg(MIPSLoc::REG, mipsReg, 1, mapFlags); return FromNativeReg(nreg); } @@ -270,7 +270,7 @@ X64Reg X64IRRegCache::MapGPR(IRReg mipsReg, MIPSMap mapFlags) { X64Reg X64IRRegCache::MapGPR2(IRReg mipsReg, MIPSMap mapFlags) { _dbg_assert_(IsValidGPR(mipsReg) && IsValidGPR(mipsReg + 1)); - // Okay, not mapped, so we need to allocate an RV register. + // Okay, not mapped, so we need to allocate an x64 register. IRNativeReg nreg = MapNativeReg(MIPSLoc::REG, mipsReg, 2, mapFlags); return FromNativeReg(nreg); } @@ -326,7 +326,6 @@ void X64IRRegCache::LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) { X64Reg r = FromNativeReg(nreg); _dbg_assert_(first != MIPS_REG_ZERO); if (nreg < NUM_X_REGS) { - // Multilane not yet supported. _assert_(lanes == 1 || (lanes == 2 && first == IRREG_LO)); if (lanes == 1) emit_->MOV(32, ::R(r), MDisp(CTXREG, -128 + GetMipsRegOffset(first))); @@ -354,7 +353,6 @@ void X64IRRegCache::StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) { X64Reg r = FromNativeReg(nreg); _dbg_assert_(first != MIPS_REG_ZERO); if (nreg < NUM_X_REGS) { - // Multilane not yet supported. _assert_(lanes == 1 || (lanes == 2 && first == IRREG_LO)); _assert_(mr[first].loc == MIPSLoc::REG || mr[first].loc == MIPSLoc::REG_IMM); if (lanes == 1) @@ -434,9 +432,9 @@ X64Reg X64IRRegCache::RXPtr(IRReg mipsReg) { if (mr[mipsReg].loc == MIPSLoc::REG_AS_PTR) { return FromNativeReg(mr[mipsReg].nReg); } else if (mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM) { - int rv = mr[mipsReg].nReg; - _dbg_assert_(nr[rv].pointerified); - if (nr[rv].pointerified) { + int r = mr[mipsReg].nReg; + _dbg_assert_(nr[r].pointerified); + if (nr[r].pointerified) { return FromNativeReg(mr[mipsReg].nReg); } else { ERROR_LOG(JIT, "Tried to use a non-pointer register as a pointer"); diff --git a/Core/MIPS/x86/X64IRRegCache.h b/Core/MIPS/x86/X64IRRegCache.h index 953c92e759..7cc580bb7a 100644 --- a/Core/MIPS/x86/X64IRRegCache.h +++ b/Core/MIPS/x86/X64IRRegCache.h @@ -78,9 +78,9 @@ public: void Init(Gen::XEmitter *emitter); // May fail and return INVALID_REG if it needs flushing. - Gen::X64Reg TryMapTempImm(IRReg, X64IRJitConstants::X64Map flags = X64IRJitConstants::X64Map::NONE); + Gen::X64Reg TryMapTempImm(IRReg reg, X64IRJitConstants::X64Map flags = X64IRJitConstants::X64Map::NONE); - // Returns an RV register containing the requested MIPS register. + // Returns an X64 register containing the requested MIPS register. Gen::X64Reg MapGPR(IRReg reg, MIPSMap mapFlags = MIPSMap::INIT); Gen::X64Reg MapGPR2(IRReg reg, MIPSMap mapFlags = MIPSMap::INIT); Gen::X64Reg MapGPRAsPointer(IRReg reg); diff --git a/UWP/CoreUWP/CoreUWP.vcxproj b/UWP/CoreUWP/CoreUWP.vcxproj index 298756b99f..d74fdbbb8d 100644 --- a/UWP/CoreUWP/CoreUWP.vcxproj +++ b/UWP/CoreUWP/CoreUWP.vcxproj @@ -271,6 +271,8 @@ + + @@ -520,6 +522,15 @@ + + + + + + + + + diff --git a/UWP/CoreUWP/CoreUWP.vcxproj.filters b/UWP/CoreUWP/CoreUWP.vcxproj.filters index 124e0d1284..b464f75271 100644 --- a/UWP/CoreUWP/CoreUWP.vcxproj.filters +++ b/UWP/CoreUWP/CoreUWP.vcxproj.filters @@ -782,6 +782,33 @@ MIPS\ARM64 + + MIPS\ARM64 + + + MIPS\ARM64 + + + MIPS\ARM64 + + + MIPS\ARM64 + + + MIPS\ARM64 + + + MIPS\ARM64 + + + MIPS\ARM64 + + + MIPS\ARM64 + + + MIPS\ARM64 + HW @@ -1790,6 +1817,12 @@ MIPS\ARM64 + + MIPS\ARM64 + + + MIPS\ARM64 + HW diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 95e98a34fb..338d52baa0 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -343,6 +343,15 @@ ARCH_FILES := \ $(SRC)/Core/MIPS/ARM64/Arm64Jit.cpp \ $(SRC)/Core/MIPS/ARM64/Arm64RegCache.cpp \ $(SRC)/Core/MIPS/ARM64/Arm64RegCacheFPU.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64IRAsm.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64IRCompALU.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64IRCompBranch.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64IRCompFPU.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64IRCompSystem.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64IRCompVec.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64IRJit.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64IRRegCache.cpp \ $(SRC)/Core/Util/DisArm64.cpp \ $(SRC)/GPU/Common/VertexDecoderArm64.cpp \ Arm64EmitterTest.cpp diff --git a/libretro/Makefile.common b/libretro/Makefile.common index f7c2c98a29..0168cd21e5 100644 --- a/libretro/Makefile.common +++ b/libretro/Makefile.common @@ -757,6 +757,15 @@ ifeq ($(WITH_DYNAREC),1) $(COREDIR)/MIPS/ARM64/Arm64Jit.cpp \ $(COREDIR)/MIPS/ARM64/Arm64RegCache.cpp \ $(COREDIR)/MIPS/ARM64/Arm64RegCacheFPU.cpp \ + $(COREDIR)/MIPS/ARM64/Arm64IRAsm.cpp \ + $(COREDIR)/MIPS/ARM64/Arm64IRCompALU.cpp \ + $(COREDIR)/MIPS/ARM64/Arm64IRCompBranch.cpp \ + $(COREDIR)/MIPS/ARM64/Arm64IRCompFPU.cpp \ + $(COREDIR)/MIPS/ARM64/Arm64IRCompLoadStore.cpp \ + $(COREDIR)/MIPS/ARM64/Arm64IRCompSystem.cpp \ + $(COREDIR)/MIPS/ARM64/Arm64IRCompVec.cpp \ + $(COREDIR)/MIPS/ARM64/Arm64IRJit.cpp \ + $(COREDIR)/MIPS/ARM64/Arm64IRRegCache.cpp \ $(COREDIR)/Util/DisArm64.cpp \ $(GPUCOMMONDIR)/VertexDecoderArm64.cpp From 76072808373114c47c64f3ed7a3a207bee1f60bd Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 3 Sep 2023 12:20:16 -0700 Subject: [PATCH 2/2] arm64jit: Implement just the most basic ops. This improves the slowness a good bit. --- Core/MIPS/ARM64/Arm64IRCompALU.cpp | 27 +++++++++++++++++++++++++++ Core/MIPS/ARM64/Arm64IRCompSystem.cpp | 21 ++++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/Core/MIPS/ARM64/Arm64IRCompALU.cpp b/Core/MIPS/ARM64/Arm64IRCompALU.cpp index f7cefcc723..b0b994db36 100644 --- a/Core/MIPS/ARM64/Arm64IRCompALU.cpp +++ b/Core/MIPS/ARM64/Arm64IRCompALU.cpp @@ -51,8 +51,29 @@ void Arm64JitBackend::CompIR_Arith(IRInst inst) { switch (inst.op) { case IROp::Add: case IROp::Sub: + CompIR_Generic(inst); + break; + case IROp::AddConst: + if (regs_.IsGPRMappedAsPointer(inst.dest) && inst.dest == inst.src1 && allowPtrMath) { + regs_.MarkGPRAsPointerDirty(inst.dest); + ADDI2R(regs_.RPtr(inst.dest), regs_.RPtr(inst.src1), (int)inst.constant, SCRATCH1_64); + } else { + regs_.Map(inst); + ADDI2R(regs_.R(inst.dest), regs_.R(inst.src1), inst.constant, SCRATCH1); + } + break; + case IROp::SubConst: + if (regs_.IsGPRMappedAsPointer(inst.dest) && inst.dest == inst.src1 && allowPtrMath) { + regs_.MarkGPRAsPointerDirty(inst.dest); + SUBI2R(regs_.RPtr(inst.dest), regs_.RPtr(inst.src1), (int)inst.constant, SCRATCH1_64); + } else { + regs_.Map(inst); + SUBI2R(regs_.R(inst.dest), regs_.R(inst.src1), inst.constant, SCRATCH1); + } + break; + case IROp::Neg: CompIR_Generic(inst); break; @@ -68,6 +89,12 @@ void Arm64JitBackend::CompIR_Assign(IRInst inst) { switch (inst.op) { case IROp::Mov: + if (inst.dest != inst.src1) { + regs_.Map(inst); + MOV(regs_.R(inst.dest), regs_.R(inst.src1)); + } + break; + case IROp::Ext8to32: case IROp::Ext16to32: CompIR_Generic(inst); diff --git a/Core/MIPS/ARM64/Arm64IRCompSystem.cpp b/Core/MIPS/ARM64/Arm64IRCompSystem.cpp index be4400f465..01ec22bcc5 100644 --- a/Core/MIPS/ARM64/Arm64IRCompSystem.cpp +++ b/Core/MIPS/ARM64/Arm64IRCompSystem.cpp @@ -47,11 +47,30 @@ void Arm64JitBackend::CompIR_Basic(IRInst inst) { switch (inst.op) { case IROp::Downcount: + SUBI2R(DOWNCOUNTREG, DOWNCOUNTREG, (s64)(s32)inst.constant, SCRATCH1); + break; + case IROp::SetConst: + regs_.SetGPRImm(inst.dest, inst.constant); + break; + case IROp::SetConstF: + { + regs_.Map(inst); + float f; + memcpy(&f, &inst.constant, sizeof(f)); + fp_.MOVI2F(regs_.F(inst.dest), f, SCRATCH1); + break; + } + case IROp::SetPC: + regs_.Map(inst); + MovToPC(regs_.R(inst.src1)); + break; + case IROp::SetPCConst: - CompIR_Generic(inst); + MOVI2R(SCRATCH1, inst.constant); + MovToPC(SCRATCH1); break; default: