diff --git a/Common/Arm64Emitter.cpp b/Common/Arm64Emitter.cpp index 7efbeba3a5..ae810d2fd3 100644 --- a/Common/Arm64Emitter.cpp +++ b/Common/Arm64Emitter.cpp @@ -3,6 +3,8 @@ // Refer to the license.txt file included. #include +#include +#include #include "Arm64Emitter.h" #include "MathUtil.h" @@ -193,11 +195,11 @@ void ARM64XEmitter::EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr bool b64Bit = Is64Bit(Rt); s64 distance = (s64)ptr - (s64)m_code; - _assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance); + _assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, (int)distance); distance >>= 2; - _assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance); + _assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, (int)distance); Rt = DecodeReg(Rt); Write32((b64Bit << 31) | (0x34 << 24) | (op << 24) | \ @@ -209,11 +211,11 @@ void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const voi bool b64Bit = Is64Bit(Rt); s64 distance = (s64)ptr - (s64)m_code; - _assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance); + _assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, (int)distance); distance >>= 2; - _assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s: Received too large distance: %lx", __FUNCTION__, distance); + _assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s: Received too large distance: %lx", __FUNCTION__, (int)distance); Rt = DecodeReg(Rt); Write32((b64Bit << 31) | (0x36 << 24) | (op << 24) | \ @@ -224,11 +226,11 @@ void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 op, const void* ptr) { s64 distance = (s64)ptr - s64(m_code); - _assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance); + _assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, (int)distance); distance >>= 2; - _assert_msg_(DYNA_REC, distance >= -0x3FFFFFF && distance < 0x3FFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance); + _assert_msg_(DYNA_REC, distance >= -0x3FFFFFF && distance < 0x3FFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, (int)distance); Write32((op << 31) | (0x5 << 26) | (distance & 0x3FFFFFF)); } @@ -566,7 +568,7 @@ void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch) Not = true; case 0: // CBZ { - _assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance); + _assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, (int)distance); bool b64Bit = Is64Bit(branch.reg); ARM64Reg reg = DecodeReg(branch.reg); inst = (b64Bit << 31) | (0x1A << 25) | (Not << 24) | ((distance << 5) & 0xFFFFE0) | reg; @@ -580,7 +582,7 @@ void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch) Not = true; case 3: // TBZ { - _assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance); + _assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, (int)distance); ARM64Reg reg = DecodeReg(branch.reg); inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) | ((branch.bit & 0x1F) << 19) | (distance << 5) | reg; } @@ -677,7 +679,7 @@ void ARM64XEmitter::B(CCFlags cond, const void* ptr) s64 distance = (s64)ptr - (s64(m_code) + 8); distance >>= 2; - _assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance); + _assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, (int)distance); Write32((0x54 << 24) | (distance << 5) | cond); } @@ -1488,7 +1490,7 @@ void ARM64XEmitter::ADRP(ARM64Reg Rd, s32 imm) // Wrapper around MOVZ+MOVK void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize) { - unsigned parts = Is64Bit(Rd) ? 4 : 2; + unsigned int parts = Is64Bit(Rd) ? 4 : 2; BitSet32 upload_part(0); bool need_movz = false; @@ -1513,7 +1515,7 @@ void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize) // XXX: Support rotating immediates to save instructions if (optimize) { - for (unsigned i = 0; i < parts; ++i) + for (unsigned int i = 0; i < parts; ++i) { if ((imm >> (i * 16)) & 0xFFFF) upload_part[i] = 1; diff --git a/Core/Core.vcxproj b/Core/Core.vcxproj index dfe8680722..c1681d1049 100644 --- a/Core/Core.vcxproj +++ b/Core/Core.vcxproj @@ -286,6 +286,66 @@ + + true + true + true + true + + + true + true + true + true + + + true + true + true + true + + + true + true + true + true + + + true + true + true + true + + + true + true + true + true + + + true + true + true + true + + + true + true + true + true + + + true + true + true + true + + + true + true + true + true + true true @@ -540,6 +600,30 @@ + + true + true + true + true + + + true + true + true + true + + + true + true + true + true + + + true + true + true + true + true true @@ -632,4 +716,4 @@ - \ No newline at end of file + diff --git a/Core/Core.vcxproj.filters b/Core/Core.vcxproj.filters index 4bbb788e14..0069834c31 100644 --- a/Core/Core.vcxproj.filters +++ b/Core/Core.vcxproj.filters @@ -557,6 +557,16 @@ HLE\Libraries + + + + + + + + + + @@ -1064,6 +1074,10 @@ Core + + + + @@ -1071,4 +1085,4 @@ - \ No newline at end of file + diff --git a/Core/HLE/ReplaceTables.cpp b/Core/HLE/ReplaceTables.cpp index e6c354d190..1f22e7432f 100644 --- a/Core/HLE/ReplaceTables.cpp +++ b/Core/HLE/ReplaceTables.cpp @@ -1015,6 +1015,8 @@ static int Hook_gakuenheaven_download_frame() { #ifdef ARM #define JITFUNC(f) (&MIPSComp::ArmJit::f) +#elif defined(ARM64) +#define JITFUNC(f) (&MIPSComp::Arm64Jit::f) #elif defined(_M_X64) || defined(_M_IX86) #define JITFUNC(f) (&MIPSComp::Jit::f) #elif defined(MIPS) diff --git a/Core/MIPS/ARM64/Arm64Asm.cpp b/Core/MIPS/ARM64/Arm64Asm.cpp new file mode 100644 index 0000000000..f7642ffedc --- /dev/null +++ b/Core/MIPS/ARM64/Arm64Asm.cpp @@ -0,0 +1,86 @@ +// Copyright (c) 2015- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + + +#include "Core/MemMap.h" +#include "Core/MIPS/MIPS.h" +#include "Core/System.h" +#include "Core/CoreTiming.h" +#include "Common/MemoryUtil.h" +#include "Common/CPUDetect.h" +#include "Common/Arm64Emitter.h" +#include "Core/MIPS/ARM64/Arm64Jit.h" +#include "Core/MIPS/ARM64/Arm64Asm.h" +#include "Core/MIPS/JitCommon/JitCommon.h" + +using namespace Arm64Gen; + +//static int temp32; // unused? + +static const bool enableDebug = false; + +//static bool enableStatistics = false; //unused? + +//The standard ARM calling convention allocates the 16 ARM registers as: + +// r15 is the program counter. +// r14 is the link register. (The BL instruction, used in a subroutine call, stores the return address in this register). +// r13 is the stack pointer. (The Push/Pop instructions in "Thumb" operating mode use this register only). +// r12 is the Intra-Procedure-call scratch register. +// r4 to r11: used to hold local variables. +// r0 to r3: used to hold argument values passed to a subroutine, and also hold results returned from a subroutine. + +// Mappable registers: +// R2, R3, R4, R5, R6, R8, R11 + +// STATIC ALLOCATION ARM: +// R10 : MIPS state +// R11 : Memory base pointer. +// R7 : Down counter +extern volatile CoreState coreState; + +void ShowPC(u32 sp) { + if (currentMIPS) { + ERROR_LOG(JIT, "ShowPC : %08x ArmSP : %08x", currentMIPS->pc, sp); + } else { + ERROR_LOG(JIT, "Universe corrupt?"); + } +} + +void DisassembleArm(const u8 *data, int size); + +// PLAN: no more block numbers - crazy opcodes just contain offset within +// dynarec buffer +// At this offset - 4, there is an int specifying the block number. + +namespace MIPSComp { + +using namespace Arm64JitConstants; + +void Arm64Jit::GenerateFixedCode() +{ + + // Uncomment if you want to see the output... + // INFO_LOG(JIT, "THE DISASM ========================"); + // DisassembleArm(enterCode, GetCodePtr() - enterCode); + // INFO_LOG(JIT, "END OF THE DISASM ========================"); + + // Don't forget to zap the instruction cache! + FlushIcache(); +} + +} // namespace MIPSComp diff --git a/Core/MIPS/ARM64/Arm64Asm.h b/Core/MIPS/ARM64/Arm64Asm.h new file mode 100644 index 0000000000..dd3b2046d8 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64Asm.h @@ -0,0 +1,22 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "Core/MIPS/MIPS.h" + +// Runtime generated assembly routines, like the Dispatcher. diff --git a/Core/MIPS/ARM64/Arm64CompALU.cpp b/Core/MIPS/ARM64/Arm64CompALU.cpp new file mode 100644 index 0000000000..15cb41197e --- /dev/null +++ b/Core/MIPS/ARM64/Arm64CompALU.cpp @@ -0,0 +1,83 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include + +#include "Core/MIPS/MIPS.h" +#include "Core/MIPS/MIPSCodeUtils.h" +#include "Core/MIPS/ARM64/Arm64Jit.h" +#include "Core/MIPS/ARM64/Arm64RegCache.h" +#include "Common/CPUDetect.h" + +using namespace MIPSAnalyst; + +#define _RS MIPS_GET_RS(op) +#define _RT MIPS_GET_RT(op) +#define _RD MIPS_GET_RD(op) +#define _FS MIPS_GET_FS(op) +#define _FT MIPS_GET_FT(op) +#define _FD MIPS_GET_FD(op) +#define _SA MIPS_GET_SA(op) +#define _POS ((op>> 6) & 0x1F) +#define _SIZE ((op>>11) & 0x1F) +#define _IMM16 (signed short)(op & 0xFFFF) +#define _IMM26 (op & 0x03FFFFFF) + +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. + +//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; } +#define CONDITIONAL_DISABLE ; +#define DISABLE { Comp_Generic(op); return; } + +namespace MIPSComp +{ +using namespace Arm64Gen; +using namespace Arm64JitConstants; + +void Arm64Jit::Comp_IType(MIPSOpcode op) { + DISABLE; +} + +void Arm64Jit::Comp_RType2(MIPSOpcode op) { + DISABLE; +} + +void Arm64Jit::Comp_RType3(MIPSOpcode op) { + DISABLE; +} + +void Arm64Jit::Comp_ShiftType(MIPSOpcode op) { + DISABLE; +} + +void Arm64Jit::Comp_Special3(MIPSOpcode op) { + DISABLE; +} + +void Arm64Jit::Comp_Allegrex(MIPSOpcode op) { + DISABLE; +} + +void Arm64Jit::Comp_Allegrex2(MIPSOpcode op) { + DISABLE; +} + +void Arm64Jit::Comp_MulDivType(MIPSOpcode op) { + DISABLE; +} +} diff --git a/Core/MIPS/ARM64/Arm64CompBranch.cpp b/Core/MIPS/ARM64/Arm64CompBranch.cpp new file mode 100644 index 0000000000..3fd83a68b1 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64CompBranch.cpp @@ -0,0 +1,614 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "Core/Reporting.h" +#include "Core/Config.h" +#include "Core/MemMap.h" +#include "Core/HLE/HLE.h" +#include "Core/HLE/HLETables.h" + +#include "Core/MIPS/MIPS.h" +#include "Core/MIPS/MIPSCodeUtils.h" +#include "Core/MIPS/MIPSAnalyst.h" +#include "Core/MIPS/MIPSTables.h" + +#include "Core/MIPS/ARM64/Arm64Jit.h" +#include "Core/MIPS/ARM64/Arm64RegCache.h" +#include "Core/MIPS/JitCommon/JitBlockCache.h" + +#include "Common/Arm64Emitter.h" + +#define _RS MIPS_GET_RS(op) +#define _RT MIPS_GET_RT(op) +#define _RD MIPS_GET_RD(op) +#define _FS MIPS_GET_FS(op) +#define _FT MIPS_GET_FT(op) +#define _FD MIPS_GET_FD(op) +#define _SA MIPS_GET_SA(op) +#define _POS ((op>> 6) & 0x1F) +#define _SIZE ((op>>11) & 0x1F) +#define _IMM16 (signed short)(op & 0xFFFF) +#define _IMM26 (op & 0x03FFFFFF) + +#define LOOPOPTIMIZATION 0 + +// We can disable nice delay slots. +// #define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false; +#define CONDITIONAL_NICE_DELAYSLOT ; + +using namespace MIPSAnalyst; + +namespace MIPSComp +{ + using namespace Arm64Gen; + using namespace Arm64JitConstants; + +void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely) +{ + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); + return; + } + int offset = _IMM16 << 2; + MIPSGPReg rt = _RT; + MIPSGPReg rs = _RS; + u32 targetAddr = js.compilerPC + offset + 4; + + bool immBranch = false; + bool immBranchTaken = false; + if (gpr.IsImm(rs) && gpr.IsImm(rt)) { + // The cc flags are opposites: when NOT to take the branch. + bool immBranchNotTaken; + s32 rsImm = (s32)gpr.GetImm(rs); + s32 rtImm = (s32)gpr.GetImm(rt); + + switch (cc) + { + case CC_EQ: immBranchNotTaken = rsImm == rtImm; break; + case CC_NEQ: immBranchNotTaken = rsImm != rtImm; break; + default: immBranchNotTaken = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSRTComp()."); + } + immBranch = true; + immBranchTaken = !immBranchNotTaken; + } + + if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions) { + if (!immBranchTaken) { + // Skip the delay slot if likely, otherwise it'll be the next instruction. + if (likely) + js.compilerPC += 4; + return; + } + + // Branch taken. Always compile the delay slot, and then go to dest. + CompileDelaySlot(DELAYSLOT_NICE); + AddContinuedBlock(targetAddr); + // Account for the increment in the loop. + js.compilerPC = targetAddr - 4; + // In case the delay slot was a break or something. + js.compiling = true; + return; + } + + MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4); + bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs); + CONDITIONAL_NICE_DELAYSLOT; + + if (immBranch) { + // Continuing is handled above, this is just static jumping. + if (immBranchTaken || !likely) + CompileDelaySlot(DELAYSLOT_FLUSH); + else + FlushAll(); + + const u32 destAddr = immBranchTaken ? targetAddr : js.compilerPC + 8; + WriteExit(destAddr, js.nextExit++); + } else { + if (!likely && delaySlotIsNice) + CompileDelaySlot(DELAYSLOT_NICE); + + // We might be able to flip the condition (EQ/NEQ are easy.) + const bool canFlip = cc == CC_EQ || cc == CC_NEQ; + + // TODO ARM64: Optimize for immediates + gpr.MapInIn(rs, rt); + CMP(gpr.R(rs), gpr.R(rt)); + + Arm64Gen::FixupBranch ptr; + if (!likely) { + if (!delaySlotIsNice) + CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); + else + FlushAll(); + ptr = B(cc); + } else { + FlushAll(); + ptr = B(cc); + CompileDelaySlot(DELAYSLOT_FLUSH); + } + + // Take the branch + WriteExit(targetAddr, js.nextExit++); + + SetJumpTarget(ptr); + // Not taken + WriteExit(js.compilerPC + 8, js.nextExit++); + } + + js.compiling = false; +} + + +void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool likely) +{ + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); + return; + } + int offset = _IMM16 << 2; + MIPSGPReg rs = _RS; + u32 targetAddr = js.compilerPC + offset + 4; + + bool immBranch = false; + bool immBranchTaken = false; + if (gpr.IsImm(rs)) { + // The cc flags are opposites: when NOT to take the branch. + bool immBranchNotTaken; + s32 imm = (s32)gpr.GetImm(rs); + + switch (cc) + { + case CC_GT: immBranchNotTaken = imm > 0; break; + case CC_GE: immBranchNotTaken = imm >= 0; break; + case CC_LT: immBranchNotTaken = imm < 0; break; + case CC_LE: immBranchNotTaken = imm <= 0; break; + default: immBranchNotTaken = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSZeroComp()."); + } + immBranch = true; + immBranchTaken = !immBranchNotTaken; + } + + if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions) { + if (!immBranchTaken) { + // Skip the delay slot if likely, otherwise it'll be the next instruction. + if (likely) + js.compilerPC += 4; + return; + } + + // Branch taken. Always compile the delay slot, and then go to dest. + CompileDelaySlot(DELAYSLOT_NICE); + if (andLink) + gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8); + + AddContinuedBlock(targetAddr); + // Account for the increment in the loop. + js.compilerPC = targetAddr - 4; + // In case the delay slot was a break or something. + js.compiling = true; + return; + } + + MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4); + bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); + CONDITIONAL_NICE_DELAYSLOT; + + if (immBranch) { + // Continuing is handled above, this is just static jumping. + if (immBranchTaken && andLink) + gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8); + if (immBranchTaken || !likely) + CompileDelaySlot(DELAYSLOT_FLUSH); + else + FlushAll(); + + const u32 destAddr = immBranchTaken ? targetAddr : js.compilerPC + 8; + WriteExit(destAddr, js.nextExit++); + } else { + if (!likely && delaySlotIsNice) + CompileDelaySlot(DELAYSLOT_NICE); + + gpr.MapReg(rs); + CMP(gpr.R(rs), 0); + + Arm64Gen::FixupBranch ptr; + if (!likely) + { + if (!delaySlotIsNice) + CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); + else + FlushAll(); + ptr = B(cc); + } + else + { + FlushAll(); + ptr = B(cc); + CompileDelaySlot(DELAYSLOT_FLUSH); + } + + // Take the branch + if (andLink) + { + gpr.SetRegImm(SCRATCHREG1, js.compilerPC + 8); + STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, MIPS_REG_RA * 4); + } + + WriteExit(targetAddr, js.nextExit++); + + SetJumpTarget(ptr); + // Not taken + WriteExit(js.compilerPC + 8, js.nextExit++); + } + js.compiling = false; +} + + +void Arm64Jit::Comp_RelBranch(MIPSOpcode op) +{ + // The CC flags here should be opposite of the actual branch becuase they skip the branching action. + switch (op >> 26) + { + case 4: BranchRSRTComp(op, CC_NEQ, false); break;//beq + case 5: BranchRSRTComp(op, CC_EQ, false); break;//bne + + case 6: BranchRSZeroComp(op, CC_GT, false, false); break;//blez + case 7: BranchRSZeroComp(op, CC_LE, false, false); break;//bgtz + + case 20: BranchRSRTComp(op, CC_NEQ, true); break;//beql + case 21: BranchRSRTComp(op, CC_EQ, true); break;//bnel + + case 22: BranchRSZeroComp(op, CC_GT, false, true); break;//blezl + case 23: BranchRSZeroComp(op, CC_LE, false, true); break;//bgtzl + + default: + _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); + break; + } +} + +void Arm64Jit::Comp_RelBranchRI(MIPSOpcode op) +{ + switch ((op >> 16) & 0x1F) + { + case 0: BranchRSZeroComp(op, CC_GE, false, false); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltz + case 1: BranchRSZeroComp(op, CC_LT, false, false); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez + case 2: BranchRSZeroComp(op, CC_GE, false, true); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 8; break;//bltzl + case 3: BranchRSZeroComp(op, CC_LT, false, true); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl + case 16: BranchRSZeroComp(op, CC_GE, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltzal + case 17: BranchRSZeroComp(op, CC_LT, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgezal + case 18: BranchRSZeroComp(op, CC_GE, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else SkipLikely(); break;//bltzall + case 19: BranchRSZeroComp(op, CC_LT, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else SkipLikely(); break;//bgezall + default: + _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); + break; + } +} + +// If likely is set, discard the branch slot if NOT taken. +void Arm64Jit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely) { + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); + return; + } + int offset = _IMM16 << 2; + u32 targetAddr = js.compilerPC + offset + 4; + + MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4); + bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp); + CONDITIONAL_NICE_DELAYSLOT; + if (!likely && delaySlotIsNice) + CompileDelaySlot(DELAYSLOT_NICE); + + gpr.MapReg(MIPS_REG_FPCOND); + TSTI2R(gpr.R(MIPS_REG_FPCOND), 1, W0); + + Arm64Gen::FixupBranch ptr; + if (!likely) { + if (!delaySlotIsNice) + CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); + else + FlushAll(); + ptr = B(cc); + } else { + FlushAll(); + ptr = B(cc); + CompileDelaySlot(DELAYSLOT_FLUSH); + } + + // Take the branch + WriteExit(targetAddr, js.nextExit++); + + SetJumpTarget(ptr); + // Not taken + WriteExit(js.compilerPC + 8, js.nextExit++); + js.compiling = false; +} + +void Arm64Jit::Comp_FPUBranch(MIPSOpcode op) { + switch((op >> 16) & 0x1f) { + case 0: BranchFPFlag(op, CC_NEQ, false); break; // bc1f + case 1: BranchFPFlag(op, CC_EQ, false); break; // bc1t + case 2: BranchFPFlag(op, CC_NEQ, true); break; // bc1fl + case 3: BranchFPFlag(op, CC_EQ, true); break; // bc1tl + default: + _dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted"); + break; + } +} + +// If likely is set, discard the branch slot if NOT taken. +void Arm64Jit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely) { + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); + return; + } + int offset = _IMM16 << 2; + u32 targetAddr = js.compilerPC + offset + 4; + + MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4); + + // Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle) + // The behavior is undefined - the CPU may take the second branch even if the first one passes. + // However, it does consistently try each branch, which these games seem to expect. + bool delaySlotIsBranch = MIPSCodeUtils::IsVFPUBranch(delaySlotOp); + bool delaySlotIsNice = !delaySlotIsBranch && IsDelaySlotNiceVFPU(op, delaySlotOp); + CONDITIONAL_NICE_DELAYSLOT; + if (!likely && delaySlotIsNice) + CompileDelaySlot(DELAYSLOT_NICE); + if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1) + ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", js.compilerPC); + + int imm3 = (op >> 18) & 7; + + gpr.MapReg(MIPS_REG_VFPUCC); + TSTI2R(gpr.R(MIPS_REG_VFPUCC), 1 << imm3, W0); + + Arm64Gen::FixupBranch ptr; + js.inDelaySlot = true; + if (!likely) + { + if (!delaySlotIsNice && !delaySlotIsBranch) + CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); + else + FlushAll(); + ptr = B(cc); + } + else + { + FlushAll(); + ptr = B(cc); + if (!delaySlotIsBranch) + CompileDelaySlot(DELAYSLOT_FLUSH); + } + js.inDelaySlot = false; + + // Take the branch + WriteExit(targetAddr, js.nextExit++); + + SetJumpTarget(ptr); + // Not taken + u32 notTakenTarget = js.compilerPC + (delaySlotIsBranch ? 4 : 8); + WriteExit(notTakenTarget, js.nextExit++); + js.compiling = false; +} + +void Arm64Jit::Comp_VBranch(MIPSOpcode op) +{ + switch ((op >> 16) & 3) + { + case 0: BranchVFPUFlag(op, CC_NEQ, false); break; // bvf + case 1: BranchVFPUFlag(op, CC_EQ, false); break; // bvt + case 2: BranchVFPUFlag(op, CC_NEQ, true); break; // bvfl + case 3: BranchVFPUFlag(op, CC_EQ, true); break; // bvtl + } +} + +void Arm64Jit::Comp_Jump(MIPSOpcode op) { + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); + return; + } + u32 off = _IMM26 << 2; + u32 targetAddr = (js.compilerPC & 0xF0000000) | off; + + // Might be a stubbed address or something? + if (!Memory::IsValidAddress(targetAddr)) { + if (js.nextExit == 0) { + ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x", targetAddr); + } else { + js.compiling = false; + } + // TODO: Mark this block dirty or something? May be indication it will be changed by imports. + return; + } + + switch (op >> 26) { + case 2: //j + CompileDelaySlot(DELAYSLOT_NICE); + if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) { + AddContinuedBlock(targetAddr); + // Account for the increment in the loop. + js.compilerPC = targetAddr - 4; + // In case the delay slot was a break or something. + js.compiling = true; + return; + } + FlushAll(); + WriteExit(targetAddr, js.nextExit++); + break; + + case 3: //jal + if (ReplaceJalTo(targetAddr)) + return; + + gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8); + CompileDelaySlot(DELAYSLOT_NICE); + if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) { + AddContinuedBlock(targetAddr); + // Account for the increment in the loop. + js.compilerPC = targetAddr - 4; + // In case the delay slot was a break or something. + js.compiling = true; + return; + } + FlushAll(); + WriteExit(targetAddr, js.nextExit++); + break; + + default: + _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); + break; + } + js.compiling = false; +} + +void Arm64Jit::Comp_JumpReg(MIPSOpcode op) +{ + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in JumpReg delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); + return; + } + MIPSGPReg rs = _RS; + MIPSGPReg rd = _RD; + bool andLink = (op & 0x3f) == 9; + + MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4); + bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); + if (andLink && rs == rd) + delaySlotIsNice = false; + CONDITIONAL_NICE_DELAYSLOT; + + ARM64Reg destReg = X8; + if (IsSyscall(delaySlotOp)) { + gpr.MapReg(rs); + MovToPC(gpr.R(rs)); // For syscall to be able to return. + if (andLink) + gpr.SetImm(rd, js.compilerPC + 8); + CompileDelaySlot(DELAYSLOT_FLUSH); + return; // Syscall wrote exit code. + } else if (delaySlotIsNice) { + if (andLink) + gpr.SetImm(rd, js.compilerPC + 8); + CompileDelaySlot(DELAYSLOT_NICE); + + if (!andLink && rs == MIPS_REG_RA && g_Config.bDiscardRegsOnJRRA) { + // According to the MIPS ABI, there are some regs we don't need to preserve. + // Let's discard them so we don't need to write them back. + // NOTE: Not all games follow the MIPS ABI! Tekken 6, for example, will crash + // with this enabled. + gpr.DiscardR(MIPS_REG_COMPILER_SCRATCH); + for (int i = MIPS_REG_A0; i <= MIPS_REG_T7; i++) + gpr.DiscardR((MIPSGPReg)i); + gpr.DiscardR(MIPS_REG_T8); + gpr.DiscardR(MIPS_REG_T9); + } + + if (jo.continueJumps && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions) { + AddContinuedBlock(gpr.GetImm(rs)); + // Account for the increment in the loop. + js.compilerPC = gpr.GetImm(rs) - 4; + // In case the delay slot was a break or something. + js.compiling = true; + return; + } + + gpr.MapReg(rs); + destReg = gpr.R(rs); // Safe because FlushAll doesn't change any regs + FlushAll(); + } else { + // Delay slot - this case is very rare, might be able to free up R8. + gpr.MapReg(rs); + MOV(W8, gpr.R(rs)); + if (andLink) + gpr.SetImm(rd, js.compilerPC + 8); + CompileDelaySlot(DELAYSLOT_NICE); + FlushAll(); + } + + switch (op & 0x3f) + { + case 8: //jr + break; + case 9: //jalr + break; + default: + _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); + break; + } + + WriteExitDestInR(destReg); + js.compiling = false; +} + + +void Arm64Jit::Comp_Syscall(MIPSOpcode op) +{ + if (!g_Config.bSkipDeadbeefFilling) + { + // All of these will be overwritten with DEADBEEF anyway. + gpr.DiscardR(MIPS_REG_COMPILER_SCRATCH); + // We need to keep A0 - T3, which are used for args. + gpr.DiscardR(MIPS_REG_T4); + gpr.DiscardR(MIPS_REG_T5); + gpr.DiscardR(MIPS_REG_T6); + gpr.DiscardR(MIPS_REG_T7); + gpr.DiscardR(MIPS_REG_T8); + gpr.DiscardR(MIPS_REG_T9); + + gpr.DiscardR(MIPS_REG_HI); + gpr.DiscardR(MIPS_REG_LO); + } + + // If we're in a delay slot, this is off by one. + const int offset = js.inDelaySlot ? -1 : 0; + WriteDownCount(offset); + RestoreRoundingMode(); + js.downcountAmount = -offset; + + // TODO: Maybe discard v0, v1, and some temps? Definitely at? + FlushAll(); + + SaveDowncount(); + // Skip the CallSyscall where possible. + void *quickFunc = GetQuickSyscallFunc(op); + if (quickFunc) + { + gpr.SetRegImm(W0, (u32)(intptr_t)GetSyscallInfo(op)); + // Already flushed, so X1 is safe. + QuickCallFunction(X1, quickFunc); + } + else + { + gpr.SetRegImm(W0, op.encoding); + QuickCallFunction(X1, (void *)&CallSyscall); + } + ApplyRoundingMode(); + RestoreDowncount(); + + WriteSyscallExit(); + js.compiling = false; +} + +void Arm64Jit::Comp_Break(MIPSOpcode op) +{ + Comp_Generic(op); + WriteSyscallExit(); + js.compiling = false; +} + +} // namespace Mipscomp diff --git a/Core/MIPS/ARM64/Arm64CompFPU.cpp b/Core/MIPS/ARM64/Arm64CompFPU.cpp new file mode 100644 index 0000000000..a7a0947820 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64CompFPU.cpp @@ -0,0 +1,75 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "Core/Config.h" +#include "Core/MemMap.h" +#include "Core/MIPS/MIPS.h" +#include "Core/MIPS/MIPSCodeUtils.h" +#include "Core/MIPS/MIPSTables.h" + +#include "Core/MIPS/ARM64/Arm64Jit.h" +#include "Core/MIPS/ARM64/Arm64RegCache.h" +#include "Common/CPUDetect.h" + +#define _RS MIPS_GET_RS(op) +#define _RT MIPS_GET_RT(op) +#define _RD MIPS_GET_RD(op) +#define _FS MIPS_GET_FS(op) +#define _FT MIPS_GET_FT(op) +#define _FD MIPS_GET_FD(op) +#define _SA MIPS_GET_SA(op) +#define _POS ((op>> 6) & 0x1F) +#define _SIZE ((op>>11) & 0x1F) +#define _IMM16 (signed short)(op & 0xFFFF) +#define _IMM26 (op & 0x03FFFFFF) + +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. + +//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; } +#define CONDITIONAL_DISABLE ; +#define DISABLE { Comp_Generic(op); return; } + +namespace MIPSComp +{ + using namespace Arm64Gen; + using namespace Arm64JitConstants; + +void Arm64Jit::Comp_FPU3op(MIPSOpcode op) +{ + DISABLE; +} + +void Arm64Jit::Comp_FPULS(MIPSOpcode op) +{ + DISABLE; +} + +void Arm64Jit::Comp_FPUComp(MIPSOpcode op) { + DISABLE; +} + +void Arm64Jit::Comp_FPU2op(MIPSOpcode op) { + DISABLE; +} + +void Arm64Jit::Comp_mxc1(MIPSOpcode op) +{ + DISABLE; +} + +} // namespace MIPSComp diff --git a/Core/MIPS/ARM64/Arm64CompLoadStore.cpp b/Core/MIPS/ARM64/Arm64CompLoadStore.cpp new file mode 100644 index 0000000000..5741b8af63 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64CompLoadStore.cpp @@ -0,0 +1,83 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + + +// Optimization ideas: +// +// It's common to see sequences of stores writing or reading to a contiguous set of +// addresses in function prologues/epilogues: +// sw s5, 104(sp) +// sw s4, 100(sp) +// sw s3, 96(sp) +// sw s2, 92(sp) +// sw s1, 88(sp) +// sw s0, 84(sp) +// sw ra, 108(sp) +// mov s4, a0 +// mov s3, a1 +// ... +// Such sequences could easily be detected and turned into nice contiguous +// sequences of ARM stores instead of the current 3 instructions per sw/lw. +// +// Also, if we kept track of the likely register content of a cached register, +// (pointer or data), we could avoid many BIC instructions. + + +#include "Core/MemMap.h" +#include "Core/Config.h" +#include "Core/MIPS/MIPS.h" +#include "Core/MIPS/MIPSAnalyst.h" +#include "Core/MIPS/MIPSCodeUtils.h" +#include "Core/MIPS/ARM64/Arm64Jit.h" +#include "Core/MIPS/ARM64/Arm64RegCache.h" + +#define _RS MIPS_GET_RS(op) +#define _RT MIPS_GET_RT(op) +#define _RD MIPS_GET_RD(op) +#define _FS MIPS_GET_FS(op) +#define _FT MIPS_GET_FT(op) +#define _FD MIPS_GET_FD(op) +#define _SA MIPS_GET_SA(op) +#define _POS ((op>> 6) & 0x1F) +#define _SIZE ((op>>11) & 0x1F) +#define _IMM16 (signed short)(op & 0xFFFF) +#define _IMM26 (op & 0x03FFFFFF) + +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. + +// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; } +#define CONDITIONAL_DISABLE ; +#define DISABLE { Comp_Generic(op); return; } + +namespace MIPSComp +{ + using namespace Arm64Gen; + using namespace Arm64JitConstants; + + void Arm64Jit::Comp_ITypeMemLR(MIPSOpcode op, bool load) { + DISABLE; + } + + void Arm64Jit::Comp_ITypeMem(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Cache(MIPSOpcode op) { + DISABLE; + } +} diff --git a/Core/MIPS/ARM64/Arm64CompReplace.cpp b/Core/MIPS/ARM64/Arm64CompReplace.cpp new file mode 100644 index 0000000000..38527cea94 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64CompReplace.cpp @@ -0,0 +1,33 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "Common/CPUDetect.h" +#include "Core/MemMap.h" +#include "Core/MIPS/JitCommon/JitCommon.h" +#include "Core/MIPS/ARM64/Arm64Jit.h" +#include "Core/MIPS/ARM64/Arm64RegCache.h" + +namespace MIPSComp { + +int Arm64Jit::Replace_fabsf() { + // TODO ARM64 + // fpr.MapDirtyIn(0, 12); + // VABS(fpr.R(0), fpr.R(12)); + return 4; // Number of instructions in the MIPS function +} + +} \ No newline at end of file diff --git a/Core/MIPS/ARM64/Arm64CompVFPU.cpp b/Core/MIPS/ARM64/Arm64CompVFPU.cpp new file mode 100644 index 0000000000..4ab8953240 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64CompVFPU.cpp @@ -0,0 +1,231 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include +#include "math/math_util.h" + +#include "Core/MemMap.h" +#include "Core/MIPS/MIPS.h" +#include "Core/MIPS/MIPSTables.h" +#include "Core/MIPS/MIPSAnalyst.h" +#include "Core/MIPS/MIPSCodeUtils.h" +#include "Common/CPUDetect.h" +#include "Core/Config.h" +#include "Core/Reporting.h" + +#include "Core/MIPS/ARM64/Arm64Jit.h" +#include "Core/MIPS/ARM64/Arm64RegCache.h" + +// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. +// Currently known non working ones should have DISABLE. + +// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; } + +#define CONDITIONAL_DISABLE ; +#define DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; } +#define _RS MIPS_GET_RS(op) +#define _RT MIPS_GET_RT(op) +#define _RD MIPS_GET_RD(op) +#define _FS MIPS_GET_FS(op) +#define _FT MIPS_GET_FT(op) +#define _FD MIPS_GET_FD(op) +#define _SA MIPS_GET_SA(op) +#define _POS ((op>> 6) & 0x1F) +#define _SIZE ((op>>11) & 0x1F) +#define _IMM16 (signed short)(op & 0xFFFF) +#define _IMM26 (op & 0x03FFFFFF) + +namespace MIPSComp +{ + using namespace Arm64Gen; + using namespace Arm64JitConstants; + + void Arm64Jit::Comp_VPFX(MIPSOpcode op) + { + CONDITIONAL_DISABLE; + int data = op & 0xFFFFF; + int regnum = (op >> 24) & 3; + switch (regnum) { + case 0: // S + js.prefixS = data; + js.prefixSFlag = JitState::PREFIX_KNOWN_DIRTY; + break; + case 1: // T + js.prefixT = data; + js.prefixTFlag = JitState::PREFIX_KNOWN_DIRTY; + break; + case 2: // D + js.prefixD = data; + js.prefixDFlag = JitState::PREFIX_KNOWN_DIRTY; + break; + default: + ERROR_LOG(CPU, "VPFX - bad regnum %i : data=%08x", regnum, data); + break; + } + } + + void Arm64Jit::Comp_SV(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_SVQ(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_VVectorInit(MIPSOpcode op) + { + DISABLE; + } + + void Arm64Jit::Comp_VIdt(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_VMatrixInit(MIPSOpcode op) + { + DISABLE; + } + + void Arm64Jit::Comp_VHdp(MIPSOpcode op) { + DISABLE; + } + + static const float MEMORY_ALIGNED16(vavg_table[4]) = { 1.0f, 1.0f / 2.0f, 1.0f / 3.0f, 1.0f / 4.0f }; + + void Arm64Jit::Comp_Vhoriz(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_VDot(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_VecDo3(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_VV2Op(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vi2f(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vh2f(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vf2i(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Mftv(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vmfvc(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vmtvc(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vmmov(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_VScl(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vmmul(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vmscl(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vtfm(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_VCrs(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_VDet(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vi2x(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vx2i(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_VCrossQuat(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vcmp(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vcmov(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Viim(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vfim(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vcst(MIPSOpcode op) { + DISABLE; + } + + // Very heavily used by FF:CC. Should be replaced by a fast approximation instead of + // calling the math library. + // Apparently this may not work on hardfp. I don't think we have any platforms using this though. + void Arm64Jit::Comp_VRot(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vsgn(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vocp(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_ColorConv(MIPSOpcode op) { + DISABLE; + } + + void Arm64Jit::Comp_Vbfy(MIPSOpcode op) { + DISABLE; + } +} diff --git a/Core/MIPS/ARM64/Arm64Jit.cpp b/Core/MIPS/ARM64/Arm64Jit.cpp new file mode 100644 index 0000000000..2bd62769ea --- /dev/null +++ b/Core/MIPS/ARM64/Arm64Jit.cpp @@ -0,0 +1,485 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "base/logging.h" +#include "Common/ChunkFile.h" +#include "Common/CPUDetect.h" + +#include "Core/Reporting.h" +#include "Core/Config.h" +#include "Core/Core.h" +#include "Core/CoreTiming.h" +#include "Core/Debugger/SymbolMap.h" +#include "Core/MemMap.h" + +#include "Core/MIPS/MIPS.h" +#include "Core/MIPS/MIPSCodeUtils.h" +#include "Core/MIPS/MIPSInt.h" +#include "Core/MIPS/MIPSTables.h" +#include "Core/HLE/ReplaceTables.h" +#include "Core/MIPS/ARM64/Arm64RegCache.h" +#include "Core/MIPS/ARM64/Arm64RegCacheFPU.h" + +#include "Core/MIPS/ARM64/Arm64Jit.h" + +#include "ext/disarm.h" + +using namespace Arm64JitConstants; + +void DisassembleArm64Print(const u8 *data, int size) { + ILOG("ARM64 TODO"); +} + +namespace MIPSComp +{ +using namespace Arm64Gen; +using namespace Arm64JitConstants; + +Arm64Jit::Arm64Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &js, &jo), fpr(mips, &js, &jo), mips_(mips) { + logBlocks = 0; + dontLogBlocks = 0; + blocks.Init(); + gpr.SetEmitter(this); + fpr.SetEmitter(this); + AllocCodeSpace(1024 * 1024 * 16); // 32MB is the absolute max because that's what an ARM branch instruction can reach, backwards and forwards. + GenerateFixedCode(); + + js.startDefaultPrefix = mips_->HasDefaultPrefix(); +} + +Arm64Jit::~Arm64Jit() { +} + +void Arm64Jit::DoState(PointerWrap &p) +{ + auto s = p.Section("Jit", 1, 2); + if (!s) + return; + + p.Do(js.startDefaultPrefix); + if (s >= 2) { + p.Do(js.hasSetRounding); + js.lastSetRounding = 0; + } else { + js.hasSetRounding = 1; + } +} + +// This is here so the savestate matches between jit and non-jit. +void Arm64Jit::DoDummyState(PointerWrap &p) +{ + auto s = p.Section("Jit", 1, 2); + if (!s) + return; + + bool dummy = false; + p.Do(dummy); + if (s >= 2) { + dummy = true; + p.Do(dummy); + } +} + +void Arm64Jit::FlushAll() +{ + gpr.FlushAll(); + fpr.FlushAll(); + FlushPrefixV(); +} + +void Arm64Jit::FlushPrefixV() +{ + if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0) { + gpr.SetRegImm(SCRATCHREG1, js.prefixS); + STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_SPREFIX])); + js.prefixSFlag = (JitState::PrefixState) (js.prefixSFlag & ~JitState::PREFIX_DIRTY); + } + + if ((js.prefixTFlag & JitState::PREFIX_DIRTY) != 0) { + gpr.SetRegImm(SCRATCHREG1, js.prefixT); + STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_TPREFIX])); + js.prefixTFlag = (JitState::PrefixState) (js.prefixTFlag & ~JitState::PREFIX_DIRTY); + } + + if ((js.prefixDFlag & JitState::PREFIX_DIRTY) != 0) { + gpr.SetRegImm(SCRATCHREG1, js.prefixD); + STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_DPREFIX])); + js.prefixDFlag = (JitState::PrefixState) (js.prefixDFlag & ~JitState::PREFIX_DIRTY); + } +} + +void Arm64Jit::ClearCache() +{ + blocks.Clear(); + ClearCodeSpace(); + GenerateFixedCode(); +} + +void Arm64Jit::InvalidateCache() +{ + blocks.Clear(); +} + +void Arm64Jit::InvalidateCacheAt(u32 em_address, int length) +{ + blocks.InvalidateICache(em_address, length); +} + +void Arm64Jit::EatInstruction(MIPSOpcode op) { + MIPSInfo info = MIPSGetInfo(op); + if (info & DELAYSLOT) { + ERROR_LOG_REPORT_ONCE(ateDelaySlot, JIT, "Ate a branch op."); + } + if (js.inDelaySlot) { + ERROR_LOG_REPORT_ONCE(ateInDelaySlot, JIT, "Ate an instruction inside a delay slot."); + } + + js.numInstructions++; + js.compilerPC += 4; + js.downcountAmount += MIPSGetInstructionCycleEstimate(op); +} + +void Arm64Jit::CompileDelaySlot(int flags) +{ + // TODO ARM64 +} + + +void Arm64Jit::Compile(u32 em_address) { + if (GetSpaceLeft() < 0x10000 || blocks.IsFull()) { + ClearCache(); + } + + int block_num = blocks.AllocateBlock(em_address); + JitBlock *b = blocks.GetBlock(block_num); + DoJit(em_address, b); + blocks.FinalizeBlock(block_num, jo.enableBlocklink); + + bool cleanSlate = false; + + if (js.hasSetRounding && !js.lastSetRounding) { + WARN_LOG(JIT, "Detected rounding mode usage, rebuilding jit with checks"); + // Won't loop, since hasSetRounding is only ever set to 1. + js.lastSetRounding = js.hasSetRounding; + cleanSlate = true; + } + + // Drat. The VFPU hit an uneaten prefix at the end of a block. + if (js.startDefaultPrefix && js.MayHavePrefix()) { + WARN_LOG(JIT, "An uneaten prefix at end of block: %08x", js.compilerPC - 4); + js.LogPrefix(); + + // Let's try that one more time. We won't get back here because we toggled the value. + js.startDefaultPrefix = false; + cleanSlate = true; + } + + if (cleanSlate) { + // Our assumptions are all wrong so it's clean-slate time. + ClearCache(); + Compile(em_address); + } +} + +void Arm64Jit::RunLoopUntil(u64 globalticks) +{ + ((void (*)())enterCode)(); +} + +const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b) +{ + js.cancel = false; + js.blockStart = js.compilerPC = mips_->pc; + js.lastContinuedPC = 0; + js.initialBlockSize = 0; + js.nextExit = 0; + js.downcountAmount = 0; + js.curBlock = b; + js.compiling = true; + js.inDelaySlot = false; + js.PrefixStart(); + + // We add a downcount flag check before the block, used when entering from a linked block. + // The last block decremented downcounter, and the flag should still be available. + // Got three variants here of where we position the code, needs detailed benchmarking. + + FixupBranch bail; + /* + if (jo.useBackJump) { + // Moves the MOVI2R and B *before* checkedEntry, and just branch backwards there. + // Speedup seems to be zero unfortunately but I guess it may vary from device to device. + // Not intrusive so keeping it around here to experiment with, may help on ARMv6 due to + // large/slow construction of 32-bit immediates? + JumpTarget backJump = GetCodePtr(); + gpr.SetRegImm(R0, js.blockStart); + B((const void *)outerLoopPCInR0); + b->checkedEntry = GetCodePtr(); + SetCC(CC_LT); + B(backJump); + SetCC(CC_AL); + } else if (jo.useForwardJump) { + b->checkedEntry = GetCodePtr(); + SetCC(CC_LT); + bail = B(); + SetCC(CC_AL); + } else { + b->checkedEntry = GetCodePtr(); + SetCC(CC_LT); + gpr.SetRegImm(R0, js.blockStart); + B((const void *)outerLoopPCInR0); + SetCC(CC_AL); + }*/ + // TODO ARM64 + + b->normalEntry = GetCodePtr(); + // TODO: this needs work + MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address); + + gpr.Start(analysis); + fpr.Start(analysis); + + int partialFlushOffset = 0; + + js.numInstructions = 0; + while (js.compiling) + { + gpr.SetCompilerPC(js.compilerPC); // Let it know for log messages + MIPSOpcode inst = Memory::Read_Opcode_JIT(js.compilerPC); + //MIPSInfo info = MIPSGetInfo(inst); + //if (info & IS_VFPU) { + // logBlocks = 1; + //} + + js.downcountAmount += MIPSGetInstructionCycleEstimate(inst); + + MIPSCompileOp(inst); + + js.compilerPC += 4; + js.numInstructions++; + + // Safety check, in case we get a bunch of really large jit ops without a lot of branching. + if (GetSpaceLeft() < 0x800 || js.numInstructions >= JitBlockCache::MAX_BLOCK_INSTRUCTIONS) + { + FlushAll(); + WriteExit(js.compilerPC, js.nextExit++); + js.compiling = false; + } + } + + if (jo.useForwardJump) { + //SetJumpTarget(bail); + //gpr.SetRegImm(R0, js.blockStart); + //B((const void *)outerLoopPCInR0); + } + + char temp[256]; + if (logBlocks > 0 && dontLogBlocks == 0) { + INFO_LOG(JIT, "=============== mips ==============="); + for (u32 cpc = em_address; cpc != js.compilerPC + 4; cpc += 4) { + MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp, true); + INFO_LOG(JIT, "M: %08x %s", cpc, temp); + } + } + + b->codeSize = GetCodePtr() - b->normalEntry; + + if (logBlocks > 0 && dontLogBlocks == 0) { + INFO_LOG(JIT, "=============== ARM ==============="); + DisassembleArm64Print(b->normalEntry, GetCodePtr() - b->normalEntry); + } + if (logBlocks > 0) + logBlocks--; + if (dontLogBlocks > 0) + dontLogBlocks--; + + // Don't forget to zap the newly written instructions in the instruction cache! + FlushIcache(); + + if (js.lastContinuedPC == 0) + b->originalSize = js.numInstructions; + else + { + // We continued at least once. Add the last proxy and set the originalSize correctly. + blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (js.compilerPC - js.lastContinuedPC) / sizeof(u32), GetCodePtr()); + b->originalSize = js.initialBlockSize; + } + return b->normalEntry; +} + +void Arm64Jit::AddContinuedBlock(u32 dest) +{ + // The first block is the root block. When we continue, we create proxy blocks after that. + if (js.lastContinuedPC == 0) + js.initialBlockSize = js.numInstructions; + else + blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (js.compilerPC - js.lastContinuedPC) / sizeof(u32), GetCodePtr()); + js.lastContinuedPC = dest; +} + +bool Arm64Jit::DescribeCodePtr(const u8 *ptr, std::string &name) +{ + // TODO: Not used by anything yet. + return false; +} + +void Arm64Jit::Comp_RunBlock(MIPSOpcode op) +{ + // This shouldn't be necessary, the dispatcher should catch us before we get here. + ERROR_LOG(JIT, "Comp_RunBlock should never be reached!"); +} + +bool Arm64Jit::ReplaceJalTo(u32 dest) { + return false; +} + +void Arm64Jit::Comp_ReplacementFunc(MIPSOpcode op) +{ + // TODO ARM64 +} + +void Arm64Jit::Comp_Generic(MIPSOpcode op) +{ + FlushAll(); + MIPSInterpretFunc func = MIPSGetInterpretFunc(op); + if (func) { + SaveDowncount(); + // TODO: Perhaps keep the rounding mode for interp? + RestoreRoundingMode(); + // gpr.SetRegImm(SCRATCHREG1, js.compilerPC); + // MovToPC(SCRATCHREG1); + //gpr.SetRegImm(R0, op.encoding); + //QuickCallFunction(R1, (void *)func); + // TODO ARM64 + ApplyRoundingMode(); + RestoreDowncount(); + } + + const MIPSInfo info = MIPSGetInfo(op); + if ((info & IS_VFPU) != 0 && (info & VFPU_NO_PREFIX) == 0) + { + // If it does eat them, it'll happen in MIPSCompileOp(). + if ((info & OUT_EAT_PREFIX) == 0) + js.PrefixUnknown(); + } +} + +void Arm64Jit::MovFromPC(ARM64Reg r) { + LDR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc)); +} + +void Arm64Jit::MovToPC(ARM64Reg r) { + STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc)); +} + +void Arm64Jit::SaveDowncount() { + if (jo.downcountInRegister) + STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); +} + +void Arm64Jit::RestoreDowncount() { + if (jo.downcountInRegister) + LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); +} + +void Arm64Jit::WriteDownCount(int offset) { + // TODO ARM64 +} + +// Abuses R2 +void Arm64Jit::WriteDownCountR(ARM64Reg reg) { + if (jo.downcountInRegister) { + SUBS(DOWNCOUNTREG, DOWNCOUNTREG, reg); + } else { + LDR(INDEX_UNSIGNED, X2, CTXREG, offsetof(MIPSState, downcount)); + SUBS(X2, X2, reg); + STR(INDEX_UNSIGNED, X2, CTXREG, offsetof(MIPSState, downcount)); + } +} + +void Arm64Jit::RestoreRoundingMode(bool force) { + // TODO ARM64 +} + +void Arm64Jit::ApplyRoundingMode(bool force) { + // TODO ARM64 +} + +void Arm64Jit::UpdateRoundingMode() { + // TODO ARM64 +} + +// IDEA - could have a WriteDualExit that takes two destinations and two condition flags, +// and just have conditional that set PC "twice". This only works when we fall back to dispatcher +// though, as we need to have the SUBS flag set in the end. So with block linking in the mix, +// I don't think this gives us that much benefit. +void Arm64Jit::WriteExit(u32 destination, int exit_num) +{ + WriteDownCount(); + //If nobody has taken care of this yet (this can be removed when all branches are done) + JitBlock *b = js.curBlock; + b->exitAddress[exit_num] = destination; + b->exitPtrs[exit_num] = GetWritableCodePtr(); + + // Link opportunity! + int block = blocks.GetBlockNumberFromStartAddress(destination); + if (block >= 0 && jo.enableBlocklink) { + // It exists! Joy of joy! + B(blocks.GetBlock(block)->checkedEntry); + b->linkStatus[exit_num] = true; + } else { + gpr.SetRegImm(X0, destination); + B((const void *)dispatcherPCInR0); + } +} + +void Arm64Jit::WriteExitDestInR(ARM64Reg Reg) +{ + MovToPC(Reg); + WriteDownCount(); + // TODO: shouldn't need an indirect branch here... + B((const void *)dispatcher); +} + +void Arm64Jit::WriteSyscallExit() +{ + WriteDownCount(); + B((const void *)dispatcherCheckCoreState); +} + +void Arm64Jit::Comp_DoNothing(MIPSOpcode op) { } + +#define _RS ((op>>21) & 0x1F) +#define _RT ((op>>16) & 0x1F) +#define _RD ((op>>11) & 0x1F) +#define _FS ((op>>11) & 0x1F) +#define _FT ((op>>16) & 0x1F) +#define _FD ((op>>6) & 0x1F) +#define _POS ((op>>6) & 0x1F) +#define _SIZE ((op>>11) & 0x1F) + +//memory regions: +// +// 08-0A +// 48-4A +// 04-05 +// 44-45 +// mov eax, addrreg + // shr eax, 28 +// mov eax, [table+eax] +// mov dreg, [eax+offreg] + +} diff --git a/Core/MIPS/ARM64/Arm64Jit.h b/Core/MIPS/ARM64/Arm64Jit.h new file mode 100644 index 0000000000..fa4313d9d3 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64Jit.h @@ -0,0 +1,291 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "Common/CPUDetect.h" +#include "Common/ArmCommon.h" +#include "Common/Arm64Emitter.h" +#include "Core/MIPS/JitCommon/JitState.h" +#include "Core/MIPS/JitCommon/JitBlockCache.h" +#include "Core/MIPS/ARM64/Arm64Asm.h" +#include "Core/MIPS/ARM64/Arm64RegCache.h" +#include "Core/MIPS/ARM64/Arm64RegCacheFPU.h" +#include "Core/MIPS/MIPSVFPUUtils.h" + +#ifndef offsetof +#include "stddef.h" +#endif + +namespace MIPSComp +{ + +struct Arm64JitOptions +{ + Arm64JitOptions() { + enableBlocklink = true; + downcountInRegister = true; + useBackJump = false; + useForwardJump = false; + cachePointers = true; + immBranches = false; + continueBranches = false; + continueJumps = false; + continueMaxInstructions = 300; + + useNEONVFPU = false; // true + if (!cpu_info.bNEON) + useNEONVFPU = false; + } + + bool useNEONVFPU; + bool enableBlocklink; + bool downcountInRegister; + bool useBackJump; + bool useForwardJump; + bool cachePointers; + bool immBranches; + bool continueBranches; + bool continueJumps; + int continueMaxInstructions; +}; + +class Arm64Jit : public Arm64Gen::ARM64CodeBlock +{ +public: + Arm64Jit(MIPSState *mips); + virtual ~Arm64Jit(); + + void DoState(PointerWrap &p); + static void DoDummyState(PointerWrap &p); + + // Compiled ops should ignore delay slots + // the compiler will take care of them by itself + // OR NOT + void Comp_Generic(MIPSOpcode op); + + void RunLoopUntil(u64 globalticks); + + void Compile(u32 em_address); // Compiles a block at current MIPS PC + const u8 *DoJit(u32 em_address, JitBlock *b); + + bool DescribeCodePtr(const u8 *ptr, std::string &name); + + void CompileDelaySlot(int flags); + void EatInstruction(MIPSOpcode op); + void AddContinuedBlock(u32 dest); + + void Comp_RunBlock(MIPSOpcode op); + void Comp_ReplacementFunc(MIPSOpcode op); + + // Ops + void Comp_ITypeMem(MIPSOpcode op); + void Comp_Cache(MIPSOpcode op); + + void Comp_RelBranch(MIPSOpcode op); + void Comp_RelBranchRI(MIPSOpcode op); + void Comp_FPUBranch(MIPSOpcode op); + void Comp_FPULS(MIPSOpcode op); + void Comp_FPUComp(MIPSOpcode op); + void Comp_Jump(MIPSOpcode op); + void Comp_JumpReg(MIPSOpcode op); + void Comp_Syscall(MIPSOpcode op); + void Comp_Break(MIPSOpcode op); + + void Comp_IType(MIPSOpcode op); + void Comp_RType2(MIPSOpcode op); + void Comp_RType3(MIPSOpcode op); + void Comp_ShiftType(MIPSOpcode op); + void Comp_Allegrex(MIPSOpcode op); + void Comp_Allegrex2(MIPSOpcode op); + void Comp_VBranch(MIPSOpcode op); + void Comp_MulDivType(MIPSOpcode op); + void Comp_Special3(MIPSOpcode op); + + void Comp_FPU3op(MIPSOpcode op); + void Comp_FPU2op(MIPSOpcode op); + void Comp_mxc1(MIPSOpcode op); + + void Comp_DoNothing(MIPSOpcode op); + + void Comp_SV(MIPSOpcode op); + void Comp_SVQ(MIPSOpcode op); + void Comp_VPFX(MIPSOpcode op); + void Comp_VVectorInit(MIPSOpcode op); + void Comp_VMatrixInit(MIPSOpcode op); + void Comp_VDot(MIPSOpcode op); + void Comp_VecDo3(MIPSOpcode op); + void Comp_VV2Op(MIPSOpcode op); + void Comp_Mftv(MIPSOpcode op); + void Comp_Vmfvc(MIPSOpcode op); + void Comp_Vmtvc(MIPSOpcode op); + void Comp_Vmmov(MIPSOpcode op); + void Comp_VScl(MIPSOpcode op); + void Comp_Vmmul(MIPSOpcode op); + void Comp_Vmscl(MIPSOpcode op); + void Comp_Vtfm(MIPSOpcode op); + void Comp_VHdp(MIPSOpcode op); + void Comp_VCrs(MIPSOpcode op); + void Comp_VDet(MIPSOpcode op); + void Comp_Vi2x(MIPSOpcode op); + void Comp_Vx2i(MIPSOpcode op); + void Comp_Vf2i(MIPSOpcode op); + void Comp_Vi2f(MIPSOpcode op); + void Comp_Vh2f(MIPSOpcode op); + void Comp_Vcst(MIPSOpcode op); + void Comp_Vhoriz(MIPSOpcode op); + void Comp_VRot(MIPSOpcode op); + void Comp_VIdt(MIPSOpcode op); + void Comp_Vcmp(MIPSOpcode op); + void Comp_Vcmov(MIPSOpcode op); + void Comp_Viim(MIPSOpcode op); + void Comp_Vfim(MIPSOpcode op); + void Comp_VCrossQuat(MIPSOpcode op); + void Comp_Vsgn(MIPSOpcode op); + void Comp_Vocp(MIPSOpcode op); + void Comp_ColorConv(MIPSOpcode op); + void Comp_Vbfy(MIPSOpcode op); + + // Non-NEON: VPFX + + // NEON implementations of the VFPU ops. + void CompNEON_SV(MIPSOpcode op); + void CompNEON_SVQ(MIPSOpcode op); + void CompNEON_VVectorInit(MIPSOpcode op); + void CompNEON_VMatrixInit(MIPSOpcode op); + void CompNEON_VDot(MIPSOpcode op); + void CompNEON_VecDo3(MIPSOpcode op); + void CompNEON_VV2Op(MIPSOpcode op); + void CompNEON_Mftv(MIPSOpcode op); + void CompNEON_Vmfvc(MIPSOpcode op); + void CompNEON_Vmtvc(MIPSOpcode op); + void CompNEON_Vmmov(MIPSOpcode op); + void CompNEON_VScl(MIPSOpcode op); + void CompNEON_Vmmul(MIPSOpcode op); + void CompNEON_Vmscl(MIPSOpcode op); + void CompNEON_Vtfm(MIPSOpcode op); + void CompNEON_VHdp(MIPSOpcode op); + void CompNEON_VCrs(MIPSOpcode op); + void CompNEON_VDet(MIPSOpcode op); + void CompNEON_Vi2x(MIPSOpcode op); + void CompNEON_Vx2i(MIPSOpcode op); + void CompNEON_Vf2i(MIPSOpcode op); + void CompNEON_Vi2f(MIPSOpcode op); + void CompNEON_Vh2f(MIPSOpcode op); + void CompNEON_Vcst(MIPSOpcode op); + void CompNEON_Vhoriz(MIPSOpcode op); + void CompNEON_VRot(MIPSOpcode op); + void CompNEON_VIdt(MIPSOpcode op); + void CompNEON_Vcmp(MIPSOpcode op); + void CompNEON_Vcmov(MIPSOpcode op); + void CompNEON_Viim(MIPSOpcode op); + void CompNEON_Vfim(MIPSOpcode op); + void CompNEON_VCrossQuat(MIPSOpcode op); + void CompNEON_Vsgn(MIPSOpcode op); + void CompNEON_Vocp(MIPSOpcode op); + void CompNEON_ColorConv(MIPSOpcode op); + void CompNEON_Vbfy(MIPSOpcode op); + + int Replace_fabsf(); + + JitBlockCache *GetBlockCache() { return &blocks; } + + void ClearCache(); + void InvalidateCache(); + void InvalidateCacheAt(u32 em_address, int length = 4); + + void EatPrefix() { js.EatPrefix(); } + +private: + void GenerateFixedCode(); + void FlushAll(); + void FlushPrefixV(); + + void WriteDownCount(int offset = 0); + void WriteDownCountR(Arm64Gen::ARM64Reg reg); + void RestoreRoundingMode(bool force = false); + void ApplyRoundingMode(bool force = false); + void UpdateRoundingMode(); + void MovFromPC(Arm64Gen::ARM64Reg r); + void MovToPC(Arm64Gen::ARM64Reg r); + + bool ReplaceJalTo(u32 dest); + + void SaveDowncount(); + void RestoreDowncount(); + + void WriteExit(u32 destination, int exit_num); + void WriteExitDestInR(Arm64Gen::ARM64Reg Reg); + void WriteSyscallExit(); + + // Utility compilation functions + void BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely); + void BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely); + void BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool likely); + void BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely); + + // Utilities to reduce duplicated code + void CompImmLogic(MIPSGPReg rs, MIPSGPReg rt, u32 uimm, void (ARM64XEmitter::*arith)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg src, Arm64Gen::ARM64Reg op2), bool (ARM64XEmitter::*tryArithI2R)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg src, u32 val), u32 (*eval)(u32 a, u32 b)); + void CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, void (ARM64XEmitter::*arithOp2)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg rm, Arm64Gen::ARM64Reg rn), bool (ARM64XEmitter::*tryArithI2R)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg rm, u32 val), u32 (*eval)(u32 a, u32 b), bool symmetric = false); + + void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz); + void ApplyPrefixD(const u8 *vregs, VectorSize sz); + void GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) { + _assert_(js.prefixSFlag & JitState::PREFIX_KNOWN); + GetVectorRegs(regs, sz, vectorReg); + ApplyPrefixST(regs, js.prefixS, sz); + } + void GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) { + _assert_(js.prefixTFlag & JitState::PREFIX_KNOWN); + GetVectorRegs(regs, sz, vectorReg); + ApplyPrefixST(regs, js.prefixT, sz); + } + void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg); + + // Utils + void SetR0ToEffectiveAddress(MIPSGPReg rs, s16 offset); + void SetCCAndR0ForSafeAddress(MIPSGPReg rs, s16 offset, Arm64Gen::ARM64Reg tempReg, bool reverse = false); + void Comp_ITypeMemLR(MIPSOpcode op, bool load); + + JitBlockCache blocks; + Arm64JitOptions jo; + JitState js; + + Arm64RegCache gpr; + ArmRegCacheFPU fpr; + + MIPSState *mips_; + + int dontLogBlocks; + int logBlocks; + +public: + // Code pointers + const u8 *enterCode; + + const u8 *outerLoop; + const u8 *outerLoopPCInR0; + const u8 *dispatcherCheckCoreState; + const u8 *dispatcherPCInR0; + const u8 *dispatcher; + const u8 *dispatcherNoCheck; + + const u8 *breakpointBailout; +}; + +} // namespace MIPSComp + diff --git a/Core/MIPS/ARM64/Arm64RegCache.cpp b/Core/MIPS/ARM64/Arm64RegCache.cpp new file mode 100644 index 0000000000..43852bd070 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64RegCache.cpp @@ -0,0 +1,503 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "Core/MemMap.h" +#include "Core/MIPS/ARM64/Arm64RegCache.h" +#include "Core/MIPS/ARM64/Arm64Jit.h" +#include "Core/MIPS/MIPSAnalyst.h" +#include "Core/Reporting.h" +#include "Common/Arm64Emitter.h" + +#ifndef offsetof +#include "stddef.h" +#endif + +using namespace Arm64Gen; +using namespace Arm64JitConstants; + +Arm64RegCache::Arm64RegCache(MIPSState *mips, MIPSComp::JitState *js, MIPSComp::Arm64JitOptions *jo) : mips_(mips), js_(js), jo_(jo) { +} + +void Arm64RegCache::Init(ARM64XEmitter *emitter) { + emit_ = emitter; +} + +void Arm64RegCache::Start(MIPSAnalyst::AnalysisResults &stats) { + for (int i = 0; i < NUM_ARMREG; i++) { + ar[i].mipsReg = MIPS_REG_INVALID; + ar[i].isDirty = false; + } + for (int i = 0; i < NUM_MIPSREG; i++) { + mr[i].loc = ML_MEM; + mr[i].reg = INVALID_REG; + mr[i].imm = -1; + mr[i].spillLock = false; + } +} + +const ARM64Reg *Arm64RegCache::GetMIPSAllocationOrder(int &count) { + // Note that R0 is reserved as scratch for now. + // R12 is also potentially usable. + // R4-R7 are registers we could use for static allocation or downcount. + // R8 is used to preserve flags in nasty branches. + // R9 and upwards are reserved for jit basics. + // R14 (LR) is used as a scratch reg (overwritten on calls/return.) + + // TODO ARM64 + if (jo_->downcountInRegister) { + static const ARM64Reg allocationOrder[] = { + X1, X2, X3, X4, X5, X6, X12, + }; + count = sizeof(allocationOrder) / sizeof(const int); + return allocationOrder; + } else { + static const ARM64Reg allocationOrder2[] = { + X1, X2, X3, X4, X5, X6, X7, X12, + }; + count = sizeof(allocationOrder2) / sizeof(const int); + return allocationOrder2; + } +} + +void Arm64RegCache::FlushBeforeCall() { + // R4-R11 are preserved. Others need flushing. + FlushArmReg(X1); + FlushArmReg(X2); + FlushArmReg(X3); + FlushArmReg(X12); +} + +bool Arm64RegCache::IsMapped(MIPSGPReg mipsReg) { + return mr[mipsReg].loc == ML_ARMREG; +} + +void Arm64RegCache::SetRegImm(ARM64Reg reg, u32 imm) { + // On ARM64, at least Cortex A57, good old MOVT/MOVW (MOVK in 64-bit) is really fast. + emit_->MOVI2R(reg, imm); +} + +void Arm64RegCache::MapRegTo(ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags) { + ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false; + if ((mapFlags & MAP_NOINIT) != MAP_NOINIT) { + if (mipsReg == MIPS_REG_ZERO) { + // If we get a request to load the zero register, at least we won't spend + // time on a memory access... + // TODO: EOR? + emit_->MOVI2R(reg, 0); + + // This way, if we SetImm() it, we'll keep it. + mr[mipsReg].loc = ML_ARMREG_IMM; + mr[mipsReg].imm = 0; + } else { + switch (mr[mipsReg].loc) { + case ML_MEM: + emit_->LDR(INDEX_UNSIGNED, reg, CTXREG, GetMipsRegOffset(mipsReg)); + mr[mipsReg].loc = ML_ARMREG; + break; + case ML_IMM: + SetRegImm(reg, mr[mipsReg].imm); + ar[reg].isDirty = true; // IMM is always dirty. + + // If we are mapping dirty, it means we're gonna overwrite. + // So the imm value is no longer valid. + if (mapFlags & MAP_DIRTY) + mr[mipsReg].loc = ML_ARMREG; + else + mr[mipsReg].loc = ML_ARMREG_IMM; + break; + default: + mr[mipsReg].loc = ML_ARMREG; + break; + } + } + } else { + if (mipsReg == MIPS_REG_ZERO) { + // This way, if we SetImm() it, we'll keep it. + mr[mipsReg].loc = ML_ARMREG_IMM; + mr[mipsReg].imm = 0; + } else { + mr[mipsReg].loc = ML_ARMREG; + } + } + ar[reg].mipsReg = mipsReg; + mr[mipsReg].reg = reg; +} + +ARM64Reg Arm64RegCache::FindBestToSpill(bool unusedOnly, bool *clobbered) { + int allocCount; + const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount); + + static const int UNUSED_LOOKAHEAD_OPS = 30; + + *clobbered = false; + for (int i = 0; i < allocCount; i++) { + ARM64Reg reg = allocOrder[i]; + if (ar[reg].mipsReg != MIPS_REG_INVALID && mr[ar[reg].mipsReg].spillLock) + continue; + + // Awesome, a clobbered reg. Let's use it. + if (MIPSAnalyst::IsRegisterClobbered(ar[reg].mipsReg, compilerPC_, UNUSED_LOOKAHEAD_OPS)) { + *clobbered = true; + return reg; + } + + // Not awesome. A used reg. Let's try to avoid spilling. + if (unusedOnly && MIPSAnalyst::IsRegisterUsed(ar[reg].mipsReg, compilerPC_, UNUSED_LOOKAHEAD_OPS)) { + continue; + } + + return reg; + } + + return INVALID_REG; +} + +// TODO: Somewhat smarter spilling - currently simply spills the first available, should do +// round robin or FIFO or something. +ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) { + // Let's see if it's already mapped. If so we just need to update the dirty flag. + // We don't need to check for ML_NOINIT because we assume that anyone who maps + // with that flag immediately writes a "known" value to the register. + if (mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM) { + ARM64Reg armReg = mr[mipsReg].reg; + if (ar[armReg].mipsReg != mipsReg) { + ERROR_LOG_REPORT(JIT, "Register mapping out of sync! %i", mipsReg); + } + if (mapFlags & MAP_DIRTY) { + // Mapping dirty means the old imm value is invalid. + mr[mipsReg].loc = ML_ARMREG; + ar[armReg].isDirty = true; + } + return (ARM64Reg)mr[mipsReg].reg; + } + + // Okay, not mapped, so we need to allocate an ARM register. + + int allocCount; + const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount); + + ARM64Reg desiredReg = INVALID_REG; + // Try to "statically" allocate the first 6 regs after v0. + int desiredOrder = allocCount - (6 - (mipsReg - (int)MIPS_REG_V0)); + if (desiredOrder >= 0 && desiredOrder < allocCount) + desiredReg = allocOrder[desiredOrder]; + + if (desiredReg != INVALID_REG) { + if (ar[desiredReg].mipsReg == MIPS_REG_INVALID) { + // With this placement, we may be able to optimize flush. + MapRegTo(desiredReg, mipsReg, mapFlags); + return desiredReg; + } + } + +allocate: + for (int i = 0; i < allocCount; i++) { + ARM64Reg reg = allocOrder[i]; + + if (ar[reg].mipsReg == MIPS_REG_INVALID) { + // That means it's free. Grab it, and load the value into it (if requested). + MapRegTo(reg, mipsReg, mapFlags); + return reg; + } + } + + // Still nothing. Let's spill a reg and goto 10. + // TODO: Use age or something to choose which register to spill? + // TODO: Spill dirty regs first? or opposite? + bool clobbered; + ARM64Reg bestToSpill = FindBestToSpill(true, &clobbered); + if (bestToSpill == INVALID_REG) { + bestToSpill = FindBestToSpill(false, &clobbered); + } + + if (bestToSpill != INVALID_REG) { + // ERROR_LOG(JIT, "Out of registers at PC %08x - spills register %i.", mips_->pc, bestToSpill); + // TODO: Broken somehow in Dante's Inferno, but most games work. Bad flags in MIPSTables somewhere? + if (clobbered) { + DiscardR(ar[bestToSpill].mipsReg); + } else { + FlushArmReg(bestToSpill); + } + goto allocate; + } + + // Uh oh, we have all them spilllocked.... + ERROR_LOG_REPORT(JIT, "Out of spillable registers at PC %08x!!!", mips_->pc); + return INVALID_REG; +} + +void Arm64RegCache::MapInIn(MIPSGPReg rd, MIPSGPReg rs) { + SpillLock(rd, rs); + MapReg(rd); + MapReg(rs); + ReleaseSpillLocks(); +} + +void Arm64RegCache::MapDirtyIn(MIPSGPReg rd, MIPSGPReg rs, bool avoidLoad) { + SpillLock(rd, rs); + bool load = !avoidLoad || rd == rs; + MapReg(rd, load ? MAP_DIRTY : MAP_NOINIT); + MapReg(rs); + ReleaseSpillLocks(); +} + +void Arm64RegCache::MapDirtyInIn(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad) { + SpillLock(rd, rs, rt); + bool load = !avoidLoad || (rd == rs || rd == rt); + MapReg(rd, load ? MAP_DIRTY : MAP_NOINIT); + MapReg(rt); + MapReg(rs); + ReleaseSpillLocks(); +} + +void Arm64RegCache::MapDirtyDirtyIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, bool avoidLoad) { + SpillLock(rd1, rd2, rs); + bool load1 = !avoidLoad || rd1 == rs; + bool load2 = !avoidLoad || rd2 == rs; + MapReg(rd1, load1 ? MAP_DIRTY : MAP_NOINIT); + MapReg(rd2, load2 ? MAP_DIRTY : MAP_NOINIT); + MapReg(rs); + ReleaseSpillLocks(); +} + +void Arm64RegCache::MapDirtyDirtyInIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad) { + SpillLock(rd1, rd2, rs, rt); + bool load1 = !avoidLoad || (rd1 == rs || rd1 == rt); + bool load2 = !avoidLoad || (rd2 == rs || rd2 == rt); + MapReg(rd1, load1 ? MAP_DIRTY : MAP_NOINIT); + MapReg(rd2, load2 ? MAP_DIRTY : MAP_NOINIT); + MapReg(rt); + MapReg(rs); + ReleaseSpillLocks(); +} + +void Arm64RegCache::FlushArmReg(ARM64Reg r) { + if (ar[r].mipsReg == MIPS_REG_INVALID) { + // Nothing to do, reg not mapped. + if (ar[r].isDirty) { + ERROR_LOG_REPORT(JIT, "Dirty but no mipsreg?"); + } + return; + } + if (ar[r].mipsReg != MIPS_REG_INVALID) { + auto &mreg = mr[ar[r].mipsReg]; + if (mreg.loc == ML_ARMREG_IMM) { + // We know its immedate value, no need to STR now. + mreg.loc = ML_IMM; + mreg.reg = INVALID_REG; + } else { + if (ar[r].isDirty && mreg.loc == ML_ARMREG) + emit_->STR(INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(ar[r].mipsReg)); + mreg.loc = ML_MEM; + mreg.reg = INVALID_REG; + mreg.imm = 0; + } + } + ar[r].isDirty = false; + ar[r].mipsReg = MIPS_REG_INVALID; +} + +void Arm64RegCache::DiscardR(MIPSGPReg mipsReg) { + const RegMIPSLoc prevLoc = mr[mipsReg].loc; + if (prevLoc == ML_ARMREG || prevLoc == ML_ARMREG_IMM) { + ARM64Reg armReg = mr[mipsReg].reg; + ar[armReg].isDirty = false; + ar[armReg].mipsReg = MIPS_REG_INVALID; + mr[mipsReg].reg = INVALID_REG; + mr[mipsReg].loc = ML_MEM; + mr[mipsReg].imm = 0; + } +} + +void Arm64RegCache::FlushR(MIPSGPReg r) { + switch (mr[r].loc) { + case ML_IMM: + // IMM is always "dirty". + if (r != MIPS_REG_ZERO) { + SetRegImm(SCRATCHREG1, mr[r].imm); + emit_->STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, GetMipsRegOffset(r)); + } + break; + + case ML_ARMREG: + case ML_ARMREG_IMM: + if (mr[r].reg == INVALID_REG) { + ERROR_LOG_REPORT(JIT, "FlushR: MipsReg %d had bad ArmReg", r); + } + if (ar[mr[r].reg].isDirty) { + if (r != MIPS_REG_ZERO) { + emit_->STR(INDEX_UNSIGNED, (ARM64Reg)mr[r].reg, CTXREG, GetMipsRegOffset(r)); + } + ar[mr[r].reg].isDirty = false; + } + ar[mr[r].reg].mipsReg = MIPS_REG_INVALID; + break; + + case ML_MEM: + // Already there, nothing to do. + break; + + default: + ERROR_LOG_REPORT(JIT, "FlushR: MipsReg %d with invalid location %d", r, mr[r].loc); + break; + } + mr[r].loc = ML_MEM; + mr[r].reg = INVALID_REG; + mr[r].imm = 0; +} + +// Note: if allowFlushImm is set, this also flushes imms while checking the sequence. +int Arm64RegCache::FlushGetSequential(MIPSGPReg startMipsReg, bool allowFlushImm) { + // Only start a sequence on a dirty armreg. + // TODO: Could also start with an imm? + const auto &startMipsInfo = mr[startMipsReg]; + if ((startMipsInfo.loc != ML_ARMREG && startMipsInfo.loc != ML_ARMREG_IMM) || startMipsInfo.reg == INVALID_REG || !ar[startMipsInfo.reg].isDirty) { + return 0; + } + + int allocCount; + const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount); + + int c = 1; + // The sequence needs to have ascending arm regs for STMIA. + int lastArmReg = startMipsInfo.reg; + // Can't use HI/LO, only regs in the main r[] array. + for (int r = (int)startMipsReg + 1; r < 32; ++r) { + if ((mr[r].loc == ML_ARMREG || mr[r].loc == ML_ARMREG_IMM) && mr[r].reg != INVALID_REG) { + if ((int)mr[r].reg > lastArmReg && ar[mr[r].reg].isDirty) { + ++c; + lastArmReg = mr[r].reg; + continue; + } + // If we're not allowed to flush imms, don't even consider them. + } else if (allowFlushImm && mr[r].loc == ML_IMM && MIPSGPReg(r) != MIPS_REG_ZERO) { + // Okay, let's search for a free (and later) reg to put this imm into. + bool found = false; + for (int j = 0; j < allocCount; ++j) { + ARM64Reg immReg = allocOrder[j]; + if ((int)immReg > lastArmReg && ar[immReg].mipsReg == MIPS_REG_INVALID) { + ++c; + lastArmReg = immReg; + + // Even if the sequence fails, we'll need it in a reg anyway, might as well be this one. + MapRegTo(immReg, MIPSGPReg(r), 0); + found = true; + break; + } + } + if (found) { + continue; + } + } + + // If it didn't hit a continue above, the chain is over. + // There's no way to skip a slot with STMIA. + break; + } + + return c; +} + +void Arm64RegCache::FlushAll() { + // TODO: Flush in pairs + for (int i = 0; i < NUM_MIPSREG; i++) { + MIPSGPReg mipsReg = MIPSGPReg(i); + FlushR(mipsReg); + } + + // Sanity check + for (int i = 0; i < NUM_ARMREG; i++) { + if (ar[i].mipsReg != MIPS_REG_INVALID) { + ERROR_LOG_REPORT(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg); + } + } +} + +void Arm64RegCache::SetImm(MIPSGPReg r, u32 immVal) { + if (r == MIPS_REG_ZERO && immVal != 0) + ERROR_LOG(JIT, "Trying to set immediate %08x to r0", immVal); + + if (mr[r].loc == ML_ARMREG_IMM && mr[r].imm == immVal) { + // Already have that value, let's keep it in the reg. + return; + } + // Zap existing value if cached in a reg + if (mr[r].reg != INVALID_REG) { + ar[mr[r].reg].mipsReg = MIPS_REG_INVALID; + ar[mr[r].reg].isDirty = false; + } + mr[r].loc = ML_IMM; + mr[r].imm = immVal; + mr[r].reg = INVALID_REG; +} + +bool Arm64RegCache::IsImm(MIPSGPReg r) const { + if (r == MIPS_REG_ZERO) return true; + return mr[r].loc == ML_IMM || mr[r].loc == ML_ARMREG_IMM; +} + +u32 Arm64RegCache::GetImm(MIPSGPReg r) const { + if (r == MIPS_REG_ZERO) return 0; + if (mr[r].loc != ML_IMM && mr[r].loc != ML_ARMREG_IMM) { + ERROR_LOG_REPORT(JIT, "Trying to get imm from non-imm register %i", r); + } + return mr[r].imm; +} + +int Arm64RegCache::GetMipsRegOffset(MIPSGPReg r) { + if (r < 32) + return r * 4; + switch (r) { + case MIPS_REG_HI: + return offsetof(MIPSState, hi); + case MIPS_REG_LO: + return offsetof(MIPSState, lo); + case MIPS_REG_FPCOND: + return offsetof(MIPSState, fpcond); + case MIPS_REG_VFPUCC: + return offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_CC]); + default: + ERROR_LOG_REPORT(JIT, "bad mips register %i", r); + return 0; // or what? + } +} + +void Arm64RegCache::SpillLock(MIPSGPReg r1, MIPSGPReg r2, MIPSGPReg r3, MIPSGPReg r4) { + mr[r1].spillLock = true; + if (r2 != MIPS_REG_INVALID) mr[r2].spillLock = true; + if (r3 != MIPS_REG_INVALID) mr[r3].spillLock = true; + if (r4 != MIPS_REG_INVALID) mr[r4].spillLock = true; +} + +void Arm64RegCache::ReleaseSpillLocks() { + for (int i = 0; i < NUM_MIPSREG; i++) { + mr[i].spillLock = false; + } +} + +void Arm64RegCache::ReleaseSpillLock(MIPSGPReg reg) { + mr[reg].spillLock = false; +} + +ARM64Reg Arm64RegCache::R(MIPSGPReg mipsReg) { + if (mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM) { + return (ARM64Reg)mr[mipsReg].reg; + } else { + ERROR_LOG_REPORT(JIT, "Reg %i not in arm reg. compilerPC = %08x", mipsReg, compilerPC_); + return INVALID_REG; // BAAAD + } +} diff --git a/Core/MIPS/ARM64/Arm64RegCache.h b/Core/MIPS/ARM64/Arm64RegCache.h new file mode 100644 index 0000000000..1de81828f2 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64RegCache.h @@ -0,0 +1,153 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "Core/MIPS/MIPS.h" +#include "Core/MIPS/MIPSAnalyst.h" +#include "Common/Arm64Emitter.h" + +namespace Arm64JitConstants { + +// Bogus mappings, TODO ARM64 +const Arm64Gen::ARM64Reg JITBASEREG = Arm64Gen::W0; +const Arm64Gen::ARM64Reg CTXREG = Arm64Gen::X1; +const Arm64Gen::ARM64Reg MEMBASEREG = Arm64Gen::X2; +const Arm64Gen::ARM64Reg SCRATCHREG1 = Arm64Gen::W3; +const Arm64Gen::ARM64Reg SCRATCHREG2 = Arm64Gen::W4; +const Arm64Gen::ARM64Reg DOWNCOUNTREG = Arm64Gen::W5; + +enum { + TOTAL_MAPPABLE_MIPSREGS = 36, +}; + +enum RegMIPSLoc { + ML_IMM, + ML_ARMREG, + // In an arm reg, but also has a known immediate value. + ML_ARMREG_IMM, + ML_MEM, +}; + +// These collide with something on Blackberry. +#undef MAP_NOINIT +#undef MAP_READ + +// Initing is the default so the flag is reversed. +enum { + MAP_DIRTY = 1, + MAP_NOINIT = 2 | MAP_DIRTY, +}; + +} + +// R1 to R6: mapped MIPS regs +// R8 = flags (maybe we could do better here?) +// R9 = code pointers +// R10 = MIPS context +// R11 = base pointer +// R14 = scratch (actually LR) + + +typedef int MIPSReg; + +struct RegARM { + MIPSGPReg mipsReg; // if -1, no mipsreg attached. + bool isDirty; // Should the register be written back? +}; + +struct RegMIPS { + // Where is this MIPS register? + Arm64JitConstants::RegMIPSLoc loc; + // Data (only one of these is used, depending on loc. Could make a union). + u32 imm; + Arm64Gen::ARM64Reg reg; // reg index + bool spillLock; // if true, this register cannot be spilled. + // If loc == ML_MEM, it's back in its location in the CPU context struct. +}; + +namespace MIPSComp { + struct Arm64JitOptions; + struct JitState; +} + +class Arm64RegCache { +public: + Arm64RegCache(MIPSState *mips, MIPSComp::JitState *js, MIPSComp::Arm64JitOptions *jo); + ~Arm64RegCache() {} + + void Init(Arm64Gen::ARM64XEmitter *emitter); + void Start(MIPSAnalyst::AnalysisResults &stats); + + // Protect the arm register containing a MIPS register from spilling, to ensure that + // it's being kept allocated. + void SpillLock(MIPSGPReg reg, MIPSGPReg reg2 = MIPS_REG_INVALID, MIPSGPReg reg3 = MIPS_REG_INVALID, MIPSGPReg reg4 = MIPS_REG_INVALID); + void ReleaseSpillLock(MIPSGPReg reg); + void ReleaseSpillLocks(); + + void SetImm(MIPSGPReg reg, u32 immVal); + bool IsImm(MIPSGPReg reg) const; + u32 GetImm(MIPSGPReg reg) const; + // Optimally set a register to an imm value (possibly using another register.) + void SetRegImm(Arm64Gen::ARM64Reg reg, u32 imm); + + // Returns an ARM register containing the requested MIPS register. + Arm64Gen::ARM64Reg MapReg(MIPSGPReg reg, int mapFlags = 0); + + bool IsMapped(MIPSGPReg reg); + bool IsMappedAsPointer(MIPSGPReg reg); + + void MapInIn(MIPSGPReg rd, MIPSGPReg rs); + void MapDirtyIn(MIPSGPReg rd, MIPSGPReg rs, bool avoidLoad = true); + void MapDirtyInIn(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad = true); + void MapDirtyDirtyIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, bool avoidLoad = true); + void MapDirtyDirtyInIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad = true); + void FlushArmReg(Arm64Gen::ARM64Reg r); + void FlushR(MIPSGPReg r); + void FlushBeforeCall(); + void FlushAll(); + void DiscardR(MIPSGPReg r); + + Arm64Gen::ARM64Reg R(MIPSGPReg preg); // Returns a cached register, while checking that it's NOT mapped as a pointer + + void SetEmitter(Arm64Gen::ARM64XEmitter *emitter) { emit_ = emitter; } + + // For better log output only. + void SetCompilerPC(u32 compilerPC) { compilerPC_ = compilerPC; } + + int GetMipsRegOffset(MIPSGPReg r); + +private: + const Arm64Gen::ARM64Reg *GetMIPSAllocationOrder(int &count); + void MapRegTo(Arm64Gen::ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags); + int FlushGetSequential(MIPSGPReg startMipsReg, bool allowFlushImm); + Arm64Gen::ARM64Reg FindBestToSpill(bool unusedOnly, bool *clobbered); + + MIPSState *mips_; + Arm64Gen::ARM64XEmitter *emit_; + MIPSComp::JitState *js_; + MIPSComp::Arm64JitOptions *jo_; + u32 compilerPC_; + + enum { + NUM_ARMREG = 32, // 31 actual registers, plus the zero register. + NUM_MIPSREG = Arm64JitConstants::TOTAL_MAPPABLE_MIPSREGS, + }; + + RegARM ar[NUM_ARMREG]; + RegMIPS mr[NUM_MIPSREG]; +}; diff --git a/Core/MIPS/ARM64/Arm64RegCacheFPU.cpp b/Core/MIPS/ARM64/Arm64RegCacheFPU.cpp new file mode 100644 index 0000000000..7c9bdcd1ef --- /dev/null +++ b/Core/MIPS/ARM64/Arm64RegCacheFPU.cpp @@ -0,0 +1,570 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include + +#include "base/logging.h" +#include "Common/CPUDetect.h" +#include "Core/MIPS/MIPS.h" +#include "Core/MIPS/ARM64/Arm64RegCacheFPU.h" +#include "Core/MIPS/ARM64/Arm64Jit.h" +#include "Core/MIPS/MIPSTables.h" + +using namespace Arm64Gen; +using namespace Arm64JitConstants; + +ArmRegCacheFPU::ArmRegCacheFPU(MIPSState *mips, MIPSComp::JitState *js, MIPSComp::Arm64JitOptions *jo) : mips_(mips), vr(mr + 32), js_(js), jo_(jo), initialReady(false) { + if (cpu_info.bNEON) { + numARMFpuReg_ = 32; + } else { + numARMFpuReg_ = 16; + } +} + +void ArmRegCacheFPU::Start(MIPSAnalyst::AnalysisResults &stats) { + if (!initialReady) { + SetupInitialRegs(); + initialReady = true; + } + + memcpy(ar, arInitial, sizeof(ar)); + memcpy(mr, mrInitial, sizeof(mr)); + pendingFlush = false; +} + +void ArmRegCacheFPU::SetupInitialRegs() { + for (int i = 0; i < numARMFpuReg_; i++) { + arInitial[i].mipsReg = -1; + arInitial[i].isDirty = false; + } + for (int i = 0; i < NUM_MIPSFPUREG; i++) { + mrInitial[i].loc = ML_MEM; + mrInitial[i].reg = INVALID_REG; + mrInitial[i].spillLock = false; + mrInitial[i].tempLock = false; + } + for (int i = 0; i < MAX_ARMQUADS; i++) { + qr[i].isDirty = false; + qr[i].mipsVec = -1; + qr[i].sz = V_Invalid; + qr[i].spillLock = false; + qr[i].isTemp = false; + memset(qr[i].vregs, 0xff, 4); + } +} + +const ARM64Reg *ArmRegCacheFPU::GetMIPSAllocationOrder(int &count) { + // VFP mapping + // VFPU registers and regular FP registers are mapped interchangably on top of the standard + // 16 FPU registers. + + // NEON mapping + // We map FPU and VFPU registers entirely separately. FPU is mapped to 12 of the bottom 16 S registers. + // VFPU is mapped to the upper 48 regs, 32 of which can only be reached through NEON + // (or D16-D31 as doubles, but not relevant). + // Might consider shifting the split in the future, giving more regs to NEON allowing it to map more quads. + + // We should attempt to map scalars to low Q registers and wider things to high registers, + // as the NEON instructions are all 2-vector or 4-vector, they don't do scalar, we want to be + // able to use regular VFP instructions too. + static const ARM64Reg allocationOrder[] = { + // Reserve four temp registers. Useful when building quads until we really figure out + // how to do that best. + S4, S5, S6, S7, // Q1 + S8, S9, S10, S11, // Q2 + S12, S13, S14, S15, // Q3 + S16, S17, S18, S19, // Q4 + S20, S21, S22, S23, // Q5 + S24, S25, S26, S27, // Q6 + S28, S29, S30, S31, // Q7 + // Q8-Q15 free for NEON tricks + }; + + static const ARM64Reg allocationOrderNEONVFPU[] = { + // Reserve four temp registers. Useful when building quads until we really figure out + // how to do that best. + S4, S5, S6, S7, // Q1 + S8, S9, S10, S11, // Q2 + S12, S13, S14, S15, // Q3 + // Q4-Q15 free for VFPU + }; + + // NOTE: It's important that S2/S3 are not allocated with bNEON, even if !useNEONVFPU. + // They are used by a few instructions, like vh2f. + if (jo_->useNEONVFPU) { + count = sizeof(allocationOrderNEONVFPU) / sizeof(const ARM64Reg); + return allocationOrderNEONVFPU; + } else { + count = sizeof(allocationOrder) / sizeof(const ARM64Reg); + return allocationOrder; + } +} + +bool ArmRegCacheFPU::IsMapped(MIPSReg r) { + return mr[r].loc == ML_ARMREG; +} + +ARM64Reg ArmRegCacheFPU::MapReg(MIPSReg mipsReg, int mapFlags) { + // INFO_LOG(JIT, "FPR MapReg: %i flags=%i", mipsReg, mapFlags); + if (jo_->useNEONVFPU && mipsReg >= 32) { + ERROR_LOG(JIT, "Cannot map VFPU registers to ARM VFP registers in NEON mode. PC=%08x", js_->compilerPC); + return S0; + } + + pendingFlush = true; + // Let's see if it's already mapped. If so we just need to update the dirty flag. + // We don't need to check for ML_NOINIT because we assume that anyone who maps + // with that flag immediately writes a "known" value to the register. + if (mr[mipsReg].loc == ML_ARMREG) { + if (ar[mr[mipsReg].reg].mipsReg != mipsReg) { + ERROR_LOG(JIT, "Reg mapping out of sync! MR %i", mipsReg); + } + if (mapFlags & MAP_DIRTY) { + ar[mr[mipsReg].reg].isDirty = true; + } + //INFO_LOG(JIT, "Already mapped %i to %i", mipsReg, mr[mipsReg].reg); + return (ARM64Reg)(mr[mipsReg].reg + S0); + } + + // Okay, not mapped, so we need to allocate an ARM register. + + int allocCount; + const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount); + +allocate: + for (int i = 0; i < allocCount; i++) { + int reg = allocOrder[i] - S0; + + if (ar[reg].mipsReg == -1) { + // That means it's free. Grab it, and load the value into it (if requested). + ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false; + if ((mapFlags & MAP_NOINIT) != MAP_NOINIT) { + if (mr[mipsReg].loc == ML_MEM && mipsReg < TEMP0) { + // emit_->VLDR((ARM64Reg)(reg + S0), CTXREG, GetMipsRegOffset(mipsReg)); + } + } + ar[reg].mipsReg = mipsReg; + mr[mipsReg].loc = ML_ARMREG; + mr[mipsReg].reg = reg; + //INFO_LOG(JIT, "Mapped %i to %i", mipsReg, mr[mipsReg].reg); + return (ARM64Reg)(reg + S0); + } + } + + + // Still nothing. Let's spill a reg and goto 10. + // TODO: Use age or something to choose which register to spill? + // TODO: Spill dirty regs first? or opposite? + int bestToSpill = -1; + for (int i = 0; i < allocCount; i++) { + int reg = allocOrder[i] - S0; + if (ar[reg].mipsReg != -1 && (mr[ar[reg].mipsReg].spillLock || mr[ar[reg].mipsReg].tempLock)) + continue; + bestToSpill = reg; + break; + } + + if (bestToSpill != -1) { + FlushArmReg((ARM64Reg)(S0 + bestToSpill)); + goto allocate; + } + + // Uh oh, we have all them spilllocked.... + ERROR_LOG(JIT, "Out of spillable registers at PC %08x!!!", js_->compilerPC); + return INVALID_REG; +} + +void ArmRegCacheFPU::MapInIn(MIPSReg rd, MIPSReg rs) { + SpillLock(rd, rs); + MapReg(rd); + MapReg(rs); + ReleaseSpillLock(rd); + ReleaseSpillLock(rs); +} + +void ArmRegCacheFPU::MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad) { + SpillLock(rd, rs); + bool overlap = avoidLoad && rd == rs; + MapReg(rd, overlap ? MAP_DIRTY : MAP_NOINIT); + MapReg(rs); + ReleaseSpillLock(rd); + ReleaseSpillLock(rs); +} + +void ArmRegCacheFPU::MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad) { + SpillLock(rd, rs, rt); + bool overlap = avoidLoad && (rd == rs || rd == rt); + MapReg(rd, overlap ? MAP_DIRTY : MAP_NOINIT); + MapReg(rt); + MapReg(rs); + ReleaseSpillLock(rd); + ReleaseSpillLock(rs); + ReleaseSpillLock(rt); +} + +void ArmRegCacheFPU::SpillLockV(const u8 *v, VectorSize sz) { + for (int i = 0; i < GetNumVectorElements(sz); i++) { + vr[v[i]].spillLock = true; + } +} + +void ArmRegCacheFPU::SpillLockV(int vec, VectorSize sz) { + u8 v[4]; + GetVectorRegs(v, sz, vec); + SpillLockV(v, sz); +} + +void ArmRegCacheFPU::MapRegV(int vreg, int flags) { + MapReg(vreg + 32, flags); +} + +void ArmRegCacheFPU::LoadToRegV(ARM64Reg armReg, int vreg) { + if (vr[vreg].loc == ML_ARMREG) { + // emit_->VMOV(armReg, (ARM64Reg)(S0 + vr[vreg].reg)); + } else { + MapRegV(vreg); + // emit_->VMOV(armReg, V(vreg)); + } +} + +void ArmRegCacheFPU::MapRegsAndSpillLockV(int vec, VectorSize sz, int flags) { + u8 v[4]; + GetVectorRegs(v, sz, vec); + SpillLockV(v, sz); + for (int i = 0; i < GetNumVectorElements(sz); i++) { + MapRegV(v[i], flags); + } +} + +void ArmRegCacheFPU::MapRegsAndSpillLockV(const u8 *v, VectorSize sz, int flags) { + SpillLockV(v, sz); + for (int i = 0; i < GetNumVectorElements(sz); i++) { + MapRegV(v[i], flags); + } +} + +void ArmRegCacheFPU::MapInInV(int vs, int vt) { + SpillLockV(vs); + SpillLockV(vt); + MapRegV(vs); + MapRegV(vt); + ReleaseSpillLockV(vs); + ReleaseSpillLockV(vt); +} + +void ArmRegCacheFPU::MapDirtyInV(int vd, int vs, bool avoidLoad) { + bool overlap = avoidLoad && (vd == vs); + SpillLockV(vd); + SpillLockV(vs); + MapRegV(vd, overlap ? MAP_DIRTY : MAP_NOINIT); + MapRegV(vs); + ReleaseSpillLockV(vd); + ReleaseSpillLockV(vs); +} + +void ArmRegCacheFPU::MapDirtyInInV(int vd, int vs, int vt, bool avoidLoad) { + bool overlap = avoidLoad && ((vd == vs) || (vd == vt)); + SpillLockV(vd); + SpillLockV(vs); + SpillLockV(vt); + MapRegV(vd, overlap ? MAP_DIRTY : MAP_NOINIT); + MapRegV(vs); + MapRegV(vt); + ReleaseSpillLockV(vd); + ReleaseSpillLockV(vs); + ReleaseSpillLockV(vt); +} + +void ArmRegCacheFPU::FlushArmReg(ARM64Reg r) { + if (r >= S0 && r <= S31) { + int reg = r - S0; + if (ar[reg].mipsReg == -1) { + // Nothing to do, reg not mapped. + return; + } + if (ar[reg].mipsReg != -1) { + if (ar[reg].isDirty && mr[ar[reg].mipsReg].loc == ML_ARMREG) + { + //INFO_LOG(JIT, "Flushing ARM reg %i", reg); + // emit_->VSTR(r, CTXREG, GetMipsRegOffset(ar[reg].mipsReg)); + } + // IMMs won't be in an ARM reg. + mr[ar[reg].mipsReg].loc = ML_MEM; + mr[ar[reg].mipsReg].reg = INVALID_REG; + } else { + ERROR_LOG(JIT, "Dirty but no mipsreg?"); + } + ar[reg].isDirty = false; + ar[reg].mipsReg = -1; + } +} + +void ArmRegCacheFPU::FlushV(MIPSReg r) { + FlushR(r + 32); +} + +void ArmRegCacheFPU::FlushR(MIPSReg r) { + switch (mr[r].loc) { + case ML_IMM: + // IMM is always "dirty". + // IMM is not allowed for FP (yet). + ERROR_LOG(JIT, "Imm in FP register?"); + break; + + case ML_ARMREG: + if (mr[r].reg == INVALID_REG) { + ERROR_LOG(JIT, "FlushR: MipsReg had bad ArmReg"); + } + + if (mr[r].reg >= Q0 && mr[r].reg <= Q15) { + // This should happen rarely, but occasionally we need to flush a single stray + // mipsreg that's been part of a quad. + int quad = mr[r].reg - Q0; + if (qr[quad].isDirty) { + WARN_LOG(JIT, "FlushR found quad register %i - PC=%08x", quad, js_->compilerPC); + //emit_->ADDI2R(R0, CTXREG, GetMipsRegOffset(r), R1); + //emit_->VST1_lane(F_32, (ARM64Reg)mr[r].reg, R0, mr[r].lane, true); + } + } else { + if (ar[mr[r].reg].isDirty) { + //INFO_LOG(JIT, "Flushing dirty reg %i", mr[r].reg); + // emit_->VSTR((ARM64Reg)(mr[r].reg + S0), CTXREG, GetMipsRegOffset(r)); + ar[mr[r].reg].isDirty = false; + } + ar[mr[r].reg].mipsReg = -1; + } + break; + + case ML_MEM: + // Already there, nothing to do. + break; + + default: + //BAD + break; + } + mr[r].loc = ML_MEM; + mr[r].reg = (int)INVALID_REG; +} + +int ArmRegCacheFPU::GetNumARMFPURegs() { + if (cpu_info.bNEON) + return 32; + else + return 16; +} + +// Scalar only. Need a similar one for sequential Q vectors. +int ArmRegCacheFPU::FlushGetSequential(int a, int maxArmReg) { + int c = 1; + int lastMipsOffset = GetMipsRegOffset(ar[a].mipsReg); + a++; + while (a < maxArmReg) { + if (!ar[a].isDirty || ar[a].mipsReg == -1) + break; + int mipsOffset = GetMipsRegOffset(ar[a].mipsReg); + if (mipsOffset != lastMipsOffset + 4) { + break; + } + + lastMipsOffset = mipsOffset; + a++; + c++; + } + return c; +} + +void ArmRegCacheFPU::FlushAll() { + if (!pendingFlush) { + // Nothing allocated. FPU regs are not nearly as common as GPR. + return; + } + + // Discard temps! + for (int i = TEMP0; i < TEMP0 + NUM_TEMPS; i++) { + DiscardR(i); + } + + // Loop through the ARM registers, then use GetMipsRegOffset to determine if MIPS registers are + // sequential. This is necessary because we store VFPU registers in a staggered order to get + // columns sequential (most VFPU math in nearly all games is in columns, not rows). + + int numArmRegs; + // We rely on the allocation order being sequential. + const ARM64Reg baseReg = GetMIPSAllocationOrder(numArmRegs)[0]; + + for (int i = 0; i < numArmRegs; i++) { + int a = (baseReg - S0) + i; + int m = ar[a].mipsReg; + + if (ar[a].isDirty) { + if (m == -1) { + ILOG("ARM reg %i is dirty but has no mipsreg", a); + continue; + } + + int c = FlushGetSequential(a, GetNumARMFPURegs()); + if (c == 1) { + // ILOG("Got single register: %i (%i)", a, m); + //emit_->VSTR((ARM64Reg)(a + S0), CTXREG, GetMipsRegOffset(m)); + } else if (c == 2) { + // Probably not worth using VSTMIA for two. + int offset = GetMipsRegOffset(m); + //emit_->VSTR((ARM64Reg)(a + S0), CTXREG, offset); + //emit_->VSTR((ARM64Reg)(a + 1 + S0), CTXREG, offset + 4); + } else { + // ILOG("Got sequence: %i at %i (%i)", c, a, m); + //emit_->ADDI2R(SCRATCHREG1, CTXREG, GetMipsRegOffset(m), SCRATCHREG2); + // ILOG("VSTMIA R0, %i, %i", a, c); + //emit_->VSTMIA(SCRATCHREG1, false, (ARM64Reg)(S0 + a), c); + } + + // Skip past, and mark as non-dirty. + for (int j = 0; j < c; j++) { + int b = a + j; + mr[ar[b].mipsReg].loc = ML_MEM; + mr[ar[b].mipsReg].reg = (int)INVALID_REG; + ar[a + j].mipsReg = -1; + ar[a + j].isDirty = false; + } + i += c - 1; + } else { + if (m != -1) { + mr[m].loc = ML_MEM; + mr[m].reg = (int)INVALID_REG; + } + ar[a].mipsReg = -1; + // already not dirty + } + } + + // Sanity check + for (int i = 0; i < numARMFpuReg_; i++) { + if (ar[i].mipsReg != -1) { + ERROR_LOG(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg); + } + } + pendingFlush = false; +} + +void ArmRegCacheFPU::DiscardR(MIPSReg r) { + switch (mr[r].loc) { + case ML_IMM: + // IMM is always "dirty". + // IMM is not allowed for FP (yet). + ERROR_LOG(JIT, "Imm in FP register?"); + break; + + case ML_ARMREG: + if (mr[r].reg == INVALID_REG) { + ERROR_LOG(JIT, "DiscardR: MipsReg had bad ArmReg"); + } else { + // Note that we DO NOT write it back here. That's the whole point of Discard. + ar[mr[r].reg].isDirty = false; + ar[mr[r].reg].mipsReg = -1; + } + break; + + case ML_MEM: + // Already there, nothing to do. + break; + + default: + //BAD + break; + } + mr[r].loc = ML_MEM; + mr[r].reg = (int)INVALID_REG; + mr[r].tempLock = false; + mr[r].spillLock = false; +} + +bool ArmRegCacheFPU::IsTempX(ARM64Reg r) const { + return ar[r - S0].mipsReg >= TEMP0; +} + +int ArmRegCacheFPU::GetTempR() { + if (jo_->useNEONVFPU) { + ERROR_LOG(JIT, "VFP temps not allowed in NEON mode"); + return 0; + } + pendingFlush = true; + for (int r = TEMP0; r < TEMP0 + NUM_TEMPS; ++r) { + if (mr[r].loc == ML_MEM && !mr[r].tempLock) { + mr[r].tempLock = true; + return r; + } + } + + ERROR_LOG(CPU, "Out of temp regs! Might need to DiscardR() some"); + _assert_msg_(JIT, 0, "Regcache ran out of temp regs, might need to DiscardR() some."); + return -1; +} + +int ArmRegCacheFPU::GetMipsRegOffset(MIPSReg r) { + // These are offsets within the MIPSState structure. First there are the GPRS, then FPRS, then the "VFPURs", then the VFPU ctrls. + if (r < 0 || r > 32 + 128 + NUM_TEMPS) { + ERROR_LOG(JIT, "bad mips register %i, out of range", r); + return 0; // or what? + } + + if (r < 32 || r >= 32 + 128) { + return (32 + r) << 2; + } else { + // r is between 32 and 128 + 32 + return (32 + 32 + voffset[r - 32]) << 2; + } +} + +void ArmRegCacheFPU::SpillLock(MIPSReg r1, MIPSReg r2, MIPSReg r3, MIPSReg r4) { + mr[r1].spillLock = true; + if (r2 != -1) mr[r2].spillLock = true; + if (r3 != -1) mr[r3].spillLock = true; + if (r4 != -1) mr[r4].spillLock = true; +} + +// This is actually pretty slow with all the 160 regs... +void ArmRegCacheFPU::ReleaseSpillLocksAndDiscardTemps() { + for (int i = 0; i < NUM_MIPSFPUREG; i++) { + mr[i].spillLock = false; + } + for (int i = TEMP0; i < TEMP0 + NUM_TEMPS; ++i) { + DiscardR(i); + } + for (int i = 0; i < MAX_ARMQUADS; i++) { + qr[i].spillLock = false; + if (qr[i].isTemp) { + qr[i].isTemp = false; + qr[i].sz = V_Invalid; + } + } +} + +ARM64Reg ArmRegCacheFPU::R(int mipsReg) { + if (mr[mipsReg].loc == ML_ARMREG) { + return (ARM64Reg)(mr[mipsReg].reg + S0); + } else { + if (mipsReg < 32) { + ERROR_LOG(JIT, "FReg %i not in ARM reg. compilerPC = %08x : %s", mipsReg, js_->compilerPC, MIPSDisasmAt(js_->compilerPC)); + } else if (mipsReg < 32 + 128) { + ERROR_LOG(JIT, "VReg %i not in ARM reg. compilerPC = %08x : %s", mipsReg - 32, js_->compilerPC, MIPSDisasmAt(js_->compilerPC)); + } else { + ERROR_LOG(JIT, "Tempreg %i not in ARM reg. compilerPC = %08x : %s", mipsReg - 128 - 32, js_->compilerPC, MIPSDisasmAt(js_->compilerPC)); + } + return INVALID_REG; // BAAAD + } +} diff --git a/Core/MIPS/ARM64/Arm64RegCacheFPU.h b/Core/MIPS/ARM64/Arm64RegCacheFPU.h new file mode 100644 index 0000000000..e51d268cb5 --- /dev/null +++ b/Core/MIPS/ARM64/Arm64RegCacheFPU.h @@ -0,0 +1,186 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#pragma once + +#include "../MIPS.h" +#include "../MIPSAnalyst.h" +#include "Core/MIPS/ARM64/Arm64RegCache.h" +#include "Core/MIPS/MIPSVFPUUtils.h" +#include "Common/Arm64Emitter.h" + +// These collide with something on Blackberry. +#undef MAP_NOINIT +#undef MAP_READ + +namespace Arm64JitConstants { + +enum { + NUM_TEMPS = 16, + TEMP0 = 32 + 128, + TOTAL_MAPPABLE_MIPSFPUREGS = 32 + 128 + NUM_TEMPS, +}; + +enum { + MAP_READ = 0, + MAP_MTX_TRANSPOSED = 16, + MAP_PREFER_LOW = 16, + MAP_PREFER_HIGH = 32, + + // Force is not yet correctly implemented, if the reg is already mapped it will not move + MAP_FORCE_LOW = 64, // Only map Q0-Q7 (and probably not Q0-Q3 as they are S registers so that leaves Q8-Q15) + MAP_FORCE_HIGH = 128, // Only map Q8-Q15 +}; + +} + +struct FPURegARM64 { + int mipsReg; // if -1, no mipsreg attached. + bool isDirty; // Should the register be written back? +}; + +struct FPURegQuad64 { + int mipsVec; + VectorSize sz; + u8 vregs[4]; + bool isDirty; + bool spillLock; + bool isTemp; +}; + +struct FPURegMIPS { + // Where is this MIPS register? + Arm64JitConstants::RegMIPSLoc loc; + // Data (only one of these is used, depending on loc. Could make a union). + u32 reg; + int lane; + + bool spillLock; // if true, this register cannot be spilled. + bool tempLock; + // If loc == ML_MEM, it's back in its location in the CPU context struct. +}; + +namespace MIPSComp { + struct Arm64JitOptions; + struct JitState; +} + +class ArmRegCacheFPU { +public: + ArmRegCacheFPU(MIPSState *mips, MIPSComp::JitState *js, MIPSComp::Arm64JitOptions *jo); + ~ArmRegCacheFPU() {} + + void Init(Arm64Gen::ARM64XEmitter *emitter); + + void Start(MIPSAnalyst::AnalysisResults &stats); + + // Protect the arm register containing a MIPS register from spilling, to ensure that + // it's being kept allocated. + void SpillLock(MIPSReg reg, MIPSReg reg2 = -1, MIPSReg reg3 = -1, MIPSReg reg4 = -1); + void SpillLockV(MIPSReg r) { SpillLock(r + 32); } + + void ReleaseSpillLocksAndDiscardTemps(); + void ReleaseSpillLock(int mipsreg) { + mr[mipsreg].spillLock = false; + } + void ReleaseSpillLockV(int mipsreg) { + ReleaseSpillLock(mipsreg + 32); + } + + void SetImm(MIPSReg reg, u32 immVal); + bool IsImm(MIPSReg reg) const; + u32 GetImm(MIPSReg reg) const; + + // Returns an ARM register containing the requested MIPS register. + Arm64Gen::ARM64Reg MapReg(MIPSReg reg, int mapFlags = 0); + void MapInIn(MIPSReg rd, MIPSReg rs); + void MapDirty(MIPSReg rd); + void MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad = true); + void MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad = true); + bool IsMapped(MIPSReg r); + void FlushArmReg(Arm64Gen::ARM64Reg r); + void FlushR(MIPSReg r); + void DiscardR(MIPSReg r); + Arm64Gen::ARM64Reg R(int preg); // Returns a cached register + + // VFPU register as single ARM VFP registers. Must not be used in the upcoming NEON mode! + void MapRegV(int vreg, int flags = 0); + void LoadToRegV(Arm64Gen::ARM64Reg armReg, int vreg); + void MapInInV(int rt, int rs); + void MapDirtyInV(int rd, int rs, bool avoidLoad = true); + void MapDirtyInInV(int rd, int rs, int rt, bool avoidLoad = true); + + bool IsTempX(Arm64Gen::ARM64Reg r) const; + MIPSReg GetTempV() { return GetTempR() - 32; } + // VFPU registers as single VFP registers. + Arm64Gen::ARM64Reg V(int vreg) { return R(vreg + 32); } + + int FlushGetSequential(int a, int maxArmReg); + void FlushAll(); + + // This one is allowed at any point. + void FlushV(MIPSReg r); + + // NOTE: These require you to release spill locks manually! + void MapRegsAndSpillLockV(int vec, VectorSize vsz, int flags); + void MapRegsAndSpillLockV(const u8 *v, VectorSize vsz, int flags); + + void SpillLockV(const u8 *v, VectorSize vsz); + void SpillLockV(int vec, VectorSize vsz); + + void SetEmitter(Arm64Gen::ARM64XEmitter *emitter) { emit_ = emitter; } + + int GetMipsRegOffset(MIPSReg r); + +private: + MIPSReg GetTempR(); + const Arm64Gen::ARM64Reg *GetMIPSAllocationOrder(int &count); + int GetMipsRegOffsetV(MIPSReg r) { + return GetMipsRegOffset(r + 32); + } + int GetNumARMFPURegs(); + + void SetupInitialRegs(); + + MIPSState *mips_; + Arm64Gen::ARM64XEmitter *emit_; + MIPSComp::JitState *js_; + MIPSComp::Arm64JitOptions *jo_; + + int numARMFpuReg_; + int qTime_; + + enum { + // With NEON, we have 64 S = 32 D = 16 Q registers. Only the first 32 S registers + // are individually mappable though. + MAX_ARMFPUREG = 32, + MAX_ARMQUADS = 16, + NUM_MIPSFPUREG = Arm64JitConstants::TOTAL_MAPPABLE_MIPSFPUREGS, + }; + + FPURegARM64 ar[MAX_ARMFPUREG]; + FPURegMIPS mr[NUM_MIPSFPUREG]; + FPURegQuad64 qr[MAX_ARMQUADS]; + FPURegMIPS *vr; + + bool pendingFlush; + bool initialReady; + FPURegARM64 arInitial[MAX_ARMFPUREG]; + FPURegMIPS mrInitial[NUM_MIPSFPUREG]; +}; diff --git a/Core/MIPS/JitCommon/JitBlockCache.h b/Core/MIPS/JitCommon/JitBlockCache.h index 40f12569e4..65c6559cd9 100644 --- a/Core/MIPS/JitCommon/JitBlockCache.h +++ b/Core/MIPS/JitCommon/JitBlockCache.h @@ -30,6 +30,10 @@ #include "Common/ArmEmitter.h" namespace ArmGen { class ARMXEmitter; } typedef ArmGen::ARMXCodeBlock NativeCodeBlock; +#elif defined(ARM64) +#include "Common/Arm64Emitter.h" +namespace Arm64Gen { class ARM64XEmitter; } +typedef Arm64Gen::ARM64CodeBlock NativeCodeBlock; #elif defined(_M_IX86) || defined(_M_X64) #include "Common/x64Emitter.h" namespace Gen { class XEmitter; } @@ -44,7 +48,7 @@ namespace FakeGen { class FakeXEmitter; } typedef FakeGen::FakeXCodeBlock NativeCodeBlock; #endif -#if defined(ARM) +#if defined(ARM) || defined(ARM64) const int MAX_JIT_BLOCK_EXITS = 2; #else const int MAX_JIT_BLOCK_EXITS = 8; diff --git a/Core/MIPS/JitCommon/JitCommon.cpp b/Core/MIPS/JitCommon/JitCommon.cpp index eb9643b2b4..b7a7e76aae 100644 --- a/Core/MIPS/JitCommon/JitCommon.cpp +++ b/Core/MIPS/JitCommon/JitCommon.cpp @@ -23,10 +23,13 @@ #include "ext/disarm.h" #include "ext/udis86/udis86.h" +#include "Core/Util/DisArm64.h" namespace MIPSComp { #if defined(ARM) ArmJit *jit; +#elif defined(ARM64) + Arm64Jit *jit; #else Jit *jit; #endif @@ -35,6 +38,7 @@ namespace MIPSComp { } } +#if !defined(ARM64) // We compile this for x86 as well because it may be useful when developing the ARM JIT on a PC. std::vector DisassembleArm2(const u8 *data, int size) { std::vector lines; @@ -75,8 +79,37 @@ std::vector DisassembleArm2(const u8 *data, int size) { } return lines; } +#endif -#ifndef ARM +#if !defined(ARM) +std::vector DisassembleArm64(const u8 *data, int size) { + std::vector lines; + + char temp[256]; + int bkpt_count = 0; + for (int i = 0; i < size; i += 4) { + const u32 *codePtr = (const u32 *)(data + i); + u32 inst = codePtr[0]; + Arm64Dis((u32)(intptr_t)codePtr, inst, temp, sizeof(temp), false); + std::string buf = temp; + if (buf == "BKPT 1") { + bkpt_count++; + } else { + if (bkpt_count) { + lines.push_back(StringFromFormat("BKPT 1 (x%i)", bkpt_count)); + bkpt_count = 0; + } + lines.push_back(buf); + } + } + if (bkpt_count) { + lines.push_back(StringFromFormat("BKPT 1 (x%i)", bkpt_count)); + } + return lines; +} +#endif + +#if !defined(ARM) && !defined(ARM64) const char *ppsspp_resolver(struct ud*, uint64_t addr, diff --git a/Core/MIPS/JitCommon/JitCommon.h b/Core/MIPS/JitCommon/JitCommon.h index 59b687c85a..a3a1c35828 100644 --- a/Core/MIPS/JitCommon/JitCommon.h +++ b/Core/MIPS/JitCommon/JitCommon.h @@ -24,6 +24,7 @@ // TODO: Find a better place for these. std::vector DisassembleArm2(const u8 *data, int size); +std::vector DisassembleArm64(const u8 *data, int size); std::vector DisassembleX86(const u8 *data, int size); namespace MIPSComp { diff --git a/Core/MIPS/JitCommon/NativeJit.h b/Core/MIPS/JitCommon/NativeJit.h index 603b5f4713..67cc8f8556 100644 --- a/Core/MIPS/JitCommon/NativeJit.h +++ b/Core/MIPS/JitCommon/NativeJit.h @@ -26,6 +26,9 @@ struct JitBlock; #if defined(ARM) #include "../ARM/ArmJit.h" typedef MIPSComp::ArmJit NativeJit; +#elif defined(ARM64) +#include "../ARM64/Arm64Jit.h" +typedef MIPSComp::Arm64Jit NativeJit; #elif defined(_M_IX86) || defined(_M_X64) #include "../x86/Jit.h" typedef MIPSComp::Jit NativeJit; diff --git a/Core/MIPS/MIPS.cpp b/Core/MIPS/MIPS.cpp index edc6f500fc..347eb052b2 100644 --- a/Core/MIPS/MIPS.cpp +++ b/Core/MIPS/MIPS.cpp @@ -211,6 +211,8 @@ void MIPSState::Init() { if (PSP_CoreParameter().cpuCore == CPU_JIT) { #ifdef ARM MIPSComp::jit = new MIPSComp::ArmJit(this); +#elif defined(ARM64) + MIPSComp::jit = new MIPSComp::Arm64Jit(this); #elif defined(_M_IX86) || defined(_M_X64) MIPSComp::jit = new MIPSComp::Jit(this); #elif defined(MIPS) @@ -218,6 +220,8 @@ void MIPSState::Init() { #else MIPSComp::jit = new MIPSComp::FakeJit(this); #endif + } else { + MIPSComp::jit = nullptr; } } @@ -236,6 +240,8 @@ void MIPSState::UpdateCore(CPUCore desired) { if (!MIPSComp::jit) { #ifdef ARM MIPSComp::jit = new MIPSComp::ArmJit(this); +#elif defined(ARM64) + MIPSComp::jit = new MIPSComp::Arm64Jit(this); #elif defined(_M_IX86) || defined(_M_X64) MIPSComp::jit = new MIPSComp::Jit(this); #elif defined(MIPS) diff --git a/Core/MIPS/MIPSTables.cpp b/Core/MIPS/MIPSTables.cpp index b79b7ee537..07b8bd18dd 100644 --- a/Core/MIPS/MIPSTables.cpp +++ b/Core/MIPS/MIPSTables.cpp @@ -90,6 +90,8 @@ struct MIPSInstruction { #ifdef ARM #define JITFUNC(f) (&ArmJit::f) +#elif defined(ARM64) +#define JITFUNC(f) (&Arm64Jit::f) #elif defined(_M_X64) || defined(_M_IX86) #define JITFUNC(f) (&Jit::f) #elif defined(MIPS) diff --git a/Core/MemMap.cpp b/Core/MemMap.cpp index 6c1455f5f2..3ef061b88b 100644 --- a/Core/MemMap.cpp +++ b/Core/MemMap.cpp @@ -185,7 +185,7 @@ static bool Memory_TryBase(u32 flags) { if (!*view.out_ptr_low) goto bail; } -#ifdef _M_X64 +#if defined(_M_X64) || defined(ARM64) *view.out_ptr = (u8*)g_arena.CreateView( position, view.size, base + view.virtual_address); #else diff --git a/Core/Util/AudioFormatNEON.cpp b/Core/Util/AudioFormatNEON.cpp index c30a3236da..3dd73dee6b 100644 --- a/Core/Util/AudioFormatNEON.cpp +++ b/Core/Util/AudioFormatNEON.cpp @@ -20,7 +20,7 @@ #include "Core/Util/AudioFormat.h" #include "Core/Util/AudioFormatNEON.h" -#ifndef ARM +#if !defined(ARM) && !defined(ARM64) #error Should not be compiled on non-ARM. #endif diff --git a/GPU/Common/TextureDecoderNEON.cpp b/GPU/Common/TextureDecoderNEON.cpp index 92bf04d523..c1fb50a7ee 100644 --- a/GPU/Common/TextureDecoderNEON.cpp +++ b/GPU/Common/TextureDecoderNEON.cpp @@ -18,7 +18,7 @@ #include #include "GPU/Common/TextureDecoder.h" -#ifndef ARM +#if !defined(ARM) && !defined(ARM64) #error Should not be compiled on non-ARM. #endif @@ -29,7 +29,7 @@ u32 QuickTexHashNEON(const void *checkp, u32 size) { __builtin_prefetch(checkp, 0, 0); if (((intptr_t)checkp & 0xf) == 0 && (size & 0x3f) == 0) { -#ifdef IOS +#if defined(IOS) || defined(ARM64) uint32x4_t cursor = vdupq_n_u32(0); uint16x8_t cursor2 = vld1q_u16(QuickTexHashInitial); uint16x8_t update = vdupq_n_u16(0x2455U); diff --git a/GPU/Common/VertexDecoderArm64.cpp b/GPU/Common/VertexDecoderArm64.cpp new file mode 100644 index 0000000000..ee47742a85 --- /dev/null +++ b/GPU/Common/VertexDecoderArm64.cpp @@ -0,0 +1,33 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "base/logging.h" +#include "Common/CPUDetect.h" +#include "Core/Config.h" +#include "Core/Reporting.h" +#include "Common/Arm64Emitter.h" +#include "GPU/GPUState.h" +#include "GPU/Common/VertexDecoderCommon.h" + +JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) { + // TODO ARM64 + return NULL; +} + +bool VertexDecoderJitCache::CompileStep(const VertexDecoder &dec, int step) { + return false; +} diff --git a/GPU/Common/VertexDecoderCommon.h b/GPU/Common/VertexDecoderCommon.h index 225d71f78b..426c1ae418 100644 --- a/GPU/Common/VertexDecoderCommon.h +++ b/GPU/Common/VertexDecoderCommon.h @@ -25,6 +25,8 @@ #include "GPU/ge_constants.h" #ifdef ARM #include "Common/ArmEmitter.h" +#elif defined(ARM64) +#include "Common/Arm64Emitter.h" #elif defined(_M_IX86) || defined(_M_X64) #include "Common/x64Emitter.h" #elif defined(MIPS) @@ -446,8 +448,7 @@ struct VertexDecoderOptions { bool expand8BitNormalsToFloat; }; -class VertexDecoder -{ +class VertexDecoder { public: VertexDecoder(); @@ -587,6 +588,8 @@ public: #ifdef ARM class VertexDecoderJitCache : public ArmGen::ARMXCodeBlock { +#elif defined(ARM64) +class VertexDecoderJitCache : public Arm64Gen::ARM64CodeBlock { #elif defined(_M_IX86) || defined(_M_X64) class VertexDecoderJitCache : public Gen::XCodeBlock { #elif defined(MIPS) diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index fd1886b67c..31217b07aa 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -262,6 +262,12 @@ true true + + true + true + true + true + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index 3ee6fe52ba..045ee59d1b 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -350,6 +350,9 @@ DirectX9 + + Common + diff --git a/UI/DevScreens.cpp b/UI/DevScreens.cpp index 4f0674a7c3..534c403f51 100644 --- a/UI/DevScreens.cpp +++ b/UI/DevScreens.cpp @@ -283,6 +283,8 @@ const char *GetCompilerABI() { return "armeabi-v7a"; #elif defined(ARM) return "armeabi"; +#elif defined(ARM64) + return "arm64"; #elif defined(_M_IX86) return "x86"; #elif defined(_M_X64) diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp index e05bd84d54..df93a9686a 100644 --- a/UI/GameSettingsScreen.cpp +++ b/UI/GameSettingsScreen.cpp @@ -469,6 +469,9 @@ void GameSettingsScreen::CreateViews() { systemSettings->Add(new ItemHeader(s->T("UI Language"))); systemSettings->Add(new Choice(dev->T("Language", "Language")))->OnClick.Handle(this, &GameSettingsScreen::OnLanguage); + systemSettings->Add(new ItemHeader(s->T("Developer Tools"))); + systemSettings->Add(new Choice(s->T("Developer Tools")))->OnClick.Handle(this, &GameSettingsScreen::OnDeveloperTools); + systemSettings->Add(new ItemHeader(s->T("Help the PPSSPP team"))); enableReports_ = Reporting::IsEnabled(); enableReportsCheckbox_ = new CheckBox(&enableReports_, s->T("Enable Compatibility Server Reports")); @@ -491,9 +494,6 @@ void GameSettingsScreen::CreateViews() { #endif systemSettings->Add(new CheckBox(&g_Config.bSetRoundingMode, s->T("Respect FPU rounding (disable for old GEB saves)")))->OnClick.Handle(this, &GameSettingsScreen::OnJitAffectingSetting); - systemSettings->Add(new ItemHeader(s->T("Developer Tools"))); - systemSettings->Add(new Choice(s->T("Developer Tools")))->OnClick.Handle(this, &GameSettingsScreen::OnDeveloperTools); - systemSettings->Add(new ItemHeader(s->T("General"))); #ifdef ANDROID diff --git a/android/jni/Android.mk b/android/jni/Android.mk index de40bf89e7..377ff68b2c 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -51,7 +51,6 @@ ARCH_FILES := \ $(SRC)/GPU/Common/VertexDecoderX86.cpp endif -# ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) ifeq ($(findstring armeabi-v7a,$(TARGET_ARCH_ABI)),armeabi-v7a) ARCH_FILES := \ $(SRC)/GPU/Common/TextureDecoderNEON.cpp.neon \ @@ -72,9 +71,30 @@ ARCH_FILES := \ $(SRC)/Core/MIPS/ARM/ArmRegCache.cpp \ $(SRC)/Core/MIPS/ARM/ArmRegCacheFPU.cpp \ $(SRC)/GPU/Common/VertexDecoderArm.cpp \ + $(SRC)/ext/disarm.cpp \ ArmEmitterTest.cpp endif +ifeq ($(findstring arm64-v8a,$(TARGET_ARCH_ABI)),arm64-v8a) +ARCH_FILES := \ + $(SRC)/GPU/Common/TextureDecoderNEON.cpp \ + $(SRC)/Core/Util/AudioFormatNEON.cpp \ + $(SRC)/Common/Arm64Emitter.cpp \ + $(SRC)/Common/ArmCPUDetect.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64CompALU.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64CompBranch.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64CompFPU.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64CompLoadStore.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64CompVFPU.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64CompReplace.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64Asm.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64Jit.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64RegCache.cpp \ + $(SRC)/Core/MIPS/ARM64/Arm64RegCacheFPU.cpp \ + $(SRC)/Core/Util/DisArm64.cpp \ + $(SRC)/GPU/Common/VertexDecoderArm64.cpp +endif + ifeq ($(TARGET_ARCH_ABI),armeabi) ARCH_FILES := \ $(SRC)/Common/ArmEmitter.cpp \ @@ -112,7 +132,6 @@ EXEC_AND_LIB_FILES := \ $(SRC)/Core/MIPS/MIPSDebugInterface.cpp \ $(SRC)/UI/ui_atlas.cpp \ $(SRC)/UI/OnScreenDisplay.cpp \ - $(SRC)/ext/disarm.cpp \ $(SRC)/ext/libkirk/AES.c \ $(SRC)/ext/libkirk/amctrl.c \ $(SRC)/ext/libkirk/SHA1.c \ diff --git a/android/jni/Application.mk b/android/jni/Application.mk index 7ae0f7e0aa..64df16e077 100644 --- a/android/jni/Application.mk +++ b/android/jni/Application.mk @@ -1,6 +1,7 @@ APP_STL := gnustl_static APP_PLATFORM := android-9 -APP_ABI := armeabi-v7a x86 +#APP_ABI := armeabi-v7a x86 #APP_ABI := armeabi-v7a +APP_ABI := arm64-v8a APP_GNUSTL_CPP_FEATURES := -NDK_TOOLCHAIN_VERSION := 4.8 +# NDK_TOOLCHAIN_VERSION := 4.9 diff --git a/android/jni/Locals.mk b/android/jni/Locals.mk index cbc59e0e6a..f8a940f305 100644 --- a/android/jni/Locals.mk +++ b/android/jni/Locals.mk @@ -1,10 +1,10 @@ # These are definitions for LOCAL_ variables for PPSSPP. # They are shared between ppsspp_jni (lib for Android app) and ppsspp_headless. -LOCAL_CFLAGS := -DUSE_FFMPEG -DUSING_GLES2 -DMOBILE_DEVICE -O3 -fsigned-char -Wall -Wno-multichar -Wno-psabi -Wno-unused-variable -fno-strict-aliasing -D__STDC_CONSTANT_MACROS +LOCAL_CFLAGS := -DUSE_FFMPEG -DUSING_GLES2 -DMOBILE_DEVICE -O3 -fsigned-char -Wall -Wno-multichar -Wno-psabi -Wno-unused-variable -fno-strict-aliasing -D__STDC_CONSTANT_MACROS -Wno-format # yes, it's really CPPFLAGS for C++ # literal-suffix is generated by Android default code and causes noise. -LOCAL_CPPFLAGS := -fno-exceptions -std=gnu++11 -fno-rtti -Wno-reorder -Wno-literal-suffix +LOCAL_CPPFLAGS := -fno-exceptions -std=gnu++11 -fno-rtti -Wno-reorder -Wno-literal-suffix -Wno-format LOCAL_C_INCLUDES := \ $(LOCAL_PATH)/../../Common \ $(LOCAL_PATH)/../.. \ @@ -49,12 +49,12 @@ ifeq ($(TARGET_ARCH_ABI),x86) LOCAL_CFLAGS := $(LOCAL_CFLAGS) -D_ARCH_32 -D_M_IX86 -fomit-frame-pointer -mtune=atom -mfpmath=sse -mssse3 endif ifeq ($(TARGET_ARCH_ABI),arm64-v8a) - #LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavformat.a - #LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavcodec.a - #LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libswresample.a - #LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libswscale.a - #LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavutil.a - #LOCAL_C_INCLUDES += $(LOCAL_PATH)/../../ffmpeg/android/arm64/include + LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavformat.a + LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavcodec.a + LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libswresample.a + LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libswscale.a + LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavutil.a + LOCAL_C_INCLUDES += $(LOCAL_PATH)/../../ffmpeg/android/arm64/include LOCAL_CFLAGS := $(LOCAL_CFLAGS) -D_ARCH_64 -DARM64 endif