Initial work on ARM64, based on the ARM jit.

This commit is contained in:
Henrik Rydgard 2015-03-05 23:31:03 +01:00
parent f4dfd49a7b
commit b309c83973
36 changed files with 3669 additions and 36 deletions

View file

@ -3,6 +3,8 @@
// Refer to the license.txt file included.
#include <limits>
#include <algorithm>
#include <cmath>
#include "Arm64Emitter.h"
#include "MathUtil.h"
@ -193,11 +195,11 @@ void ARM64XEmitter::EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr
bool b64Bit = Is64Bit(Rt);
s64 distance = (s64)ptr - (s64)m_code;
_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance);
_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, (int)distance);
distance >>= 2;
_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance);
_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, (int)distance);
Rt = DecodeReg(Rt);
Write32((b64Bit << 31) | (0x34 << 24) | (op << 24) | \
@ -209,11 +211,11 @@ void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const voi
bool b64Bit = Is64Bit(Rt);
s64 distance = (s64)ptr - (s64)m_code;
_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance);
_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, (int)distance);
distance >>= 2;
_assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s: Received too large distance: %lx", __FUNCTION__, distance);
_assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s: Received too large distance: %lx", __FUNCTION__, (int)distance);
Rt = DecodeReg(Rt);
Write32((b64Bit << 31) | (0x36 << 24) | (op << 24) | \
@ -224,11 +226,11 @@ void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 op, const void* ptr)
{
s64 distance = (s64)ptr - s64(m_code);
_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance);
_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, (int)distance);
distance >>= 2;
_assert_msg_(DYNA_REC, distance >= -0x3FFFFFF && distance < 0x3FFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance);
_assert_msg_(DYNA_REC, distance >= -0x3FFFFFF && distance < 0x3FFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, (int)distance);
Write32((op << 31) | (0x5 << 26) | (distance & 0x3FFFFFF));
}
@ -566,7 +568,7 @@ void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch)
Not = true;
case 0: // CBZ
{
_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance);
_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, (int)distance);
bool b64Bit = Is64Bit(branch.reg);
ARM64Reg reg = DecodeReg(branch.reg);
inst = (b64Bit << 31) | (0x1A << 25) | (Not << 24) | ((distance << 5) & 0xFFFFE0) | reg;
@ -580,7 +582,7 @@ void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch)
Not = true;
case 3: // TBZ
{
_assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance);
_assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, (int)distance);
ARM64Reg reg = DecodeReg(branch.reg);
inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) | ((branch.bit & 0x1F) << 19) | (distance << 5) | reg;
}
@ -677,7 +679,7 @@ void ARM64XEmitter::B(CCFlags cond, const void* ptr)
s64 distance = (s64)ptr - (s64(m_code) + 8);
distance >>= 2;
_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance);
_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, (int)distance);
Write32((0x54 << 24) | (distance << 5) | cond);
}
@ -1488,7 +1490,7 @@ void ARM64XEmitter::ADRP(ARM64Reg Rd, s32 imm)
// Wrapper around MOVZ+MOVK
void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
{
unsigned parts = Is64Bit(Rd) ? 4 : 2;
unsigned int parts = Is64Bit(Rd) ? 4 : 2;
BitSet32 upload_part(0);
bool need_movz = false;
@ -1513,7 +1515,7 @@ void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
// XXX: Support rotating immediates to save instructions
if (optimize)
{
for (unsigned i = 0; i < parts; ++i)
for (unsigned int i = 0; i < parts; ++i)
{
if ((imm >> (i * 16)) & 0xFFFF)
upload_part[i] = 1;

View file

@ -286,6 +286,66 @@
<ClCompile Include="Loaders.cpp" />
<ClCompile Include="MemMap.cpp" />
<ClCompile Include="MemmapFunctions.cpp" />
<ClCompile Include="MIPS\ARM64\Arm64Asm.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="MIPS\ARM64\Arm64CompALU.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="MIPS\ARM64\Arm64CompBranch.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="MIPS\ARM64\Arm64CompFPU.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="MIPS\ARM64\Arm64CompLoadStore.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="MIPS\ARM64\Arm64CompReplace.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="MIPS\ARM64\Arm64CompVFPU.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="MIPS\ARM64\Arm64Jit.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="MIPS\ARM64\Arm64RegCache.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="MIPS\ARM64\Arm64RegCacheFPU.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="MIPS\ARM\ArmAsm.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
@ -540,6 +600,30 @@
<ClInclude Include="Loaders.h" />
<ClInclude Include="MemMap.h" />
<ClInclude Include="MemMapHelpers.h" />
<ClInclude Include="MIPS\ARM64\Arm64Asm.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="MIPS\ARM64\Arm64Jit.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="MIPS\ARM64\Arm64RegCache.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="MIPS\ARM64\Arm64RegCacheFPU.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="MIPS\ARM\ArmAsm.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
@ -632,4 +716,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View file

@ -557,6 +557,16 @@
<ClCompile Include="HLE\sceSfmt19937.cpp">
<Filter>HLE\Libraries</Filter>
</ClCompile>
<ClCompile Include="MIPS\ARM64\Arm64CompBranch.cpp" />
<ClCompile Include="MIPS\ARM64\Arm64CompFPU.cpp" />
<ClCompile Include="MIPS\ARM64\Arm64CompLoadStore.cpp" />
<ClCompile Include="MIPS\ARM64\Arm64CompReplace.cpp" />
<ClCompile Include="MIPS\ARM64\Arm64CompVFPU.cpp" />
<ClCompile Include="MIPS\ARM64\Arm64Jit.cpp" />
<ClCompile Include="MIPS\ARM64\Arm64RegCache.cpp" />
<ClCompile Include="MIPS\ARM64\Arm64RegCacheFPU.cpp" />
<ClCompile Include="MIPS\ARM64\Arm64Asm.cpp" />
<ClCompile Include="MIPS\ARM64\Arm64CompALU.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="ELF\ElfReader.h">
@ -1064,6 +1074,10 @@
<ClInclude Include="MemMapHelpers.h">
<Filter>Core</Filter>
</ClInclude>
<ClInclude Include="MIPS\ARM64\Arm64Jit.h" />
<ClInclude Include="MIPS\ARM64\Arm64RegCache.h" />
<ClInclude Include="MIPS\ARM64\Arm64RegCacheFPU.h" />
<ClInclude Include="MIPS\ARM64\Arm64Asm.h" />
</ItemGroup>
<ItemGroup>
<None Include="CMakeLists.txt" />
@ -1071,4 +1085,4 @@
<None Include="..\android\jni\Android.mk" />
<None Include="GameLogNotes.txt" />
</ItemGroup>
</Project>
</Project>

View file

@ -1015,6 +1015,8 @@ static int Hook_gakuenheaven_download_frame() {
#ifdef ARM
#define JITFUNC(f) (&MIPSComp::ArmJit::f)
#elif defined(ARM64)
#define JITFUNC(f) (&MIPSComp::Arm64Jit::f)
#elif defined(_M_X64) || defined(_M_IX86)
#define JITFUNC(f) (&MIPSComp::Jit::f)
#elif defined(MIPS)

View file

@ -0,0 +1,86 @@
// Copyright (c) 2015- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "Core/MemMap.h"
#include "Core/MIPS/MIPS.h"
#include "Core/System.h"
#include "Core/CoreTiming.h"
#include "Common/MemoryUtil.h"
#include "Common/CPUDetect.h"
#include "Common/Arm64Emitter.h"
#include "Core/MIPS/ARM64/Arm64Jit.h"
#include "Core/MIPS/ARM64/Arm64Asm.h"
#include "Core/MIPS/JitCommon/JitCommon.h"
using namespace Arm64Gen;
//static int temp32; // unused?
static const bool enableDebug = false;
//static bool enableStatistics = false; //unused?
//The standard ARM calling convention allocates the 16 ARM registers as:
// r15 is the program counter.
// r14 is the link register. (The BL instruction, used in a subroutine call, stores the return address in this register).
// r13 is the stack pointer. (The Push/Pop instructions in "Thumb" operating mode use this register only).
// r12 is the Intra-Procedure-call scratch register.
// r4 to r11: used to hold local variables.
// r0 to r3: used to hold argument values passed to a subroutine, and also hold results returned from a subroutine.
// Mappable registers:
// R2, R3, R4, R5, R6, R8, R11
// STATIC ALLOCATION ARM:
// R10 : MIPS state
// R11 : Memory base pointer.
// R7 : Down counter
extern volatile CoreState coreState;
void ShowPC(u32 sp) {
if (currentMIPS) {
ERROR_LOG(JIT, "ShowPC : %08x ArmSP : %08x", currentMIPS->pc, sp);
} else {
ERROR_LOG(JIT, "Universe corrupt?");
}
}
void DisassembleArm(const u8 *data, int size);
// PLAN: no more block numbers - crazy opcodes just contain offset within
// dynarec buffer
// At this offset - 4, there is an int specifying the block number.
namespace MIPSComp {
using namespace Arm64JitConstants;
void Arm64Jit::GenerateFixedCode()
{
// Uncomment if you want to see the output...
// INFO_LOG(JIT, "THE DISASM ========================");
// DisassembleArm(enterCode, GetCodePtr() - enterCode);
// INFO_LOG(JIT, "END OF THE DISASM ========================");
// Don't forget to zap the instruction cache!
FlushIcache();
}
} // namespace MIPSComp

View file

@ -0,0 +1,22 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "Core/MIPS/MIPS.h"
// Runtime generated assembly routines, like the Dispatcher.

View file

@ -0,0 +1,83 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <algorithm>
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/ARM64/Arm64Jit.h"
#include "Core/MIPS/ARM64/Arm64RegCache.h"
#include "Common/CPUDetect.h"
using namespace MIPSAnalyst;
#define _RS MIPS_GET_RS(op)
#define _RT MIPS_GET_RT(op)
#define _RD MIPS_GET_RD(op)
#define _FS MIPS_GET_FS(op)
#define _FT MIPS_GET_FT(op)
#define _FD MIPS_GET_FD(op)
#define _SA MIPS_GET_SA(op)
#define _POS ((op>> 6) & 0x1F)
#define _SIZE ((op>>11) & 0x1F)
#define _IMM16 (signed short)(op & 0xFFFF)
#define _IMM26 (op & 0x03FFFFFF)
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { Comp_Generic(op); return; }
namespace MIPSComp
{
using namespace Arm64Gen;
using namespace Arm64JitConstants;
void Arm64Jit::Comp_IType(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_RType2(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_RType3(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_ShiftType(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Special3(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Allegrex(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Allegrex2(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_MulDivType(MIPSOpcode op) {
DISABLE;
}
}

View file

@ -0,0 +1,614 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "Core/Reporting.h"
#include "Core/Config.h"
#include "Core/MemMap.h"
#include "Core/HLE/HLE.h"
#include "Core/HLE/HLETables.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSAnalyst.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/MIPS/ARM64/Arm64Jit.h"
#include "Core/MIPS/ARM64/Arm64RegCache.h"
#include "Core/MIPS/JitCommon/JitBlockCache.h"
#include "Common/Arm64Emitter.h"
#define _RS MIPS_GET_RS(op)
#define _RT MIPS_GET_RT(op)
#define _RD MIPS_GET_RD(op)
#define _FS MIPS_GET_FS(op)
#define _FT MIPS_GET_FT(op)
#define _FD MIPS_GET_FD(op)
#define _SA MIPS_GET_SA(op)
#define _POS ((op>> 6) & 0x1F)
#define _SIZE ((op>>11) & 0x1F)
#define _IMM16 (signed short)(op & 0xFFFF)
#define _IMM26 (op & 0x03FFFFFF)
#define LOOPOPTIMIZATION 0
// We can disable nice delay slots.
// #define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false;
#define CONDITIONAL_NICE_DELAYSLOT ;
using namespace MIPSAnalyst;
namespace MIPSComp
{
using namespace Arm64Gen;
using namespace Arm64JitConstants;
void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = _IMM16 << 2;
MIPSGPReg rt = _RT;
MIPSGPReg rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
bool immBranch = false;
bool immBranchTaken = false;
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
// The cc flags are opposites: when NOT to take the branch.
bool immBranchNotTaken;
s32 rsImm = (s32)gpr.GetImm(rs);
s32 rtImm = (s32)gpr.GetImm(rt);
switch (cc)
{
case CC_EQ: immBranchNotTaken = rsImm == rtImm; break;
case CC_NEQ: immBranchNotTaken = rsImm != rtImm; break;
default: immBranchNotTaken = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSRTComp().");
}
immBranch = true;
immBranchTaken = !immBranchNotTaken;
}
if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions) {
if (!immBranchTaken) {
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (likely)
js.compilerPC += 4;
return;
}
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (immBranch) {
// Continuing is handled above, this is just static jumping.
if (immBranchTaken || !likely)
CompileDelaySlot(DELAYSLOT_FLUSH);
else
FlushAll();
const u32 destAddr = immBranchTaken ? targetAddr : js.compilerPC + 8;
WriteExit(destAddr, js.nextExit++);
} else {
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
// We might be able to flip the condition (EQ/NEQ are easy.)
const bool canFlip = cc == CC_EQ || cc == CC_NEQ;
// TODO ARM64: Optimize for immediates
gpr.MapInIn(rs, rt);
CMP(gpr.R(rs), gpr.R(rt));
Arm64Gen::FixupBranch ptr;
if (!likely) {
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B(cc);
} else {
FlushAll();
ptr = B(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, js.nextExit++);
}
js.compiling = false;
}
void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = _IMM16 << 2;
MIPSGPReg rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
bool immBranch = false;
bool immBranchTaken = false;
if (gpr.IsImm(rs)) {
// The cc flags are opposites: when NOT to take the branch.
bool immBranchNotTaken;
s32 imm = (s32)gpr.GetImm(rs);
switch (cc)
{
case CC_GT: immBranchNotTaken = imm > 0; break;
case CC_GE: immBranchNotTaken = imm >= 0; break;
case CC_LT: immBranchNotTaken = imm < 0; break;
case CC_LE: immBranchNotTaken = imm <= 0; break;
default: immBranchNotTaken = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSZeroComp().");
}
immBranch = true;
immBranchTaken = !immBranchNotTaken;
}
if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions) {
if (!immBranchTaken) {
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (likely)
js.compilerPC += 4;
return;
}
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
if (andLink)
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (immBranch) {
// Continuing is handled above, this is just static jumping.
if (immBranchTaken && andLink)
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
if (immBranchTaken || !likely)
CompileDelaySlot(DELAYSLOT_FLUSH);
else
FlushAll();
const u32 destAddr = immBranchTaken ? targetAddr : js.compilerPC + 8;
WriteExit(destAddr, js.nextExit++);
} else {
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs);
CMP(gpr.R(rs), 0);
Arm64Gen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B(cc);
}
else
{
FlushAll();
ptr = B(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
if (andLink)
{
gpr.SetRegImm(SCRATCHREG1, js.compilerPC + 8);
STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, MIPS_REG_RA * 4);
}
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, js.nextExit++);
}
js.compiling = false;
}
void Arm64Jit::Comp_RelBranch(MIPSOpcode op)
{
// The CC flags here should be opposite of the actual branch becuase they skip the branching action.
switch (op >> 26)
{
case 4: BranchRSRTComp(op, CC_NEQ, false); break;//beq
case 5: BranchRSRTComp(op, CC_EQ, false); break;//bne
case 6: BranchRSZeroComp(op, CC_GT, false, false); break;//blez
case 7: BranchRSZeroComp(op, CC_LE, false, false); break;//bgtz
case 20: BranchRSRTComp(op, CC_NEQ, true); break;//beql
case 21: BranchRSRTComp(op, CC_EQ, true); break;//bnel
case 22: BranchRSZeroComp(op, CC_GT, false, true); break;//blezl
case 23: BranchRSZeroComp(op, CC_LE, false, true); break;//bgtzl
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
}
void Arm64Jit::Comp_RelBranchRI(MIPSOpcode op)
{
switch ((op >> 16) & 0x1F)
{
case 0: BranchRSZeroComp(op, CC_GE, false, false); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltz
case 1: BranchRSZeroComp(op, CC_LT, false, false); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez
case 2: BranchRSZeroComp(op, CC_GE, false, true); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 8; break;//bltzl
case 3: BranchRSZeroComp(op, CC_LT, false, true); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl
case 16: BranchRSZeroComp(op, CC_GE, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltzal
case 17: BranchRSZeroComp(op, CC_LT, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgezal
case 18: BranchRSZeroComp(op, CC_GE, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else SkipLikely(); break;//bltzall
case 19: BranchRSZeroComp(op, CC_LT, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else SkipLikely(); break;//bgezall
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
}
// If likely is set, discard the branch slot if NOT taken.
void Arm64Jit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely) {
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = _IMM16 << 2;
u32 targetAddr = js.compilerPC + offset + 4;
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(MIPS_REG_FPCOND);
TSTI2R(gpr.R(MIPS_REG_FPCOND), 1, W0);
Arm64Gen::FixupBranch ptr;
if (!likely) {
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B(cc);
} else {
FlushAll();
ptr = B(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, js.nextExit++);
js.compiling = false;
}
void Arm64Jit::Comp_FPUBranch(MIPSOpcode op) {
switch((op >> 16) & 0x1f) {
case 0: BranchFPFlag(op, CC_NEQ, false); break; // bc1f
case 1: BranchFPFlag(op, CC_EQ, false); break; // bc1t
case 2: BranchFPFlag(op, CC_NEQ, true); break; // bc1fl
case 3: BranchFPFlag(op, CC_EQ, true); break; // bc1tl
default:
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
break;
}
}
// If likely is set, discard the branch slot if NOT taken.
void Arm64Jit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely) {
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = _IMM16 << 2;
u32 targetAddr = js.compilerPC + offset + 4;
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
// Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle)
// The behavior is undefined - the CPU may take the second branch even if the first one passes.
// However, it does consistently try each branch, which these games seem to expect.
bool delaySlotIsBranch = MIPSCodeUtils::IsVFPUBranch(delaySlotOp);
bool delaySlotIsNice = !delaySlotIsBranch && IsDelaySlotNiceVFPU(op, delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", js.compilerPC);
int imm3 = (op >> 18) & 7;
gpr.MapReg(MIPS_REG_VFPUCC);
TSTI2R(gpr.R(MIPS_REG_VFPUCC), 1 << imm3, W0);
Arm64Gen::FixupBranch ptr;
js.inDelaySlot = true;
if (!likely)
{
if (!delaySlotIsNice && !delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B(cc);
}
else
{
FlushAll();
ptr = B(cc);
if (!delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_FLUSH);
}
js.inDelaySlot = false;
// Take the branch
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
u32 notTakenTarget = js.compilerPC + (delaySlotIsBranch ? 4 : 8);
WriteExit(notTakenTarget, js.nextExit++);
js.compiling = false;
}
void Arm64Jit::Comp_VBranch(MIPSOpcode op)
{
switch ((op >> 16) & 3)
{
case 0: BranchVFPUFlag(op, CC_NEQ, false); break; // bvf
case 1: BranchVFPUFlag(op, CC_EQ, false); break; // bvt
case 2: BranchVFPUFlag(op, CC_NEQ, true); break; // bvfl
case 3: BranchVFPUFlag(op, CC_EQ, true); break; // bvtl
}
}
void Arm64Jit::Comp_Jump(MIPSOpcode op) {
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
u32 off = _IMM26 << 2;
u32 targetAddr = (js.compilerPC & 0xF0000000) | off;
// Might be a stubbed address or something?
if (!Memory::IsValidAddress(targetAddr)) {
if (js.nextExit == 0) {
ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x", targetAddr);
} else {
js.compiling = false;
}
// TODO: Mark this block dirty or something? May be indication it will be changed by imports.
return;
}
switch (op >> 26) {
case 2: //j
CompileDelaySlot(DELAYSLOT_NICE);
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) {
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
FlushAll();
WriteExit(targetAddr, js.nextExit++);
break;
case 3: //jal
if (ReplaceJalTo(targetAddr))
return;
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
CompileDelaySlot(DELAYSLOT_NICE);
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) {
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
FlushAll();
WriteExit(targetAddr, js.nextExit++);
break;
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
js.compiling = false;
}
void Arm64Jit::Comp_JumpReg(MIPSOpcode op)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in JumpReg delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
MIPSGPReg rs = _RS;
MIPSGPReg rd = _RD;
bool andLink = (op & 0x3f) == 9;
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
if (andLink && rs == rd)
delaySlotIsNice = false;
CONDITIONAL_NICE_DELAYSLOT;
ARM64Reg destReg = X8;
if (IsSyscall(delaySlotOp)) {
gpr.MapReg(rs);
MovToPC(gpr.R(rs)); // For syscall to be able to return.
if (andLink)
gpr.SetImm(rd, js.compilerPC + 8);
CompileDelaySlot(DELAYSLOT_FLUSH);
return; // Syscall wrote exit code.
} else if (delaySlotIsNice) {
if (andLink)
gpr.SetImm(rd, js.compilerPC + 8);
CompileDelaySlot(DELAYSLOT_NICE);
if (!andLink && rs == MIPS_REG_RA && g_Config.bDiscardRegsOnJRRA) {
// According to the MIPS ABI, there are some regs we don't need to preserve.
// Let's discard them so we don't need to write them back.
// NOTE: Not all games follow the MIPS ABI! Tekken 6, for example, will crash
// with this enabled.
gpr.DiscardR(MIPS_REG_COMPILER_SCRATCH);
for (int i = MIPS_REG_A0; i <= MIPS_REG_T7; i++)
gpr.DiscardR((MIPSGPReg)i);
gpr.DiscardR(MIPS_REG_T8);
gpr.DiscardR(MIPS_REG_T9);
}
if (jo.continueJumps && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions) {
AddContinuedBlock(gpr.GetImm(rs));
// Account for the increment in the loop.
js.compilerPC = gpr.GetImm(rs) - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
gpr.MapReg(rs);
destReg = gpr.R(rs); // Safe because FlushAll doesn't change any regs
FlushAll();
} else {
// Delay slot - this case is very rare, might be able to free up R8.
gpr.MapReg(rs);
MOV(W8, gpr.R(rs));
if (andLink)
gpr.SetImm(rd, js.compilerPC + 8);
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
}
switch (op & 0x3f)
{
case 8: //jr
break;
case 9: //jalr
break;
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
WriteExitDestInR(destReg);
js.compiling = false;
}
void Arm64Jit::Comp_Syscall(MIPSOpcode op)
{
if (!g_Config.bSkipDeadbeefFilling)
{
// All of these will be overwritten with DEADBEEF anyway.
gpr.DiscardR(MIPS_REG_COMPILER_SCRATCH);
// We need to keep A0 - T3, which are used for args.
gpr.DiscardR(MIPS_REG_T4);
gpr.DiscardR(MIPS_REG_T5);
gpr.DiscardR(MIPS_REG_T6);
gpr.DiscardR(MIPS_REG_T7);
gpr.DiscardR(MIPS_REG_T8);
gpr.DiscardR(MIPS_REG_T9);
gpr.DiscardR(MIPS_REG_HI);
gpr.DiscardR(MIPS_REG_LO);
}
// If we're in a delay slot, this is off by one.
const int offset = js.inDelaySlot ? -1 : 0;
WriteDownCount(offset);
RestoreRoundingMode();
js.downcountAmount = -offset;
// TODO: Maybe discard v0, v1, and some temps? Definitely at?
FlushAll();
SaveDowncount();
// Skip the CallSyscall where possible.
void *quickFunc = GetQuickSyscallFunc(op);
if (quickFunc)
{
gpr.SetRegImm(W0, (u32)(intptr_t)GetSyscallInfo(op));
// Already flushed, so X1 is safe.
QuickCallFunction(X1, quickFunc);
}
else
{
gpr.SetRegImm(W0, op.encoding);
QuickCallFunction(X1, (void *)&CallSyscall);
}
ApplyRoundingMode();
RestoreDowncount();
WriteSyscallExit();
js.compiling = false;
}
void Arm64Jit::Comp_Break(MIPSOpcode op)
{
Comp_Generic(op);
WriteSyscallExit();
js.compiling = false;
}
} // namespace Mipscomp

View file

@ -0,0 +1,75 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "Core/Config.h"
#include "Core/MemMap.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/MIPS/ARM64/Arm64Jit.h"
#include "Core/MIPS/ARM64/Arm64RegCache.h"
#include "Common/CPUDetect.h"
#define _RS MIPS_GET_RS(op)
#define _RT MIPS_GET_RT(op)
#define _RD MIPS_GET_RD(op)
#define _FS MIPS_GET_FS(op)
#define _FT MIPS_GET_FT(op)
#define _FD MIPS_GET_FD(op)
#define _SA MIPS_GET_SA(op)
#define _POS ((op>> 6) & 0x1F)
#define _SIZE ((op>>11) & 0x1F)
#define _IMM16 (signed short)(op & 0xFFFF)
#define _IMM26 (op & 0x03FFFFFF)
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { Comp_Generic(op); return; }
namespace MIPSComp
{
using namespace Arm64Gen;
using namespace Arm64JitConstants;
void Arm64Jit::Comp_FPU3op(MIPSOpcode op)
{
DISABLE;
}
void Arm64Jit::Comp_FPULS(MIPSOpcode op)
{
DISABLE;
}
void Arm64Jit::Comp_FPUComp(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_FPU2op(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_mxc1(MIPSOpcode op)
{
DISABLE;
}
} // namespace MIPSComp

View file

@ -0,0 +1,83 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
// Optimization ideas:
//
// It's common to see sequences of stores writing or reading to a contiguous set of
// addresses in function prologues/epilogues:
// sw s5, 104(sp)
// sw s4, 100(sp)
// sw s3, 96(sp)
// sw s2, 92(sp)
// sw s1, 88(sp)
// sw s0, 84(sp)
// sw ra, 108(sp)
// mov s4, a0
// mov s3, a1
// ...
// Such sequences could easily be detected and turned into nice contiguous
// sequences of ARM stores instead of the current 3 instructions per sw/lw.
//
// Also, if we kept track of the likely register content of a cached register,
// (pointer or data), we could avoid many BIC instructions.
#include "Core/MemMap.h"
#include "Core/Config.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSAnalyst.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/ARM64/Arm64Jit.h"
#include "Core/MIPS/ARM64/Arm64RegCache.h"
#define _RS MIPS_GET_RS(op)
#define _RT MIPS_GET_RT(op)
#define _RD MIPS_GET_RD(op)
#define _FS MIPS_GET_FS(op)
#define _FT MIPS_GET_FT(op)
#define _FD MIPS_GET_FD(op)
#define _SA MIPS_GET_SA(op)
#define _POS ((op>> 6) & 0x1F)
#define _SIZE ((op>>11) & 0x1F)
#define _IMM16 (signed short)(op & 0xFFFF)
#define _IMM26 (op & 0x03FFFFFF)
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { Comp_Generic(op); return; }
namespace MIPSComp
{
using namespace Arm64Gen;
using namespace Arm64JitConstants;
void Arm64Jit::Comp_ITypeMemLR(MIPSOpcode op, bool load) {
DISABLE;
}
void Arm64Jit::Comp_ITypeMem(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Cache(MIPSOpcode op) {
DISABLE;
}
}

View file

@ -0,0 +1,33 @@
// Copyright (c) 2013- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "Common/CPUDetect.h"
#include "Core/MemMap.h"
#include "Core/MIPS/JitCommon/JitCommon.h"
#include "Core/MIPS/ARM64/Arm64Jit.h"
#include "Core/MIPS/ARM64/Arm64RegCache.h"
namespace MIPSComp {
int Arm64Jit::Replace_fabsf() {
// TODO ARM64
// fpr.MapDirtyIn(0, 12);
// VABS(fpr.R(0), fpr.R(12));
return 4; // Number of instructions in the MIPS function
}
}

View file

@ -0,0 +1,231 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <cmath>
#include "math/math_util.h"
#include "Core/MemMap.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/MIPS/MIPSAnalyst.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Common/CPUDetect.h"
#include "Core/Config.h"
#include "Core/Reporting.h"
#include "Core/MIPS/ARM64/Arm64Jit.h"
#include "Core/MIPS/ARM64/Arm64RegCache.h"
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
#define _RS MIPS_GET_RS(op)
#define _RT MIPS_GET_RT(op)
#define _RD MIPS_GET_RD(op)
#define _FS MIPS_GET_FS(op)
#define _FT MIPS_GET_FT(op)
#define _FD MIPS_GET_FD(op)
#define _SA MIPS_GET_SA(op)
#define _POS ((op>> 6) & 0x1F)
#define _SIZE ((op>>11) & 0x1F)
#define _IMM16 (signed short)(op & 0xFFFF)
#define _IMM26 (op & 0x03FFFFFF)
namespace MIPSComp
{
using namespace Arm64Gen;
using namespace Arm64JitConstants;
void Arm64Jit::Comp_VPFX(MIPSOpcode op)
{
CONDITIONAL_DISABLE;
int data = op & 0xFFFFF;
int regnum = (op >> 24) & 3;
switch (regnum) {
case 0: // S
js.prefixS = data;
js.prefixSFlag = JitState::PREFIX_KNOWN_DIRTY;
break;
case 1: // T
js.prefixT = data;
js.prefixTFlag = JitState::PREFIX_KNOWN_DIRTY;
break;
case 2: // D
js.prefixD = data;
js.prefixDFlag = JitState::PREFIX_KNOWN_DIRTY;
break;
default:
ERROR_LOG(CPU, "VPFX - bad regnum %i : data=%08x", regnum, data);
break;
}
}
void Arm64Jit::Comp_SV(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_SVQ(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_VVectorInit(MIPSOpcode op)
{
DISABLE;
}
void Arm64Jit::Comp_VIdt(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_VMatrixInit(MIPSOpcode op)
{
DISABLE;
}
void Arm64Jit::Comp_VHdp(MIPSOpcode op) {
DISABLE;
}
static const float MEMORY_ALIGNED16(vavg_table[4]) = { 1.0f, 1.0f / 2.0f, 1.0f / 3.0f, 1.0f / 4.0f };
void Arm64Jit::Comp_Vhoriz(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_VDot(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_VecDo3(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_VV2Op(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vi2f(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vh2f(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vf2i(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Mftv(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vmfvc(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vmtvc(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vmmov(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_VScl(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vmmul(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vmscl(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vtfm(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_VCrs(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_VDet(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vi2x(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vx2i(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_VCrossQuat(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vcmp(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vcmov(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Viim(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vfim(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vcst(MIPSOpcode op) {
DISABLE;
}
// Very heavily used by FF:CC. Should be replaced by a fast approximation instead of
// calling the math library.
// Apparently this may not work on hardfp. I don't think we have any platforms using this though.
void Arm64Jit::Comp_VRot(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vsgn(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vocp(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_ColorConv(MIPSOpcode op) {
DISABLE;
}
void Arm64Jit::Comp_Vbfy(MIPSOpcode op) {
DISABLE;
}
}

View file

@ -0,0 +1,485 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "base/logging.h"
#include "Common/ChunkFile.h"
#include "Common/CPUDetect.h"
#include "Core/Reporting.h"
#include "Core/Config.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/Debugger/SymbolMap.h"
#include "Core/MemMap.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSInt.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/HLE/ReplaceTables.h"
#include "Core/MIPS/ARM64/Arm64RegCache.h"
#include "Core/MIPS/ARM64/Arm64RegCacheFPU.h"
#include "Core/MIPS/ARM64/Arm64Jit.h"
#include "ext/disarm.h"
using namespace Arm64JitConstants;
void DisassembleArm64Print(const u8 *data, int size) {
ILOG("ARM64 TODO");
}
namespace MIPSComp
{
using namespace Arm64Gen;
using namespace Arm64JitConstants;
Arm64Jit::Arm64Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &js, &jo), fpr(mips, &js, &jo), mips_(mips) {
logBlocks = 0;
dontLogBlocks = 0;
blocks.Init();
gpr.SetEmitter(this);
fpr.SetEmitter(this);
AllocCodeSpace(1024 * 1024 * 16); // 32MB is the absolute max because that's what an ARM branch instruction can reach, backwards and forwards.
GenerateFixedCode();
js.startDefaultPrefix = mips_->HasDefaultPrefix();
}
Arm64Jit::~Arm64Jit() {
}
void Arm64Jit::DoState(PointerWrap &p)
{
auto s = p.Section("Jit", 1, 2);
if (!s)
return;
p.Do(js.startDefaultPrefix);
if (s >= 2) {
p.Do(js.hasSetRounding);
js.lastSetRounding = 0;
} else {
js.hasSetRounding = 1;
}
}
// This is here so the savestate matches between jit and non-jit.
void Arm64Jit::DoDummyState(PointerWrap &p)
{
auto s = p.Section("Jit", 1, 2);
if (!s)
return;
bool dummy = false;
p.Do(dummy);
if (s >= 2) {
dummy = true;
p.Do(dummy);
}
}
void Arm64Jit::FlushAll()
{
gpr.FlushAll();
fpr.FlushAll();
FlushPrefixV();
}
void Arm64Jit::FlushPrefixV()
{
if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0) {
gpr.SetRegImm(SCRATCHREG1, js.prefixS);
STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_SPREFIX]));
js.prefixSFlag = (JitState::PrefixState) (js.prefixSFlag & ~JitState::PREFIX_DIRTY);
}
if ((js.prefixTFlag & JitState::PREFIX_DIRTY) != 0) {
gpr.SetRegImm(SCRATCHREG1, js.prefixT);
STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_TPREFIX]));
js.prefixTFlag = (JitState::PrefixState) (js.prefixTFlag & ~JitState::PREFIX_DIRTY);
}
if ((js.prefixDFlag & JitState::PREFIX_DIRTY) != 0) {
gpr.SetRegImm(SCRATCHREG1, js.prefixD);
STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_DPREFIX]));
js.prefixDFlag = (JitState::PrefixState) (js.prefixDFlag & ~JitState::PREFIX_DIRTY);
}
}
void Arm64Jit::ClearCache()
{
blocks.Clear();
ClearCodeSpace();
GenerateFixedCode();
}
void Arm64Jit::InvalidateCache()
{
blocks.Clear();
}
void Arm64Jit::InvalidateCacheAt(u32 em_address, int length)
{
blocks.InvalidateICache(em_address, length);
}
void Arm64Jit::EatInstruction(MIPSOpcode op) {
MIPSInfo info = MIPSGetInfo(op);
if (info & DELAYSLOT) {
ERROR_LOG_REPORT_ONCE(ateDelaySlot, JIT, "Ate a branch op.");
}
if (js.inDelaySlot) {
ERROR_LOG_REPORT_ONCE(ateInDelaySlot, JIT, "Ate an instruction inside a delay slot.");
}
js.numInstructions++;
js.compilerPC += 4;
js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
}
void Arm64Jit::CompileDelaySlot(int flags)
{
// TODO ARM64
}
void Arm64Jit::Compile(u32 em_address) {
if (GetSpaceLeft() < 0x10000 || blocks.IsFull()) {
ClearCache();
}
int block_num = blocks.AllocateBlock(em_address);
JitBlock *b = blocks.GetBlock(block_num);
DoJit(em_address, b);
blocks.FinalizeBlock(block_num, jo.enableBlocklink);
bool cleanSlate = false;
if (js.hasSetRounding && !js.lastSetRounding) {
WARN_LOG(JIT, "Detected rounding mode usage, rebuilding jit with checks");
// Won't loop, since hasSetRounding is only ever set to 1.
js.lastSetRounding = js.hasSetRounding;
cleanSlate = true;
}
// Drat. The VFPU hit an uneaten prefix at the end of a block.
if (js.startDefaultPrefix && js.MayHavePrefix()) {
WARN_LOG(JIT, "An uneaten prefix at end of block: %08x", js.compilerPC - 4);
js.LogPrefix();
// Let's try that one more time. We won't get back here because we toggled the value.
js.startDefaultPrefix = false;
cleanSlate = true;
}
if (cleanSlate) {
// Our assumptions are all wrong so it's clean-slate time.
ClearCache();
Compile(em_address);
}
}
void Arm64Jit::RunLoopUntil(u64 globalticks)
{
((void (*)())enterCode)();
}
const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b)
{
js.cancel = false;
js.blockStart = js.compilerPC = mips_->pc;
js.lastContinuedPC = 0;
js.initialBlockSize = 0;
js.nextExit = 0;
js.downcountAmount = 0;
js.curBlock = b;
js.compiling = true;
js.inDelaySlot = false;
js.PrefixStart();
// We add a downcount flag check before the block, used when entering from a linked block.
// The last block decremented downcounter, and the flag should still be available.
// Got three variants here of where we position the code, needs detailed benchmarking.
FixupBranch bail;
/*
if (jo.useBackJump) {
// Moves the MOVI2R and B *before* checkedEntry, and just branch backwards there.
// Speedup seems to be zero unfortunately but I guess it may vary from device to device.
// Not intrusive so keeping it around here to experiment with, may help on ARMv6 due to
// large/slow construction of 32-bit immediates?
JumpTarget backJump = GetCodePtr();
gpr.SetRegImm(R0, js.blockStart);
B((const void *)outerLoopPCInR0);
b->checkedEntry = GetCodePtr();
SetCC(CC_LT);
B(backJump);
SetCC(CC_AL);
} else if (jo.useForwardJump) {
b->checkedEntry = GetCodePtr();
SetCC(CC_LT);
bail = B();
SetCC(CC_AL);
} else {
b->checkedEntry = GetCodePtr();
SetCC(CC_LT);
gpr.SetRegImm(R0, js.blockStart);
B((const void *)outerLoopPCInR0);
SetCC(CC_AL);
}*/
// TODO ARM64
b->normalEntry = GetCodePtr();
// TODO: this needs work
MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address);
gpr.Start(analysis);
fpr.Start(analysis);
int partialFlushOffset = 0;
js.numInstructions = 0;
while (js.compiling)
{
gpr.SetCompilerPC(js.compilerPC); // Let it know for log messages
MIPSOpcode inst = Memory::Read_Opcode_JIT(js.compilerPC);
//MIPSInfo info = MIPSGetInfo(inst);
//if (info & IS_VFPU) {
// logBlocks = 1;
//}
js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);
MIPSCompileOp(inst);
js.compilerPC += 4;
js.numInstructions++;
// Safety check, in case we get a bunch of really large jit ops without a lot of branching.
if (GetSpaceLeft() < 0x800 || js.numInstructions >= JitBlockCache::MAX_BLOCK_INSTRUCTIONS)
{
FlushAll();
WriteExit(js.compilerPC, js.nextExit++);
js.compiling = false;
}
}
if (jo.useForwardJump) {
//SetJumpTarget(bail);
//gpr.SetRegImm(R0, js.blockStart);
//B((const void *)outerLoopPCInR0);
}
char temp[256];
if (logBlocks > 0 && dontLogBlocks == 0) {
INFO_LOG(JIT, "=============== mips ===============");
for (u32 cpc = em_address; cpc != js.compilerPC + 4; cpc += 4) {
MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp, true);
INFO_LOG(JIT, "M: %08x %s", cpc, temp);
}
}
b->codeSize = GetCodePtr() - b->normalEntry;
if (logBlocks > 0 && dontLogBlocks == 0) {
INFO_LOG(JIT, "=============== ARM ===============");
DisassembleArm64Print(b->normalEntry, GetCodePtr() - b->normalEntry);
}
if (logBlocks > 0)
logBlocks--;
if (dontLogBlocks > 0)
dontLogBlocks--;
// Don't forget to zap the newly written instructions in the instruction cache!
FlushIcache();
if (js.lastContinuedPC == 0)
b->originalSize = js.numInstructions;
else
{
// We continued at least once. Add the last proxy and set the originalSize correctly.
blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (js.compilerPC - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
b->originalSize = js.initialBlockSize;
}
return b->normalEntry;
}
void Arm64Jit::AddContinuedBlock(u32 dest)
{
// The first block is the root block. When we continue, we create proxy blocks after that.
if (js.lastContinuedPC == 0)
js.initialBlockSize = js.numInstructions;
else
blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (js.compilerPC - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
js.lastContinuedPC = dest;
}
bool Arm64Jit::DescribeCodePtr(const u8 *ptr, std::string &name)
{
// TODO: Not used by anything yet.
return false;
}
void Arm64Jit::Comp_RunBlock(MIPSOpcode op)
{
// This shouldn't be necessary, the dispatcher should catch us before we get here.
ERROR_LOG(JIT, "Comp_RunBlock should never be reached!");
}
bool Arm64Jit::ReplaceJalTo(u32 dest) {
return false;
}
void Arm64Jit::Comp_ReplacementFunc(MIPSOpcode op)
{
// TODO ARM64
}
void Arm64Jit::Comp_Generic(MIPSOpcode op)
{
FlushAll();
MIPSInterpretFunc func = MIPSGetInterpretFunc(op);
if (func) {
SaveDowncount();
// TODO: Perhaps keep the rounding mode for interp?
RestoreRoundingMode();
// gpr.SetRegImm(SCRATCHREG1, js.compilerPC);
// MovToPC(SCRATCHREG1);
//gpr.SetRegImm(R0, op.encoding);
//QuickCallFunction(R1, (void *)func);
// TODO ARM64
ApplyRoundingMode();
RestoreDowncount();
}
const MIPSInfo info = MIPSGetInfo(op);
if ((info & IS_VFPU) != 0 && (info & VFPU_NO_PREFIX) == 0)
{
// If it does eat them, it'll happen in MIPSCompileOp().
if ((info & OUT_EAT_PREFIX) == 0)
js.PrefixUnknown();
}
}
void Arm64Jit::MovFromPC(ARM64Reg r) {
LDR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));
}
void Arm64Jit::MovToPC(ARM64Reg r) {
STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));
}
void Arm64Jit::SaveDowncount() {
if (jo.downcountInRegister)
STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
}
void Arm64Jit::RestoreDowncount() {
if (jo.downcountInRegister)
LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
}
void Arm64Jit::WriteDownCount(int offset) {
// TODO ARM64
}
// Abuses R2
void Arm64Jit::WriteDownCountR(ARM64Reg reg) {
if (jo.downcountInRegister) {
SUBS(DOWNCOUNTREG, DOWNCOUNTREG, reg);
} else {
LDR(INDEX_UNSIGNED, X2, CTXREG, offsetof(MIPSState, downcount));
SUBS(X2, X2, reg);
STR(INDEX_UNSIGNED, X2, CTXREG, offsetof(MIPSState, downcount));
}
}
void Arm64Jit::RestoreRoundingMode(bool force) {
// TODO ARM64
}
void Arm64Jit::ApplyRoundingMode(bool force) {
// TODO ARM64
}
void Arm64Jit::UpdateRoundingMode() {
// TODO ARM64
}
// IDEA - could have a WriteDualExit that takes two destinations and two condition flags,
// and just have conditional that set PC "twice". This only works when we fall back to dispatcher
// though, as we need to have the SUBS flag set in the end. So with block linking in the mix,
// I don't think this gives us that much benefit.
void Arm64Jit::WriteExit(u32 destination, int exit_num)
{
WriteDownCount();
//If nobody has taken care of this yet (this can be removed when all branches are done)
JitBlock *b = js.curBlock;
b->exitAddress[exit_num] = destination;
b->exitPtrs[exit_num] = GetWritableCodePtr();
// Link opportunity!
int block = blocks.GetBlockNumberFromStartAddress(destination);
if (block >= 0 && jo.enableBlocklink) {
// It exists! Joy of joy!
B(blocks.GetBlock(block)->checkedEntry);
b->linkStatus[exit_num] = true;
} else {
gpr.SetRegImm(X0, destination);
B((const void *)dispatcherPCInR0);
}
}
void Arm64Jit::WriteExitDestInR(ARM64Reg Reg)
{
MovToPC(Reg);
WriteDownCount();
// TODO: shouldn't need an indirect branch here...
B((const void *)dispatcher);
}
void Arm64Jit::WriteSyscallExit()
{
WriteDownCount();
B((const void *)dispatcherCheckCoreState);
}
void Arm64Jit::Comp_DoNothing(MIPSOpcode op) { }
#define _RS ((op>>21) & 0x1F)
#define _RT ((op>>16) & 0x1F)
#define _RD ((op>>11) & 0x1F)
#define _FS ((op>>11) & 0x1F)
#define _FT ((op>>16) & 0x1F)
#define _FD ((op>>6) & 0x1F)
#define _POS ((op>>6) & 0x1F)
#define _SIZE ((op>>11) & 0x1F)
//memory regions:
//
// 08-0A
// 48-4A
// 04-05
// 44-45
// mov eax, addrreg
// shr eax, 28
// mov eax, [table+eax]
// mov dreg, [eax+offreg]
}

291
Core/MIPS/ARM64/Arm64Jit.h Normal file
View file

@ -0,0 +1,291 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "Common/CPUDetect.h"
#include "Common/ArmCommon.h"
#include "Common/Arm64Emitter.h"
#include "Core/MIPS/JitCommon/JitState.h"
#include "Core/MIPS/JitCommon/JitBlockCache.h"
#include "Core/MIPS/ARM64/Arm64Asm.h"
#include "Core/MIPS/ARM64/Arm64RegCache.h"
#include "Core/MIPS/ARM64/Arm64RegCacheFPU.h"
#include "Core/MIPS/MIPSVFPUUtils.h"
#ifndef offsetof
#include "stddef.h"
#endif
namespace MIPSComp
{
struct Arm64JitOptions
{
Arm64JitOptions() {
enableBlocklink = true;
downcountInRegister = true;
useBackJump = false;
useForwardJump = false;
cachePointers = true;
immBranches = false;
continueBranches = false;
continueJumps = false;
continueMaxInstructions = 300;
useNEONVFPU = false; // true
if (!cpu_info.bNEON)
useNEONVFPU = false;
}
bool useNEONVFPU;
bool enableBlocklink;
bool downcountInRegister;
bool useBackJump;
bool useForwardJump;
bool cachePointers;
bool immBranches;
bool continueBranches;
bool continueJumps;
int continueMaxInstructions;
};
class Arm64Jit : public Arm64Gen::ARM64CodeBlock
{
public:
Arm64Jit(MIPSState *mips);
virtual ~Arm64Jit();
void DoState(PointerWrap &p);
static void DoDummyState(PointerWrap &p);
// Compiled ops should ignore delay slots
// the compiler will take care of them by itself
// OR NOT
void Comp_Generic(MIPSOpcode op);
void RunLoopUntil(u64 globalticks);
void Compile(u32 em_address); // Compiles a block at current MIPS PC
const u8 *DoJit(u32 em_address, JitBlock *b);
bool DescribeCodePtr(const u8 *ptr, std::string &name);
void CompileDelaySlot(int flags);
void EatInstruction(MIPSOpcode op);
void AddContinuedBlock(u32 dest);
void Comp_RunBlock(MIPSOpcode op);
void Comp_ReplacementFunc(MIPSOpcode op);
// Ops
void Comp_ITypeMem(MIPSOpcode op);
void Comp_Cache(MIPSOpcode op);
void Comp_RelBranch(MIPSOpcode op);
void Comp_RelBranchRI(MIPSOpcode op);
void Comp_FPUBranch(MIPSOpcode op);
void Comp_FPULS(MIPSOpcode op);
void Comp_FPUComp(MIPSOpcode op);
void Comp_Jump(MIPSOpcode op);
void Comp_JumpReg(MIPSOpcode op);
void Comp_Syscall(MIPSOpcode op);
void Comp_Break(MIPSOpcode op);
void Comp_IType(MIPSOpcode op);
void Comp_RType2(MIPSOpcode op);
void Comp_RType3(MIPSOpcode op);
void Comp_ShiftType(MIPSOpcode op);
void Comp_Allegrex(MIPSOpcode op);
void Comp_Allegrex2(MIPSOpcode op);
void Comp_VBranch(MIPSOpcode op);
void Comp_MulDivType(MIPSOpcode op);
void Comp_Special3(MIPSOpcode op);
void Comp_FPU3op(MIPSOpcode op);
void Comp_FPU2op(MIPSOpcode op);
void Comp_mxc1(MIPSOpcode op);
void Comp_DoNothing(MIPSOpcode op);
void Comp_SV(MIPSOpcode op);
void Comp_SVQ(MIPSOpcode op);
void Comp_VPFX(MIPSOpcode op);
void Comp_VVectorInit(MIPSOpcode op);
void Comp_VMatrixInit(MIPSOpcode op);
void Comp_VDot(MIPSOpcode op);
void Comp_VecDo3(MIPSOpcode op);
void Comp_VV2Op(MIPSOpcode op);
void Comp_Mftv(MIPSOpcode op);
void Comp_Vmfvc(MIPSOpcode op);
void Comp_Vmtvc(MIPSOpcode op);
void Comp_Vmmov(MIPSOpcode op);
void Comp_VScl(MIPSOpcode op);
void Comp_Vmmul(MIPSOpcode op);
void Comp_Vmscl(MIPSOpcode op);
void Comp_Vtfm(MIPSOpcode op);
void Comp_VHdp(MIPSOpcode op);
void Comp_VCrs(MIPSOpcode op);
void Comp_VDet(MIPSOpcode op);
void Comp_Vi2x(MIPSOpcode op);
void Comp_Vx2i(MIPSOpcode op);
void Comp_Vf2i(MIPSOpcode op);
void Comp_Vi2f(MIPSOpcode op);
void Comp_Vh2f(MIPSOpcode op);
void Comp_Vcst(MIPSOpcode op);
void Comp_Vhoriz(MIPSOpcode op);
void Comp_VRot(MIPSOpcode op);
void Comp_VIdt(MIPSOpcode op);
void Comp_Vcmp(MIPSOpcode op);
void Comp_Vcmov(MIPSOpcode op);
void Comp_Viim(MIPSOpcode op);
void Comp_Vfim(MIPSOpcode op);
void Comp_VCrossQuat(MIPSOpcode op);
void Comp_Vsgn(MIPSOpcode op);
void Comp_Vocp(MIPSOpcode op);
void Comp_ColorConv(MIPSOpcode op);
void Comp_Vbfy(MIPSOpcode op);
// Non-NEON: VPFX
// NEON implementations of the VFPU ops.
void CompNEON_SV(MIPSOpcode op);
void CompNEON_SVQ(MIPSOpcode op);
void CompNEON_VVectorInit(MIPSOpcode op);
void CompNEON_VMatrixInit(MIPSOpcode op);
void CompNEON_VDot(MIPSOpcode op);
void CompNEON_VecDo3(MIPSOpcode op);
void CompNEON_VV2Op(MIPSOpcode op);
void CompNEON_Mftv(MIPSOpcode op);
void CompNEON_Vmfvc(MIPSOpcode op);
void CompNEON_Vmtvc(MIPSOpcode op);
void CompNEON_Vmmov(MIPSOpcode op);
void CompNEON_VScl(MIPSOpcode op);
void CompNEON_Vmmul(MIPSOpcode op);
void CompNEON_Vmscl(MIPSOpcode op);
void CompNEON_Vtfm(MIPSOpcode op);
void CompNEON_VHdp(MIPSOpcode op);
void CompNEON_VCrs(MIPSOpcode op);
void CompNEON_VDet(MIPSOpcode op);
void CompNEON_Vi2x(MIPSOpcode op);
void CompNEON_Vx2i(MIPSOpcode op);
void CompNEON_Vf2i(MIPSOpcode op);
void CompNEON_Vi2f(MIPSOpcode op);
void CompNEON_Vh2f(MIPSOpcode op);
void CompNEON_Vcst(MIPSOpcode op);
void CompNEON_Vhoriz(MIPSOpcode op);
void CompNEON_VRot(MIPSOpcode op);
void CompNEON_VIdt(MIPSOpcode op);
void CompNEON_Vcmp(MIPSOpcode op);
void CompNEON_Vcmov(MIPSOpcode op);
void CompNEON_Viim(MIPSOpcode op);
void CompNEON_Vfim(MIPSOpcode op);
void CompNEON_VCrossQuat(MIPSOpcode op);
void CompNEON_Vsgn(MIPSOpcode op);
void CompNEON_Vocp(MIPSOpcode op);
void CompNEON_ColorConv(MIPSOpcode op);
void CompNEON_Vbfy(MIPSOpcode op);
int Replace_fabsf();
JitBlockCache *GetBlockCache() { return &blocks; }
void ClearCache();
void InvalidateCache();
void InvalidateCacheAt(u32 em_address, int length = 4);
void EatPrefix() { js.EatPrefix(); }
private:
void GenerateFixedCode();
void FlushAll();
void FlushPrefixV();
void WriteDownCount(int offset = 0);
void WriteDownCountR(Arm64Gen::ARM64Reg reg);
void RestoreRoundingMode(bool force = false);
void ApplyRoundingMode(bool force = false);
void UpdateRoundingMode();
void MovFromPC(Arm64Gen::ARM64Reg r);
void MovToPC(Arm64Gen::ARM64Reg r);
bool ReplaceJalTo(u32 dest);
void SaveDowncount();
void RestoreDowncount();
void WriteExit(u32 destination, int exit_num);
void WriteExitDestInR(Arm64Gen::ARM64Reg Reg);
void WriteSyscallExit();
// Utility compilation functions
void BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely);
void BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely);
void BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool likely);
void BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely);
// Utilities to reduce duplicated code
void CompImmLogic(MIPSGPReg rs, MIPSGPReg rt, u32 uimm, void (ARM64XEmitter::*arith)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg src, Arm64Gen::ARM64Reg op2), bool (ARM64XEmitter::*tryArithI2R)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg src, u32 val), u32 (*eval)(u32 a, u32 b));
void CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, void (ARM64XEmitter::*arithOp2)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg rm, Arm64Gen::ARM64Reg rn), bool (ARM64XEmitter::*tryArithI2R)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg rm, u32 val), u32 (*eval)(u32 a, u32 b), bool symmetric = false);
void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz);
void ApplyPrefixD(const u8 *vregs, VectorSize sz);
void GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) {
_assert_(js.prefixSFlag & JitState::PREFIX_KNOWN);
GetVectorRegs(regs, sz, vectorReg);
ApplyPrefixST(regs, js.prefixS, sz);
}
void GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) {
_assert_(js.prefixTFlag & JitState::PREFIX_KNOWN);
GetVectorRegs(regs, sz, vectorReg);
ApplyPrefixST(regs, js.prefixT, sz);
}
void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg);
// Utils
void SetR0ToEffectiveAddress(MIPSGPReg rs, s16 offset);
void SetCCAndR0ForSafeAddress(MIPSGPReg rs, s16 offset, Arm64Gen::ARM64Reg tempReg, bool reverse = false);
void Comp_ITypeMemLR(MIPSOpcode op, bool load);
JitBlockCache blocks;
Arm64JitOptions jo;
JitState js;
Arm64RegCache gpr;
ArmRegCacheFPU fpr;
MIPSState *mips_;
int dontLogBlocks;
int logBlocks;
public:
// Code pointers
const u8 *enterCode;
const u8 *outerLoop;
const u8 *outerLoopPCInR0;
const u8 *dispatcherCheckCoreState;
const u8 *dispatcherPCInR0;
const u8 *dispatcher;
const u8 *dispatcherNoCheck;
const u8 *breakpointBailout;
};
} // namespace MIPSComp

View file

@ -0,0 +1,503 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "Core/MemMap.h"
#include "Core/MIPS/ARM64/Arm64RegCache.h"
#include "Core/MIPS/ARM64/Arm64Jit.h"
#include "Core/MIPS/MIPSAnalyst.h"
#include "Core/Reporting.h"
#include "Common/Arm64Emitter.h"
#ifndef offsetof
#include "stddef.h"
#endif
using namespace Arm64Gen;
using namespace Arm64JitConstants;
Arm64RegCache::Arm64RegCache(MIPSState *mips, MIPSComp::JitState *js, MIPSComp::Arm64JitOptions *jo) : mips_(mips), js_(js), jo_(jo) {
}
void Arm64RegCache::Init(ARM64XEmitter *emitter) {
emit_ = emitter;
}
void Arm64RegCache::Start(MIPSAnalyst::AnalysisResults &stats) {
for (int i = 0; i < NUM_ARMREG; i++) {
ar[i].mipsReg = MIPS_REG_INVALID;
ar[i].isDirty = false;
}
for (int i = 0; i < NUM_MIPSREG; i++) {
mr[i].loc = ML_MEM;
mr[i].reg = INVALID_REG;
mr[i].imm = -1;
mr[i].spillLock = false;
}
}
const ARM64Reg *Arm64RegCache::GetMIPSAllocationOrder(int &count) {
// Note that R0 is reserved as scratch for now.
// R12 is also potentially usable.
// R4-R7 are registers we could use for static allocation or downcount.
// R8 is used to preserve flags in nasty branches.
// R9 and upwards are reserved for jit basics.
// R14 (LR) is used as a scratch reg (overwritten on calls/return.)
// TODO ARM64
if (jo_->downcountInRegister) {
static const ARM64Reg allocationOrder[] = {
X1, X2, X3, X4, X5, X6, X12,
};
count = sizeof(allocationOrder) / sizeof(const int);
return allocationOrder;
} else {
static const ARM64Reg allocationOrder2[] = {
X1, X2, X3, X4, X5, X6, X7, X12,
};
count = sizeof(allocationOrder2) / sizeof(const int);
return allocationOrder2;
}
}
void Arm64RegCache::FlushBeforeCall() {
// R4-R11 are preserved. Others need flushing.
FlushArmReg(X1);
FlushArmReg(X2);
FlushArmReg(X3);
FlushArmReg(X12);
}
bool Arm64RegCache::IsMapped(MIPSGPReg mipsReg) {
return mr[mipsReg].loc == ML_ARMREG;
}
void Arm64RegCache::SetRegImm(ARM64Reg reg, u32 imm) {
// On ARM64, at least Cortex A57, good old MOVT/MOVW (MOVK in 64-bit) is really fast.
emit_->MOVI2R(reg, imm);
}
void Arm64RegCache::MapRegTo(ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags) {
ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false;
if ((mapFlags & MAP_NOINIT) != MAP_NOINIT) {
if (mipsReg == MIPS_REG_ZERO) {
// If we get a request to load the zero register, at least we won't spend
// time on a memory access...
// TODO: EOR?
emit_->MOVI2R(reg, 0);
// This way, if we SetImm() it, we'll keep it.
mr[mipsReg].loc = ML_ARMREG_IMM;
mr[mipsReg].imm = 0;
} else {
switch (mr[mipsReg].loc) {
case ML_MEM:
emit_->LDR(INDEX_UNSIGNED, reg, CTXREG, GetMipsRegOffset(mipsReg));
mr[mipsReg].loc = ML_ARMREG;
break;
case ML_IMM:
SetRegImm(reg, mr[mipsReg].imm);
ar[reg].isDirty = true; // IMM is always dirty.
// If we are mapping dirty, it means we're gonna overwrite.
// So the imm value is no longer valid.
if (mapFlags & MAP_DIRTY)
mr[mipsReg].loc = ML_ARMREG;
else
mr[mipsReg].loc = ML_ARMREG_IMM;
break;
default:
mr[mipsReg].loc = ML_ARMREG;
break;
}
}
} else {
if (mipsReg == MIPS_REG_ZERO) {
// This way, if we SetImm() it, we'll keep it.
mr[mipsReg].loc = ML_ARMREG_IMM;
mr[mipsReg].imm = 0;
} else {
mr[mipsReg].loc = ML_ARMREG;
}
}
ar[reg].mipsReg = mipsReg;
mr[mipsReg].reg = reg;
}
ARM64Reg Arm64RegCache::FindBestToSpill(bool unusedOnly, bool *clobbered) {
int allocCount;
const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount);
static const int UNUSED_LOOKAHEAD_OPS = 30;
*clobbered = false;
for (int i = 0; i < allocCount; i++) {
ARM64Reg reg = allocOrder[i];
if (ar[reg].mipsReg != MIPS_REG_INVALID && mr[ar[reg].mipsReg].spillLock)
continue;
// Awesome, a clobbered reg. Let's use it.
if (MIPSAnalyst::IsRegisterClobbered(ar[reg].mipsReg, compilerPC_, UNUSED_LOOKAHEAD_OPS)) {
*clobbered = true;
return reg;
}
// Not awesome. A used reg. Let's try to avoid spilling.
if (unusedOnly && MIPSAnalyst::IsRegisterUsed(ar[reg].mipsReg, compilerPC_, UNUSED_LOOKAHEAD_OPS)) {
continue;
}
return reg;
}
return INVALID_REG;
}
// TODO: Somewhat smarter spilling - currently simply spills the first available, should do
// round robin or FIFO or something.
ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) {
// Let's see if it's already mapped. If so we just need to update the dirty flag.
// We don't need to check for ML_NOINIT because we assume that anyone who maps
// with that flag immediately writes a "known" value to the register.
if (mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM) {
ARM64Reg armReg = mr[mipsReg].reg;
if (ar[armReg].mipsReg != mipsReg) {
ERROR_LOG_REPORT(JIT, "Register mapping out of sync! %i", mipsReg);
}
if (mapFlags & MAP_DIRTY) {
// Mapping dirty means the old imm value is invalid.
mr[mipsReg].loc = ML_ARMREG;
ar[armReg].isDirty = true;
}
return (ARM64Reg)mr[mipsReg].reg;
}
// Okay, not mapped, so we need to allocate an ARM register.
int allocCount;
const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount);
ARM64Reg desiredReg = INVALID_REG;
// Try to "statically" allocate the first 6 regs after v0.
int desiredOrder = allocCount - (6 - (mipsReg - (int)MIPS_REG_V0));
if (desiredOrder >= 0 && desiredOrder < allocCount)
desiredReg = allocOrder[desiredOrder];
if (desiredReg != INVALID_REG) {
if (ar[desiredReg].mipsReg == MIPS_REG_INVALID) {
// With this placement, we may be able to optimize flush.
MapRegTo(desiredReg, mipsReg, mapFlags);
return desiredReg;
}
}
allocate:
for (int i = 0; i < allocCount; i++) {
ARM64Reg reg = allocOrder[i];
if (ar[reg].mipsReg == MIPS_REG_INVALID) {
// That means it's free. Grab it, and load the value into it (if requested).
MapRegTo(reg, mipsReg, mapFlags);
return reg;
}
}
// Still nothing. Let's spill a reg and goto 10.
// TODO: Use age or something to choose which register to spill?
// TODO: Spill dirty regs first? or opposite?
bool clobbered;
ARM64Reg bestToSpill = FindBestToSpill(true, &clobbered);
if (bestToSpill == INVALID_REG) {
bestToSpill = FindBestToSpill(false, &clobbered);
}
if (bestToSpill != INVALID_REG) {
// ERROR_LOG(JIT, "Out of registers at PC %08x - spills register %i.", mips_->pc, bestToSpill);
// TODO: Broken somehow in Dante's Inferno, but most games work. Bad flags in MIPSTables somewhere?
if (clobbered) {
DiscardR(ar[bestToSpill].mipsReg);
} else {
FlushArmReg(bestToSpill);
}
goto allocate;
}
// Uh oh, we have all them spilllocked....
ERROR_LOG_REPORT(JIT, "Out of spillable registers at PC %08x!!!", mips_->pc);
return INVALID_REG;
}
void Arm64RegCache::MapInIn(MIPSGPReg rd, MIPSGPReg rs) {
SpillLock(rd, rs);
MapReg(rd);
MapReg(rs);
ReleaseSpillLocks();
}
void Arm64RegCache::MapDirtyIn(MIPSGPReg rd, MIPSGPReg rs, bool avoidLoad) {
SpillLock(rd, rs);
bool load = !avoidLoad || rd == rs;
MapReg(rd, load ? MAP_DIRTY : MAP_NOINIT);
MapReg(rs);
ReleaseSpillLocks();
}
void Arm64RegCache::MapDirtyInIn(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad) {
SpillLock(rd, rs, rt);
bool load = !avoidLoad || (rd == rs || rd == rt);
MapReg(rd, load ? MAP_DIRTY : MAP_NOINIT);
MapReg(rt);
MapReg(rs);
ReleaseSpillLocks();
}
void Arm64RegCache::MapDirtyDirtyIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, bool avoidLoad) {
SpillLock(rd1, rd2, rs);
bool load1 = !avoidLoad || rd1 == rs;
bool load2 = !avoidLoad || rd2 == rs;
MapReg(rd1, load1 ? MAP_DIRTY : MAP_NOINIT);
MapReg(rd2, load2 ? MAP_DIRTY : MAP_NOINIT);
MapReg(rs);
ReleaseSpillLocks();
}
void Arm64RegCache::MapDirtyDirtyInIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad) {
SpillLock(rd1, rd2, rs, rt);
bool load1 = !avoidLoad || (rd1 == rs || rd1 == rt);
bool load2 = !avoidLoad || (rd2 == rs || rd2 == rt);
MapReg(rd1, load1 ? MAP_DIRTY : MAP_NOINIT);
MapReg(rd2, load2 ? MAP_DIRTY : MAP_NOINIT);
MapReg(rt);
MapReg(rs);
ReleaseSpillLocks();
}
void Arm64RegCache::FlushArmReg(ARM64Reg r) {
if (ar[r].mipsReg == MIPS_REG_INVALID) {
// Nothing to do, reg not mapped.
if (ar[r].isDirty) {
ERROR_LOG_REPORT(JIT, "Dirty but no mipsreg?");
}
return;
}
if (ar[r].mipsReg != MIPS_REG_INVALID) {
auto &mreg = mr[ar[r].mipsReg];
if (mreg.loc == ML_ARMREG_IMM) {
// We know its immedate value, no need to STR now.
mreg.loc = ML_IMM;
mreg.reg = INVALID_REG;
} else {
if (ar[r].isDirty && mreg.loc == ML_ARMREG)
emit_->STR(INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(ar[r].mipsReg));
mreg.loc = ML_MEM;
mreg.reg = INVALID_REG;
mreg.imm = 0;
}
}
ar[r].isDirty = false;
ar[r].mipsReg = MIPS_REG_INVALID;
}
void Arm64RegCache::DiscardR(MIPSGPReg mipsReg) {
const RegMIPSLoc prevLoc = mr[mipsReg].loc;
if (prevLoc == ML_ARMREG || prevLoc == ML_ARMREG_IMM) {
ARM64Reg armReg = mr[mipsReg].reg;
ar[armReg].isDirty = false;
ar[armReg].mipsReg = MIPS_REG_INVALID;
mr[mipsReg].reg = INVALID_REG;
mr[mipsReg].loc = ML_MEM;
mr[mipsReg].imm = 0;
}
}
void Arm64RegCache::FlushR(MIPSGPReg r) {
switch (mr[r].loc) {
case ML_IMM:
// IMM is always "dirty".
if (r != MIPS_REG_ZERO) {
SetRegImm(SCRATCHREG1, mr[r].imm);
emit_->STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, GetMipsRegOffset(r));
}
break;
case ML_ARMREG:
case ML_ARMREG_IMM:
if (mr[r].reg == INVALID_REG) {
ERROR_LOG_REPORT(JIT, "FlushR: MipsReg %d had bad ArmReg", r);
}
if (ar[mr[r].reg].isDirty) {
if (r != MIPS_REG_ZERO) {
emit_->STR(INDEX_UNSIGNED, (ARM64Reg)mr[r].reg, CTXREG, GetMipsRegOffset(r));
}
ar[mr[r].reg].isDirty = false;
}
ar[mr[r].reg].mipsReg = MIPS_REG_INVALID;
break;
case ML_MEM:
// Already there, nothing to do.
break;
default:
ERROR_LOG_REPORT(JIT, "FlushR: MipsReg %d with invalid location %d", r, mr[r].loc);
break;
}
mr[r].loc = ML_MEM;
mr[r].reg = INVALID_REG;
mr[r].imm = 0;
}
// Note: if allowFlushImm is set, this also flushes imms while checking the sequence.
int Arm64RegCache::FlushGetSequential(MIPSGPReg startMipsReg, bool allowFlushImm) {
// Only start a sequence on a dirty armreg.
// TODO: Could also start with an imm?
const auto &startMipsInfo = mr[startMipsReg];
if ((startMipsInfo.loc != ML_ARMREG && startMipsInfo.loc != ML_ARMREG_IMM) || startMipsInfo.reg == INVALID_REG || !ar[startMipsInfo.reg].isDirty) {
return 0;
}
int allocCount;
const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount);
int c = 1;
// The sequence needs to have ascending arm regs for STMIA.
int lastArmReg = startMipsInfo.reg;
// Can't use HI/LO, only regs in the main r[] array.
for (int r = (int)startMipsReg + 1; r < 32; ++r) {
if ((mr[r].loc == ML_ARMREG || mr[r].loc == ML_ARMREG_IMM) && mr[r].reg != INVALID_REG) {
if ((int)mr[r].reg > lastArmReg && ar[mr[r].reg].isDirty) {
++c;
lastArmReg = mr[r].reg;
continue;
}
// If we're not allowed to flush imms, don't even consider them.
} else if (allowFlushImm && mr[r].loc == ML_IMM && MIPSGPReg(r) != MIPS_REG_ZERO) {
// Okay, let's search for a free (and later) reg to put this imm into.
bool found = false;
for (int j = 0; j < allocCount; ++j) {
ARM64Reg immReg = allocOrder[j];
if ((int)immReg > lastArmReg && ar[immReg].mipsReg == MIPS_REG_INVALID) {
++c;
lastArmReg = immReg;
// Even if the sequence fails, we'll need it in a reg anyway, might as well be this one.
MapRegTo(immReg, MIPSGPReg(r), 0);
found = true;
break;
}
}
if (found) {
continue;
}
}
// If it didn't hit a continue above, the chain is over.
// There's no way to skip a slot with STMIA.
break;
}
return c;
}
void Arm64RegCache::FlushAll() {
// TODO: Flush in pairs
for (int i = 0; i < NUM_MIPSREG; i++) {
MIPSGPReg mipsReg = MIPSGPReg(i);
FlushR(mipsReg);
}
// Sanity check
for (int i = 0; i < NUM_ARMREG; i++) {
if (ar[i].mipsReg != MIPS_REG_INVALID) {
ERROR_LOG_REPORT(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg);
}
}
}
void Arm64RegCache::SetImm(MIPSGPReg r, u32 immVal) {
if (r == MIPS_REG_ZERO && immVal != 0)
ERROR_LOG(JIT, "Trying to set immediate %08x to r0", immVal);
if (mr[r].loc == ML_ARMREG_IMM && mr[r].imm == immVal) {
// Already have that value, let's keep it in the reg.
return;
}
// Zap existing value if cached in a reg
if (mr[r].reg != INVALID_REG) {
ar[mr[r].reg].mipsReg = MIPS_REG_INVALID;
ar[mr[r].reg].isDirty = false;
}
mr[r].loc = ML_IMM;
mr[r].imm = immVal;
mr[r].reg = INVALID_REG;
}
bool Arm64RegCache::IsImm(MIPSGPReg r) const {
if (r == MIPS_REG_ZERO) return true;
return mr[r].loc == ML_IMM || mr[r].loc == ML_ARMREG_IMM;
}
u32 Arm64RegCache::GetImm(MIPSGPReg r) const {
if (r == MIPS_REG_ZERO) return 0;
if (mr[r].loc != ML_IMM && mr[r].loc != ML_ARMREG_IMM) {
ERROR_LOG_REPORT(JIT, "Trying to get imm from non-imm register %i", r);
}
return mr[r].imm;
}
int Arm64RegCache::GetMipsRegOffset(MIPSGPReg r) {
if (r < 32)
return r * 4;
switch (r) {
case MIPS_REG_HI:
return offsetof(MIPSState, hi);
case MIPS_REG_LO:
return offsetof(MIPSState, lo);
case MIPS_REG_FPCOND:
return offsetof(MIPSState, fpcond);
case MIPS_REG_VFPUCC:
return offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_CC]);
default:
ERROR_LOG_REPORT(JIT, "bad mips register %i", r);
return 0; // or what?
}
}
void Arm64RegCache::SpillLock(MIPSGPReg r1, MIPSGPReg r2, MIPSGPReg r3, MIPSGPReg r4) {
mr[r1].spillLock = true;
if (r2 != MIPS_REG_INVALID) mr[r2].spillLock = true;
if (r3 != MIPS_REG_INVALID) mr[r3].spillLock = true;
if (r4 != MIPS_REG_INVALID) mr[r4].spillLock = true;
}
void Arm64RegCache::ReleaseSpillLocks() {
for (int i = 0; i < NUM_MIPSREG; i++) {
mr[i].spillLock = false;
}
}
void Arm64RegCache::ReleaseSpillLock(MIPSGPReg reg) {
mr[reg].spillLock = false;
}
ARM64Reg Arm64RegCache::R(MIPSGPReg mipsReg) {
if (mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM) {
return (ARM64Reg)mr[mipsReg].reg;
} else {
ERROR_LOG_REPORT(JIT, "Reg %i not in arm reg. compilerPC = %08x", mipsReg, compilerPC_);
return INVALID_REG; // BAAAD
}
}

View file

@ -0,0 +1,153 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSAnalyst.h"
#include "Common/Arm64Emitter.h"
namespace Arm64JitConstants {
// Bogus mappings, TODO ARM64
const Arm64Gen::ARM64Reg JITBASEREG = Arm64Gen::W0;
const Arm64Gen::ARM64Reg CTXREG = Arm64Gen::X1;
const Arm64Gen::ARM64Reg MEMBASEREG = Arm64Gen::X2;
const Arm64Gen::ARM64Reg SCRATCHREG1 = Arm64Gen::W3;
const Arm64Gen::ARM64Reg SCRATCHREG2 = Arm64Gen::W4;
const Arm64Gen::ARM64Reg DOWNCOUNTREG = Arm64Gen::W5;
enum {
TOTAL_MAPPABLE_MIPSREGS = 36,
};
enum RegMIPSLoc {
ML_IMM,
ML_ARMREG,
// In an arm reg, but also has a known immediate value.
ML_ARMREG_IMM,
ML_MEM,
};
// These collide with something on Blackberry.
#undef MAP_NOINIT
#undef MAP_READ
// Initing is the default so the flag is reversed.
enum {
MAP_DIRTY = 1,
MAP_NOINIT = 2 | MAP_DIRTY,
};
}
// R1 to R6: mapped MIPS regs
// R8 = flags (maybe we could do better here?)
// R9 = code pointers
// R10 = MIPS context
// R11 = base pointer
// R14 = scratch (actually LR)
typedef int MIPSReg;
struct RegARM {
MIPSGPReg mipsReg; // if -1, no mipsreg attached.
bool isDirty; // Should the register be written back?
};
struct RegMIPS {
// Where is this MIPS register?
Arm64JitConstants::RegMIPSLoc loc;
// Data (only one of these is used, depending on loc. Could make a union).
u32 imm;
Arm64Gen::ARM64Reg reg; // reg index
bool spillLock; // if true, this register cannot be spilled.
// If loc == ML_MEM, it's back in its location in the CPU context struct.
};
namespace MIPSComp {
struct Arm64JitOptions;
struct JitState;
}
class Arm64RegCache {
public:
Arm64RegCache(MIPSState *mips, MIPSComp::JitState *js, MIPSComp::Arm64JitOptions *jo);
~Arm64RegCache() {}
void Init(Arm64Gen::ARM64XEmitter *emitter);
void Start(MIPSAnalyst::AnalysisResults &stats);
// Protect the arm register containing a MIPS register from spilling, to ensure that
// it's being kept allocated.
void SpillLock(MIPSGPReg reg, MIPSGPReg reg2 = MIPS_REG_INVALID, MIPSGPReg reg3 = MIPS_REG_INVALID, MIPSGPReg reg4 = MIPS_REG_INVALID);
void ReleaseSpillLock(MIPSGPReg reg);
void ReleaseSpillLocks();
void SetImm(MIPSGPReg reg, u32 immVal);
bool IsImm(MIPSGPReg reg) const;
u32 GetImm(MIPSGPReg reg) const;
// Optimally set a register to an imm value (possibly using another register.)
void SetRegImm(Arm64Gen::ARM64Reg reg, u32 imm);
// Returns an ARM register containing the requested MIPS register.
Arm64Gen::ARM64Reg MapReg(MIPSGPReg reg, int mapFlags = 0);
bool IsMapped(MIPSGPReg reg);
bool IsMappedAsPointer(MIPSGPReg reg);
void MapInIn(MIPSGPReg rd, MIPSGPReg rs);
void MapDirtyIn(MIPSGPReg rd, MIPSGPReg rs, bool avoidLoad = true);
void MapDirtyInIn(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad = true);
void MapDirtyDirtyIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, bool avoidLoad = true);
void MapDirtyDirtyInIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad = true);
void FlushArmReg(Arm64Gen::ARM64Reg r);
void FlushR(MIPSGPReg r);
void FlushBeforeCall();
void FlushAll();
void DiscardR(MIPSGPReg r);
Arm64Gen::ARM64Reg R(MIPSGPReg preg); // Returns a cached register, while checking that it's NOT mapped as a pointer
void SetEmitter(Arm64Gen::ARM64XEmitter *emitter) { emit_ = emitter; }
// For better log output only.
void SetCompilerPC(u32 compilerPC) { compilerPC_ = compilerPC; }
int GetMipsRegOffset(MIPSGPReg r);
private:
const Arm64Gen::ARM64Reg *GetMIPSAllocationOrder(int &count);
void MapRegTo(Arm64Gen::ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags);
int FlushGetSequential(MIPSGPReg startMipsReg, bool allowFlushImm);
Arm64Gen::ARM64Reg FindBestToSpill(bool unusedOnly, bool *clobbered);
MIPSState *mips_;
Arm64Gen::ARM64XEmitter *emit_;
MIPSComp::JitState *js_;
MIPSComp::Arm64JitOptions *jo_;
u32 compilerPC_;
enum {
NUM_ARMREG = 32, // 31 actual registers, plus the zero register.
NUM_MIPSREG = Arm64JitConstants::TOTAL_MAPPABLE_MIPSREGS,
};
RegARM ar[NUM_ARMREG];
RegMIPS mr[NUM_MIPSREG];
};

View file

@ -0,0 +1,570 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <cstring>
#include "base/logging.h"
#include "Common/CPUDetect.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/ARM64/Arm64RegCacheFPU.h"
#include "Core/MIPS/ARM64/Arm64Jit.h"
#include "Core/MIPS/MIPSTables.h"
using namespace Arm64Gen;
using namespace Arm64JitConstants;
ArmRegCacheFPU::ArmRegCacheFPU(MIPSState *mips, MIPSComp::JitState *js, MIPSComp::Arm64JitOptions *jo) : mips_(mips), vr(mr + 32), js_(js), jo_(jo), initialReady(false) {
if (cpu_info.bNEON) {
numARMFpuReg_ = 32;
} else {
numARMFpuReg_ = 16;
}
}
void ArmRegCacheFPU::Start(MIPSAnalyst::AnalysisResults &stats) {
if (!initialReady) {
SetupInitialRegs();
initialReady = true;
}
memcpy(ar, arInitial, sizeof(ar));
memcpy(mr, mrInitial, sizeof(mr));
pendingFlush = false;
}
void ArmRegCacheFPU::SetupInitialRegs() {
for (int i = 0; i < numARMFpuReg_; i++) {
arInitial[i].mipsReg = -1;
arInitial[i].isDirty = false;
}
for (int i = 0; i < NUM_MIPSFPUREG; i++) {
mrInitial[i].loc = ML_MEM;
mrInitial[i].reg = INVALID_REG;
mrInitial[i].spillLock = false;
mrInitial[i].tempLock = false;
}
for (int i = 0; i < MAX_ARMQUADS; i++) {
qr[i].isDirty = false;
qr[i].mipsVec = -1;
qr[i].sz = V_Invalid;
qr[i].spillLock = false;
qr[i].isTemp = false;
memset(qr[i].vregs, 0xff, 4);
}
}
const ARM64Reg *ArmRegCacheFPU::GetMIPSAllocationOrder(int &count) {
// VFP mapping
// VFPU registers and regular FP registers are mapped interchangably on top of the standard
// 16 FPU registers.
// NEON mapping
// We map FPU and VFPU registers entirely separately. FPU is mapped to 12 of the bottom 16 S registers.
// VFPU is mapped to the upper 48 regs, 32 of which can only be reached through NEON
// (or D16-D31 as doubles, but not relevant).
// Might consider shifting the split in the future, giving more regs to NEON allowing it to map more quads.
// We should attempt to map scalars to low Q registers and wider things to high registers,
// as the NEON instructions are all 2-vector or 4-vector, they don't do scalar, we want to be
// able to use regular VFP instructions too.
static const ARM64Reg allocationOrder[] = {
// Reserve four temp registers. Useful when building quads until we really figure out
// how to do that best.
S4, S5, S6, S7, // Q1
S8, S9, S10, S11, // Q2
S12, S13, S14, S15, // Q3
S16, S17, S18, S19, // Q4
S20, S21, S22, S23, // Q5
S24, S25, S26, S27, // Q6
S28, S29, S30, S31, // Q7
// Q8-Q15 free for NEON tricks
};
static const ARM64Reg allocationOrderNEONVFPU[] = {
// Reserve four temp registers. Useful when building quads until we really figure out
// how to do that best.
S4, S5, S6, S7, // Q1
S8, S9, S10, S11, // Q2
S12, S13, S14, S15, // Q3
// Q4-Q15 free for VFPU
};
// NOTE: It's important that S2/S3 are not allocated with bNEON, even if !useNEONVFPU.
// They are used by a few instructions, like vh2f.
if (jo_->useNEONVFPU) {
count = sizeof(allocationOrderNEONVFPU) / sizeof(const ARM64Reg);
return allocationOrderNEONVFPU;
} else {
count = sizeof(allocationOrder) / sizeof(const ARM64Reg);
return allocationOrder;
}
}
bool ArmRegCacheFPU::IsMapped(MIPSReg r) {
return mr[r].loc == ML_ARMREG;
}
ARM64Reg ArmRegCacheFPU::MapReg(MIPSReg mipsReg, int mapFlags) {
// INFO_LOG(JIT, "FPR MapReg: %i flags=%i", mipsReg, mapFlags);
if (jo_->useNEONVFPU && mipsReg >= 32) {
ERROR_LOG(JIT, "Cannot map VFPU registers to ARM VFP registers in NEON mode. PC=%08x", js_->compilerPC);
return S0;
}
pendingFlush = true;
// Let's see if it's already mapped. If so we just need to update the dirty flag.
// We don't need to check for ML_NOINIT because we assume that anyone who maps
// with that flag immediately writes a "known" value to the register.
if (mr[mipsReg].loc == ML_ARMREG) {
if (ar[mr[mipsReg].reg].mipsReg != mipsReg) {
ERROR_LOG(JIT, "Reg mapping out of sync! MR %i", mipsReg);
}
if (mapFlags & MAP_DIRTY) {
ar[mr[mipsReg].reg].isDirty = true;
}
//INFO_LOG(JIT, "Already mapped %i to %i", mipsReg, mr[mipsReg].reg);
return (ARM64Reg)(mr[mipsReg].reg + S0);
}
// Okay, not mapped, so we need to allocate an ARM register.
int allocCount;
const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount);
allocate:
for (int i = 0; i < allocCount; i++) {
int reg = allocOrder[i] - S0;
if (ar[reg].mipsReg == -1) {
// That means it's free. Grab it, and load the value into it (if requested).
ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false;
if ((mapFlags & MAP_NOINIT) != MAP_NOINIT) {
if (mr[mipsReg].loc == ML_MEM && mipsReg < TEMP0) {
// emit_->VLDR((ARM64Reg)(reg + S0), CTXREG, GetMipsRegOffset(mipsReg));
}
}
ar[reg].mipsReg = mipsReg;
mr[mipsReg].loc = ML_ARMREG;
mr[mipsReg].reg = reg;
//INFO_LOG(JIT, "Mapped %i to %i", mipsReg, mr[mipsReg].reg);
return (ARM64Reg)(reg + S0);
}
}
// Still nothing. Let's spill a reg and goto 10.
// TODO: Use age or something to choose which register to spill?
// TODO: Spill dirty regs first? or opposite?
int bestToSpill = -1;
for (int i = 0; i < allocCount; i++) {
int reg = allocOrder[i] - S0;
if (ar[reg].mipsReg != -1 && (mr[ar[reg].mipsReg].spillLock || mr[ar[reg].mipsReg].tempLock))
continue;
bestToSpill = reg;
break;
}
if (bestToSpill != -1) {
FlushArmReg((ARM64Reg)(S0 + bestToSpill));
goto allocate;
}
// Uh oh, we have all them spilllocked....
ERROR_LOG(JIT, "Out of spillable registers at PC %08x!!!", js_->compilerPC);
return INVALID_REG;
}
void ArmRegCacheFPU::MapInIn(MIPSReg rd, MIPSReg rs) {
SpillLock(rd, rs);
MapReg(rd);
MapReg(rs);
ReleaseSpillLock(rd);
ReleaseSpillLock(rs);
}
void ArmRegCacheFPU::MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad) {
SpillLock(rd, rs);
bool overlap = avoidLoad && rd == rs;
MapReg(rd, overlap ? MAP_DIRTY : MAP_NOINIT);
MapReg(rs);
ReleaseSpillLock(rd);
ReleaseSpillLock(rs);
}
void ArmRegCacheFPU::MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad) {
SpillLock(rd, rs, rt);
bool overlap = avoidLoad && (rd == rs || rd == rt);
MapReg(rd, overlap ? MAP_DIRTY : MAP_NOINIT);
MapReg(rt);
MapReg(rs);
ReleaseSpillLock(rd);
ReleaseSpillLock(rs);
ReleaseSpillLock(rt);
}
void ArmRegCacheFPU::SpillLockV(const u8 *v, VectorSize sz) {
for (int i = 0; i < GetNumVectorElements(sz); i++) {
vr[v[i]].spillLock = true;
}
}
void ArmRegCacheFPU::SpillLockV(int vec, VectorSize sz) {
u8 v[4];
GetVectorRegs(v, sz, vec);
SpillLockV(v, sz);
}
void ArmRegCacheFPU::MapRegV(int vreg, int flags) {
MapReg(vreg + 32, flags);
}
void ArmRegCacheFPU::LoadToRegV(ARM64Reg armReg, int vreg) {
if (vr[vreg].loc == ML_ARMREG) {
// emit_->VMOV(armReg, (ARM64Reg)(S0 + vr[vreg].reg));
} else {
MapRegV(vreg);
// emit_->VMOV(armReg, V(vreg));
}
}
void ArmRegCacheFPU::MapRegsAndSpillLockV(int vec, VectorSize sz, int flags) {
u8 v[4];
GetVectorRegs(v, sz, vec);
SpillLockV(v, sz);
for (int i = 0; i < GetNumVectorElements(sz); i++) {
MapRegV(v[i], flags);
}
}
void ArmRegCacheFPU::MapRegsAndSpillLockV(const u8 *v, VectorSize sz, int flags) {
SpillLockV(v, sz);
for (int i = 0; i < GetNumVectorElements(sz); i++) {
MapRegV(v[i], flags);
}
}
void ArmRegCacheFPU::MapInInV(int vs, int vt) {
SpillLockV(vs);
SpillLockV(vt);
MapRegV(vs);
MapRegV(vt);
ReleaseSpillLockV(vs);
ReleaseSpillLockV(vt);
}
void ArmRegCacheFPU::MapDirtyInV(int vd, int vs, bool avoidLoad) {
bool overlap = avoidLoad && (vd == vs);
SpillLockV(vd);
SpillLockV(vs);
MapRegV(vd, overlap ? MAP_DIRTY : MAP_NOINIT);
MapRegV(vs);
ReleaseSpillLockV(vd);
ReleaseSpillLockV(vs);
}
void ArmRegCacheFPU::MapDirtyInInV(int vd, int vs, int vt, bool avoidLoad) {
bool overlap = avoidLoad && ((vd == vs) || (vd == vt));
SpillLockV(vd);
SpillLockV(vs);
SpillLockV(vt);
MapRegV(vd, overlap ? MAP_DIRTY : MAP_NOINIT);
MapRegV(vs);
MapRegV(vt);
ReleaseSpillLockV(vd);
ReleaseSpillLockV(vs);
ReleaseSpillLockV(vt);
}
void ArmRegCacheFPU::FlushArmReg(ARM64Reg r) {
if (r >= S0 && r <= S31) {
int reg = r - S0;
if (ar[reg].mipsReg == -1) {
// Nothing to do, reg not mapped.
return;
}
if (ar[reg].mipsReg != -1) {
if (ar[reg].isDirty && mr[ar[reg].mipsReg].loc == ML_ARMREG)
{
//INFO_LOG(JIT, "Flushing ARM reg %i", reg);
// emit_->VSTR(r, CTXREG, GetMipsRegOffset(ar[reg].mipsReg));
}
// IMMs won't be in an ARM reg.
mr[ar[reg].mipsReg].loc = ML_MEM;
mr[ar[reg].mipsReg].reg = INVALID_REG;
} else {
ERROR_LOG(JIT, "Dirty but no mipsreg?");
}
ar[reg].isDirty = false;
ar[reg].mipsReg = -1;
}
}
void ArmRegCacheFPU::FlushV(MIPSReg r) {
FlushR(r + 32);
}
void ArmRegCacheFPU::FlushR(MIPSReg r) {
switch (mr[r].loc) {
case ML_IMM:
// IMM is always "dirty".
// IMM is not allowed for FP (yet).
ERROR_LOG(JIT, "Imm in FP register?");
break;
case ML_ARMREG:
if (mr[r].reg == INVALID_REG) {
ERROR_LOG(JIT, "FlushR: MipsReg had bad ArmReg");
}
if (mr[r].reg >= Q0 && mr[r].reg <= Q15) {
// This should happen rarely, but occasionally we need to flush a single stray
// mipsreg that's been part of a quad.
int quad = mr[r].reg - Q0;
if (qr[quad].isDirty) {
WARN_LOG(JIT, "FlushR found quad register %i - PC=%08x", quad, js_->compilerPC);
//emit_->ADDI2R(R0, CTXREG, GetMipsRegOffset(r), R1);
//emit_->VST1_lane(F_32, (ARM64Reg)mr[r].reg, R0, mr[r].lane, true);
}
} else {
if (ar[mr[r].reg].isDirty) {
//INFO_LOG(JIT, "Flushing dirty reg %i", mr[r].reg);
// emit_->VSTR((ARM64Reg)(mr[r].reg + S0), CTXREG, GetMipsRegOffset(r));
ar[mr[r].reg].isDirty = false;
}
ar[mr[r].reg].mipsReg = -1;
}
break;
case ML_MEM:
// Already there, nothing to do.
break;
default:
//BAD
break;
}
mr[r].loc = ML_MEM;
mr[r].reg = (int)INVALID_REG;
}
int ArmRegCacheFPU::GetNumARMFPURegs() {
if (cpu_info.bNEON)
return 32;
else
return 16;
}
// Scalar only. Need a similar one for sequential Q vectors.
int ArmRegCacheFPU::FlushGetSequential(int a, int maxArmReg) {
int c = 1;
int lastMipsOffset = GetMipsRegOffset(ar[a].mipsReg);
a++;
while (a < maxArmReg) {
if (!ar[a].isDirty || ar[a].mipsReg == -1)
break;
int mipsOffset = GetMipsRegOffset(ar[a].mipsReg);
if (mipsOffset != lastMipsOffset + 4) {
break;
}
lastMipsOffset = mipsOffset;
a++;
c++;
}
return c;
}
void ArmRegCacheFPU::FlushAll() {
if (!pendingFlush) {
// Nothing allocated. FPU regs are not nearly as common as GPR.
return;
}
// Discard temps!
for (int i = TEMP0; i < TEMP0 + NUM_TEMPS; i++) {
DiscardR(i);
}
// Loop through the ARM registers, then use GetMipsRegOffset to determine if MIPS registers are
// sequential. This is necessary because we store VFPU registers in a staggered order to get
// columns sequential (most VFPU math in nearly all games is in columns, not rows).
int numArmRegs;
// We rely on the allocation order being sequential.
const ARM64Reg baseReg = GetMIPSAllocationOrder(numArmRegs)[0];
for (int i = 0; i < numArmRegs; i++) {
int a = (baseReg - S0) + i;
int m = ar[a].mipsReg;
if (ar[a].isDirty) {
if (m == -1) {
ILOG("ARM reg %i is dirty but has no mipsreg", a);
continue;
}
int c = FlushGetSequential(a, GetNumARMFPURegs());
if (c == 1) {
// ILOG("Got single register: %i (%i)", a, m);
//emit_->VSTR((ARM64Reg)(a + S0), CTXREG, GetMipsRegOffset(m));
} else if (c == 2) {
// Probably not worth using VSTMIA for two.
int offset = GetMipsRegOffset(m);
//emit_->VSTR((ARM64Reg)(a + S0), CTXREG, offset);
//emit_->VSTR((ARM64Reg)(a + 1 + S0), CTXREG, offset + 4);
} else {
// ILOG("Got sequence: %i at %i (%i)", c, a, m);
//emit_->ADDI2R(SCRATCHREG1, CTXREG, GetMipsRegOffset(m), SCRATCHREG2);
// ILOG("VSTMIA R0, %i, %i", a, c);
//emit_->VSTMIA(SCRATCHREG1, false, (ARM64Reg)(S0 + a), c);
}
// Skip past, and mark as non-dirty.
for (int j = 0; j < c; j++) {
int b = a + j;
mr[ar[b].mipsReg].loc = ML_MEM;
mr[ar[b].mipsReg].reg = (int)INVALID_REG;
ar[a + j].mipsReg = -1;
ar[a + j].isDirty = false;
}
i += c - 1;
} else {
if (m != -1) {
mr[m].loc = ML_MEM;
mr[m].reg = (int)INVALID_REG;
}
ar[a].mipsReg = -1;
// already not dirty
}
}
// Sanity check
for (int i = 0; i < numARMFpuReg_; i++) {
if (ar[i].mipsReg != -1) {
ERROR_LOG(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg);
}
}
pendingFlush = false;
}
void ArmRegCacheFPU::DiscardR(MIPSReg r) {
switch (mr[r].loc) {
case ML_IMM:
// IMM is always "dirty".
// IMM is not allowed for FP (yet).
ERROR_LOG(JIT, "Imm in FP register?");
break;
case ML_ARMREG:
if (mr[r].reg == INVALID_REG) {
ERROR_LOG(JIT, "DiscardR: MipsReg had bad ArmReg");
} else {
// Note that we DO NOT write it back here. That's the whole point of Discard.
ar[mr[r].reg].isDirty = false;
ar[mr[r].reg].mipsReg = -1;
}
break;
case ML_MEM:
// Already there, nothing to do.
break;
default:
//BAD
break;
}
mr[r].loc = ML_MEM;
mr[r].reg = (int)INVALID_REG;
mr[r].tempLock = false;
mr[r].spillLock = false;
}
bool ArmRegCacheFPU::IsTempX(ARM64Reg r) const {
return ar[r - S0].mipsReg >= TEMP0;
}
int ArmRegCacheFPU::GetTempR() {
if (jo_->useNEONVFPU) {
ERROR_LOG(JIT, "VFP temps not allowed in NEON mode");
return 0;
}
pendingFlush = true;
for (int r = TEMP0; r < TEMP0 + NUM_TEMPS; ++r) {
if (mr[r].loc == ML_MEM && !mr[r].tempLock) {
mr[r].tempLock = true;
return r;
}
}
ERROR_LOG(CPU, "Out of temp regs! Might need to DiscardR() some");
_assert_msg_(JIT, 0, "Regcache ran out of temp regs, might need to DiscardR() some.");
return -1;
}
int ArmRegCacheFPU::GetMipsRegOffset(MIPSReg r) {
// These are offsets within the MIPSState structure. First there are the GPRS, then FPRS, then the "VFPURs", then the VFPU ctrls.
if (r < 0 || r > 32 + 128 + NUM_TEMPS) {
ERROR_LOG(JIT, "bad mips register %i, out of range", r);
return 0; // or what?
}
if (r < 32 || r >= 32 + 128) {
return (32 + r) << 2;
} else {
// r is between 32 and 128 + 32
return (32 + 32 + voffset[r - 32]) << 2;
}
}
void ArmRegCacheFPU::SpillLock(MIPSReg r1, MIPSReg r2, MIPSReg r3, MIPSReg r4) {
mr[r1].spillLock = true;
if (r2 != -1) mr[r2].spillLock = true;
if (r3 != -1) mr[r3].spillLock = true;
if (r4 != -1) mr[r4].spillLock = true;
}
// This is actually pretty slow with all the 160 regs...
void ArmRegCacheFPU::ReleaseSpillLocksAndDiscardTemps() {
for (int i = 0; i < NUM_MIPSFPUREG; i++) {
mr[i].spillLock = false;
}
for (int i = TEMP0; i < TEMP0 + NUM_TEMPS; ++i) {
DiscardR(i);
}
for (int i = 0; i < MAX_ARMQUADS; i++) {
qr[i].spillLock = false;
if (qr[i].isTemp) {
qr[i].isTemp = false;
qr[i].sz = V_Invalid;
}
}
}
ARM64Reg ArmRegCacheFPU::R(int mipsReg) {
if (mr[mipsReg].loc == ML_ARMREG) {
return (ARM64Reg)(mr[mipsReg].reg + S0);
} else {
if (mipsReg < 32) {
ERROR_LOG(JIT, "FReg %i not in ARM reg. compilerPC = %08x : %s", mipsReg, js_->compilerPC, MIPSDisasmAt(js_->compilerPC));
} else if (mipsReg < 32 + 128) {
ERROR_LOG(JIT, "VReg %i not in ARM reg. compilerPC = %08x : %s", mipsReg - 32, js_->compilerPC, MIPSDisasmAt(js_->compilerPC));
} else {
ERROR_LOG(JIT, "Tempreg %i not in ARM reg. compilerPC = %08x : %s", mipsReg - 128 - 32, js_->compilerPC, MIPSDisasmAt(js_->compilerPC));
}
return INVALID_REG; // BAAAD
}
}

View file

@ -0,0 +1,186 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#pragma once
#include "../MIPS.h"
#include "../MIPSAnalyst.h"
#include "Core/MIPS/ARM64/Arm64RegCache.h"
#include "Core/MIPS/MIPSVFPUUtils.h"
#include "Common/Arm64Emitter.h"
// These collide with something on Blackberry.
#undef MAP_NOINIT
#undef MAP_READ
namespace Arm64JitConstants {
enum {
NUM_TEMPS = 16,
TEMP0 = 32 + 128,
TOTAL_MAPPABLE_MIPSFPUREGS = 32 + 128 + NUM_TEMPS,
};
enum {
MAP_READ = 0,
MAP_MTX_TRANSPOSED = 16,
MAP_PREFER_LOW = 16,
MAP_PREFER_HIGH = 32,
// Force is not yet correctly implemented, if the reg is already mapped it will not move
MAP_FORCE_LOW = 64, // Only map Q0-Q7 (and probably not Q0-Q3 as they are S registers so that leaves Q8-Q15)
MAP_FORCE_HIGH = 128, // Only map Q8-Q15
};
}
struct FPURegARM64 {
int mipsReg; // if -1, no mipsreg attached.
bool isDirty; // Should the register be written back?
};
struct FPURegQuad64 {
int mipsVec;
VectorSize sz;
u8 vregs[4];
bool isDirty;
bool spillLock;
bool isTemp;
};
struct FPURegMIPS {
// Where is this MIPS register?
Arm64JitConstants::RegMIPSLoc loc;
// Data (only one of these is used, depending on loc. Could make a union).
u32 reg;
int lane;
bool spillLock; // if true, this register cannot be spilled.
bool tempLock;
// If loc == ML_MEM, it's back in its location in the CPU context struct.
};
namespace MIPSComp {
struct Arm64JitOptions;
struct JitState;
}
class ArmRegCacheFPU {
public:
ArmRegCacheFPU(MIPSState *mips, MIPSComp::JitState *js, MIPSComp::Arm64JitOptions *jo);
~ArmRegCacheFPU() {}
void Init(Arm64Gen::ARM64XEmitter *emitter);
void Start(MIPSAnalyst::AnalysisResults &stats);
// Protect the arm register containing a MIPS register from spilling, to ensure that
// it's being kept allocated.
void SpillLock(MIPSReg reg, MIPSReg reg2 = -1, MIPSReg reg3 = -1, MIPSReg reg4 = -1);
void SpillLockV(MIPSReg r) { SpillLock(r + 32); }
void ReleaseSpillLocksAndDiscardTemps();
void ReleaseSpillLock(int mipsreg) {
mr[mipsreg].spillLock = false;
}
void ReleaseSpillLockV(int mipsreg) {
ReleaseSpillLock(mipsreg + 32);
}
void SetImm(MIPSReg reg, u32 immVal);
bool IsImm(MIPSReg reg) const;
u32 GetImm(MIPSReg reg) const;
// Returns an ARM register containing the requested MIPS register.
Arm64Gen::ARM64Reg MapReg(MIPSReg reg, int mapFlags = 0);
void MapInIn(MIPSReg rd, MIPSReg rs);
void MapDirty(MIPSReg rd);
void MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad = true);
void MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad = true);
bool IsMapped(MIPSReg r);
void FlushArmReg(Arm64Gen::ARM64Reg r);
void FlushR(MIPSReg r);
void DiscardR(MIPSReg r);
Arm64Gen::ARM64Reg R(int preg); // Returns a cached register
// VFPU register as single ARM VFP registers. Must not be used in the upcoming NEON mode!
void MapRegV(int vreg, int flags = 0);
void LoadToRegV(Arm64Gen::ARM64Reg armReg, int vreg);
void MapInInV(int rt, int rs);
void MapDirtyInV(int rd, int rs, bool avoidLoad = true);
void MapDirtyInInV(int rd, int rs, int rt, bool avoidLoad = true);
bool IsTempX(Arm64Gen::ARM64Reg r) const;
MIPSReg GetTempV() { return GetTempR() - 32; }
// VFPU registers as single VFP registers.
Arm64Gen::ARM64Reg V(int vreg) { return R(vreg + 32); }
int FlushGetSequential(int a, int maxArmReg);
void FlushAll();
// This one is allowed at any point.
void FlushV(MIPSReg r);
// NOTE: These require you to release spill locks manually!
void MapRegsAndSpillLockV(int vec, VectorSize vsz, int flags);
void MapRegsAndSpillLockV(const u8 *v, VectorSize vsz, int flags);
void SpillLockV(const u8 *v, VectorSize vsz);
void SpillLockV(int vec, VectorSize vsz);
void SetEmitter(Arm64Gen::ARM64XEmitter *emitter) { emit_ = emitter; }
int GetMipsRegOffset(MIPSReg r);
private:
MIPSReg GetTempR();
const Arm64Gen::ARM64Reg *GetMIPSAllocationOrder(int &count);
int GetMipsRegOffsetV(MIPSReg r) {
return GetMipsRegOffset(r + 32);
}
int GetNumARMFPURegs();
void SetupInitialRegs();
MIPSState *mips_;
Arm64Gen::ARM64XEmitter *emit_;
MIPSComp::JitState *js_;
MIPSComp::Arm64JitOptions *jo_;
int numARMFpuReg_;
int qTime_;
enum {
// With NEON, we have 64 S = 32 D = 16 Q registers. Only the first 32 S registers
// are individually mappable though.
MAX_ARMFPUREG = 32,
MAX_ARMQUADS = 16,
NUM_MIPSFPUREG = Arm64JitConstants::TOTAL_MAPPABLE_MIPSFPUREGS,
};
FPURegARM64 ar[MAX_ARMFPUREG];
FPURegMIPS mr[NUM_MIPSFPUREG];
FPURegQuad64 qr[MAX_ARMQUADS];
FPURegMIPS *vr;
bool pendingFlush;
bool initialReady;
FPURegARM64 arInitial[MAX_ARMFPUREG];
FPURegMIPS mrInitial[NUM_MIPSFPUREG];
};

View file

@ -30,6 +30,10 @@
#include "Common/ArmEmitter.h"
namespace ArmGen { class ARMXEmitter; }
typedef ArmGen::ARMXCodeBlock NativeCodeBlock;
#elif defined(ARM64)
#include "Common/Arm64Emitter.h"
namespace Arm64Gen { class ARM64XEmitter; }
typedef Arm64Gen::ARM64CodeBlock NativeCodeBlock;
#elif defined(_M_IX86) || defined(_M_X64)
#include "Common/x64Emitter.h"
namespace Gen { class XEmitter; }
@ -44,7 +48,7 @@ namespace FakeGen { class FakeXEmitter; }
typedef FakeGen::FakeXCodeBlock NativeCodeBlock;
#endif
#if defined(ARM)
#if defined(ARM) || defined(ARM64)
const int MAX_JIT_BLOCK_EXITS = 2;
#else
const int MAX_JIT_BLOCK_EXITS = 8;

View file

@ -23,10 +23,13 @@
#include "ext/disarm.h"
#include "ext/udis86/udis86.h"
#include "Core/Util/DisArm64.h"
namespace MIPSComp {
#if defined(ARM)
ArmJit *jit;
#elif defined(ARM64)
Arm64Jit *jit;
#else
Jit *jit;
#endif
@ -35,6 +38,7 @@ namespace MIPSComp {
}
}
#if !defined(ARM64)
// We compile this for x86 as well because it may be useful when developing the ARM JIT on a PC.
std::vector<std::string> DisassembleArm2(const u8 *data, int size) {
std::vector<std::string> lines;
@ -75,8 +79,37 @@ std::vector<std::string> DisassembleArm2(const u8 *data, int size) {
}
return lines;
}
#endif
#ifndef ARM
#if !defined(ARM)
std::vector<std::string> DisassembleArm64(const u8 *data, int size) {
std::vector<std::string> lines;
char temp[256];
int bkpt_count = 0;
for (int i = 0; i < size; i += 4) {
const u32 *codePtr = (const u32 *)(data + i);
u32 inst = codePtr[0];
Arm64Dis((u32)(intptr_t)codePtr, inst, temp, sizeof(temp), false);
std::string buf = temp;
if (buf == "BKPT 1") {
bkpt_count++;
} else {
if (bkpt_count) {
lines.push_back(StringFromFormat("BKPT 1 (x%i)", bkpt_count));
bkpt_count = 0;
}
lines.push_back(buf);
}
}
if (bkpt_count) {
lines.push_back(StringFromFormat("BKPT 1 (x%i)", bkpt_count));
}
return lines;
}
#endif
#if !defined(ARM) && !defined(ARM64)
const char *ppsspp_resolver(struct ud*,
uint64_t addr,

View file

@ -24,6 +24,7 @@
// TODO: Find a better place for these.
std::vector<std::string> DisassembleArm2(const u8 *data, int size);
std::vector<std::string> DisassembleArm64(const u8 *data, int size);
std::vector<std::string> DisassembleX86(const u8 *data, int size);
namespace MIPSComp {

View file

@ -26,6 +26,9 @@ struct JitBlock;
#if defined(ARM)
#include "../ARM/ArmJit.h"
typedef MIPSComp::ArmJit NativeJit;
#elif defined(ARM64)
#include "../ARM64/Arm64Jit.h"
typedef MIPSComp::Arm64Jit NativeJit;
#elif defined(_M_IX86) || defined(_M_X64)
#include "../x86/Jit.h"
typedef MIPSComp::Jit NativeJit;

View file

@ -211,6 +211,8 @@ void MIPSState::Init() {
if (PSP_CoreParameter().cpuCore == CPU_JIT) {
#ifdef ARM
MIPSComp::jit = new MIPSComp::ArmJit(this);
#elif defined(ARM64)
MIPSComp::jit = new MIPSComp::Arm64Jit(this);
#elif defined(_M_IX86) || defined(_M_X64)
MIPSComp::jit = new MIPSComp::Jit(this);
#elif defined(MIPS)
@ -218,6 +220,8 @@ void MIPSState::Init() {
#else
MIPSComp::jit = new MIPSComp::FakeJit(this);
#endif
} else {
MIPSComp::jit = nullptr;
}
}
@ -236,6 +240,8 @@ void MIPSState::UpdateCore(CPUCore desired) {
if (!MIPSComp::jit) {
#ifdef ARM
MIPSComp::jit = new MIPSComp::ArmJit(this);
#elif defined(ARM64)
MIPSComp::jit = new MIPSComp::Arm64Jit(this);
#elif defined(_M_IX86) || defined(_M_X64)
MIPSComp::jit = new MIPSComp::Jit(this);
#elif defined(MIPS)

View file

@ -90,6 +90,8 @@ struct MIPSInstruction {
#ifdef ARM
#define JITFUNC(f) (&ArmJit::f)
#elif defined(ARM64)
#define JITFUNC(f) (&Arm64Jit::f)
#elif defined(_M_X64) || defined(_M_IX86)
#define JITFUNC(f) (&Jit::f)
#elif defined(MIPS)

View file

@ -185,7 +185,7 @@ static bool Memory_TryBase(u32 flags) {
if (!*view.out_ptr_low)
goto bail;
}
#ifdef _M_X64
#if defined(_M_X64) || defined(ARM64)
*view.out_ptr = (u8*)g_arena.CreateView(
position, view.size, base + view.virtual_address);
#else

View file

@ -20,7 +20,7 @@
#include "Core/Util/AudioFormat.h"
#include "Core/Util/AudioFormatNEON.h"
#ifndef ARM
#if !defined(ARM) && !defined(ARM64)
#error Should not be compiled on non-ARM.
#endif

View file

@ -18,7 +18,7 @@
#include <arm_neon.h>
#include "GPU/Common/TextureDecoder.h"
#ifndef ARM
#if !defined(ARM) && !defined(ARM64)
#error Should not be compiled on non-ARM.
#endif
@ -29,7 +29,7 @@ u32 QuickTexHashNEON(const void *checkp, u32 size) {
__builtin_prefetch(checkp, 0, 0);
if (((intptr_t)checkp & 0xf) == 0 && (size & 0x3f) == 0) {
#ifdef IOS
#if defined(IOS) || defined(ARM64)
uint32x4_t cursor = vdupq_n_u32(0);
uint16x8_t cursor2 = vld1q_u16(QuickTexHashInitial);
uint16x8_t update = vdupq_n_u16(0x2455U);

View file

@ -0,0 +1,33 @@
// Copyright (c) 2013- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "base/logging.h"
#include "Common/CPUDetect.h"
#include "Core/Config.h"
#include "Core/Reporting.h"
#include "Common/Arm64Emitter.h"
#include "GPU/GPUState.h"
#include "GPU/Common/VertexDecoderCommon.h"
JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
// TODO ARM64
return NULL;
}
bool VertexDecoderJitCache::CompileStep(const VertexDecoder &dec, int step) {
return false;
}

View file

@ -25,6 +25,8 @@
#include "GPU/ge_constants.h"
#ifdef ARM
#include "Common/ArmEmitter.h"
#elif defined(ARM64)
#include "Common/Arm64Emitter.h"
#elif defined(_M_IX86) || defined(_M_X64)
#include "Common/x64Emitter.h"
#elif defined(MIPS)
@ -446,8 +448,7 @@ struct VertexDecoderOptions {
bool expand8BitNormalsToFloat;
};
class VertexDecoder
{
class VertexDecoder {
public:
VertexDecoder();
@ -587,6 +588,8 @@ public:
#ifdef ARM
class VertexDecoderJitCache : public ArmGen::ARMXCodeBlock {
#elif defined(ARM64)
class VertexDecoderJitCache : public Arm64Gen::ARM64CodeBlock {
#elif defined(_M_IX86) || defined(_M_X64)
class VertexDecoderJitCache : public Gen::XCodeBlock {
#elif defined(MIPS)

View file

@ -262,6 +262,12 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="Common\VertexDecoderArm64.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="Common\VertexDecoderCommon.cpp" />
<ClCompile Include="Common\VertexDecoderX86.cpp" />
<ClCompile Include="Debugger\Breakpoints.cpp" />

View file

@ -350,6 +350,9 @@
<ClCompile Include="Directx9\DepalettizeShaderDX9.cpp">
<Filter>DirectX9</Filter>
</ClCompile>
<ClCompile Include="Common\VertexDecoderArm64.cpp">
<Filter>Common</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="CMakeLists.txt" />

View file

@ -283,6 +283,8 @@ const char *GetCompilerABI() {
return "armeabi-v7a";
#elif defined(ARM)
return "armeabi";
#elif defined(ARM64)
return "arm64";
#elif defined(_M_IX86)
return "x86";
#elif defined(_M_X64)

View file

@ -469,6 +469,9 @@ void GameSettingsScreen::CreateViews() {
systemSettings->Add(new ItemHeader(s->T("UI Language")));
systemSettings->Add(new Choice(dev->T("Language", "Language")))->OnClick.Handle(this, &GameSettingsScreen::OnLanguage);
systemSettings->Add(new ItemHeader(s->T("Developer Tools")));
systemSettings->Add(new Choice(s->T("Developer Tools")))->OnClick.Handle(this, &GameSettingsScreen::OnDeveloperTools);
systemSettings->Add(new ItemHeader(s->T("Help the PPSSPP team")));
enableReports_ = Reporting::IsEnabled();
enableReportsCheckbox_ = new CheckBox(&enableReports_, s->T("Enable Compatibility Server Reports"));
@ -491,9 +494,6 @@ void GameSettingsScreen::CreateViews() {
#endif
systemSettings->Add(new CheckBox(&g_Config.bSetRoundingMode, s->T("Respect FPU rounding (disable for old GEB saves)")))->OnClick.Handle(this, &GameSettingsScreen::OnJitAffectingSetting);
systemSettings->Add(new ItemHeader(s->T("Developer Tools")));
systemSettings->Add(new Choice(s->T("Developer Tools")))->OnClick.Handle(this, &GameSettingsScreen::OnDeveloperTools);
systemSettings->Add(new ItemHeader(s->T("General")));
#ifdef ANDROID

View file

@ -51,7 +51,6 @@ ARCH_FILES := \
$(SRC)/GPU/Common/VertexDecoderX86.cpp
endif
# ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
ifeq ($(findstring armeabi-v7a,$(TARGET_ARCH_ABI)),armeabi-v7a)
ARCH_FILES := \
$(SRC)/GPU/Common/TextureDecoderNEON.cpp.neon \
@ -72,9 +71,30 @@ ARCH_FILES := \
$(SRC)/Core/MIPS/ARM/ArmRegCache.cpp \
$(SRC)/Core/MIPS/ARM/ArmRegCacheFPU.cpp \
$(SRC)/GPU/Common/VertexDecoderArm.cpp \
$(SRC)/ext/disarm.cpp \
ArmEmitterTest.cpp
endif
ifeq ($(findstring arm64-v8a,$(TARGET_ARCH_ABI)),arm64-v8a)
ARCH_FILES := \
$(SRC)/GPU/Common/TextureDecoderNEON.cpp \
$(SRC)/Core/Util/AudioFormatNEON.cpp \
$(SRC)/Common/Arm64Emitter.cpp \
$(SRC)/Common/ArmCPUDetect.cpp \
$(SRC)/Core/MIPS/ARM64/Arm64CompALU.cpp \
$(SRC)/Core/MIPS/ARM64/Arm64CompBranch.cpp \
$(SRC)/Core/MIPS/ARM64/Arm64CompFPU.cpp \
$(SRC)/Core/MIPS/ARM64/Arm64CompLoadStore.cpp \
$(SRC)/Core/MIPS/ARM64/Arm64CompVFPU.cpp \
$(SRC)/Core/MIPS/ARM64/Arm64CompReplace.cpp \
$(SRC)/Core/MIPS/ARM64/Arm64Asm.cpp \
$(SRC)/Core/MIPS/ARM64/Arm64Jit.cpp \
$(SRC)/Core/MIPS/ARM64/Arm64RegCache.cpp \
$(SRC)/Core/MIPS/ARM64/Arm64RegCacheFPU.cpp \
$(SRC)/Core/Util/DisArm64.cpp \
$(SRC)/GPU/Common/VertexDecoderArm64.cpp
endif
ifeq ($(TARGET_ARCH_ABI),armeabi)
ARCH_FILES := \
$(SRC)/Common/ArmEmitter.cpp \
@ -112,7 +132,6 @@ EXEC_AND_LIB_FILES := \
$(SRC)/Core/MIPS/MIPSDebugInterface.cpp \
$(SRC)/UI/ui_atlas.cpp \
$(SRC)/UI/OnScreenDisplay.cpp \
$(SRC)/ext/disarm.cpp \
$(SRC)/ext/libkirk/AES.c \
$(SRC)/ext/libkirk/amctrl.c \
$(SRC)/ext/libkirk/SHA1.c \

View file

@ -1,6 +1,7 @@
APP_STL := gnustl_static
APP_PLATFORM := android-9
APP_ABI := armeabi-v7a x86
#APP_ABI := armeabi-v7a x86
#APP_ABI := armeabi-v7a
APP_ABI := arm64-v8a
APP_GNUSTL_CPP_FEATURES :=
NDK_TOOLCHAIN_VERSION := 4.8
# NDK_TOOLCHAIN_VERSION := 4.9

View file

@ -1,10 +1,10 @@
# These are definitions for LOCAL_ variables for PPSSPP.
# They are shared between ppsspp_jni (lib for Android app) and ppsspp_headless.
LOCAL_CFLAGS := -DUSE_FFMPEG -DUSING_GLES2 -DMOBILE_DEVICE -O3 -fsigned-char -Wall -Wno-multichar -Wno-psabi -Wno-unused-variable -fno-strict-aliasing -D__STDC_CONSTANT_MACROS
LOCAL_CFLAGS := -DUSE_FFMPEG -DUSING_GLES2 -DMOBILE_DEVICE -O3 -fsigned-char -Wall -Wno-multichar -Wno-psabi -Wno-unused-variable -fno-strict-aliasing -D__STDC_CONSTANT_MACROS -Wno-format
# yes, it's really CPPFLAGS for C++
# literal-suffix is generated by Android default code and causes noise.
LOCAL_CPPFLAGS := -fno-exceptions -std=gnu++11 -fno-rtti -Wno-reorder -Wno-literal-suffix
LOCAL_CPPFLAGS := -fno-exceptions -std=gnu++11 -fno-rtti -Wno-reorder -Wno-literal-suffix -Wno-format
LOCAL_C_INCLUDES := \
$(LOCAL_PATH)/../../Common \
$(LOCAL_PATH)/../.. \
@ -49,12 +49,12 @@ ifeq ($(TARGET_ARCH_ABI),x86)
LOCAL_CFLAGS := $(LOCAL_CFLAGS) -D_ARCH_32 -D_M_IX86 -fomit-frame-pointer -mtune=atom -mfpmath=sse -mssse3
endif
ifeq ($(TARGET_ARCH_ABI),arm64-v8a)
#LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavformat.a
#LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavcodec.a
#LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libswresample.a
#LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libswscale.a
#LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavutil.a
#LOCAL_C_INCLUDES += $(LOCAL_PATH)/../../ffmpeg/android/arm64/include
LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavformat.a
LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavcodec.a
LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libswresample.a
LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libswscale.a
LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavutil.a
LOCAL_C_INCLUDES += $(LOCAL_PATH)/../../ffmpeg/android/arm64/include
LOCAL_CFLAGS := $(LOCAL_CFLAGS) -D_ARCH_64 -DARM64
endif