mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Initial work on ARM64, based on the ARM jit.
This commit is contained in:
parent
f4dfd49a7b
commit
b309c83973
36 changed files with 3669 additions and 36 deletions
|
@ -3,6 +3,8 @@
|
|||
// Refer to the license.txt file included.
|
||||
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#include "Arm64Emitter.h"
|
||||
#include "MathUtil.h"
|
||||
|
@ -193,11 +195,11 @@ void ARM64XEmitter::EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr
|
|||
bool b64Bit = Is64Bit(Rt);
|
||||
s64 distance = (s64)ptr - (s64)m_code;
|
||||
|
||||
_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance);
|
||||
_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, (int)distance);
|
||||
|
||||
distance >>= 2;
|
||||
|
||||
_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance);
|
||||
_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, (int)distance);
|
||||
|
||||
Rt = DecodeReg(Rt);
|
||||
Write32((b64Bit << 31) | (0x34 << 24) | (op << 24) | \
|
||||
|
@ -209,11 +211,11 @@ void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const voi
|
|||
bool b64Bit = Is64Bit(Rt);
|
||||
s64 distance = (s64)ptr - (s64)m_code;
|
||||
|
||||
_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance);
|
||||
_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, (int)distance);
|
||||
|
||||
distance >>= 2;
|
||||
|
||||
_assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s: Received too large distance: %lx", __FUNCTION__, distance);
|
||||
_assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s: Received too large distance: %lx", __FUNCTION__, (int)distance);
|
||||
|
||||
Rt = DecodeReg(Rt);
|
||||
Write32((b64Bit << 31) | (0x36 << 24) | (op << 24) | \
|
||||
|
@ -224,11 +226,11 @@ void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 op, const void* ptr)
|
|||
{
|
||||
s64 distance = (s64)ptr - s64(m_code);
|
||||
|
||||
_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance);
|
||||
_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, (int)distance);
|
||||
|
||||
distance >>= 2;
|
||||
|
||||
_assert_msg_(DYNA_REC, distance >= -0x3FFFFFF && distance < 0x3FFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance);
|
||||
_assert_msg_(DYNA_REC, distance >= -0x3FFFFFF && distance < 0x3FFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, (int)distance);
|
||||
|
||||
Write32((op << 31) | (0x5 << 26) | (distance & 0x3FFFFFF));
|
||||
}
|
||||
|
@ -566,7 +568,7 @@ void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch)
|
|||
Not = true;
|
||||
case 0: // CBZ
|
||||
{
|
||||
_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance);
|
||||
_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, (int)distance);
|
||||
bool b64Bit = Is64Bit(branch.reg);
|
||||
ARM64Reg reg = DecodeReg(branch.reg);
|
||||
inst = (b64Bit << 31) | (0x1A << 25) | (Not << 24) | ((distance << 5) & 0xFFFFE0) | reg;
|
||||
|
@ -580,7 +582,7 @@ void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch)
|
|||
Not = true;
|
||||
case 3: // TBZ
|
||||
{
|
||||
_assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance);
|
||||
_assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, (int)distance);
|
||||
ARM64Reg reg = DecodeReg(branch.reg);
|
||||
inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) | ((branch.bit & 0x1F) << 19) | (distance << 5) | reg;
|
||||
}
|
||||
|
@ -677,7 +679,7 @@ void ARM64XEmitter::B(CCFlags cond, const void* ptr)
|
|||
s64 distance = (s64)ptr - (s64(m_code) + 8);
|
||||
distance >>= 2;
|
||||
|
||||
_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance);
|
||||
_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, (int)distance);
|
||||
|
||||
Write32((0x54 << 24) | (distance << 5) | cond);
|
||||
}
|
||||
|
@ -1488,7 +1490,7 @@ void ARM64XEmitter::ADRP(ARM64Reg Rd, s32 imm)
|
|||
// Wrapper around MOVZ+MOVK
|
||||
void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
|
||||
{
|
||||
unsigned parts = Is64Bit(Rd) ? 4 : 2;
|
||||
unsigned int parts = Is64Bit(Rd) ? 4 : 2;
|
||||
BitSet32 upload_part(0);
|
||||
bool need_movz = false;
|
||||
|
||||
|
@ -1513,7 +1515,7 @@ void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
|
|||
// XXX: Support rotating immediates to save instructions
|
||||
if (optimize)
|
||||
{
|
||||
for (unsigned i = 0; i < parts; ++i)
|
||||
for (unsigned int i = 0; i < parts; ++i)
|
||||
{
|
||||
if ((imm >> (i * 16)) & 0xFFFF)
|
||||
upload_part[i] = 1;
|
||||
|
|
|
@ -286,6 +286,66 @@
|
|||
<ClCompile Include="Loaders.cpp" />
|
||||
<ClCompile Include="MemMap.cpp" />
|
||||
<ClCompile Include="MemmapFunctions.cpp" />
|
||||
<ClCompile Include="MIPS\ARM64\Arm64Asm.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="MIPS\ARM64\Arm64CompALU.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="MIPS\ARM64\Arm64CompBranch.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="MIPS\ARM64\Arm64CompFPU.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="MIPS\ARM64\Arm64CompLoadStore.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="MIPS\ARM64\Arm64CompReplace.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="MIPS\ARM64\Arm64CompVFPU.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="MIPS\ARM64\Arm64Jit.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="MIPS\ARM64\Arm64RegCache.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="MIPS\ARM64\Arm64RegCacheFPU.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="MIPS\ARM\ArmAsm.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
|
@ -540,6 +600,30 @@
|
|||
<ClInclude Include="Loaders.h" />
|
||||
<ClInclude Include="MemMap.h" />
|
||||
<ClInclude Include="MemMapHelpers.h" />
|
||||
<ClInclude Include="MIPS\ARM64\Arm64Asm.h">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClInclude>
|
||||
<ClInclude Include="MIPS\ARM64\Arm64Jit.h">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClInclude>
|
||||
<ClInclude Include="MIPS\ARM64\Arm64RegCache.h">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClInclude>
|
||||
<ClInclude Include="MIPS\ARM64\Arm64RegCacheFPU.h">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClInclude>
|
||||
<ClInclude Include="MIPS\ARM\ArmAsm.h">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
|
@ -632,4 +716,4 @@
|
|||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
|
|
|
@ -557,6 +557,16 @@
|
|||
<ClCompile Include="HLE\sceSfmt19937.cpp">
|
||||
<Filter>HLE\Libraries</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="MIPS\ARM64\Arm64CompBranch.cpp" />
|
||||
<ClCompile Include="MIPS\ARM64\Arm64CompFPU.cpp" />
|
||||
<ClCompile Include="MIPS\ARM64\Arm64CompLoadStore.cpp" />
|
||||
<ClCompile Include="MIPS\ARM64\Arm64CompReplace.cpp" />
|
||||
<ClCompile Include="MIPS\ARM64\Arm64CompVFPU.cpp" />
|
||||
<ClCompile Include="MIPS\ARM64\Arm64Jit.cpp" />
|
||||
<ClCompile Include="MIPS\ARM64\Arm64RegCache.cpp" />
|
||||
<ClCompile Include="MIPS\ARM64\Arm64RegCacheFPU.cpp" />
|
||||
<ClCompile Include="MIPS\ARM64\Arm64Asm.cpp" />
|
||||
<ClCompile Include="MIPS\ARM64\Arm64CompALU.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="ELF\ElfReader.h">
|
||||
|
@ -1064,6 +1074,10 @@
|
|||
<ClInclude Include="MemMapHelpers.h">
|
||||
<Filter>Core</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="MIPS\ARM64\Arm64Jit.h" />
|
||||
<ClInclude Include="MIPS\ARM64\Arm64RegCache.h" />
|
||||
<ClInclude Include="MIPS\ARM64\Arm64RegCacheFPU.h" />
|
||||
<ClInclude Include="MIPS\ARM64\Arm64Asm.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="CMakeLists.txt" />
|
||||
|
@ -1071,4 +1085,4 @@
|
|||
<None Include="..\android\jni\Android.mk" />
|
||||
<None Include="GameLogNotes.txt" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
|
|
|
@ -1015,6 +1015,8 @@ static int Hook_gakuenheaven_download_frame() {
|
|||
|
||||
#ifdef ARM
|
||||
#define JITFUNC(f) (&MIPSComp::ArmJit::f)
|
||||
#elif defined(ARM64)
|
||||
#define JITFUNC(f) (&MIPSComp::Arm64Jit::f)
|
||||
#elif defined(_M_X64) || defined(_M_IX86)
|
||||
#define JITFUNC(f) (&MIPSComp::Jit::f)
|
||||
#elif defined(MIPS)
|
||||
|
|
86
Core/MIPS/ARM64/Arm64Asm.cpp
Normal file
86
Core/MIPS/ARM64/Arm64Asm.cpp
Normal file
|
@ -0,0 +1,86 @@
|
|||
// Copyright (c) 2015- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/System.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Common/MemoryUtil.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/Arm64Emitter.h"
|
||||
#include "Core/MIPS/ARM64/Arm64Jit.h"
|
||||
#include "Core/MIPS/ARM64/Arm64Asm.h"
|
||||
#include "Core/MIPS/JitCommon/JitCommon.h"
|
||||
|
||||
using namespace Arm64Gen;
|
||||
|
||||
//static int temp32; // unused?
|
||||
|
||||
static const bool enableDebug = false;
|
||||
|
||||
//static bool enableStatistics = false; //unused?
|
||||
|
||||
//The standard ARM calling convention allocates the 16 ARM registers as:
|
||||
|
||||
// r15 is the program counter.
|
||||
// r14 is the link register. (The BL instruction, used in a subroutine call, stores the return address in this register).
|
||||
// r13 is the stack pointer. (The Push/Pop instructions in "Thumb" operating mode use this register only).
|
||||
// r12 is the Intra-Procedure-call scratch register.
|
||||
// r4 to r11: used to hold local variables.
|
||||
// r0 to r3: used to hold argument values passed to a subroutine, and also hold results returned from a subroutine.
|
||||
|
||||
// Mappable registers:
|
||||
// R2, R3, R4, R5, R6, R8, R11
|
||||
|
||||
// STATIC ALLOCATION ARM:
|
||||
// R10 : MIPS state
|
||||
// R11 : Memory base pointer.
|
||||
// R7 : Down counter
|
||||
extern volatile CoreState coreState;
|
||||
|
||||
void ShowPC(u32 sp) {
|
||||
if (currentMIPS) {
|
||||
ERROR_LOG(JIT, "ShowPC : %08x ArmSP : %08x", currentMIPS->pc, sp);
|
||||
} else {
|
||||
ERROR_LOG(JIT, "Universe corrupt?");
|
||||
}
|
||||
}
|
||||
|
||||
void DisassembleArm(const u8 *data, int size);
|
||||
|
||||
// PLAN: no more block numbers - crazy opcodes just contain offset within
|
||||
// dynarec buffer
|
||||
// At this offset - 4, there is an int specifying the block number.
|
||||
|
||||
namespace MIPSComp {
|
||||
|
||||
using namespace Arm64JitConstants;
|
||||
|
||||
void Arm64Jit::GenerateFixedCode()
|
||||
{
|
||||
|
||||
// Uncomment if you want to see the output...
|
||||
// INFO_LOG(JIT, "THE DISASM ========================");
|
||||
// DisassembleArm(enterCode, GetCodePtr() - enterCode);
|
||||
// INFO_LOG(JIT, "END OF THE DISASM ========================");
|
||||
|
||||
// Don't forget to zap the instruction cache!
|
||||
FlushIcache();
|
||||
}
|
||||
|
||||
} // namespace MIPSComp
|
22
Core/MIPS/ARM64/Arm64Asm.h
Normal file
22
Core/MIPS/ARM64/Arm64Asm.h
Normal file
|
@ -0,0 +1,22 @@
|
|||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
|
||||
// Runtime generated assembly routines, like the Dispatcher.
|
83
Core/MIPS/ARM64/Arm64CompALU.cpp
Normal file
83
Core/MIPS/ARM64/Arm64CompALU.cpp
Normal file
|
@ -0,0 +1,83 @@
|
|||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSCodeUtils.h"
|
||||
#include "Core/MIPS/ARM64/Arm64Jit.h"
|
||||
#include "Core/MIPS/ARM64/Arm64RegCache.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
|
||||
using namespace MIPSAnalyst;
|
||||
|
||||
#define _RS MIPS_GET_RS(op)
|
||||
#define _RT MIPS_GET_RT(op)
|
||||
#define _RD MIPS_GET_RD(op)
|
||||
#define _FS MIPS_GET_FS(op)
|
||||
#define _FT MIPS_GET_FT(op)
|
||||
#define _FD MIPS_GET_FD(op)
|
||||
#define _SA MIPS_GET_SA(op)
|
||||
#define _POS ((op>> 6) & 0x1F)
|
||||
#define _SIZE ((op>>11) & 0x1F)
|
||||
#define _IMM16 (signed short)(op & 0xFFFF)
|
||||
#define _IMM26 (op & 0x03FFFFFF)
|
||||
|
||||
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
|
||||
// Currently known non working ones should have DISABLE.
|
||||
|
||||
//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
|
||||
#define CONDITIONAL_DISABLE ;
|
||||
#define DISABLE { Comp_Generic(op); return; }
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
using namespace Arm64Gen;
|
||||
using namespace Arm64JitConstants;
|
||||
|
||||
void Arm64Jit::Comp_IType(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_RType2(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_RType3(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_ShiftType(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Special3(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Allegrex(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Allegrex2(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_MulDivType(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
}
|
614
Core/MIPS/ARM64/Arm64CompBranch.cpp
Normal file
614
Core/MIPS/ARM64/Arm64CompBranch.cpp
Normal file
|
@ -0,0 +1,614 @@
|
|||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include "Core/Reporting.h"
|
||||
#include "Core/Config.h"
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/HLE/HLE.h"
|
||||
#include "Core/HLE/HLETables.h"
|
||||
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSCodeUtils.h"
|
||||
#include "Core/MIPS/MIPSAnalyst.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
|
||||
#include "Core/MIPS/ARM64/Arm64Jit.h"
|
||||
#include "Core/MIPS/ARM64/Arm64RegCache.h"
|
||||
#include "Core/MIPS/JitCommon/JitBlockCache.h"
|
||||
|
||||
#include "Common/Arm64Emitter.h"
|
||||
|
||||
#define _RS MIPS_GET_RS(op)
|
||||
#define _RT MIPS_GET_RT(op)
|
||||
#define _RD MIPS_GET_RD(op)
|
||||
#define _FS MIPS_GET_FS(op)
|
||||
#define _FT MIPS_GET_FT(op)
|
||||
#define _FD MIPS_GET_FD(op)
|
||||
#define _SA MIPS_GET_SA(op)
|
||||
#define _POS ((op>> 6) & 0x1F)
|
||||
#define _SIZE ((op>>11) & 0x1F)
|
||||
#define _IMM16 (signed short)(op & 0xFFFF)
|
||||
#define _IMM26 (op & 0x03FFFFFF)
|
||||
|
||||
#define LOOPOPTIMIZATION 0
|
||||
|
||||
// We can disable nice delay slots.
|
||||
// #define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false;
|
||||
#define CONDITIONAL_NICE_DELAYSLOT ;
|
||||
|
||||
using namespace MIPSAnalyst;
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
using namespace Arm64Gen;
|
||||
using namespace Arm64JitConstants;
|
||||
|
||||
void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
|
||||
{
|
||||
if (js.inDelaySlot) {
|
||||
ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
int offset = _IMM16 << 2;
|
||||
MIPSGPReg rt = _RT;
|
||||
MIPSGPReg rs = _RS;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
bool immBranch = false;
|
||||
bool immBranchTaken = false;
|
||||
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
|
||||
// The cc flags are opposites: when NOT to take the branch.
|
||||
bool immBranchNotTaken;
|
||||
s32 rsImm = (s32)gpr.GetImm(rs);
|
||||
s32 rtImm = (s32)gpr.GetImm(rt);
|
||||
|
||||
switch (cc)
|
||||
{
|
||||
case CC_EQ: immBranchNotTaken = rsImm == rtImm; break;
|
||||
case CC_NEQ: immBranchNotTaken = rsImm != rtImm; break;
|
||||
default: immBranchNotTaken = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSRTComp().");
|
||||
}
|
||||
immBranch = true;
|
||||
immBranchTaken = !immBranchNotTaken;
|
||||
}
|
||||
|
||||
if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions) {
|
||||
if (!immBranchTaken) {
|
||||
// Skip the delay slot if likely, otherwise it'll be the next instruction.
|
||||
if (likely)
|
||||
js.compilerPC += 4;
|
||||
return;
|
||||
}
|
||||
|
||||
// Branch taken. Always compile the delay slot, and then go to dest.
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
AddContinuedBlock(targetAddr);
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = targetAddr - 4;
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
return;
|
||||
}
|
||||
|
||||
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
|
||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
|
||||
if (immBranch) {
|
||||
// Continuing is handled above, this is just static jumping.
|
||||
if (immBranchTaken || !likely)
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
else
|
||||
FlushAll();
|
||||
|
||||
const u32 destAddr = immBranchTaken ? targetAddr : js.compilerPC + 8;
|
||||
WriteExit(destAddr, js.nextExit++);
|
||||
} else {
|
||||
if (!likely && delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
||||
// We might be able to flip the condition (EQ/NEQ are easy.)
|
||||
const bool canFlip = cc == CC_EQ || cc == CC_NEQ;
|
||||
|
||||
// TODO ARM64: Optimize for immediates
|
||||
gpr.MapInIn(rs, rt);
|
||||
CMP(gpr.R(rs), gpr.R(rt));
|
||||
|
||||
Arm64Gen::FixupBranch ptr;
|
||||
if (!likely) {
|
||||
if (!delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
|
||||
else
|
||||
FlushAll();
|
||||
ptr = B(cc);
|
||||
} else {
|
||||
FlushAll();
|
||||
ptr = B(cc);
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
}
|
||||
|
||||
// Take the branch
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
// Not taken
|
||||
WriteExit(js.compilerPC + 8, js.nextExit++);
|
||||
}
|
||||
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
|
||||
void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool likely)
|
||||
{
|
||||
if (js.inDelaySlot) {
|
||||
ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
int offset = _IMM16 << 2;
|
||||
MIPSGPReg rs = _RS;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
bool immBranch = false;
|
||||
bool immBranchTaken = false;
|
||||
if (gpr.IsImm(rs)) {
|
||||
// The cc flags are opposites: when NOT to take the branch.
|
||||
bool immBranchNotTaken;
|
||||
s32 imm = (s32)gpr.GetImm(rs);
|
||||
|
||||
switch (cc)
|
||||
{
|
||||
case CC_GT: immBranchNotTaken = imm > 0; break;
|
||||
case CC_GE: immBranchNotTaken = imm >= 0; break;
|
||||
case CC_LT: immBranchNotTaken = imm < 0; break;
|
||||
case CC_LE: immBranchNotTaken = imm <= 0; break;
|
||||
default: immBranchNotTaken = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSZeroComp().");
|
||||
}
|
||||
immBranch = true;
|
||||
immBranchTaken = !immBranchNotTaken;
|
||||
}
|
||||
|
||||
if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions) {
|
||||
if (!immBranchTaken) {
|
||||
// Skip the delay slot if likely, otherwise it'll be the next instruction.
|
||||
if (likely)
|
||||
js.compilerPC += 4;
|
||||
return;
|
||||
}
|
||||
|
||||
// Branch taken. Always compile the delay slot, and then go to dest.
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
if (andLink)
|
||||
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
|
||||
|
||||
AddContinuedBlock(targetAddr);
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = targetAddr - 4;
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
return;
|
||||
}
|
||||
|
||||
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
|
||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
|
||||
if (immBranch) {
|
||||
// Continuing is handled above, this is just static jumping.
|
||||
if (immBranchTaken && andLink)
|
||||
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
|
||||
if (immBranchTaken || !likely)
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
else
|
||||
FlushAll();
|
||||
|
||||
const u32 destAddr = immBranchTaken ? targetAddr : js.compilerPC + 8;
|
||||
WriteExit(destAddr, js.nextExit++);
|
||||
} else {
|
||||
if (!likely && delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
||||
gpr.MapReg(rs);
|
||||
CMP(gpr.R(rs), 0);
|
||||
|
||||
Arm64Gen::FixupBranch ptr;
|
||||
if (!likely)
|
||||
{
|
||||
if (!delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
|
||||
else
|
||||
FlushAll();
|
||||
ptr = B(cc);
|
||||
}
|
||||
else
|
||||
{
|
||||
FlushAll();
|
||||
ptr = B(cc);
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
}
|
||||
|
||||
// Take the branch
|
||||
if (andLink)
|
||||
{
|
||||
gpr.SetRegImm(SCRATCHREG1, js.compilerPC + 8);
|
||||
STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, MIPS_REG_RA * 4);
|
||||
}
|
||||
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
// Not taken
|
||||
WriteExit(js.compilerPC + 8, js.nextExit++);
|
||||
}
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
|
||||
void Arm64Jit::Comp_RelBranch(MIPSOpcode op)
|
||||
{
|
||||
// The CC flags here should be opposite of the actual branch becuase they skip the branching action.
|
||||
switch (op >> 26)
|
||||
{
|
||||
case 4: BranchRSRTComp(op, CC_NEQ, false); break;//beq
|
||||
case 5: BranchRSRTComp(op, CC_EQ, false); break;//bne
|
||||
|
||||
case 6: BranchRSZeroComp(op, CC_GT, false, false); break;//blez
|
||||
case 7: BranchRSZeroComp(op, CC_LE, false, false); break;//bgtz
|
||||
|
||||
case 20: BranchRSRTComp(op, CC_NEQ, true); break;//beql
|
||||
case 21: BranchRSRTComp(op, CC_EQ, true); break;//bnel
|
||||
|
||||
case 22: BranchRSZeroComp(op, CC_GT, false, true); break;//blezl
|
||||
case 23: BranchRSZeroComp(op, CC_LE, false, true); break;//bgtzl
|
||||
|
||||
default:
|
||||
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_RelBranchRI(MIPSOpcode op)
|
||||
{
|
||||
switch ((op >> 16) & 0x1F)
|
||||
{
|
||||
case 0: BranchRSZeroComp(op, CC_GE, false, false); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltz
|
||||
case 1: BranchRSZeroComp(op, CC_LT, false, false); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez
|
||||
case 2: BranchRSZeroComp(op, CC_GE, false, true); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 8; break;//bltzl
|
||||
case 3: BranchRSZeroComp(op, CC_LT, false, true); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl
|
||||
case 16: BranchRSZeroComp(op, CC_GE, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltzal
|
||||
case 17: BranchRSZeroComp(op, CC_LT, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgezal
|
||||
case 18: BranchRSZeroComp(op, CC_GE, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else SkipLikely(); break;//bltzall
|
||||
case 19: BranchRSZeroComp(op, CC_LT, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else SkipLikely(); break;//bgezall
|
||||
default:
|
||||
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If likely is set, discard the branch slot if NOT taken.
|
||||
void Arm64Jit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely) {
|
||||
if (js.inDelaySlot) {
|
||||
ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
int offset = _IMM16 << 2;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
|
||||
bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp);
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
if (!likely && delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
||||
gpr.MapReg(MIPS_REG_FPCOND);
|
||||
TSTI2R(gpr.R(MIPS_REG_FPCOND), 1, W0);
|
||||
|
||||
Arm64Gen::FixupBranch ptr;
|
||||
if (!likely) {
|
||||
if (!delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
|
||||
else
|
||||
FlushAll();
|
||||
ptr = B(cc);
|
||||
} else {
|
||||
FlushAll();
|
||||
ptr = B(cc);
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
}
|
||||
|
||||
// Take the branch
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
// Not taken
|
||||
WriteExit(js.compilerPC + 8, js.nextExit++);
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_FPUBranch(MIPSOpcode op) {
|
||||
switch((op >> 16) & 0x1f) {
|
||||
case 0: BranchFPFlag(op, CC_NEQ, false); break; // bc1f
|
||||
case 1: BranchFPFlag(op, CC_EQ, false); break; // bc1t
|
||||
case 2: BranchFPFlag(op, CC_NEQ, true); break; // bc1fl
|
||||
case 3: BranchFPFlag(op, CC_EQ, true); break; // bc1tl
|
||||
default:
|
||||
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If likely is set, discard the branch slot if NOT taken.
|
||||
void Arm64Jit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely) {
|
||||
if (js.inDelaySlot) {
|
||||
ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
int offset = _IMM16 << 2;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
|
||||
|
||||
// Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle)
|
||||
// The behavior is undefined - the CPU may take the second branch even if the first one passes.
|
||||
// However, it does consistently try each branch, which these games seem to expect.
|
||||
bool delaySlotIsBranch = MIPSCodeUtils::IsVFPUBranch(delaySlotOp);
|
||||
bool delaySlotIsNice = !delaySlotIsBranch && IsDelaySlotNiceVFPU(op, delaySlotOp);
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
if (!likely && delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
|
||||
ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", js.compilerPC);
|
||||
|
||||
int imm3 = (op >> 18) & 7;
|
||||
|
||||
gpr.MapReg(MIPS_REG_VFPUCC);
|
||||
TSTI2R(gpr.R(MIPS_REG_VFPUCC), 1 << imm3, W0);
|
||||
|
||||
Arm64Gen::FixupBranch ptr;
|
||||
js.inDelaySlot = true;
|
||||
if (!likely)
|
||||
{
|
||||
if (!delaySlotIsNice && !delaySlotIsBranch)
|
||||
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
|
||||
else
|
||||
FlushAll();
|
||||
ptr = B(cc);
|
||||
}
|
||||
else
|
||||
{
|
||||
FlushAll();
|
||||
ptr = B(cc);
|
||||
if (!delaySlotIsBranch)
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
}
|
||||
js.inDelaySlot = false;
|
||||
|
||||
// Take the branch
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
// Not taken
|
||||
u32 notTakenTarget = js.compilerPC + (delaySlotIsBranch ? 4 : 8);
|
||||
WriteExit(notTakenTarget, js.nextExit++);
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_VBranch(MIPSOpcode op)
|
||||
{
|
||||
switch ((op >> 16) & 3)
|
||||
{
|
||||
case 0: BranchVFPUFlag(op, CC_NEQ, false); break; // bvf
|
||||
case 1: BranchVFPUFlag(op, CC_EQ, false); break; // bvt
|
||||
case 2: BranchVFPUFlag(op, CC_NEQ, true); break; // bvfl
|
||||
case 3: BranchVFPUFlag(op, CC_EQ, true); break; // bvtl
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Jump(MIPSOpcode op) {
|
||||
if (js.inDelaySlot) {
|
||||
ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
u32 off = _IMM26 << 2;
|
||||
u32 targetAddr = (js.compilerPC & 0xF0000000) | off;
|
||||
|
||||
// Might be a stubbed address or something?
|
||||
if (!Memory::IsValidAddress(targetAddr)) {
|
||||
if (js.nextExit == 0) {
|
||||
ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x", targetAddr);
|
||||
} else {
|
||||
js.compiling = false;
|
||||
}
|
||||
// TODO: Mark this block dirty or something? May be indication it will be changed by imports.
|
||||
return;
|
||||
}
|
||||
|
||||
switch (op >> 26) {
|
||||
case 2: //j
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) {
|
||||
AddContinuedBlock(targetAddr);
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = targetAddr - 4;
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
return;
|
||||
}
|
||||
FlushAll();
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
break;
|
||||
|
||||
case 3: //jal
|
||||
if (ReplaceJalTo(targetAddr))
|
||||
return;
|
||||
|
||||
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) {
|
||||
AddContinuedBlock(targetAddr);
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = targetAddr - 4;
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
return;
|
||||
}
|
||||
FlushAll();
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
break;
|
||||
|
||||
default:
|
||||
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
|
||||
break;
|
||||
}
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_JumpReg(MIPSOpcode op)
|
||||
{
|
||||
if (js.inDelaySlot) {
|
||||
ERROR_LOG_REPORT(JIT, "Branch in JumpReg delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
|
||||
return;
|
||||
}
|
||||
MIPSGPReg rs = _RS;
|
||||
MIPSGPReg rd = _RD;
|
||||
bool andLink = (op & 0x3f) == 9;
|
||||
|
||||
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
|
||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
|
||||
if (andLink && rs == rd)
|
||||
delaySlotIsNice = false;
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
|
||||
ARM64Reg destReg = X8;
|
||||
if (IsSyscall(delaySlotOp)) {
|
||||
gpr.MapReg(rs);
|
||||
MovToPC(gpr.R(rs)); // For syscall to be able to return.
|
||||
if (andLink)
|
||||
gpr.SetImm(rd, js.compilerPC + 8);
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
return; // Syscall wrote exit code.
|
||||
} else if (delaySlotIsNice) {
|
||||
if (andLink)
|
||||
gpr.SetImm(rd, js.compilerPC + 8);
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
||||
if (!andLink && rs == MIPS_REG_RA && g_Config.bDiscardRegsOnJRRA) {
|
||||
// According to the MIPS ABI, there are some regs we don't need to preserve.
|
||||
// Let's discard them so we don't need to write them back.
|
||||
// NOTE: Not all games follow the MIPS ABI! Tekken 6, for example, will crash
|
||||
// with this enabled.
|
||||
gpr.DiscardR(MIPS_REG_COMPILER_SCRATCH);
|
||||
for (int i = MIPS_REG_A0; i <= MIPS_REG_T7; i++)
|
||||
gpr.DiscardR((MIPSGPReg)i);
|
||||
gpr.DiscardR(MIPS_REG_T8);
|
||||
gpr.DiscardR(MIPS_REG_T9);
|
||||
}
|
||||
|
||||
if (jo.continueJumps && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions) {
|
||||
AddContinuedBlock(gpr.GetImm(rs));
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = gpr.GetImm(rs) - 4;
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
return;
|
||||
}
|
||||
|
||||
gpr.MapReg(rs);
|
||||
destReg = gpr.R(rs); // Safe because FlushAll doesn't change any regs
|
||||
FlushAll();
|
||||
} else {
|
||||
// Delay slot - this case is very rare, might be able to free up R8.
|
||||
gpr.MapReg(rs);
|
||||
MOV(W8, gpr.R(rs));
|
||||
if (andLink)
|
||||
gpr.SetImm(rd, js.compilerPC + 8);
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
FlushAll();
|
||||
}
|
||||
|
||||
switch (op & 0x3f)
|
||||
{
|
||||
case 8: //jr
|
||||
break;
|
||||
case 9: //jalr
|
||||
break;
|
||||
default:
|
||||
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
|
||||
break;
|
||||
}
|
||||
|
||||
WriteExitDestInR(destReg);
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
|
||||
void Arm64Jit::Comp_Syscall(MIPSOpcode op)
|
||||
{
|
||||
if (!g_Config.bSkipDeadbeefFilling)
|
||||
{
|
||||
// All of these will be overwritten with DEADBEEF anyway.
|
||||
gpr.DiscardR(MIPS_REG_COMPILER_SCRATCH);
|
||||
// We need to keep A0 - T3, which are used for args.
|
||||
gpr.DiscardR(MIPS_REG_T4);
|
||||
gpr.DiscardR(MIPS_REG_T5);
|
||||
gpr.DiscardR(MIPS_REG_T6);
|
||||
gpr.DiscardR(MIPS_REG_T7);
|
||||
gpr.DiscardR(MIPS_REG_T8);
|
||||
gpr.DiscardR(MIPS_REG_T9);
|
||||
|
||||
gpr.DiscardR(MIPS_REG_HI);
|
||||
gpr.DiscardR(MIPS_REG_LO);
|
||||
}
|
||||
|
||||
// If we're in a delay slot, this is off by one.
|
||||
const int offset = js.inDelaySlot ? -1 : 0;
|
||||
WriteDownCount(offset);
|
||||
RestoreRoundingMode();
|
||||
js.downcountAmount = -offset;
|
||||
|
||||
// TODO: Maybe discard v0, v1, and some temps? Definitely at?
|
||||
FlushAll();
|
||||
|
||||
SaveDowncount();
|
||||
// Skip the CallSyscall where possible.
|
||||
void *quickFunc = GetQuickSyscallFunc(op);
|
||||
if (quickFunc)
|
||||
{
|
||||
gpr.SetRegImm(W0, (u32)(intptr_t)GetSyscallInfo(op));
|
||||
// Already flushed, so X1 is safe.
|
||||
QuickCallFunction(X1, quickFunc);
|
||||
}
|
||||
else
|
||||
{
|
||||
gpr.SetRegImm(W0, op.encoding);
|
||||
QuickCallFunction(X1, (void *)&CallSyscall);
|
||||
}
|
||||
ApplyRoundingMode();
|
||||
RestoreDowncount();
|
||||
|
||||
WriteSyscallExit();
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Break(MIPSOpcode op)
|
||||
{
|
||||
Comp_Generic(op);
|
||||
WriteSyscallExit();
|
||||
js.compiling = false;
|
||||
}
|
||||
|
||||
} // namespace Mipscomp
|
75
Core/MIPS/ARM64/Arm64CompFPU.cpp
Normal file
75
Core/MIPS/ARM64/Arm64CompFPU.cpp
Normal file
|
@ -0,0 +1,75 @@
|
|||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include "Core/Config.h"
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSCodeUtils.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
|
||||
#include "Core/MIPS/ARM64/Arm64Jit.h"
|
||||
#include "Core/MIPS/ARM64/Arm64RegCache.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
|
||||
#define _RS MIPS_GET_RS(op)
|
||||
#define _RT MIPS_GET_RT(op)
|
||||
#define _RD MIPS_GET_RD(op)
|
||||
#define _FS MIPS_GET_FS(op)
|
||||
#define _FT MIPS_GET_FT(op)
|
||||
#define _FD MIPS_GET_FD(op)
|
||||
#define _SA MIPS_GET_SA(op)
|
||||
#define _POS ((op>> 6) & 0x1F)
|
||||
#define _SIZE ((op>>11) & 0x1F)
|
||||
#define _IMM16 (signed short)(op & 0xFFFF)
|
||||
#define _IMM26 (op & 0x03FFFFFF)
|
||||
|
||||
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
|
||||
// Currently known non working ones should have DISABLE.
|
||||
|
||||
//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
|
||||
#define CONDITIONAL_DISABLE ;
|
||||
#define DISABLE { Comp_Generic(op); return; }
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
using namespace Arm64Gen;
|
||||
using namespace Arm64JitConstants;
|
||||
|
||||
void Arm64Jit::Comp_FPU3op(MIPSOpcode op)
|
||||
{
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_FPULS(MIPSOpcode op)
|
||||
{
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_FPUComp(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_FPU2op(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_mxc1(MIPSOpcode op)
|
||||
{
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
} // namespace MIPSComp
|
83
Core/MIPS/ARM64/Arm64CompLoadStore.cpp
Normal file
83
Core/MIPS/ARM64/Arm64CompLoadStore.cpp
Normal file
|
@ -0,0 +1,83 @@
|
|||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
|
||||
// Optimization ideas:
|
||||
//
|
||||
// It's common to see sequences of stores writing or reading to a contiguous set of
|
||||
// addresses in function prologues/epilogues:
|
||||
// sw s5, 104(sp)
|
||||
// sw s4, 100(sp)
|
||||
// sw s3, 96(sp)
|
||||
// sw s2, 92(sp)
|
||||
// sw s1, 88(sp)
|
||||
// sw s0, 84(sp)
|
||||
// sw ra, 108(sp)
|
||||
// mov s4, a0
|
||||
// mov s3, a1
|
||||
// ...
|
||||
// Such sequences could easily be detected and turned into nice contiguous
|
||||
// sequences of ARM stores instead of the current 3 instructions per sw/lw.
|
||||
//
|
||||
// Also, if we kept track of the likely register content of a cached register,
|
||||
// (pointer or data), we could avoid many BIC instructions.
|
||||
|
||||
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/Config.h"
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSAnalyst.h"
|
||||
#include "Core/MIPS/MIPSCodeUtils.h"
|
||||
#include "Core/MIPS/ARM64/Arm64Jit.h"
|
||||
#include "Core/MIPS/ARM64/Arm64RegCache.h"
|
||||
|
||||
#define _RS MIPS_GET_RS(op)
|
||||
#define _RT MIPS_GET_RT(op)
|
||||
#define _RD MIPS_GET_RD(op)
|
||||
#define _FS MIPS_GET_FS(op)
|
||||
#define _FT MIPS_GET_FT(op)
|
||||
#define _FD MIPS_GET_FD(op)
|
||||
#define _SA MIPS_GET_SA(op)
|
||||
#define _POS ((op>> 6) & 0x1F)
|
||||
#define _SIZE ((op>>11) & 0x1F)
|
||||
#define _IMM16 (signed short)(op & 0xFFFF)
|
||||
#define _IMM26 (op & 0x03FFFFFF)
|
||||
|
||||
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
|
||||
// Currently known non working ones should have DISABLE.
|
||||
|
||||
// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
|
||||
#define CONDITIONAL_DISABLE ;
|
||||
#define DISABLE { Comp_Generic(op); return; }
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
using namespace Arm64Gen;
|
||||
using namespace Arm64JitConstants;
|
||||
|
||||
void Arm64Jit::Comp_ITypeMemLR(MIPSOpcode op, bool load) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_ITypeMem(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Cache(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
}
|
33
Core/MIPS/ARM64/Arm64CompReplace.cpp
Normal file
33
Core/MIPS/ARM64/Arm64CompReplace.cpp
Normal file
|
@ -0,0 +1,33 @@
|
|||
// Copyright (c) 2013- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/MIPS/JitCommon/JitCommon.h"
|
||||
#include "Core/MIPS/ARM64/Arm64Jit.h"
|
||||
#include "Core/MIPS/ARM64/Arm64RegCache.h"
|
||||
|
||||
namespace MIPSComp {
|
||||
|
||||
int Arm64Jit::Replace_fabsf() {
|
||||
// TODO ARM64
|
||||
// fpr.MapDirtyIn(0, 12);
|
||||
// VABS(fpr.R(0), fpr.R(12));
|
||||
return 4; // Number of instructions in the MIPS function
|
||||
}
|
||||
|
||||
}
|
231
Core/MIPS/ARM64/Arm64CompVFPU.cpp
Normal file
231
Core/MIPS/ARM64/Arm64CompVFPU.cpp
Normal file
|
@ -0,0 +1,231 @@
|
|||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <cmath>
|
||||
#include "math/math_util.h"
|
||||
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
#include "Core/MIPS/MIPSAnalyst.h"
|
||||
#include "Core/MIPS/MIPSCodeUtils.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Core/Config.h"
|
||||
#include "Core/Reporting.h"
|
||||
|
||||
#include "Core/MIPS/ARM64/Arm64Jit.h"
|
||||
#include "Core/MIPS/ARM64/Arm64RegCache.h"
|
||||
|
||||
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
|
||||
// Currently known non working ones should have DISABLE.
|
||||
|
||||
// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
|
||||
|
||||
#define CONDITIONAL_DISABLE ;
|
||||
#define DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
|
||||
#define _RS MIPS_GET_RS(op)
|
||||
#define _RT MIPS_GET_RT(op)
|
||||
#define _RD MIPS_GET_RD(op)
|
||||
#define _FS MIPS_GET_FS(op)
|
||||
#define _FT MIPS_GET_FT(op)
|
||||
#define _FD MIPS_GET_FD(op)
|
||||
#define _SA MIPS_GET_SA(op)
|
||||
#define _POS ((op>> 6) & 0x1F)
|
||||
#define _SIZE ((op>>11) & 0x1F)
|
||||
#define _IMM16 (signed short)(op & 0xFFFF)
|
||||
#define _IMM26 (op & 0x03FFFFFF)
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
using namespace Arm64Gen;
|
||||
using namespace Arm64JitConstants;
|
||||
|
||||
void Arm64Jit::Comp_VPFX(MIPSOpcode op)
|
||||
{
|
||||
CONDITIONAL_DISABLE;
|
||||
int data = op & 0xFFFFF;
|
||||
int regnum = (op >> 24) & 3;
|
||||
switch (regnum) {
|
||||
case 0: // S
|
||||
js.prefixS = data;
|
||||
js.prefixSFlag = JitState::PREFIX_KNOWN_DIRTY;
|
||||
break;
|
||||
case 1: // T
|
||||
js.prefixT = data;
|
||||
js.prefixTFlag = JitState::PREFIX_KNOWN_DIRTY;
|
||||
break;
|
||||
case 2: // D
|
||||
js.prefixD = data;
|
||||
js.prefixDFlag = JitState::PREFIX_KNOWN_DIRTY;
|
||||
break;
|
||||
default:
|
||||
ERROR_LOG(CPU, "VPFX - bad regnum %i : data=%08x", regnum, data);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_SV(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_SVQ(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_VVectorInit(MIPSOpcode op)
|
||||
{
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_VIdt(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_VMatrixInit(MIPSOpcode op)
|
||||
{
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_VHdp(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
static const float MEMORY_ALIGNED16(vavg_table[4]) = { 1.0f, 1.0f / 2.0f, 1.0f / 3.0f, 1.0f / 4.0f };
|
||||
|
||||
void Arm64Jit::Comp_Vhoriz(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_VDot(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_VecDo3(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_VV2Op(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vi2f(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vh2f(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vf2i(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Mftv(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vmfvc(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vmtvc(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vmmov(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_VScl(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vmmul(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vmscl(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vtfm(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_VCrs(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_VDet(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vi2x(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vx2i(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_VCrossQuat(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vcmp(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vcmov(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Viim(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vfim(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vcst(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
// Very heavily used by FF:CC. Should be replaced by a fast approximation instead of
|
||||
// calling the math library.
|
||||
// Apparently this may not work on hardfp. I don't think we have any platforms using this though.
|
||||
void Arm64Jit::Comp_VRot(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vsgn(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vocp(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_ColorConv(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Vbfy(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
}
|
||||
}
|
485
Core/MIPS/ARM64/Arm64Jit.cpp
Normal file
485
Core/MIPS/ARM64/Arm64Jit.cpp
Normal file
|
@ -0,0 +1,485 @@
|
|||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "Common/ChunkFile.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
|
||||
#include "Core/Reporting.h"
|
||||
#include "Core/Config.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/Debugger/SymbolMap.h"
|
||||
#include "Core/MemMap.h"
|
||||
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSCodeUtils.h"
|
||||
#include "Core/MIPS/MIPSInt.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
#include "Core/HLE/ReplaceTables.h"
|
||||
#include "Core/MIPS/ARM64/Arm64RegCache.h"
|
||||
#include "Core/MIPS/ARM64/Arm64RegCacheFPU.h"
|
||||
|
||||
#include "Core/MIPS/ARM64/Arm64Jit.h"
|
||||
|
||||
#include "ext/disarm.h"
|
||||
|
||||
using namespace Arm64JitConstants;
|
||||
|
||||
void DisassembleArm64Print(const u8 *data, int size) {
|
||||
ILOG("ARM64 TODO");
|
||||
}
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
using namespace Arm64Gen;
|
||||
using namespace Arm64JitConstants;
|
||||
|
||||
Arm64Jit::Arm64Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &js, &jo), fpr(mips, &js, &jo), mips_(mips) {
|
||||
logBlocks = 0;
|
||||
dontLogBlocks = 0;
|
||||
blocks.Init();
|
||||
gpr.SetEmitter(this);
|
||||
fpr.SetEmitter(this);
|
||||
AllocCodeSpace(1024 * 1024 * 16); // 32MB is the absolute max because that's what an ARM branch instruction can reach, backwards and forwards.
|
||||
GenerateFixedCode();
|
||||
|
||||
js.startDefaultPrefix = mips_->HasDefaultPrefix();
|
||||
}
|
||||
|
||||
Arm64Jit::~Arm64Jit() {
|
||||
}
|
||||
|
||||
void Arm64Jit::DoState(PointerWrap &p)
|
||||
{
|
||||
auto s = p.Section("Jit", 1, 2);
|
||||
if (!s)
|
||||
return;
|
||||
|
||||
p.Do(js.startDefaultPrefix);
|
||||
if (s >= 2) {
|
||||
p.Do(js.hasSetRounding);
|
||||
js.lastSetRounding = 0;
|
||||
} else {
|
||||
js.hasSetRounding = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// This is here so the savestate matches between jit and non-jit.
|
||||
void Arm64Jit::DoDummyState(PointerWrap &p)
|
||||
{
|
||||
auto s = p.Section("Jit", 1, 2);
|
||||
if (!s)
|
||||
return;
|
||||
|
||||
bool dummy = false;
|
||||
p.Do(dummy);
|
||||
if (s >= 2) {
|
||||
dummy = true;
|
||||
p.Do(dummy);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64Jit::FlushAll()
|
||||
{
|
||||
gpr.FlushAll();
|
||||
fpr.FlushAll();
|
||||
FlushPrefixV();
|
||||
}
|
||||
|
||||
void Arm64Jit::FlushPrefixV()
|
||||
{
|
||||
if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0) {
|
||||
gpr.SetRegImm(SCRATCHREG1, js.prefixS);
|
||||
STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_SPREFIX]));
|
||||
js.prefixSFlag = (JitState::PrefixState) (js.prefixSFlag & ~JitState::PREFIX_DIRTY);
|
||||
}
|
||||
|
||||
if ((js.prefixTFlag & JitState::PREFIX_DIRTY) != 0) {
|
||||
gpr.SetRegImm(SCRATCHREG1, js.prefixT);
|
||||
STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_TPREFIX]));
|
||||
js.prefixTFlag = (JitState::PrefixState) (js.prefixTFlag & ~JitState::PREFIX_DIRTY);
|
||||
}
|
||||
|
||||
if ((js.prefixDFlag & JitState::PREFIX_DIRTY) != 0) {
|
||||
gpr.SetRegImm(SCRATCHREG1, js.prefixD);
|
||||
STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_DPREFIX]));
|
||||
js.prefixDFlag = (JitState::PrefixState) (js.prefixDFlag & ~JitState::PREFIX_DIRTY);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64Jit::ClearCache()
|
||||
{
|
||||
blocks.Clear();
|
||||
ClearCodeSpace();
|
||||
GenerateFixedCode();
|
||||
}
|
||||
|
||||
void Arm64Jit::InvalidateCache()
|
||||
{
|
||||
blocks.Clear();
|
||||
}
|
||||
|
||||
void Arm64Jit::InvalidateCacheAt(u32 em_address, int length)
|
||||
{
|
||||
blocks.InvalidateICache(em_address, length);
|
||||
}
|
||||
|
||||
void Arm64Jit::EatInstruction(MIPSOpcode op) {
|
||||
MIPSInfo info = MIPSGetInfo(op);
|
||||
if (info & DELAYSLOT) {
|
||||
ERROR_LOG_REPORT_ONCE(ateDelaySlot, JIT, "Ate a branch op.");
|
||||
}
|
||||
if (js.inDelaySlot) {
|
||||
ERROR_LOG_REPORT_ONCE(ateInDelaySlot, JIT, "Ate an instruction inside a delay slot.");
|
||||
}
|
||||
|
||||
js.numInstructions++;
|
||||
js.compilerPC += 4;
|
||||
js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
|
||||
}
|
||||
|
||||
void Arm64Jit::CompileDelaySlot(int flags)
|
||||
{
|
||||
// TODO ARM64
|
||||
}
|
||||
|
||||
|
||||
void Arm64Jit::Compile(u32 em_address) {
|
||||
if (GetSpaceLeft() < 0x10000 || blocks.IsFull()) {
|
||||
ClearCache();
|
||||
}
|
||||
|
||||
int block_num = blocks.AllocateBlock(em_address);
|
||||
JitBlock *b = blocks.GetBlock(block_num);
|
||||
DoJit(em_address, b);
|
||||
blocks.FinalizeBlock(block_num, jo.enableBlocklink);
|
||||
|
||||
bool cleanSlate = false;
|
||||
|
||||
if (js.hasSetRounding && !js.lastSetRounding) {
|
||||
WARN_LOG(JIT, "Detected rounding mode usage, rebuilding jit with checks");
|
||||
// Won't loop, since hasSetRounding is only ever set to 1.
|
||||
js.lastSetRounding = js.hasSetRounding;
|
||||
cleanSlate = true;
|
||||
}
|
||||
|
||||
// Drat. The VFPU hit an uneaten prefix at the end of a block.
|
||||
if (js.startDefaultPrefix && js.MayHavePrefix()) {
|
||||
WARN_LOG(JIT, "An uneaten prefix at end of block: %08x", js.compilerPC - 4);
|
||||
js.LogPrefix();
|
||||
|
||||
// Let's try that one more time. We won't get back here because we toggled the value.
|
||||
js.startDefaultPrefix = false;
|
||||
cleanSlate = true;
|
||||
}
|
||||
|
||||
if (cleanSlate) {
|
||||
// Our assumptions are all wrong so it's clean-slate time.
|
||||
ClearCache();
|
||||
Compile(em_address);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64Jit::RunLoopUntil(u64 globalticks)
|
||||
{
|
||||
((void (*)())enterCode)();
|
||||
}
|
||||
|
||||
const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b)
|
||||
{
|
||||
js.cancel = false;
|
||||
js.blockStart = js.compilerPC = mips_->pc;
|
||||
js.lastContinuedPC = 0;
|
||||
js.initialBlockSize = 0;
|
||||
js.nextExit = 0;
|
||||
js.downcountAmount = 0;
|
||||
js.curBlock = b;
|
||||
js.compiling = true;
|
||||
js.inDelaySlot = false;
|
||||
js.PrefixStart();
|
||||
|
||||
// We add a downcount flag check before the block, used when entering from a linked block.
|
||||
// The last block decremented downcounter, and the flag should still be available.
|
||||
// Got three variants here of where we position the code, needs detailed benchmarking.
|
||||
|
||||
FixupBranch bail;
|
||||
/*
|
||||
if (jo.useBackJump) {
|
||||
// Moves the MOVI2R and B *before* checkedEntry, and just branch backwards there.
|
||||
// Speedup seems to be zero unfortunately but I guess it may vary from device to device.
|
||||
// Not intrusive so keeping it around here to experiment with, may help on ARMv6 due to
|
||||
// large/slow construction of 32-bit immediates?
|
||||
JumpTarget backJump = GetCodePtr();
|
||||
gpr.SetRegImm(R0, js.blockStart);
|
||||
B((const void *)outerLoopPCInR0);
|
||||
b->checkedEntry = GetCodePtr();
|
||||
SetCC(CC_LT);
|
||||
B(backJump);
|
||||
SetCC(CC_AL);
|
||||
} else if (jo.useForwardJump) {
|
||||
b->checkedEntry = GetCodePtr();
|
||||
SetCC(CC_LT);
|
||||
bail = B();
|
||||
SetCC(CC_AL);
|
||||
} else {
|
||||
b->checkedEntry = GetCodePtr();
|
||||
SetCC(CC_LT);
|
||||
gpr.SetRegImm(R0, js.blockStart);
|
||||
B((const void *)outerLoopPCInR0);
|
||||
SetCC(CC_AL);
|
||||
}*/
|
||||
// TODO ARM64
|
||||
|
||||
b->normalEntry = GetCodePtr();
|
||||
// TODO: this needs work
|
||||
MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address);
|
||||
|
||||
gpr.Start(analysis);
|
||||
fpr.Start(analysis);
|
||||
|
||||
int partialFlushOffset = 0;
|
||||
|
||||
js.numInstructions = 0;
|
||||
while (js.compiling)
|
||||
{
|
||||
gpr.SetCompilerPC(js.compilerPC); // Let it know for log messages
|
||||
MIPSOpcode inst = Memory::Read_Opcode_JIT(js.compilerPC);
|
||||
//MIPSInfo info = MIPSGetInfo(inst);
|
||||
//if (info & IS_VFPU) {
|
||||
// logBlocks = 1;
|
||||
//}
|
||||
|
||||
js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);
|
||||
|
||||
MIPSCompileOp(inst);
|
||||
|
||||
js.compilerPC += 4;
|
||||
js.numInstructions++;
|
||||
|
||||
// Safety check, in case we get a bunch of really large jit ops without a lot of branching.
|
||||
if (GetSpaceLeft() < 0x800 || js.numInstructions >= JitBlockCache::MAX_BLOCK_INSTRUCTIONS)
|
||||
{
|
||||
FlushAll();
|
||||
WriteExit(js.compilerPC, js.nextExit++);
|
||||
js.compiling = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (jo.useForwardJump) {
|
||||
//SetJumpTarget(bail);
|
||||
//gpr.SetRegImm(R0, js.blockStart);
|
||||
//B((const void *)outerLoopPCInR0);
|
||||
}
|
||||
|
||||
char temp[256];
|
||||
if (logBlocks > 0 && dontLogBlocks == 0) {
|
||||
INFO_LOG(JIT, "=============== mips ===============");
|
||||
for (u32 cpc = em_address; cpc != js.compilerPC + 4; cpc += 4) {
|
||||
MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp, true);
|
||||
INFO_LOG(JIT, "M: %08x %s", cpc, temp);
|
||||
}
|
||||
}
|
||||
|
||||
b->codeSize = GetCodePtr() - b->normalEntry;
|
||||
|
||||
if (logBlocks > 0 && dontLogBlocks == 0) {
|
||||
INFO_LOG(JIT, "=============== ARM ===============");
|
||||
DisassembleArm64Print(b->normalEntry, GetCodePtr() - b->normalEntry);
|
||||
}
|
||||
if (logBlocks > 0)
|
||||
logBlocks--;
|
||||
if (dontLogBlocks > 0)
|
||||
dontLogBlocks--;
|
||||
|
||||
// Don't forget to zap the newly written instructions in the instruction cache!
|
||||
FlushIcache();
|
||||
|
||||
if (js.lastContinuedPC == 0)
|
||||
b->originalSize = js.numInstructions;
|
||||
else
|
||||
{
|
||||
// We continued at least once. Add the last proxy and set the originalSize correctly.
|
||||
blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (js.compilerPC - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
|
||||
b->originalSize = js.initialBlockSize;
|
||||
}
|
||||
return b->normalEntry;
|
||||
}
|
||||
|
||||
void Arm64Jit::AddContinuedBlock(u32 dest)
|
||||
{
|
||||
// The first block is the root block. When we continue, we create proxy blocks after that.
|
||||
if (js.lastContinuedPC == 0)
|
||||
js.initialBlockSize = js.numInstructions;
|
||||
else
|
||||
blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (js.compilerPC - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
|
||||
js.lastContinuedPC = dest;
|
||||
}
|
||||
|
||||
bool Arm64Jit::DescribeCodePtr(const u8 *ptr, std::string &name)
|
||||
{
|
||||
// TODO: Not used by anything yet.
|
||||
return false;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_RunBlock(MIPSOpcode op)
|
||||
{
|
||||
// This shouldn't be necessary, the dispatcher should catch us before we get here.
|
||||
ERROR_LOG(JIT, "Comp_RunBlock should never be reached!");
|
||||
}
|
||||
|
||||
bool Arm64Jit::ReplaceJalTo(u32 dest) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_ReplacementFunc(MIPSOpcode op)
|
||||
{
|
||||
// TODO ARM64
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_Generic(MIPSOpcode op)
|
||||
{
|
||||
FlushAll();
|
||||
MIPSInterpretFunc func = MIPSGetInterpretFunc(op);
|
||||
if (func) {
|
||||
SaveDowncount();
|
||||
// TODO: Perhaps keep the rounding mode for interp?
|
||||
RestoreRoundingMode();
|
||||
// gpr.SetRegImm(SCRATCHREG1, js.compilerPC);
|
||||
// MovToPC(SCRATCHREG1);
|
||||
//gpr.SetRegImm(R0, op.encoding);
|
||||
//QuickCallFunction(R1, (void *)func);
|
||||
// TODO ARM64
|
||||
ApplyRoundingMode();
|
||||
RestoreDowncount();
|
||||
}
|
||||
|
||||
const MIPSInfo info = MIPSGetInfo(op);
|
||||
if ((info & IS_VFPU) != 0 && (info & VFPU_NO_PREFIX) == 0)
|
||||
{
|
||||
// If it does eat them, it'll happen in MIPSCompileOp().
|
||||
if ((info & OUT_EAT_PREFIX) == 0)
|
||||
js.PrefixUnknown();
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64Jit::MovFromPC(ARM64Reg r) {
|
||||
LDR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));
|
||||
}
|
||||
|
||||
void Arm64Jit::MovToPC(ARM64Reg r) {
|
||||
STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));
|
||||
}
|
||||
|
||||
void Arm64Jit::SaveDowncount() {
|
||||
if (jo.downcountInRegister)
|
||||
STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
|
||||
}
|
||||
|
||||
void Arm64Jit::RestoreDowncount() {
|
||||
if (jo.downcountInRegister)
|
||||
LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
|
||||
}
|
||||
|
||||
void Arm64Jit::WriteDownCount(int offset) {
|
||||
// TODO ARM64
|
||||
}
|
||||
|
||||
// Abuses R2
|
||||
void Arm64Jit::WriteDownCountR(ARM64Reg reg) {
|
||||
if (jo.downcountInRegister) {
|
||||
SUBS(DOWNCOUNTREG, DOWNCOUNTREG, reg);
|
||||
} else {
|
||||
LDR(INDEX_UNSIGNED, X2, CTXREG, offsetof(MIPSState, downcount));
|
||||
SUBS(X2, X2, reg);
|
||||
STR(INDEX_UNSIGNED, X2, CTXREG, offsetof(MIPSState, downcount));
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64Jit::RestoreRoundingMode(bool force) {
|
||||
// TODO ARM64
|
||||
}
|
||||
|
||||
void Arm64Jit::ApplyRoundingMode(bool force) {
|
||||
// TODO ARM64
|
||||
}
|
||||
|
||||
void Arm64Jit::UpdateRoundingMode() {
|
||||
// TODO ARM64
|
||||
}
|
||||
|
||||
// IDEA - could have a WriteDualExit that takes two destinations and two condition flags,
|
||||
// and just have conditional that set PC "twice". This only works when we fall back to dispatcher
|
||||
// though, as we need to have the SUBS flag set in the end. So with block linking in the mix,
|
||||
// I don't think this gives us that much benefit.
|
||||
void Arm64Jit::WriteExit(u32 destination, int exit_num)
|
||||
{
|
||||
WriteDownCount();
|
||||
//If nobody has taken care of this yet (this can be removed when all branches are done)
|
||||
JitBlock *b = js.curBlock;
|
||||
b->exitAddress[exit_num] = destination;
|
||||
b->exitPtrs[exit_num] = GetWritableCodePtr();
|
||||
|
||||
// Link opportunity!
|
||||
int block = blocks.GetBlockNumberFromStartAddress(destination);
|
||||
if (block >= 0 && jo.enableBlocklink) {
|
||||
// It exists! Joy of joy!
|
||||
B(blocks.GetBlock(block)->checkedEntry);
|
||||
b->linkStatus[exit_num] = true;
|
||||
} else {
|
||||
gpr.SetRegImm(X0, destination);
|
||||
B((const void *)dispatcherPCInR0);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64Jit::WriteExitDestInR(ARM64Reg Reg)
|
||||
{
|
||||
MovToPC(Reg);
|
||||
WriteDownCount();
|
||||
// TODO: shouldn't need an indirect branch here...
|
||||
B((const void *)dispatcher);
|
||||
}
|
||||
|
||||
void Arm64Jit::WriteSyscallExit()
|
||||
{
|
||||
WriteDownCount();
|
||||
B((const void *)dispatcherCheckCoreState);
|
||||
}
|
||||
|
||||
void Arm64Jit::Comp_DoNothing(MIPSOpcode op) { }
|
||||
|
||||
#define _RS ((op>>21) & 0x1F)
|
||||
#define _RT ((op>>16) & 0x1F)
|
||||
#define _RD ((op>>11) & 0x1F)
|
||||
#define _FS ((op>>11) & 0x1F)
|
||||
#define _FT ((op>>16) & 0x1F)
|
||||
#define _FD ((op>>6) & 0x1F)
|
||||
#define _POS ((op>>6) & 0x1F)
|
||||
#define _SIZE ((op>>11) & 0x1F)
|
||||
|
||||
//memory regions:
|
||||
//
|
||||
// 08-0A
|
||||
// 48-4A
|
||||
// 04-05
|
||||
// 44-45
|
||||
// mov eax, addrreg
|
||||
// shr eax, 28
|
||||
// mov eax, [table+eax]
|
||||
// mov dreg, [eax+offreg]
|
||||
|
||||
}
|
291
Core/MIPS/ARM64/Arm64Jit.h
Normal file
291
Core/MIPS/ARM64/Arm64Jit.h
Normal file
|
@ -0,0 +1,291 @@
|
|||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/ArmCommon.h"
|
||||
#include "Common/Arm64Emitter.h"
|
||||
#include "Core/MIPS/JitCommon/JitState.h"
|
||||
#include "Core/MIPS/JitCommon/JitBlockCache.h"
|
||||
#include "Core/MIPS/ARM64/Arm64Asm.h"
|
||||
#include "Core/MIPS/ARM64/Arm64RegCache.h"
|
||||
#include "Core/MIPS/ARM64/Arm64RegCacheFPU.h"
|
||||
#include "Core/MIPS/MIPSVFPUUtils.h"
|
||||
|
||||
#ifndef offsetof
|
||||
#include "stddef.h"
|
||||
#endif
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
|
||||
struct Arm64JitOptions
|
||||
{
|
||||
Arm64JitOptions() {
|
||||
enableBlocklink = true;
|
||||
downcountInRegister = true;
|
||||
useBackJump = false;
|
||||
useForwardJump = false;
|
||||
cachePointers = true;
|
||||
immBranches = false;
|
||||
continueBranches = false;
|
||||
continueJumps = false;
|
||||
continueMaxInstructions = 300;
|
||||
|
||||
useNEONVFPU = false; // true
|
||||
if (!cpu_info.bNEON)
|
||||
useNEONVFPU = false;
|
||||
}
|
||||
|
||||
bool useNEONVFPU;
|
||||
bool enableBlocklink;
|
||||
bool downcountInRegister;
|
||||
bool useBackJump;
|
||||
bool useForwardJump;
|
||||
bool cachePointers;
|
||||
bool immBranches;
|
||||
bool continueBranches;
|
||||
bool continueJumps;
|
||||
int continueMaxInstructions;
|
||||
};
|
||||
|
||||
class Arm64Jit : public Arm64Gen::ARM64CodeBlock
|
||||
{
|
||||
public:
|
||||
Arm64Jit(MIPSState *mips);
|
||||
virtual ~Arm64Jit();
|
||||
|
||||
void DoState(PointerWrap &p);
|
||||
static void DoDummyState(PointerWrap &p);
|
||||
|
||||
// Compiled ops should ignore delay slots
|
||||
// the compiler will take care of them by itself
|
||||
// OR NOT
|
||||
void Comp_Generic(MIPSOpcode op);
|
||||
|
||||
void RunLoopUntil(u64 globalticks);
|
||||
|
||||
void Compile(u32 em_address); // Compiles a block at current MIPS PC
|
||||
const u8 *DoJit(u32 em_address, JitBlock *b);
|
||||
|
||||
bool DescribeCodePtr(const u8 *ptr, std::string &name);
|
||||
|
||||
void CompileDelaySlot(int flags);
|
||||
void EatInstruction(MIPSOpcode op);
|
||||
void AddContinuedBlock(u32 dest);
|
||||
|
||||
void Comp_RunBlock(MIPSOpcode op);
|
||||
void Comp_ReplacementFunc(MIPSOpcode op);
|
||||
|
||||
// Ops
|
||||
void Comp_ITypeMem(MIPSOpcode op);
|
||||
void Comp_Cache(MIPSOpcode op);
|
||||
|
||||
void Comp_RelBranch(MIPSOpcode op);
|
||||
void Comp_RelBranchRI(MIPSOpcode op);
|
||||
void Comp_FPUBranch(MIPSOpcode op);
|
||||
void Comp_FPULS(MIPSOpcode op);
|
||||
void Comp_FPUComp(MIPSOpcode op);
|
||||
void Comp_Jump(MIPSOpcode op);
|
||||
void Comp_JumpReg(MIPSOpcode op);
|
||||
void Comp_Syscall(MIPSOpcode op);
|
||||
void Comp_Break(MIPSOpcode op);
|
||||
|
||||
void Comp_IType(MIPSOpcode op);
|
||||
void Comp_RType2(MIPSOpcode op);
|
||||
void Comp_RType3(MIPSOpcode op);
|
||||
void Comp_ShiftType(MIPSOpcode op);
|
||||
void Comp_Allegrex(MIPSOpcode op);
|
||||
void Comp_Allegrex2(MIPSOpcode op);
|
||||
void Comp_VBranch(MIPSOpcode op);
|
||||
void Comp_MulDivType(MIPSOpcode op);
|
||||
void Comp_Special3(MIPSOpcode op);
|
||||
|
||||
void Comp_FPU3op(MIPSOpcode op);
|
||||
void Comp_FPU2op(MIPSOpcode op);
|
||||
void Comp_mxc1(MIPSOpcode op);
|
||||
|
||||
void Comp_DoNothing(MIPSOpcode op);
|
||||
|
||||
void Comp_SV(MIPSOpcode op);
|
||||
void Comp_SVQ(MIPSOpcode op);
|
||||
void Comp_VPFX(MIPSOpcode op);
|
||||
void Comp_VVectorInit(MIPSOpcode op);
|
||||
void Comp_VMatrixInit(MIPSOpcode op);
|
||||
void Comp_VDot(MIPSOpcode op);
|
||||
void Comp_VecDo3(MIPSOpcode op);
|
||||
void Comp_VV2Op(MIPSOpcode op);
|
||||
void Comp_Mftv(MIPSOpcode op);
|
||||
void Comp_Vmfvc(MIPSOpcode op);
|
||||
void Comp_Vmtvc(MIPSOpcode op);
|
||||
void Comp_Vmmov(MIPSOpcode op);
|
||||
void Comp_VScl(MIPSOpcode op);
|
||||
void Comp_Vmmul(MIPSOpcode op);
|
||||
void Comp_Vmscl(MIPSOpcode op);
|
||||
void Comp_Vtfm(MIPSOpcode op);
|
||||
void Comp_VHdp(MIPSOpcode op);
|
||||
void Comp_VCrs(MIPSOpcode op);
|
||||
void Comp_VDet(MIPSOpcode op);
|
||||
void Comp_Vi2x(MIPSOpcode op);
|
||||
void Comp_Vx2i(MIPSOpcode op);
|
||||
void Comp_Vf2i(MIPSOpcode op);
|
||||
void Comp_Vi2f(MIPSOpcode op);
|
||||
void Comp_Vh2f(MIPSOpcode op);
|
||||
void Comp_Vcst(MIPSOpcode op);
|
||||
void Comp_Vhoriz(MIPSOpcode op);
|
||||
void Comp_VRot(MIPSOpcode op);
|
||||
void Comp_VIdt(MIPSOpcode op);
|
||||
void Comp_Vcmp(MIPSOpcode op);
|
||||
void Comp_Vcmov(MIPSOpcode op);
|
||||
void Comp_Viim(MIPSOpcode op);
|
||||
void Comp_Vfim(MIPSOpcode op);
|
||||
void Comp_VCrossQuat(MIPSOpcode op);
|
||||
void Comp_Vsgn(MIPSOpcode op);
|
||||
void Comp_Vocp(MIPSOpcode op);
|
||||
void Comp_ColorConv(MIPSOpcode op);
|
||||
void Comp_Vbfy(MIPSOpcode op);
|
||||
|
||||
// Non-NEON: VPFX
|
||||
|
||||
// NEON implementations of the VFPU ops.
|
||||
void CompNEON_SV(MIPSOpcode op);
|
||||
void CompNEON_SVQ(MIPSOpcode op);
|
||||
void CompNEON_VVectorInit(MIPSOpcode op);
|
||||
void CompNEON_VMatrixInit(MIPSOpcode op);
|
||||
void CompNEON_VDot(MIPSOpcode op);
|
||||
void CompNEON_VecDo3(MIPSOpcode op);
|
||||
void CompNEON_VV2Op(MIPSOpcode op);
|
||||
void CompNEON_Mftv(MIPSOpcode op);
|
||||
void CompNEON_Vmfvc(MIPSOpcode op);
|
||||
void CompNEON_Vmtvc(MIPSOpcode op);
|
||||
void CompNEON_Vmmov(MIPSOpcode op);
|
||||
void CompNEON_VScl(MIPSOpcode op);
|
||||
void CompNEON_Vmmul(MIPSOpcode op);
|
||||
void CompNEON_Vmscl(MIPSOpcode op);
|
||||
void CompNEON_Vtfm(MIPSOpcode op);
|
||||
void CompNEON_VHdp(MIPSOpcode op);
|
||||
void CompNEON_VCrs(MIPSOpcode op);
|
||||
void CompNEON_VDet(MIPSOpcode op);
|
||||
void CompNEON_Vi2x(MIPSOpcode op);
|
||||
void CompNEON_Vx2i(MIPSOpcode op);
|
||||
void CompNEON_Vf2i(MIPSOpcode op);
|
||||
void CompNEON_Vi2f(MIPSOpcode op);
|
||||
void CompNEON_Vh2f(MIPSOpcode op);
|
||||
void CompNEON_Vcst(MIPSOpcode op);
|
||||
void CompNEON_Vhoriz(MIPSOpcode op);
|
||||
void CompNEON_VRot(MIPSOpcode op);
|
||||
void CompNEON_VIdt(MIPSOpcode op);
|
||||
void CompNEON_Vcmp(MIPSOpcode op);
|
||||
void CompNEON_Vcmov(MIPSOpcode op);
|
||||
void CompNEON_Viim(MIPSOpcode op);
|
||||
void CompNEON_Vfim(MIPSOpcode op);
|
||||
void CompNEON_VCrossQuat(MIPSOpcode op);
|
||||
void CompNEON_Vsgn(MIPSOpcode op);
|
||||
void CompNEON_Vocp(MIPSOpcode op);
|
||||
void CompNEON_ColorConv(MIPSOpcode op);
|
||||
void CompNEON_Vbfy(MIPSOpcode op);
|
||||
|
||||
int Replace_fabsf();
|
||||
|
||||
JitBlockCache *GetBlockCache() { return &blocks; }
|
||||
|
||||
void ClearCache();
|
||||
void InvalidateCache();
|
||||
void InvalidateCacheAt(u32 em_address, int length = 4);
|
||||
|
||||
void EatPrefix() { js.EatPrefix(); }
|
||||
|
||||
private:
|
||||
void GenerateFixedCode();
|
||||
void FlushAll();
|
||||
void FlushPrefixV();
|
||||
|
||||
void WriteDownCount(int offset = 0);
|
||||
void WriteDownCountR(Arm64Gen::ARM64Reg reg);
|
||||
void RestoreRoundingMode(bool force = false);
|
||||
void ApplyRoundingMode(bool force = false);
|
||||
void UpdateRoundingMode();
|
||||
void MovFromPC(Arm64Gen::ARM64Reg r);
|
||||
void MovToPC(Arm64Gen::ARM64Reg r);
|
||||
|
||||
bool ReplaceJalTo(u32 dest);
|
||||
|
||||
void SaveDowncount();
|
||||
void RestoreDowncount();
|
||||
|
||||
void WriteExit(u32 destination, int exit_num);
|
||||
void WriteExitDestInR(Arm64Gen::ARM64Reg Reg);
|
||||
void WriteSyscallExit();
|
||||
|
||||
// Utility compilation functions
|
||||
void BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely);
|
||||
void BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely);
|
||||
void BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool likely);
|
||||
void BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely);
|
||||
|
||||
// Utilities to reduce duplicated code
|
||||
void CompImmLogic(MIPSGPReg rs, MIPSGPReg rt, u32 uimm, void (ARM64XEmitter::*arith)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg src, Arm64Gen::ARM64Reg op2), bool (ARM64XEmitter::*tryArithI2R)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg src, u32 val), u32 (*eval)(u32 a, u32 b));
|
||||
void CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, void (ARM64XEmitter::*arithOp2)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg rm, Arm64Gen::ARM64Reg rn), bool (ARM64XEmitter::*tryArithI2R)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg rm, u32 val), u32 (*eval)(u32 a, u32 b), bool symmetric = false);
|
||||
|
||||
void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz);
|
||||
void ApplyPrefixD(const u8 *vregs, VectorSize sz);
|
||||
void GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) {
|
||||
_assert_(js.prefixSFlag & JitState::PREFIX_KNOWN);
|
||||
GetVectorRegs(regs, sz, vectorReg);
|
||||
ApplyPrefixST(regs, js.prefixS, sz);
|
||||
}
|
||||
void GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) {
|
||||
_assert_(js.prefixTFlag & JitState::PREFIX_KNOWN);
|
||||
GetVectorRegs(regs, sz, vectorReg);
|
||||
ApplyPrefixST(regs, js.prefixT, sz);
|
||||
}
|
||||
void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg);
|
||||
|
||||
// Utils
|
||||
void SetR0ToEffectiveAddress(MIPSGPReg rs, s16 offset);
|
||||
void SetCCAndR0ForSafeAddress(MIPSGPReg rs, s16 offset, Arm64Gen::ARM64Reg tempReg, bool reverse = false);
|
||||
void Comp_ITypeMemLR(MIPSOpcode op, bool load);
|
||||
|
||||
JitBlockCache blocks;
|
||||
Arm64JitOptions jo;
|
||||
JitState js;
|
||||
|
||||
Arm64RegCache gpr;
|
||||
ArmRegCacheFPU fpr;
|
||||
|
||||
MIPSState *mips_;
|
||||
|
||||
int dontLogBlocks;
|
||||
int logBlocks;
|
||||
|
||||
public:
|
||||
// Code pointers
|
||||
const u8 *enterCode;
|
||||
|
||||
const u8 *outerLoop;
|
||||
const u8 *outerLoopPCInR0;
|
||||
const u8 *dispatcherCheckCoreState;
|
||||
const u8 *dispatcherPCInR0;
|
||||
const u8 *dispatcher;
|
||||
const u8 *dispatcherNoCheck;
|
||||
|
||||
const u8 *breakpointBailout;
|
||||
};
|
||||
|
||||
} // namespace MIPSComp
|
||||
|
503
Core/MIPS/ARM64/Arm64RegCache.cpp
Normal file
503
Core/MIPS/ARM64/Arm64RegCache.cpp
Normal file
|
@ -0,0 +1,503 @@
|
|||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/MIPS/ARM64/Arm64RegCache.h"
|
||||
#include "Core/MIPS/ARM64/Arm64Jit.h"
|
||||
#include "Core/MIPS/MIPSAnalyst.h"
|
||||
#include "Core/Reporting.h"
|
||||
#include "Common/Arm64Emitter.h"
|
||||
|
||||
#ifndef offsetof
|
||||
#include "stddef.h"
|
||||
#endif
|
||||
|
||||
using namespace Arm64Gen;
|
||||
using namespace Arm64JitConstants;
|
||||
|
||||
Arm64RegCache::Arm64RegCache(MIPSState *mips, MIPSComp::JitState *js, MIPSComp::Arm64JitOptions *jo) : mips_(mips), js_(js), jo_(jo) {
|
||||
}
|
||||
|
||||
void Arm64RegCache::Init(ARM64XEmitter *emitter) {
|
||||
emit_ = emitter;
|
||||
}
|
||||
|
||||
void Arm64RegCache::Start(MIPSAnalyst::AnalysisResults &stats) {
|
||||
for (int i = 0; i < NUM_ARMREG; i++) {
|
||||
ar[i].mipsReg = MIPS_REG_INVALID;
|
||||
ar[i].isDirty = false;
|
||||
}
|
||||
for (int i = 0; i < NUM_MIPSREG; i++) {
|
||||
mr[i].loc = ML_MEM;
|
||||
mr[i].reg = INVALID_REG;
|
||||
mr[i].imm = -1;
|
||||
mr[i].spillLock = false;
|
||||
}
|
||||
}
|
||||
|
||||
const ARM64Reg *Arm64RegCache::GetMIPSAllocationOrder(int &count) {
|
||||
// Note that R0 is reserved as scratch for now.
|
||||
// R12 is also potentially usable.
|
||||
// R4-R7 are registers we could use for static allocation or downcount.
|
||||
// R8 is used to preserve flags in nasty branches.
|
||||
// R9 and upwards are reserved for jit basics.
|
||||
// R14 (LR) is used as a scratch reg (overwritten on calls/return.)
|
||||
|
||||
// TODO ARM64
|
||||
if (jo_->downcountInRegister) {
|
||||
static const ARM64Reg allocationOrder[] = {
|
||||
X1, X2, X3, X4, X5, X6, X12,
|
||||
};
|
||||
count = sizeof(allocationOrder) / sizeof(const int);
|
||||
return allocationOrder;
|
||||
} else {
|
||||
static const ARM64Reg allocationOrder2[] = {
|
||||
X1, X2, X3, X4, X5, X6, X7, X12,
|
||||
};
|
||||
count = sizeof(allocationOrder2) / sizeof(const int);
|
||||
return allocationOrder2;
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64RegCache::FlushBeforeCall() {
|
||||
// R4-R11 are preserved. Others need flushing.
|
||||
FlushArmReg(X1);
|
||||
FlushArmReg(X2);
|
||||
FlushArmReg(X3);
|
||||
FlushArmReg(X12);
|
||||
}
|
||||
|
||||
bool Arm64RegCache::IsMapped(MIPSGPReg mipsReg) {
|
||||
return mr[mipsReg].loc == ML_ARMREG;
|
||||
}
|
||||
|
||||
void Arm64RegCache::SetRegImm(ARM64Reg reg, u32 imm) {
|
||||
// On ARM64, at least Cortex A57, good old MOVT/MOVW (MOVK in 64-bit) is really fast.
|
||||
emit_->MOVI2R(reg, imm);
|
||||
}
|
||||
|
||||
void Arm64RegCache::MapRegTo(ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags) {
|
||||
ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false;
|
||||
if ((mapFlags & MAP_NOINIT) != MAP_NOINIT) {
|
||||
if (mipsReg == MIPS_REG_ZERO) {
|
||||
// If we get a request to load the zero register, at least we won't spend
|
||||
// time on a memory access...
|
||||
// TODO: EOR?
|
||||
emit_->MOVI2R(reg, 0);
|
||||
|
||||
// This way, if we SetImm() it, we'll keep it.
|
||||
mr[mipsReg].loc = ML_ARMREG_IMM;
|
||||
mr[mipsReg].imm = 0;
|
||||
} else {
|
||||
switch (mr[mipsReg].loc) {
|
||||
case ML_MEM:
|
||||
emit_->LDR(INDEX_UNSIGNED, reg, CTXREG, GetMipsRegOffset(mipsReg));
|
||||
mr[mipsReg].loc = ML_ARMREG;
|
||||
break;
|
||||
case ML_IMM:
|
||||
SetRegImm(reg, mr[mipsReg].imm);
|
||||
ar[reg].isDirty = true; // IMM is always dirty.
|
||||
|
||||
// If we are mapping dirty, it means we're gonna overwrite.
|
||||
// So the imm value is no longer valid.
|
||||
if (mapFlags & MAP_DIRTY)
|
||||
mr[mipsReg].loc = ML_ARMREG;
|
||||
else
|
||||
mr[mipsReg].loc = ML_ARMREG_IMM;
|
||||
break;
|
||||
default:
|
||||
mr[mipsReg].loc = ML_ARMREG;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (mipsReg == MIPS_REG_ZERO) {
|
||||
// This way, if we SetImm() it, we'll keep it.
|
||||
mr[mipsReg].loc = ML_ARMREG_IMM;
|
||||
mr[mipsReg].imm = 0;
|
||||
} else {
|
||||
mr[mipsReg].loc = ML_ARMREG;
|
||||
}
|
||||
}
|
||||
ar[reg].mipsReg = mipsReg;
|
||||
mr[mipsReg].reg = reg;
|
||||
}
|
||||
|
||||
ARM64Reg Arm64RegCache::FindBestToSpill(bool unusedOnly, bool *clobbered) {
|
||||
int allocCount;
|
||||
const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount);
|
||||
|
||||
static const int UNUSED_LOOKAHEAD_OPS = 30;
|
||||
|
||||
*clobbered = false;
|
||||
for (int i = 0; i < allocCount; i++) {
|
||||
ARM64Reg reg = allocOrder[i];
|
||||
if (ar[reg].mipsReg != MIPS_REG_INVALID && mr[ar[reg].mipsReg].spillLock)
|
||||
continue;
|
||||
|
||||
// Awesome, a clobbered reg. Let's use it.
|
||||
if (MIPSAnalyst::IsRegisterClobbered(ar[reg].mipsReg, compilerPC_, UNUSED_LOOKAHEAD_OPS)) {
|
||||
*clobbered = true;
|
||||
return reg;
|
||||
}
|
||||
|
||||
// Not awesome. A used reg. Let's try to avoid spilling.
|
||||
if (unusedOnly && MIPSAnalyst::IsRegisterUsed(ar[reg].mipsReg, compilerPC_, UNUSED_LOOKAHEAD_OPS)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
return INVALID_REG;
|
||||
}
|
||||
|
||||
// TODO: Somewhat smarter spilling - currently simply spills the first available, should do
|
||||
// round robin or FIFO or something.
|
||||
ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) {
|
||||
// Let's see if it's already mapped. If so we just need to update the dirty flag.
|
||||
// We don't need to check for ML_NOINIT because we assume that anyone who maps
|
||||
// with that flag immediately writes a "known" value to the register.
|
||||
if (mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM) {
|
||||
ARM64Reg armReg = mr[mipsReg].reg;
|
||||
if (ar[armReg].mipsReg != mipsReg) {
|
||||
ERROR_LOG_REPORT(JIT, "Register mapping out of sync! %i", mipsReg);
|
||||
}
|
||||
if (mapFlags & MAP_DIRTY) {
|
||||
// Mapping dirty means the old imm value is invalid.
|
||||
mr[mipsReg].loc = ML_ARMREG;
|
||||
ar[armReg].isDirty = true;
|
||||
}
|
||||
return (ARM64Reg)mr[mipsReg].reg;
|
||||
}
|
||||
|
||||
// Okay, not mapped, so we need to allocate an ARM register.
|
||||
|
||||
int allocCount;
|
||||
const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount);
|
||||
|
||||
ARM64Reg desiredReg = INVALID_REG;
|
||||
// Try to "statically" allocate the first 6 regs after v0.
|
||||
int desiredOrder = allocCount - (6 - (mipsReg - (int)MIPS_REG_V0));
|
||||
if (desiredOrder >= 0 && desiredOrder < allocCount)
|
||||
desiredReg = allocOrder[desiredOrder];
|
||||
|
||||
if (desiredReg != INVALID_REG) {
|
||||
if (ar[desiredReg].mipsReg == MIPS_REG_INVALID) {
|
||||
// With this placement, we may be able to optimize flush.
|
||||
MapRegTo(desiredReg, mipsReg, mapFlags);
|
||||
return desiredReg;
|
||||
}
|
||||
}
|
||||
|
||||
allocate:
|
||||
for (int i = 0; i < allocCount; i++) {
|
||||
ARM64Reg reg = allocOrder[i];
|
||||
|
||||
if (ar[reg].mipsReg == MIPS_REG_INVALID) {
|
||||
// That means it's free. Grab it, and load the value into it (if requested).
|
||||
MapRegTo(reg, mipsReg, mapFlags);
|
||||
return reg;
|
||||
}
|
||||
}
|
||||
|
||||
// Still nothing. Let's spill a reg and goto 10.
|
||||
// TODO: Use age or something to choose which register to spill?
|
||||
// TODO: Spill dirty regs first? or opposite?
|
||||
bool clobbered;
|
||||
ARM64Reg bestToSpill = FindBestToSpill(true, &clobbered);
|
||||
if (bestToSpill == INVALID_REG) {
|
||||
bestToSpill = FindBestToSpill(false, &clobbered);
|
||||
}
|
||||
|
||||
if (bestToSpill != INVALID_REG) {
|
||||
// ERROR_LOG(JIT, "Out of registers at PC %08x - spills register %i.", mips_->pc, bestToSpill);
|
||||
// TODO: Broken somehow in Dante's Inferno, but most games work. Bad flags in MIPSTables somewhere?
|
||||
if (clobbered) {
|
||||
DiscardR(ar[bestToSpill].mipsReg);
|
||||
} else {
|
||||
FlushArmReg(bestToSpill);
|
||||
}
|
||||
goto allocate;
|
||||
}
|
||||
|
||||
// Uh oh, we have all them spilllocked....
|
||||
ERROR_LOG_REPORT(JIT, "Out of spillable registers at PC %08x!!!", mips_->pc);
|
||||
return INVALID_REG;
|
||||
}
|
||||
|
||||
void Arm64RegCache::MapInIn(MIPSGPReg rd, MIPSGPReg rs) {
|
||||
SpillLock(rd, rs);
|
||||
MapReg(rd);
|
||||
MapReg(rs);
|
||||
ReleaseSpillLocks();
|
||||
}
|
||||
|
||||
void Arm64RegCache::MapDirtyIn(MIPSGPReg rd, MIPSGPReg rs, bool avoidLoad) {
|
||||
SpillLock(rd, rs);
|
||||
bool load = !avoidLoad || rd == rs;
|
||||
MapReg(rd, load ? MAP_DIRTY : MAP_NOINIT);
|
||||
MapReg(rs);
|
||||
ReleaseSpillLocks();
|
||||
}
|
||||
|
||||
void Arm64RegCache::MapDirtyInIn(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad) {
|
||||
SpillLock(rd, rs, rt);
|
||||
bool load = !avoidLoad || (rd == rs || rd == rt);
|
||||
MapReg(rd, load ? MAP_DIRTY : MAP_NOINIT);
|
||||
MapReg(rt);
|
||||
MapReg(rs);
|
||||
ReleaseSpillLocks();
|
||||
}
|
||||
|
||||
void Arm64RegCache::MapDirtyDirtyIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, bool avoidLoad) {
|
||||
SpillLock(rd1, rd2, rs);
|
||||
bool load1 = !avoidLoad || rd1 == rs;
|
||||
bool load2 = !avoidLoad || rd2 == rs;
|
||||
MapReg(rd1, load1 ? MAP_DIRTY : MAP_NOINIT);
|
||||
MapReg(rd2, load2 ? MAP_DIRTY : MAP_NOINIT);
|
||||
MapReg(rs);
|
||||
ReleaseSpillLocks();
|
||||
}
|
||||
|
||||
void Arm64RegCache::MapDirtyDirtyInIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad) {
|
||||
SpillLock(rd1, rd2, rs, rt);
|
||||
bool load1 = !avoidLoad || (rd1 == rs || rd1 == rt);
|
||||
bool load2 = !avoidLoad || (rd2 == rs || rd2 == rt);
|
||||
MapReg(rd1, load1 ? MAP_DIRTY : MAP_NOINIT);
|
||||
MapReg(rd2, load2 ? MAP_DIRTY : MAP_NOINIT);
|
||||
MapReg(rt);
|
||||
MapReg(rs);
|
||||
ReleaseSpillLocks();
|
||||
}
|
||||
|
||||
void Arm64RegCache::FlushArmReg(ARM64Reg r) {
|
||||
if (ar[r].mipsReg == MIPS_REG_INVALID) {
|
||||
// Nothing to do, reg not mapped.
|
||||
if (ar[r].isDirty) {
|
||||
ERROR_LOG_REPORT(JIT, "Dirty but no mipsreg?");
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (ar[r].mipsReg != MIPS_REG_INVALID) {
|
||||
auto &mreg = mr[ar[r].mipsReg];
|
||||
if (mreg.loc == ML_ARMREG_IMM) {
|
||||
// We know its immedate value, no need to STR now.
|
||||
mreg.loc = ML_IMM;
|
||||
mreg.reg = INVALID_REG;
|
||||
} else {
|
||||
if (ar[r].isDirty && mreg.loc == ML_ARMREG)
|
||||
emit_->STR(INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(ar[r].mipsReg));
|
||||
mreg.loc = ML_MEM;
|
||||
mreg.reg = INVALID_REG;
|
||||
mreg.imm = 0;
|
||||
}
|
||||
}
|
||||
ar[r].isDirty = false;
|
||||
ar[r].mipsReg = MIPS_REG_INVALID;
|
||||
}
|
||||
|
||||
void Arm64RegCache::DiscardR(MIPSGPReg mipsReg) {
|
||||
const RegMIPSLoc prevLoc = mr[mipsReg].loc;
|
||||
if (prevLoc == ML_ARMREG || prevLoc == ML_ARMREG_IMM) {
|
||||
ARM64Reg armReg = mr[mipsReg].reg;
|
||||
ar[armReg].isDirty = false;
|
||||
ar[armReg].mipsReg = MIPS_REG_INVALID;
|
||||
mr[mipsReg].reg = INVALID_REG;
|
||||
mr[mipsReg].loc = ML_MEM;
|
||||
mr[mipsReg].imm = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64RegCache::FlushR(MIPSGPReg r) {
|
||||
switch (mr[r].loc) {
|
||||
case ML_IMM:
|
||||
// IMM is always "dirty".
|
||||
if (r != MIPS_REG_ZERO) {
|
||||
SetRegImm(SCRATCHREG1, mr[r].imm);
|
||||
emit_->STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, GetMipsRegOffset(r));
|
||||
}
|
||||
break;
|
||||
|
||||
case ML_ARMREG:
|
||||
case ML_ARMREG_IMM:
|
||||
if (mr[r].reg == INVALID_REG) {
|
||||
ERROR_LOG_REPORT(JIT, "FlushR: MipsReg %d had bad ArmReg", r);
|
||||
}
|
||||
if (ar[mr[r].reg].isDirty) {
|
||||
if (r != MIPS_REG_ZERO) {
|
||||
emit_->STR(INDEX_UNSIGNED, (ARM64Reg)mr[r].reg, CTXREG, GetMipsRegOffset(r));
|
||||
}
|
||||
ar[mr[r].reg].isDirty = false;
|
||||
}
|
||||
ar[mr[r].reg].mipsReg = MIPS_REG_INVALID;
|
||||
break;
|
||||
|
||||
case ML_MEM:
|
||||
// Already there, nothing to do.
|
||||
break;
|
||||
|
||||
default:
|
||||
ERROR_LOG_REPORT(JIT, "FlushR: MipsReg %d with invalid location %d", r, mr[r].loc);
|
||||
break;
|
||||
}
|
||||
mr[r].loc = ML_MEM;
|
||||
mr[r].reg = INVALID_REG;
|
||||
mr[r].imm = 0;
|
||||
}
|
||||
|
||||
// Note: if allowFlushImm is set, this also flushes imms while checking the sequence.
|
||||
int Arm64RegCache::FlushGetSequential(MIPSGPReg startMipsReg, bool allowFlushImm) {
|
||||
// Only start a sequence on a dirty armreg.
|
||||
// TODO: Could also start with an imm?
|
||||
const auto &startMipsInfo = mr[startMipsReg];
|
||||
if ((startMipsInfo.loc != ML_ARMREG && startMipsInfo.loc != ML_ARMREG_IMM) || startMipsInfo.reg == INVALID_REG || !ar[startMipsInfo.reg].isDirty) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int allocCount;
|
||||
const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount);
|
||||
|
||||
int c = 1;
|
||||
// The sequence needs to have ascending arm regs for STMIA.
|
||||
int lastArmReg = startMipsInfo.reg;
|
||||
// Can't use HI/LO, only regs in the main r[] array.
|
||||
for (int r = (int)startMipsReg + 1; r < 32; ++r) {
|
||||
if ((mr[r].loc == ML_ARMREG || mr[r].loc == ML_ARMREG_IMM) && mr[r].reg != INVALID_REG) {
|
||||
if ((int)mr[r].reg > lastArmReg && ar[mr[r].reg].isDirty) {
|
||||
++c;
|
||||
lastArmReg = mr[r].reg;
|
||||
continue;
|
||||
}
|
||||
// If we're not allowed to flush imms, don't even consider them.
|
||||
} else if (allowFlushImm && mr[r].loc == ML_IMM && MIPSGPReg(r) != MIPS_REG_ZERO) {
|
||||
// Okay, let's search for a free (and later) reg to put this imm into.
|
||||
bool found = false;
|
||||
for (int j = 0; j < allocCount; ++j) {
|
||||
ARM64Reg immReg = allocOrder[j];
|
||||
if ((int)immReg > lastArmReg && ar[immReg].mipsReg == MIPS_REG_INVALID) {
|
||||
++c;
|
||||
lastArmReg = immReg;
|
||||
|
||||
// Even if the sequence fails, we'll need it in a reg anyway, might as well be this one.
|
||||
MapRegTo(immReg, MIPSGPReg(r), 0);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// If it didn't hit a continue above, the chain is over.
|
||||
// There's no way to skip a slot with STMIA.
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
void Arm64RegCache::FlushAll() {
|
||||
// TODO: Flush in pairs
|
||||
for (int i = 0; i < NUM_MIPSREG; i++) {
|
||||
MIPSGPReg mipsReg = MIPSGPReg(i);
|
||||
FlushR(mipsReg);
|
||||
}
|
||||
|
||||
// Sanity check
|
||||
for (int i = 0; i < NUM_ARMREG; i++) {
|
||||
if (ar[i].mipsReg != MIPS_REG_INVALID) {
|
||||
ERROR_LOG_REPORT(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64RegCache::SetImm(MIPSGPReg r, u32 immVal) {
|
||||
if (r == MIPS_REG_ZERO && immVal != 0)
|
||||
ERROR_LOG(JIT, "Trying to set immediate %08x to r0", immVal);
|
||||
|
||||
if (mr[r].loc == ML_ARMREG_IMM && mr[r].imm == immVal) {
|
||||
// Already have that value, let's keep it in the reg.
|
||||
return;
|
||||
}
|
||||
// Zap existing value if cached in a reg
|
||||
if (mr[r].reg != INVALID_REG) {
|
||||
ar[mr[r].reg].mipsReg = MIPS_REG_INVALID;
|
||||
ar[mr[r].reg].isDirty = false;
|
||||
}
|
||||
mr[r].loc = ML_IMM;
|
||||
mr[r].imm = immVal;
|
||||
mr[r].reg = INVALID_REG;
|
||||
}
|
||||
|
||||
bool Arm64RegCache::IsImm(MIPSGPReg r) const {
|
||||
if (r == MIPS_REG_ZERO) return true;
|
||||
return mr[r].loc == ML_IMM || mr[r].loc == ML_ARMREG_IMM;
|
||||
}
|
||||
|
||||
u32 Arm64RegCache::GetImm(MIPSGPReg r) const {
|
||||
if (r == MIPS_REG_ZERO) return 0;
|
||||
if (mr[r].loc != ML_IMM && mr[r].loc != ML_ARMREG_IMM) {
|
||||
ERROR_LOG_REPORT(JIT, "Trying to get imm from non-imm register %i", r);
|
||||
}
|
||||
return mr[r].imm;
|
||||
}
|
||||
|
||||
int Arm64RegCache::GetMipsRegOffset(MIPSGPReg r) {
|
||||
if (r < 32)
|
||||
return r * 4;
|
||||
switch (r) {
|
||||
case MIPS_REG_HI:
|
||||
return offsetof(MIPSState, hi);
|
||||
case MIPS_REG_LO:
|
||||
return offsetof(MIPSState, lo);
|
||||
case MIPS_REG_FPCOND:
|
||||
return offsetof(MIPSState, fpcond);
|
||||
case MIPS_REG_VFPUCC:
|
||||
return offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_CC]);
|
||||
default:
|
||||
ERROR_LOG_REPORT(JIT, "bad mips register %i", r);
|
||||
return 0; // or what?
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64RegCache::SpillLock(MIPSGPReg r1, MIPSGPReg r2, MIPSGPReg r3, MIPSGPReg r4) {
|
||||
mr[r1].spillLock = true;
|
||||
if (r2 != MIPS_REG_INVALID) mr[r2].spillLock = true;
|
||||
if (r3 != MIPS_REG_INVALID) mr[r3].spillLock = true;
|
||||
if (r4 != MIPS_REG_INVALID) mr[r4].spillLock = true;
|
||||
}
|
||||
|
||||
void Arm64RegCache::ReleaseSpillLocks() {
|
||||
for (int i = 0; i < NUM_MIPSREG; i++) {
|
||||
mr[i].spillLock = false;
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64RegCache::ReleaseSpillLock(MIPSGPReg reg) {
|
||||
mr[reg].spillLock = false;
|
||||
}
|
||||
|
||||
ARM64Reg Arm64RegCache::R(MIPSGPReg mipsReg) {
|
||||
if (mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM) {
|
||||
return (ARM64Reg)mr[mipsReg].reg;
|
||||
} else {
|
||||
ERROR_LOG_REPORT(JIT, "Reg %i not in arm reg. compilerPC = %08x", mipsReg, compilerPC_);
|
||||
return INVALID_REG; // BAAAD
|
||||
}
|
||||
}
|
153
Core/MIPS/ARM64/Arm64RegCache.h
Normal file
153
Core/MIPS/ARM64/Arm64RegCache.h
Normal file
|
@ -0,0 +1,153 @@
|
|||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSAnalyst.h"
|
||||
#include "Common/Arm64Emitter.h"
|
||||
|
||||
namespace Arm64JitConstants {
|
||||
|
||||
// Bogus mappings, TODO ARM64
|
||||
const Arm64Gen::ARM64Reg JITBASEREG = Arm64Gen::W0;
|
||||
const Arm64Gen::ARM64Reg CTXREG = Arm64Gen::X1;
|
||||
const Arm64Gen::ARM64Reg MEMBASEREG = Arm64Gen::X2;
|
||||
const Arm64Gen::ARM64Reg SCRATCHREG1 = Arm64Gen::W3;
|
||||
const Arm64Gen::ARM64Reg SCRATCHREG2 = Arm64Gen::W4;
|
||||
const Arm64Gen::ARM64Reg DOWNCOUNTREG = Arm64Gen::W5;
|
||||
|
||||
enum {
|
||||
TOTAL_MAPPABLE_MIPSREGS = 36,
|
||||
};
|
||||
|
||||
enum RegMIPSLoc {
|
||||
ML_IMM,
|
||||
ML_ARMREG,
|
||||
// In an arm reg, but also has a known immediate value.
|
||||
ML_ARMREG_IMM,
|
||||
ML_MEM,
|
||||
};
|
||||
|
||||
// These collide with something on Blackberry.
|
||||
#undef MAP_NOINIT
|
||||
#undef MAP_READ
|
||||
|
||||
// Initing is the default so the flag is reversed.
|
||||
enum {
|
||||
MAP_DIRTY = 1,
|
||||
MAP_NOINIT = 2 | MAP_DIRTY,
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
// R1 to R6: mapped MIPS regs
|
||||
// R8 = flags (maybe we could do better here?)
|
||||
// R9 = code pointers
|
||||
// R10 = MIPS context
|
||||
// R11 = base pointer
|
||||
// R14 = scratch (actually LR)
|
||||
|
||||
|
||||
typedef int MIPSReg;
|
||||
|
||||
struct RegARM {
|
||||
MIPSGPReg mipsReg; // if -1, no mipsreg attached.
|
||||
bool isDirty; // Should the register be written back?
|
||||
};
|
||||
|
||||
struct RegMIPS {
|
||||
// Where is this MIPS register?
|
||||
Arm64JitConstants::RegMIPSLoc loc;
|
||||
// Data (only one of these is used, depending on loc. Could make a union).
|
||||
u32 imm;
|
||||
Arm64Gen::ARM64Reg reg; // reg index
|
||||
bool spillLock; // if true, this register cannot be spilled.
|
||||
// If loc == ML_MEM, it's back in its location in the CPU context struct.
|
||||
};
|
||||
|
||||
namespace MIPSComp {
|
||||
struct Arm64JitOptions;
|
||||
struct JitState;
|
||||
}
|
||||
|
||||
class Arm64RegCache {
|
||||
public:
|
||||
Arm64RegCache(MIPSState *mips, MIPSComp::JitState *js, MIPSComp::Arm64JitOptions *jo);
|
||||
~Arm64RegCache() {}
|
||||
|
||||
void Init(Arm64Gen::ARM64XEmitter *emitter);
|
||||
void Start(MIPSAnalyst::AnalysisResults &stats);
|
||||
|
||||
// Protect the arm register containing a MIPS register from spilling, to ensure that
|
||||
// it's being kept allocated.
|
||||
void SpillLock(MIPSGPReg reg, MIPSGPReg reg2 = MIPS_REG_INVALID, MIPSGPReg reg3 = MIPS_REG_INVALID, MIPSGPReg reg4 = MIPS_REG_INVALID);
|
||||
void ReleaseSpillLock(MIPSGPReg reg);
|
||||
void ReleaseSpillLocks();
|
||||
|
||||
void SetImm(MIPSGPReg reg, u32 immVal);
|
||||
bool IsImm(MIPSGPReg reg) const;
|
||||
u32 GetImm(MIPSGPReg reg) const;
|
||||
// Optimally set a register to an imm value (possibly using another register.)
|
||||
void SetRegImm(Arm64Gen::ARM64Reg reg, u32 imm);
|
||||
|
||||
// Returns an ARM register containing the requested MIPS register.
|
||||
Arm64Gen::ARM64Reg MapReg(MIPSGPReg reg, int mapFlags = 0);
|
||||
|
||||
bool IsMapped(MIPSGPReg reg);
|
||||
bool IsMappedAsPointer(MIPSGPReg reg);
|
||||
|
||||
void MapInIn(MIPSGPReg rd, MIPSGPReg rs);
|
||||
void MapDirtyIn(MIPSGPReg rd, MIPSGPReg rs, bool avoidLoad = true);
|
||||
void MapDirtyInIn(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad = true);
|
||||
void MapDirtyDirtyIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, bool avoidLoad = true);
|
||||
void MapDirtyDirtyInIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad = true);
|
||||
void FlushArmReg(Arm64Gen::ARM64Reg r);
|
||||
void FlushR(MIPSGPReg r);
|
||||
void FlushBeforeCall();
|
||||
void FlushAll();
|
||||
void DiscardR(MIPSGPReg r);
|
||||
|
||||
Arm64Gen::ARM64Reg R(MIPSGPReg preg); // Returns a cached register, while checking that it's NOT mapped as a pointer
|
||||
|
||||
void SetEmitter(Arm64Gen::ARM64XEmitter *emitter) { emit_ = emitter; }
|
||||
|
||||
// For better log output only.
|
||||
void SetCompilerPC(u32 compilerPC) { compilerPC_ = compilerPC; }
|
||||
|
||||
int GetMipsRegOffset(MIPSGPReg r);
|
||||
|
||||
private:
|
||||
const Arm64Gen::ARM64Reg *GetMIPSAllocationOrder(int &count);
|
||||
void MapRegTo(Arm64Gen::ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags);
|
||||
int FlushGetSequential(MIPSGPReg startMipsReg, bool allowFlushImm);
|
||||
Arm64Gen::ARM64Reg FindBestToSpill(bool unusedOnly, bool *clobbered);
|
||||
|
||||
MIPSState *mips_;
|
||||
Arm64Gen::ARM64XEmitter *emit_;
|
||||
MIPSComp::JitState *js_;
|
||||
MIPSComp::Arm64JitOptions *jo_;
|
||||
u32 compilerPC_;
|
||||
|
||||
enum {
|
||||
NUM_ARMREG = 32, // 31 actual registers, plus the zero register.
|
||||
NUM_MIPSREG = Arm64JitConstants::TOTAL_MAPPABLE_MIPSREGS,
|
||||
};
|
||||
|
||||
RegARM ar[NUM_ARMREG];
|
||||
RegMIPS mr[NUM_MIPSREG];
|
||||
};
|
570
Core/MIPS/ARM64/Arm64RegCacheFPU.cpp
Normal file
570
Core/MIPS/ARM64/Arm64RegCacheFPU.cpp
Normal file
|
@ -0,0 +1,570 @@
|
|||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/ARM64/Arm64RegCacheFPU.h"
|
||||
#include "Core/MIPS/ARM64/Arm64Jit.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
|
||||
using namespace Arm64Gen;
|
||||
using namespace Arm64JitConstants;
|
||||
|
||||
ArmRegCacheFPU::ArmRegCacheFPU(MIPSState *mips, MIPSComp::JitState *js, MIPSComp::Arm64JitOptions *jo) : mips_(mips), vr(mr + 32), js_(js), jo_(jo), initialReady(false) {
|
||||
if (cpu_info.bNEON) {
|
||||
numARMFpuReg_ = 32;
|
||||
} else {
|
||||
numARMFpuReg_ = 16;
|
||||
}
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::Start(MIPSAnalyst::AnalysisResults &stats) {
|
||||
if (!initialReady) {
|
||||
SetupInitialRegs();
|
||||
initialReady = true;
|
||||
}
|
||||
|
||||
memcpy(ar, arInitial, sizeof(ar));
|
||||
memcpy(mr, mrInitial, sizeof(mr));
|
||||
pendingFlush = false;
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::SetupInitialRegs() {
|
||||
for (int i = 0; i < numARMFpuReg_; i++) {
|
||||
arInitial[i].mipsReg = -1;
|
||||
arInitial[i].isDirty = false;
|
||||
}
|
||||
for (int i = 0; i < NUM_MIPSFPUREG; i++) {
|
||||
mrInitial[i].loc = ML_MEM;
|
||||
mrInitial[i].reg = INVALID_REG;
|
||||
mrInitial[i].spillLock = false;
|
||||
mrInitial[i].tempLock = false;
|
||||
}
|
||||
for (int i = 0; i < MAX_ARMQUADS; i++) {
|
||||
qr[i].isDirty = false;
|
||||
qr[i].mipsVec = -1;
|
||||
qr[i].sz = V_Invalid;
|
||||
qr[i].spillLock = false;
|
||||
qr[i].isTemp = false;
|
||||
memset(qr[i].vregs, 0xff, 4);
|
||||
}
|
||||
}
|
||||
|
||||
const ARM64Reg *ArmRegCacheFPU::GetMIPSAllocationOrder(int &count) {
|
||||
// VFP mapping
|
||||
// VFPU registers and regular FP registers are mapped interchangably on top of the standard
|
||||
// 16 FPU registers.
|
||||
|
||||
// NEON mapping
|
||||
// We map FPU and VFPU registers entirely separately. FPU is mapped to 12 of the bottom 16 S registers.
|
||||
// VFPU is mapped to the upper 48 regs, 32 of which can only be reached through NEON
|
||||
// (or D16-D31 as doubles, but not relevant).
|
||||
// Might consider shifting the split in the future, giving more regs to NEON allowing it to map more quads.
|
||||
|
||||
// We should attempt to map scalars to low Q registers and wider things to high registers,
|
||||
// as the NEON instructions are all 2-vector or 4-vector, they don't do scalar, we want to be
|
||||
// able to use regular VFP instructions too.
|
||||
static const ARM64Reg allocationOrder[] = {
|
||||
// Reserve four temp registers. Useful when building quads until we really figure out
|
||||
// how to do that best.
|
||||
S4, S5, S6, S7, // Q1
|
||||
S8, S9, S10, S11, // Q2
|
||||
S12, S13, S14, S15, // Q3
|
||||
S16, S17, S18, S19, // Q4
|
||||
S20, S21, S22, S23, // Q5
|
||||
S24, S25, S26, S27, // Q6
|
||||
S28, S29, S30, S31, // Q7
|
||||
// Q8-Q15 free for NEON tricks
|
||||
};
|
||||
|
||||
static const ARM64Reg allocationOrderNEONVFPU[] = {
|
||||
// Reserve four temp registers. Useful when building quads until we really figure out
|
||||
// how to do that best.
|
||||
S4, S5, S6, S7, // Q1
|
||||
S8, S9, S10, S11, // Q2
|
||||
S12, S13, S14, S15, // Q3
|
||||
// Q4-Q15 free for VFPU
|
||||
};
|
||||
|
||||
// NOTE: It's important that S2/S3 are not allocated with bNEON, even if !useNEONVFPU.
|
||||
// They are used by a few instructions, like vh2f.
|
||||
if (jo_->useNEONVFPU) {
|
||||
count = sizeof(allocationOrderNEONVFPU) / sizeof(const ARM64Reg);
|
||||
return allocationOrderNEONVFPU;
|
||||
} else {
|
||||
count = sizeof(allocationOrder) / sizeof(const ARM64Reg);
|
||||
return allocationOrder;
|
||||
}
|
||||
}
|
||||
|
||||
bool ArmRegCacheFPU::IsMapped(MIPSReg r) {
|
||||
return mr[r].loc == ML_ARMREG;
|
||||
}
|
||||
|
||||
ARM64Reg ArmRegCacheFPU::MapReg(MIPSReg mipsReg, int mapFlags) {
|
||||
// INFO_LOG(JIT, "FPR MapReg: %i flags=%i", mipsReg, mapFlags);
|
||||
if (jo_->useNEONVFPU && mipsReg >= 32) {
|
||||
ERROR_LOG(JIT, "Cannot map VFPU registers to ARM VFP registers in NEON mode. PC=%08x", js_->compilerPC);
|
||||
return S0;
|
||||
}
|
||||
|
||||
pendingFlush = true;
|
||||
// Let's see if it's already mapped. If so we just need to update the dirty flag.
|
||||
// We don't need to check for ML_NOINIT because we assume that anyone who maps
|
||||
// with that flag immediately writes a "known" value to the register.
|
||||
if (mr[mipsReg].loc == ML_ARMREG) {
|
||||
if (ar[mr[mipsReg].reg].mipsReg != mipsReg) {
|
||||
ERROR_LOG(JIT, "Reg mapping out of sync! MR %i", mipsReg);
|
||||
}
|
||||
if (mapFlags & MAP_DIRTY) {
|
||||
ar[mr[mipsReg].reg].isDirty = true;
|
||||
}
|
||||
//INFO_LOG(JIT, "Already mapped %i to %i", mipsReg, mr[mipsReg].reg);
|
||||
return (ARM64Reg)(mr[mipsReg].reg + S0);
|
||||
}
|
||||
|
||||
// Okay, not mapped, so we need to allocate an ARM register.
|
||||
|
||||
int allocCount;
|
||||
const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount);
|
||||
|
||||
allocate:
|
||||
for (int i = 0; i < allocCount; i++) {
|
||||
int reg = allocOrder[i] - S0;
|
||||
|
||||
if (ar[reg].mipsReg == -1) {
|
||||
// That means it's free. Grab it, and load the value into it (if requested).
|
||||
ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false;
|
||||
if ((mapFlags & MAP_NOINIT) != MAP_NOINIT) {
|
||||
if (mr[mipsReg].loc == ML_MEM && mipsReg < TEMP0) {
|
||||
// emit_->VLDR((ARM64Reg)(reg + S0), CTXREG, GetMipsRegOffset(mipsReg));
|
||||
}
|
||||
}
|
||||
ar[reg].mipsReg = mipsReg;
|
||||
mr[mipsReg].loc = ML_ARMREG;
|
||||
mr[mipsReg].reg = reg;
|
||||
//INFO_LOG(JIT, "Mapped %i to %i", mipsReg, mr[mipsReg].reg);
|
||||
return (ARM64Reg)(reg + S0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Still nothing. Let's spill a reg and goto 10.
|
||||
// TODO: Use age or something to choose which register to spill?
|
||||
// TODO: Spill dirty regs first? or opposite?
|
||||
int bestToSpill = -1;
|
||||
for (int i = 0; i < allocCount; i++) {
|
||||
int reg = allocOrder[i] - S0;
|
||||
if (ar[reg].mipsReg != -1 && (mr[ar[reg].mipsReg].spillLock || mr[ar[reg].mipsReg].tempLock))
|
||||
continue;
|
||||
bestToSpill = reg;
|
||||
break;
|
||||
}
|
||||
|
||||
if (bestToSpill != -1) {
|
||||
FlushArmReg((ARM64Reg)(S0 + bestToSpill));
|
||||
goto allocate;
|
||||
}
|
||||
|
||||
// Uh oh, we have all them spilllocked....
|
||||
ERROR_LOG(JIT, "Out of spillable registers at PC %08x!!!", js_->compilerPC);
|
||||
return INVALID_REG;
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::MapInIn(MIPSReg rd, MIPSReg rs) {
|
||||
SpillLock(rd, rs);
|
||||
MapReg(rd);
|
||||
MapReg(rs);
|
||||
ReleaseSpillLock(rd);
|
||||
ReleaseSpillLock(rs);
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad) {
|
||||
SpillLock(rd, rs);
|
||||
bool overlap = avoidLoad && rd == rs;
|
||||
MapReg(rd, overlap ? MAP_DIRTY : MAP_NOINIT);
|
||||
MapReg(rs);
|
||||
ReleaseSpillLock(rd);
|
||||
ReleaseSpillLock(rs);
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad) {
|
||||
SpillLock(rd, rs, rt);
|
||||
bool overlap = avoidLoad && (rd == rs || rd == rt);
|
||||
MapReg(rd, overlap ? MAP_DIRTY : MAP_NOINIT);
|
||||
MapReg(rt);
|
||||
MapReg(rs);
|
||||
ReleaseSpillLock(rd);
|
||||
ReleaseSpillLock(rs);
|
||||
ReleaseSpillLock(rt);
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::SpillLockV(const u8 *v, VectorSize sz) {
|
||||
for (int i = 0; i < GetNumVectorElements(sz); i++) {
|
||||
vr[v[i]].spillLock = true;
|
||||
}
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::SpillLockV(int vec, VectorSize sz) {
|
||||
u8 v[4];
|
||||
GetVectorRegs(v, sz, vec);
|
||||
SpillLockV(v, sz);
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::MapRegV(int vreg, int flags) {
|
||||
MapReg(vreg + 32, flags);
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::LoadToRegV(ARM64Reg armReg, int vreg) {
|
||||
if (vr[vreg].loc == ML_ARMREG) {
|
||||
// emit_->VMOV(armReg, (ARM64Reg)(S0 + vr[vreg].reg));
|
||||
} else {
|
||||
MapRegV(vreg);
|
||||
// emit_->VMOV(armReg, V(vreg));
|
||||
}
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::MapRegsAndSpillLockV(int vec, VectorSize sz, int flags) {
|
||||
u8 v[4];
|
||||
GetVectorRegs(v, sz, vec);
|
||||
SpillLockV(v, sz);
|
||||
for (int i = 0; i < GetNumVectorElements(sz); i++) {
|
||||
MapRegV(v[i], flags);
|
||||
}
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::MapRegsAndSpillLockV(const u8 *v, VectorSize sz, int flags) {
|
||||
SpillLockV(v, sz);
|
||||
for (int i = 0; i < GetNumVectorElements(sz); i++) {
|
||||
MapRegV(v[i], flags);
|
||||
}
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::MapInInV(int vs, int vt) {
|
||||
SpillLockV(vs);
|
||||
SpillLockV(vt);
|
||||
MapRegV(vs);
|
||||
MapRegV(vt);
|
||||
ReleaseSpillLockV(vs);
|
||||
ReleaseSpillLockV(vt);
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::MapDirtyInV(int vd, int vs, bool avoidLoad) {
|
||||
bool overlap = avoidLoad && (vd == vs);
|
||||
SpillLockV(vd);
|
||||
SpillLockV(vs);
|
||||
MapRegV(vd, overlap ? MAP_DIRTY : MAP_NOINIT);
|
||||
MapRegV(vs);
|
||||
ReleaseSpillLockV(vd);
|
||||
ReleaseSpillLockV(vs);
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::MapDirtyInInV(int vd, int vs, int vt, bool avoidLoad) {
|
||||
bool overlap = avoidLoad && ((vd == vs) || (vd == vt));
|
||||
SpillLockV(vd);
|
||||
SpillLockV(vs);
|
||||
SpillLockV(vt);
|
||||
MapRegV(vd, overlap ? MAP_DIRTY : MAP_NOINIT);
|
||||
MapRegV(vs);
|
||||
MapRegV(vt);
|
||||
ReleaseSpillLockV(vd);
|
||||
ReleaseSpillLockV(vs);
|
||||
ReleaseSpillLockV(vt);
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::FlushArmReg(ARM64Reg r) {
|
||||
if (r >= S0 && r <= S31) {
|
||||
int reg = r - S0;
|
||||
if (ar[reg].mipsReg == -1) {
|
||||
// Nothing to do, reg not mapped.
|
||||
return;
|
||||
}
|
||||
if (ar[reg].mipsReg != -1) {
|
||||
if (ar[reg].isDirty && mr[ar[reg].mipsReg].loc == ML_ARMREG)
|
||||
{
|
||||
//INFO_LOG(JIT, "Flushing ARM reg %i", reg);
|
||||
// emit_->VSTR(r, CTXREG, GetMipsRegOffset(ar[reg].mipsReg));
|
||||
}
|
||||
// IMMs won't be in an ARM reg.
|
||||
mr[ar[reg].mipsReg].loc = ML_MEM;
|
||||
mr[ar[reg].mipsReg].reg = INVALID_REG;
|
||||
} else {
|
||||
ERROR_LOG(JIT, "Dirty but no mipsreg?");
|
||||
}
|
||||
ar[reg].isDirty = false;
|
||||
ar[reg].mipsReg = -1;
|
||||
}
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::FlushV(MIPSReg r) {
|
||||
FlushR(r + 32);
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::FlushR(MIPSReg r) {
|
||||
switch (mr[r].loc) {
|
||||
case ML_IMM:
|
||||
// IMM is always "dirty".
|
||||
// IMM is not allowed for FP (yet).
|
||||
ERROR_LOG(JIT, "Imm in FP register?");
|
||||
break;
|
||||
|
||||
case ML_ARMREG:
|
||||
if (mr[r].reg == INVALID_REG) {
|
||||
ERROR_LOG(JIT, "FlushR: MipsReg had bad ArmReg");
|
||||
}
|
||||
|
||||
if (mr[r].reg >= Q0 && mr[r].reg <= Q15) {
|
||||
// This should happen rarely, but occasionally we need to flush a single stray
|
||||
// mipsreg that's been part of a quad.
|
||||
int quad = mr[r].reg - Q0;
|
||||
if (qr[quad].isDirty) {
|
||||
WARN_LOG(JIT, "FlushR found quad register %i - PC=%08x", quad, js_->compilerPC);
|
||||
//emit_->ADDI2R(R0, CTXREG, GetMipsRegOffset(r), R1);
|
||||
//emit_->VST1_lane(F_32, (ARM64Reg)mr[r].reg, R0, mr[r].lane, true);
|
||||
}
|
||||
} else {
|
||||
if (ar[mr[r].reg].isDirty) {
|
||||
//INFO_LOG(JIT, "Flushing dirty reg %i", mr[r].reg);
|
||||
// emit_->VSTR((ARM64Reg)(mr[r].reg + S0), CTXREG, GetMipsRegOffset(r));
|
||||
ar[mr[r].reg].isDirty = false;
|
||||
}
|
||||
ar[mr[r].reg].mipsReg = -1;
|
||||
}
|
||||
break;
|
||||
|
||||
case ML_MEM:
|
||||
// Already there, nothing to do.
|
||||
break;
|
||||
|
||||
default:
|
||||
//BAD
|
||||
break;
|
||||
}
|
||||
mr[r].loc = ML_MEM;
|
||||
mr[r].reg = (int)INVALID_REG;
|
||||
}
|
||||
|
||||
int ArmRegCacheFPU::GetNumARMFPURegs() {
|
||||
if (cpu_info.bNEON)
|
||||
return 32;
|
||||
else
|
||||
return 16;
|
||||
}
|
||||
|
||||
// Scalar only. Need a similar one for sequential Q vectors.
|
||||
int ArmRegCacheFPU::FlushGetSequential(int a, int maxArmReg) {
|
||||
int c = 1;
|
||||
int lastMipsOffset = GetMipsRegOffset(ar[a].mipsReg);
|
||||
a++;
|
||||
while (a < maxArmReg) {
|
||||
if (!ar[a].isDirty || ar[a].mipsReg == -1)
|
||||
break;
|
||||
int mipsOffset = GetMipsRegOffset(ar[a].mipsReg);
|
||||
if (mipsOffset != lastMipsOffset + 4) {
|
||||
break;
|
||||
}
|
||||
|
||||
lastMipsOffset = mipsOffset;
|
||||
a++;
|
||||
c++;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::FlushAll() {
|
||||
if (!pendingFlush) {
|
||||
// Nothing allocated. FPU regs are not nearly as common as GPR.
|
||||
return;
|
||||
}
|
||||
|
||||
// Discard temps!
|
||||
for (int i = TEMP0; i < TEMP0 + NUM_TEMPS; i++) {
|
||||
DiscardR(i);
|
||||
}
|
||||
|
||||
// Loop through the ARM registers, then use GetMipsRegOffset to determine if MIPS registers are
|
||||
// sequential. This is necessary because we store VFPU registers in a staggered order to get
|
||||
// columns sequential (most VFPU math in nearly all games is in columns, not rows).
|
||||
|
||||
int numArmRegs;
|
||||
// We rely on the allocation order being sequential.
|
||||
const ARM64Reg baseReg = GetMIPSAllocationOrder(numArmRegs)[0];
|
||||
|
||||
for (int i = 0; i < numArmRegs; i++) {
|
||||
int a = (baseReg - S0) + i;
|
||||
int m = ar[a].mipsReg;
|
||||
|
||||
if (ar[a].isDirty) {
|
||||
if (m == -1) {
|
||||
ILOG("ARM reg %i is dirty but has no mipsreg", a);
|
||||
continue;
|
||||
}
|
||||
|
||||
int c = FlushGetSequential(a, GetNumARMFPURegs());
|
||||
if (c == 1) {
|
||||
// ILOG("Got single register: %i (%i)", a, m);
|
||||
//emit_->VSTR((ARM64Reg)(a + S0), CTXREG, GetMipsRegOffset(m));
|
||||
} else if (c == 2) {
|
||||
// Probably not worth using VSTMIA for two.
|
||||
int offset = GetMipsRegOffset(m);
|
||||
//emit_->VSTR((ARM64Reg)(a + S0), CTXREG, offset);
|
||||
//emit_->VSTR((ARM64Reg)(a + 1 + S0), CTXREG, offset + 4);
|
||||
} else {
|
||||
// ILOG("Got sequence: %i at %i (%i)", c, a, m);
|
||||
//emit_->ADDI2R(SCRATCHREG1, CTXREG, GetMipsRegOffset(m), SCRATCHREG2);
|
||||
// ILOG("VSTMIA R0, %i, %i", a, c);
|
||||
//emit_->VSTMIA(SCRATCHREG1, false, (ARM64Reg)(S0 + a), c);
|
||||
}
|
||||
|
||||
// Skip past, and mark as non-dirty.
|
||||
for (int j = 0; j < c; j++) {
|
||||
int b = a + j;
|
||||
mr[ar[b].mipsReg].loc = ML_MEM;
|
||||
mr[ar[b].mipsReg].reg = (int)INVALID_REG;
|
||||
ar[a + j].mipsReg = -1;
|
||||
ar[a + j].isDirty = false;
|
||||
}
|
||||
i += c - 1;
|
||||
} else {
|
||||
if (m != -1) {
|
||||
mr[m].loc = ML_MEM;
|
||||
mr[m].reg = (int)INVALID_REG;
|
||||
}
|
||||
ar[a].mipsReg = -1;
|
||||
// already not dirty
|
||||
}
|
||||
}
|
||||
|
||||
// Sanity check
|
||||
for (int i = 0; i < numARMFpuReg_; i++) {
|
||||
if (ar[i].mipsReg != -1) {
|
||||
ERROR_LOG(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg);
|
||||
}
|
||||
}
|
||||
pendingFlush = false;
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::DiscardR(MIPSReg r) {
|
||||
switch (mr[r].loc) {
|
||||
case ML_IMM:
|
||||
// IMM is always "dirty".
|
||||
// IMM is not allowed for FP (yet).
|
||||
ERROR_LOG(JIT, "Imm in FP register?");
|
||||
break;
|
||||
|
||||
case ML_ARMREG:
|
||||
if (mr[r].reg == INVALID_REG) {
|
||||
ERROR_LOG(JIT, "DiscardR: MipsReg had bad ArmReg");
|
||||
} else {
|
||||
// Note that we DO NOT write it back here. That's the whole point of Discard.
|
||||
ar[mr[r].reg].isDirty = false;
|
||||
ar[mr[r].reg].mipsReg = -1;
|
||||
}
|
||||
break;
|
||||
|
||||
case ML_MEM:
|
||||
// Already there, nothing to do.
|
||||
break;
|
||||
|
||||
default:
|
||||
//BAD
|
||||
break;
|
||||
}
|
||||
mr[r].loc = ML_MEM;
|
||||
mr[r].reg = (int)INVALID_REG;
|
||||
mr[r].tempLock = false;
|
||||
mr[r].spillLock = false;
|
||||
}
|
||||
|
||||
bool ArmRegCacheFPU::IsTempX(ARM64Reg r) const {
|
||||
return ar[r - S0].mipsReg >= TEMP0;
|
||||
}
|
||||
|
||||
int ArmRegCacheFPU::GetTempR() {
|
||||
if (jo_->useNEONVFPU) {
|
||||
ERROR_LOG(JIT, "VFP temps not allowed in NEON mode");
|
||||
return 0;
|
||||
}
|
||||
pendingFlush = true;
|
||||
for (int r = TEMP0; r < TEMP0 + NUM_TEMPS; ++r) {
|
||||
if (mr[r].loc == ML_MEM && !mr[r].tempLock) {
|
||||
mr[r].tempLock = true;
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
ERROR_LOG(CPU, "Out of temp regs! Might need to DiscardR() some");
|
||||
_assert_msg_(JIT, 0, "Regcache ran out of temp regs, might need to DiscardR() some.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
int ArmRegCacheFPU::GetMipsRegOffset(MIPSReg r) {
|
||||
// These are offsets within the MIPSState structure. First there are the GPRS, then FPRS, then the "VFPURs", then the VFPU ctrls.
|
||||
if (r < 0 || r > 32 + 128 + NUM_TEMPS) {
|
||||
ERROR_LOG(JIT, "bad mips register %i, out of range", r);
|
||||
return 0; // or what?
|
||||
}
|
||||
|
||||
if (r < 32 || r >= 32 + 128) {
|
||||
return (32 + r) << 2;
|
||||
} else {
|
||||
// r is between 32 and 128 + 32
|
||||
return (32 + 32 + voffset[r - 32]) << 2;
|
||||
}
|
||||
}
|
||||
|
||||
void ArmRegCacheFPU::SpillLock(MIPSReg r1, MIPSReg r2, MIPSReg r3, MIPSReg r4) {
|
||||
mr[r1].spillLock = true;
|
||||
if (r2 != -1) mr[r2].spillLock = true;
|
||||
if (r3 != -1) mr[r3].spillLock = true;
|
||||
if (r4 != -1) mr[r4].spillLock = true;
|
||||
}
|
||||
|
||||
// This is actually pretty slow with all the 160 regs...
|
||||
void ArmRegCacheFPU::ReleaseSpillLocksAndDiscardTemps() {
|
||||
for (int i = 0; i < NUM_MIPSFPUREG; i++) {
|
||||
mr[i].spillLock = false;
|
||||
}
|
||||
for (int i = TEMP0; i < TEMP0 + NUM_TEMPS; ++i) {
|
||||
DiscardR(i);
|
||||
}
|
||||
for (int i = 0; i < MAX_ARMQUADS; i++) {
|
||||
qr[i].spillLock = false;
|
||||
if (qr[i].isTemp) {
|
||||
qr[i].isTemp = false;
|
||||
qr[i].sz = V_Invalid;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ARM64Reg ArmRegCacheFPU::R(int mipsReg) {
|
||||
if (mr[mipsReg].loc == ML_ARMREG) {
|
||||
return (ARM64Reg)(mr[mipsReg].reg + S0);
|
||||
} else {
|
||||
if (mipsReg < 32) {
|
||||
ERROR_LOG(JIT, "FReg %i not in ARM reg. compilerPC = %08x : %s", mipsReg, js_->compilerPC, MIPSDisasmAt(js_->compilerPC));
|
||||
} else if (mipsReg < 32 + 128) {
|
||||
ERROR_LOG(JIT, "VReg %i not in ARM reg. compilerPC = %08x : %s", mipsReg - 32, js_->compilerPC, MIPSDisasmAt(js_->compilerPC));
|
||||
} else {
|
||||
ERROR_LOG(JIT, "Tempreg %i not in ARM reg. compilerPC = %08x : %s", mipsReg - 128 - 32, js_->compilerPC, MIPSDisasmAt(js_->compilerPC));
|
||||
}
|
||||
return INVALID_REG; // BAAAD
|
||||
}
|
||||
}
|
186
Core/MIPS/ARM64/Arm64RegCacheFPU.h
Normal file
186
Core/MIPS/ARM64/Arm64RegCacheFPU.h
Normal file
|
@ -0,0 +1,186 @@
|
|||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#pragma once
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../MIPS.h"
|
||||
#include "../MIPSAnalyst.h"
|
||||
#include "Core/MIPS/ARM64/Arm64RegCache.h"
|
||||
#include "Core/MIPS/MIPSVFPUUtils.h"
|
||||
#include "Common/Arm64Emitter.h"
|
||||
|
||||
// These collide with something on Blackberry.
|
||||
#undef MAP_NOINIT
|
||||
#undef MAP_READ
|
||||
|
||||
namespace Arm64JitConstants {
|
||||
|
||||
enum {
|
||||
NUM_TEMPS = 16,
|
||||
TEMP0 = 32 + 128,
|
||||
TOTAL_MAPPABLE_MIPSFPUREGS = 32 + 128 + NUM_TEMPS,
|
||||
};
|
||||
|
||||
enum {
|
||||
MAP_READ = 0,
|
||||
MAP_MTX_TRANSPOSED = 16,
|
||||
MAP_PREFER_LOW = 16,
|
||||
MAP_PREFER_HIGH = 32,
|
||||
|
||||
// Force is not yet correctly implemented, if the reg is already mapped it will not move
|
||||
MAP_FORCE_LOW = 64, // Only map Q0-Q7 (and probably not Q0-Q3 as they are S registers so that leaves Q8-Q15)
|
||||
MAP_FORCE_HIGH = 128, // Only map Q8-Q15
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
struct FPURegARM64 {
|
||||
int mipsReg; // if -1, no mipsreg attached.
|
||||
bool isDirty; // Should the register be written back?
|
||||
};
|
||||
|
||||
struct FPURegQuad64 {
|
||||
int mipsVec;
|
||||
VectorSize sz;
|
||||
u8 vregs[4];
|
||||
bool isDirty;
|
||||
bool spillLock;
|
||||
bool isTemp;
|
||||
};
|
||||
|
||||
struct FPURegMIPS {
|
||||
// Where is this MIPS register?
|
||||
Arm64JitConstants::RegMIPSLoc loc;
|
||||
// Data (only one of these is used, depending on loc. Could make a union).
|
||||
u32 reg;
|
||||
int lane;
|
||||
|
||||
bool spillLock; // if true, this register cannot be spilled.
|
||||
bool tempLock;
|
||||
// If loc == ML_MEM, it's back in its location in the CPU context struct.
|
||||
};
|
||||
|
||||
namespace MIPSComp {
|
||||
struct Arm64JitOptions;
|
||||
struct JitState;
|
||||
}
|
||||
|
||||
class ArmRegCacheFPU {
|
||||
public:
|
||||
ArmRegCacheFPU(MIPSState *mips, MIPSComp::JitState *js, MIPSComp::Arm64JitOptions *jo);
|
||||
~ArmRegCacheFPU() {}
|
||||
|
||||
void Init(Arm64Gen::ARM64XEmitter *emitter);
|
||||
|
||||
void Start(MIPSAnalyst::AnalysisResults &stats);
|
||||
|
||||
// Protect the arm register containing a MIPS register from spilling, to ensure that
|
||||
// it's being kept allocated.
|
||||
void SpillLock(MIPSReg reg, MIPSReg reg2 = -1, MIPSReg reg3 = -1, MIPSReg reg4 = -1);
|
||||
void SpillLockV(MIPSReg r) { SpillLock(r + 32); }
|
||||
|
||||
void ReleaseSpillLocksAndDiscardTemps();
|
||||
void ReleaseSpillLock(int mipsreg) {
|
||||
mr[mipsreg].spillLock = false;
|
||||
}
|
||||
void ReleaseSpillLockV(int mipsreg) {
|
||||
ReleaseSpillLock(mipsreg + 32);
|
||||
}
|
||||
|
||||
void SetImm(MIPSReg reg, u32 immVal);
|
||||
bool IsImm(MIPSReg reg) const;
|
||||
u32 GetImm(MIPSReg reg) const;
|
||||
|
||||
// Returns an ARM register containing the requested MIPS register.
|
||||
Arm64Gen::ARM64Reg MapReg(MIPSReg reg, int mapFlags = 0);
|
||||
void MapInIn(MIPSReg rd, MIPSReg rs);
|
||||
void MapDirty(MIPSReg rd);
|
||||
void MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad = true);
|
||||
void MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad = true);
|
||||
bool IsMapped(MIPSReg r);
|
||||
void FlushArmReg(Arm64Gen::ARM64Reg r);
|
||||
void FlushR(MIPSReg r);
|
||||
void DiscardR(MIPSReg r);
|
||||
Arm64Gen::ARM64Reg R(int preg); // Returns a cached register
|
||||
|
||||
// VFPU register as single ARM VFP registers. Must not be used in the upcoming NEON mode!
|
||||
void MapRegV(int vreg, int flags = 0);
|
||||
void LoadToRegV(Arm64Gen::ARM64Reg armReg, int vreg);
|
||||
void MapInInV(int rt, int rs);
|
||||
void MapDirtyInV(int rd, int rs, bool avoidLoad = true);
|
||||
void MapDirtyInInV(int rd, int rs, int rt, bool avoidLoad = true);
|
||||
|
||||
bool IsTempX(Arm64Gen::ARM64Reg r) const;
|
||||
MIPSReg GetTempV() { return GetTempR() - 32; }
|
||||
// VFPU registers as single VFP registers.
|
||||
Arm64Gen::ARM64Reg V(int vreg) { return R(vreg + 32); }
|
||||
|
||||
int FlushGetSequential(int a, int maxArmReg);
|
||||
void FlushAll();
|
||||
|
||||
// This one is allowed at any point.
|
||||
void FlushV(MIPSReg r);
|
||||
|
||||
// NOTE: These require you to release spill locks manually!
|
||||
void MapRegsAndSpillLockV(int vec, VectorSize vsz, int flags);
|
||||
void MapRegsAndSpillLockV(const u8 *v, VectorSize vsz, int flags);
|
||||
|
||||
void SpillLockV(const u8 *v, VectorSize vsz);
|
||||
void SpillLockV(int vec, VectorSize vsz);
|
||||
|
||||
void SetEmitter(Arm64Gen::ARM64XEmitter *emitter) { emit_ = emitter; }
|
||||
|
||||
int GetMipsRegOffset(MIPSReg r);
|
||||
|
||||
private:
|
||||
MIPSReg GetTempR();
|
||||
const Arm64Gen::ARM64Reg *GetMIPSAllocationOrder(int &count);
|
||||
int GetMipsRegOffsetV(MIPSReg r) {
|
||||
return GetMipsRegOffset(r + 32);
|
||||
}
|
||||
int GetNumARMFPURegs();
|
||||
|
||||
void SetupInitialRegs();
|
||||
|
||||
MIPSState *mips_;
|
||||
Arm64Gen::ARM64XEmitter *emit_;
|
||||
MIPSComp::JitState *js_;
|
||||
MIPSComp::Arm64JitOptions *jo_;
|
||||
|
||||
int numARMFpuReg_;
|
||||
int qTime_;
|
||||
|
||||
enum {
|
||||
// With NEON, we have 64 S = 32 D = 16 Q registers. Only the first 32 S registers
|
||||
// are individually mappable though.
|
||||
MAX_ARMFPUREG = 32,
|
||||
MAX_ARMQUADS = 16,
|
||||
NUM_MIPSFPUREG = Arm64JitConstants::TOTAL_MAPPABLE_MIPSFPUREGS,
|
||||
};
|
||||
|
||||
FPURegARM64 ar[MAX_ARMFPUREG];
|
||||
FPURegMIPS mr[NUM_MIPSFPUREG];
|
||||
FPURegQuad64 qr[MAX_ARMQUADS];
|
||||
FPURegMIPS *vr;
|
||||
|
||||
bool pendingFlush;
|
||||
bool initialReady;
|
||||
FPURegARM64 arInitial[MAX_ARMFPUREG];
|
||||
FPURegMIPS mrInitial[NUM_MIPSFPUREG];
|
||||
};
|
|
@ -30,6 +30,10 @@
|
|||
#include "Common/ArmEmitter.h"
|
||||
namespace ArmGen { class ARMXEmitter; }
|
||||
typedef ArmGen::ARMXCodeBlock NativeCodeBlock;
|
||||
#elif defined(ARM64)
|
||||
#include "Common/Arm64Emitter.h"
|
||||
namespace Arm64Gen { class ARM64XEmitter; }
|
||||
typedef Arm64Gen::ARM64CodeBlock NativeCodeBlock;
|
||||
#elif defined(_M_IX86) || defined(_M_X64)
|
||||
#include "Common/x64Emitter.h"
|
||||
namespace Gen { class XEmitter; }
|
||||
|
@ -44,7 +48,7 @@ namespace FakeGen { class FakeXEmitter; }
|
|||
typedef FakeGen::FakeXCodeBlock NativeCodeBlock;
|
||||
#endif
|
||||
|
||||
#if defined(ARM)
|
||||
#if defined(ARM) || defined(ARM64)
|
||||
const int MAX_JIT_BLOCK_EXITS = 2;
|
||||
#else
|
||||
const int MAX_JIT_BLOCK_EXITS = 8;
|
||||
|
|
|
@ -23,10 +23,13 @@
|
|||
|
||||
#include "ext/disarm.h"
|
||||
#include "ext/udis86/udis86.h"
|
||||
#include "Core/Util/DisArm64.h"
|
||||
|
||||
namespace MIPSComp {
|
||||
#if defined(ARM)
|
||||
ArmJit *jit;
|
||||
#elif defined(ARM64)
|
||||
Arm64Jit *jit;
|
||||
#else
|
||||
Jit *jit;
|
||||
#endif
|
||||
|
@ -35,6 +38,7 @@ namespace MIPSComp {
|
|||
}
|
||||
}
|
||||
|
||||
#if !defined(ARM64)
|
||||
// We compile this for x86 as well because it may be useful when developing the ARM JIT on a PC.
|
||||
std::vector<std::string> DisassembleArm2(const u8 *data, int size) {
|
||||
std::vector<std::string> lines;
|
||||
|
@ -75,8 +79,37 @@ std::vector<std::string> DisassembleArm2(const u8 *data, int size) {
|
|||
}
|
||||
return lines;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef ARM
|
||||
#if !defined(ARM)
|
||||
std::vector<std::string> DisassembleArm64(const u8 *data, int size) {
|
||||
std::vector<std::string> lines;
|
||||
|
||||
char temp[256];
|
||||
int bkpt_count = 0;
|
||||
for (int i = 0; i < size; i += 4) {
|
||||
const u32 *codePtr = (const u32 *)(data + i);
|
||||
u32 inst = codePtr[0];
|
||||
Arm64Dis((u32)(intptr_t)codePtr, inst, temp, sizeof(temp), false);
|
||||
std::string buf = temp;
|
||||
if (buf == "BKPT 1") {
|
||||
bkpt_count++;
|
||||
} else {
|
||||
if (bkpt_count) {
|
||||
lines.push_back(StringFromFormat("BKPT 1 (x%i)", bkpt_count));
|
||||
bkpt_count = 0;
|
||||
}
|
||||
lines.push_back(buf);
|
||||
}
|
||||
}
|
||||
if (bkpt_count) {
|
||||
lines.push_back(StringFromFormat("BKPT 1 (x%i)", bkpt_count));
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !defined(ARM) && !defined(ARM64)
|
||||
|
||||
const char *ppsspp_resolver(struct ud*,
|
||||
uint64_t addr,
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
// TODO: Find a better place for these.
|
||||
std::vector<std::string> DisassembleArm2(const u8 *data, int size);
|
||||
std::vector<std::string> DisassembleArm64(const u8 *data, int size);
|
||||
std::vector<std::string> DisassembleX86(const u8 *data, int size);
|
||||
|
||||
namespace MIPSComp {
|
||||
|
|
|
@ -26,6 +26,9 @@ struct JitBlock;
|
|||
#if defined(ARM)
|
||||
#include "../ARM/ArmJit.h"
|
||||
typedef MIPSComp::ArmJit NativeJit;
|
||||
#elif defined(ARM64)
|
||||
#include "../ARM64/Arm64Jit.h"
|
||||
typedef MIPSComp::Arm64Jit NativeJit;
|
||||
#elif defined(_M_IX86) || defined(_M_X64)
|
||||
#include "../x86/Jit.h"
|
||||
typedef MIPSComp::Jit NativeJit;
|
||||
|
|
|
@ -211,6 +211,8 @@ void MIPSState::Init() {
|
|||
if (PSP_CoreParameter().cpuCore == CPU_JIT) {
|
||||
#ifdef ARM
|
||||
MIPSComp::jit = new MIPSComp::ArmJit(this);
|
||||
#elif defined(ARM64)
|
||||
MIPSComp::jit = new MIPSComp::Arm64Jit(this);
|
||||
#elif defined(_M_IX86) || defined(_M_X64)
|
||||
MIPSComp::jit = new MIPSComp::Jit(this);
|
||||
#elif defined(MIPS)
|
||||
|
@ -218,6 +220,8 @@ void MIPSState::Init() {
|
|||
#else
|
||||
MIPSComp::jit = new MIPSComp::FakeJit(this);
|
||||
#endif
|
||||
} else {
|
||||
MIPSComp::jit = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -236,6 +240,8 @@ void MIPSState::UpdateCore(CPUCore desired) {
|
|||
if (!MIPSComp::jit) {
|
||||
#ifdef ARM
|
||||
MIPSComp::jit = new MIPSComp::ArmJit(this);
|
||||
#elif defined(ARM64)
|
||||
MIPSComp::jit = new MIPSComp::Arm64Jit(this);
|
||||
#elif defined(_M_IX86) || defined(_M_X64)
|
||||
MIPSComp::jit = new MIPSComp::Jit(this);
|
||||
#elif defined(MIPS)
|
||||
|
|
|
@ -90,6 +90,8 @@ struct MIPSInstruction {
|
|||
|
||||
#ifdef ARM
|
||||
#define JITFUNC(f) (&ArmJit::f)
|
||||
#elif defined(ARM64)
|
||||
#define JITFUNC(f) (&Arm64Jit::f)
|
||||
#elif defined(_M_X64) || defined(_M_IX86)
|
||||
#define JITFUNC(f) (&Jit::f)
|
||||
#elif defined(MIPS)
|
||||
|
|
|
@ -185,7 +185,7 @@ static bool Memory_TryBase(u32 flags) {
|
|||
if (!*view.out_ptr_low)
|
||||
goto bail;
|
||||
}
|
||||
#ifdef _M_X64
|
||||
#if defined(_M_X64) || defined(ARM64)
|
||||
*view.out_ptr = (u8*)g_arena.CreateView(
|
||||
position, view.size, base + view.virtual_address);
|
||||
#else
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#include "Core/Util/AudioFormat.h"
|
||||
#include "Core/Util/AudioFormatNEON.h"
|
||||
|
||||
#ifndef ARM
|
||||
#if !defined(ARM) && !defined(ARM64)
|
||||
#error Should not be compiled on non-ARM.
|
||||
#endif
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#include <arm_neon.h>
|
||||
#include "GPU/Common/TextureDecoder.h"
|
||||
|
||||
#ifndef ARM
|
||||
#if !defined(ARM) && !defined(ARM64)
|
||||
#error Should not be compiled on non-ARM.
|
||||
#endif
|
||||
|
||||
|
@ -29,7 +29,7 @@ u32 QuickTexHashNEON(const void *checkp, u32 size) {
|
|||
__builtin_prefetch(checkp, 0, 0);
|
||||
|
||||
if (((intptr_t)checkp & 0xf) == 0 && (size & 0x3f) == 0) {
|
||||
#ifdef IOS
|
||||
#if defined(IOS) || defined(ARM64)
|
||||
uint32x4_t cursor = vdupq_n_u32(0);
|
||||
uint16x8_t cursor2 = vld1q_u16(QuickTexHashInitial);
|
||||
uint16x8_t update = vdupq_n_u16(0x2455U);
|
||||
|
|
33
GPU/Common/VertexDecoderArm64.cpp
Normal file
33
GPU/Common/VertexDecoderArm64.cpp
Normal file
|
@ -0,0 +1,33 @@
|
|||
// Copyright (c) 2013- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Core/Config.h"
|
||||
#include "Core/Reporting.h"
|
||||
#include "Common/Arm64Emitter.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/Common/VertexDecoderCommon.h"
|
||||
|
||||
JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
|
||||
// TODO ARM64
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool VertexDecoderJitCache::CompileStep(const VertexDecoder &dec, int step) {
|
||||
return false;
|
||||
}
|
|
@ -25,6 +25,8 @@
|
|||
#include "GPU/ge_constants.h"
|
||||
#ifdef ARM
|
||||
#include "Common/ArmEmitter.h"
|
||||
#elif defined(ARM64)
|
||||
#include "Common/Arm64Emitter.h"
|
||||
#elif defined(_M_IX86) || defined(_M_X64)
|
||||
#include "Common/x64Emitter.h"
|
||||
#elif defined(MIPS)
|
||||
|
@ -446,8 +448,7 @@ struct VertexDecoderOptions {
|
|||
bool expand8BitNormalsToFloat;
|
||||
};
|
||||
|
||||
class VertexDecoder
|
||||
{
|
||||
class VertexDecoder {
|
||||
public:
|
||||
VertexDecoder();
|
||||
|
||||
|
@ -587,6 +588,8 @@ public:
|
|||
|
||||
#ifdef ARM
|
||||
class VertexDecoderJitCache : public ArmGen::ARMXCodeBlock {
|
||||
#elif defined(ARM64)
|
||||
class VertexDecoderJitCache : public Arm64Gen::ARM64CodeBlock {
|
||||
#elif defined(_M_IX86) || defined(_M_X64)
|
||||
class VertexDecoderJitCache : public Gen::XCodeBlock {
|
||||
#elif defined(MIPS)
|
||||
|
|
|
@ -262,6 +262,12 @@
|
|||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Common\VertexDecoderArm64.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Common\VertexDecoderCommon.cpp" />
|
||||
<ClCompile Include="Common\VertexDecoderX86.cpp" />
|
||||
<ClCompile Include="Debugger\Breakpoints.cpp" />
|
||||
|
|
|
@ -350,6 +350,9 @@
|
|||
<ClCompile Include="Directx9\DepalettizeShaderDX9.cpp">
|
||||
<Filter>DirectX9</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Common\VertexDecoderArm64.cpp">
|
||||
<Filter>Common</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="CMakeLists.txt" />
|
||||
|
|
|
@ -283,6 +283,8 @@ const char *GetCompilerABI() {
|
|||
return "armeabi-v7a";
|
||||
#elif defined(ARM)
|
||||
return "armeabi";
|
||||
#elif defined(ARM64)
|
||||
return "arm64";
|
||||
#elif defined(_M_IX86)
|
||||
return "x86";
|
||||
#elif defined(_M_X64)
|
||||
|
|
|
@ -469,6 +469,9 @@ void GameSettingsScreen::CreateViews() {
|
|||
systemSettings->Add(new ItemHeader(s->T("UI Language")));
|
||||
systemSettings->Add(new Choice(dev->T("Language", "Language")))->OnClick.Handle(this, &GameSettingsScreen::OnLanguage);
|
||||
|
||||
systemSettings->Add(new ItemHeader(s->T("Developer Tools")));
|
||||
systemSettings->Add(new Choice(s->T("Developer Tools")))->OnClick.Handle(this, &GameSettingsScreen::OnDeveloperTools);
|
||||
|
||||
systemSettings->Add(new ItemHeader(s->T("Help the PPSSPP team")));
|
||||
enableReports_ = Reporting::IsEnabled();
|
||||
enableReportsCheckbox_ = new CheckBox(&enableReports_, s->T("Enable Compatibility Server Reports"));
|
||||
|
@ -491,9 +494,6 @@ void GameSettingsScreen::CreateViews() {
|
|||
#endif
|
||||
systemSettings->Add(new CheckBox(&g_Config.bSetRoundingMode, s->T("Respect FPU rounding (disable for old GEB saves)")))->OnClick.Handle(this, &GameSettingsScreen::OnJitAffectingSetting);
|
||||
|
||||
systemSettings->Add(new ItemHeader(s->T("Developer Tools")));
|
||||
systemSettings->Add(new Choice(s->T("Developer Tools")))->OnClick.Handle(this, &GameSettingsScreen::OnDeveloperTools);
|
||||
|
||||
systemSettings->Add(new ItemHeader(s->T("General")));
|
||||
|
||||
#ifdef ANDROID
|
||||
|
|
|
@ -51,7 +51,6 @@ ARCH_FILES := \
|
|||
$(SRC)/GPU/Common/VertexDecoderX86.cpp
|
||||
endif
|
||||
|
||||
# ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
|
||||
ifeq ($(findstring armeabi-v7a,$(TARGET_ARCH_ABI)),armeabi-v7a)
|
||||
ARCH_FILES := \
|
||||
$(SRC)/GPU/Common/TextureDecoderNEON.cpp.neon \
|
||||
|
@ -72,9 +71,30 @@ ARCH_FILES := \
|
|||
$(SRC)/Core/MIPS/ARM/ArmRegCache.cpp \
|
||||
$(SRC)/Core/MIPS/ARM/ArmRegCacheFPU.cpp \
|
||||
$(SRC)/GPU/Common/VertexDecoderArm.cpp \
|
||||
$(SRC)/ext/disarm.cpp \
|
||||
ArmEmitterTest.cpp
|
||||
endif
|
||||
|
||||
ifeq ($(findstring arm64-v8a,$(TARGET_ARCH_ABI)),arm64-v8a)
|
||||
ARCH_FILES := \
|
||||
$(SRC)/GPU/Common/TextureDecoderNEON.cpp \
|
||||
$(SRC)/Core/Util/AudioFormatNEON.cpp \
|
||||
$(SRC)/Common/Arm64Emitter.cpp \
|
||||
$(SRC)/Common/ArmCPUDetect.cpp \
|
||||
$(SRC)/Core/MIPS/ARM64/Arm64CompALU.cpp \
|
||||
$(SRC)/Core/MIPS/ARM64/Arm64CompBranch.cpp \
|
||||
$(SRC)/Core/MIPS/ARM64/Arm64CompFPU.cpp \
|
||||
$(SRC)/Core/MIPS/ARM64/Arm64CompLoadStore.cpp \
|
||||
$(SRC)/Core/MIPS/ARM64/Arm64CompVFPU.cpp \
|
||||
$(SRC)/Core/MIPS/ARM64/Arm64CompReplace.cpp \
|
||||
$(SRC)/Core/MIPS/ARM64/Arm64Asm.cpp \
|
||||
$(SRC)/Core/MIPS/ARM64/Arm64Jit.cpp \
|
||||
$(SRC)/Core/MIPS/ARM64/Arm64RegCache.cpp \
|
||||
$(SRC)/Core/MIPS/ARM64/Arm64RegCacheFPU.cpp \
|
||||
$(SRC)/Core/Util/DisArm64.cpp \
|
||||
$(SRC)/GPU/Common/VertexDecoderArm64.cpp
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH_ABI),armeabi)
|
||||
ARCH_FILES := \
|
||||
$(SRC)/Common/ArmEmitter.cpp \
|
||||
|
@ -112,7 +132,6 @@ EXEC_AND_LIB_FILES := \
|
|||
$(SRC)/Core/MIPS/MIPSDebugInterface.cpp \
|
||||
$(SRC)/UI/ui_atlas.cpp \
|
||||
$(SRC)/UI/OnScreenDisplay.cpp \
|
||||
$(SRC)/ext/disarm.cpp \
|
||||
$(SRC)/ext/libkirk/AES.c \
|
||||
$(SRC)/ext/libkirk/amctrl.c \
|
||||
$(SRC)/ext/libkirk/SHA1.c \
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
APP_STL := gnustl_static
|
||||
APP_PLATFORM := android-9
|
||||
APP_ABI := armeabi-v7a x86
|
||||
#APP_ABI := armeabi-v7a x86
|
||||
#APP_ABI := armeabi-v7a
|
||||
APP_ABI := arm64-v8a
|
||||
APP_GNUSTL_CPP_FEATURES :=
|
||||
NDK_TOOLCHAIN_VERSION := 4.8
|
||||
# NDK_TOOLCHAIN_VERSION := 4.9
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
# These are definitions for LOCAL_ variables for PPSSPP.
|
||||
# They are shared between ppsspp_jni (lib for Android app) and ppsspp_headless.
|
||||
|
||||
LOCAL_CFLAGS := -DUSE_FFMPEG -DUSING_GLES2 -DMOBILE_DEVICE -O3 -fsigned-char -Wall -Wno-multichar -Wno-psabi -Wno-unused-variable -fno-strict-aliasing -D__STDC_CONSTANT_MACROS
|
||||
LOCAL_CFLAGS := -DUSE_FFMPEG -DUSING_GLES2 -DMOBILE_DEVICE -O3 -fsigned-char -Wall -Wno-multichar -Wno-psabi -Wno-unused-variable -fno-strict-aliasing -D__STDC_CONSTANT_MACROS -Wno-format
|
||||
# yes, it's really CPPFLAGS for C++
|
||||
# literal-suffix is generated by Android default code and causes noise.
|
||||
LOCAL_CPPFLAGS := -fno-exceptions -std=gnu++11 -fno-rtti -Wno-reorder -Wno-literal-suffix
|
||||
LOCAL_CPPFLAGS := -fno-exceptions -std=gnu++11 -fno-rtti -Wno-reorder -Wno-literal-suffix -Wno-format
|
||||
LOCAL_C_INCLUDES := \
|
||||
$(LOCAL_PATH)/../../Common \
|
||||
$(LOCAL_PATH)/../.. \
|
||||
|
@ -49,12 +49,12 @@ ifeq ($(TARGET_ARCH_ABI),x86)
|
|||
LOCAL_CFLAGS := $(LOCAL_CFLAGS) -D_ARCH_32 -D_M_IX86 -fomit-frame-pointer -mtune=atom -mfpmath=sse -mssse3
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH_ABI),arm64-v8a)
|
||||
#LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavformat.a
|
||||
#LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavcodec.a
|
||||
#LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libswresample.a
|
||||
#LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libswscale.a
|
||||
#LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavutil.a
|
||||
#LOCAL_C_INCLUDES += $(LOCAL_PATH)/../../ffmpeg/android/arm64/include
|
||||
LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavformat.a
|
||||
LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavcodec.a
|
||||
LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libswresample.a
|
||||
LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libswscale.a
|
||||
LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavutil.a
|
||||
LOCAL_C_INCLUDES += $(LOCAL_PATH)/../../ffmpeg/android/arm64/include
|
||||
|
||||
LOCAL_CFLAGS := $(LOCAL_CFLAGS) -D_ARCH_64 -DARM64
|
||||
endif
|
||||
|
|
Loading…
Add table
Reference in a new issue