diff --git a/Common/Arm64Emitter.cpp b/Common/Arm64Emitter.cpp
index 7efbeba3a5..ae810d2fd3 100644
--- a/Common/Arm64Emitter.cpp
+++ b/Common/Arm64Emitter.cpp
@@ -3,6 +3,8 @@
 // Refer to the license.txt file included.
 
 #include <limits>
+#include <algorithm>
+#include <cmath>
 
 #include "Arm64Emitter.h"
 #include "MathUtil.h"
@@ -193,11 +195,11 @@ void ARM64XEmitter::EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr
 	bool b64Bit = Is64Bit(Rt);
 	s64 distance = (s64)ptr - (s64)m_code;
 
-	_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance);
+	_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, (int)distance);
 
 	distance >>= 2;
 
-	_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance);
+	_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, (int)distance);
 
 	Rt = DecodeReg(Rt);
 	Write32((b64Bit << 31) | (0x34 << 24) | (op << 24) | \
@@ -209,11 +211,11 @@ void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const voi
 	bool b64Bit = Is64Bit(Rt);
 	s64 distance = (s64)ptr - (s64)m_code;
 
-	_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance);
+	_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, (int)distance);
 
 	distance >>= 2;
 
-	_assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s: Received too large distance: %lx", __FUNCTION__, distance);
+	_assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s: Received too large distance: %lx", __FUNCTION__, (int)distance);
 
 	Rt = DecodeReg(Rt);
 	Write32((b64Bit << 31) | (0x36 << 24) | (op << 24) | \
@@ -224,11 +226,11 @@ void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 op, const void* ptr)
 {
 	s64 distance = (s64)ptr - s64(m_code);
 
-	_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance);
+	_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, (int)distance);
 
 	distance >>= 2;
 
-	_assert_msg_(DYNA_REC, distance >= -0x3FFFFFF && distance < 0x3FFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance);
+	_assert_msg_(DYNA_REC, distance >= -0x3FFFFFF && distance < 0x3FFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, (int)distance);
 
 	Write32((op << 31) | (0x5 << 26) | (distance & 0x3FFFFFF));
 }
@@ -566,7 +568,7 @@ void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch)
 			Not = true;
 		case 0: // CBZ
 		{
-			_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance);
+			_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, (int)distance);
 			bool b64Bit = Is64Bit(branch.reg);
 			ARM64Reg reg = DecodeReg(branch.reg);
 			inst = (b64Bit << 31) | (0x1A << 25) | (Not << 24) | ((distance << 5) & 0xFFFFE0) | reg;
@@ -580,7 +582,7 @@ void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch)
 			Not = true;
 		case 3: // TBZ
 		{
-			_assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance);
+			_assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, (int)distance);
 			ARM64Reg reg = DecodeReg(branch.reg);
 			inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) | ((branch.bit & 0x1F) << 19) | (distance << 5) | reg;
 		}
@@ -677,7 +679,7 @@ void ARM64XEmitter::B(CCFlags cond, const void* ptr)
 	s64 distance = (s64)ptr - (s64(m_code) + 8);
 	distance >>= 2;
 
-	_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance);
+	_assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, (int)distance);
 
 	Write32((0x54 << 24) | (distance << 5) | cond);
 }
@@ -1488,7 +1490,7 @@ void ARM64XEmitter::ADRP(ARM64Reg Rd, s32 imm)
 // Wrapper around MOVZ+MOVK
 void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
 {
-	unsigned parts = Is64Bit(Rd) ? 4 : 2;
+	unsigned int parts = Is64Bit(Rd) ? 4 : 2;
 	BitSet32 upload_part(0);
 	bool need_movz = false;
 
@@ -1513,7 +1515,7 @@ void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
 	// XXX: Support rotating immediates to save instructions
 	if (optimize)
 	{
-		for (unsigned i = 0; i < parts; ++i)
+		for (unsigned int i = 0; i < parts; ++i)
 		{
 			if ((imm >> (i * 16)) & 0xFFFF)
 				upload_part[i] = 1;
diff --git a/Core/Core.vcxproj b/Core/Core.vcxproj
index dfe8680722..c1681d1049 100644
--- a/Core/Core.vcxproj
+++ b/Core/Core.vcxproj
@@ -286,6 +286,66 @@
     <ClCompile Include="Loaders.cpp" />
     <ClCompile Include="MemMap.cpp" />
     <ClCompile Include="MemmapFunctions.cpp" />
+    <ClCompile Include="MIPS\ARM64\Arm64Asm.cpp">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="MIPS\ARM64\Arm64CompALU.cpp">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="MIPS\ARM64\Arm64CompBranch.cpp">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="MIPS\ARM64\Arm64CompFPU.cpp">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="MIPS\ARM64\Arm64CompLoadStore.cpp">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="MIPS\ARM64\Arm64CompReplace.cpp">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="MIPS\ARM64\Arm64CompVFPU.cpp">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="MIPS\ARM64\Arm64Jit.cpp">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="MIPS\ARM64\Arm64RegCache.cpp">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="MIPS\ARM64\Arm64RegCacheFPU.cpp">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
     <ClCompile Include="MIPS\ARM\ArmAsm.cpp">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
@@ -540,6 +600,30 @@
     <ClInclude Include="Loaders.h" />
     <ClInclude Include="MemMap.h" />
     <ClInclude Include="MemMapHelpers.h" />
+    <ClInclude Include="MIPS\ARM64\Arm64Asm.h">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClInclude>
+    <ClInclude Include="MIPS\ARM64\Arm64Jit.h">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClInclude>
+    <ClInclude Include="MIPS\ARM64\Arm64RegCache.h">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClInclude>
+    <ClInclude Include="MIPS\ARM64\Arm64RegCacheFPU.h">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClInclude>
     <ClInclude Include="MIPS\ARM\ArmAsm.h">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
@@ -632,4 +716,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/Core/Core.vcxproj.filters b/Core/Core.vcxproj.filters
index 4bbb788e14..0069834c31 100644
--- a/Core/Core.vcxproj.filters
+++ b/Core/Core.vcxproj.filters
@@ -557,6 +557,16 @@
     <ClCompile Include="HLE\sceSfmt19937.cpp">
       <Filter>HLE\Libraries</Filter>
     </ClCompile>
+    <ClCompile Include="MIPS\ARM64\Arm64CompBranch.cpp" />
+    <ClCompile Include="MIPS\ARM64\Arm64CompFPU.cpp" />
+    <ClCompile Include="MIPS\ARM64\Arm64CompLoadStore.cpp" />
+    <ClCompile Include="MIPS\ARM64\Arm64CompReplace.cpp" />
+    <ClCompile Include="MIPS\ARM64\Arm64CompVFPU.cpp" />
+    <ClCompile Include="MIPS\ARM64\Arm64Jit.cpp" />
+    <ClCompile Include="MIPS\ARM64\Arm64RegCache.cpp" />
+    <ClCompile Include="MIPS\ARM64\Arm64RegCacheFPU.cpp" />
+    <ClCompile Include="MIPS\ARM64\Arm64Asm.cpp" />
+    <ClCompile Include="MIPS\ARM64\Arm64CompALU.cpp" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="ELF\ElfReader.h">
@@ -1064,6 +1074,10 @@
     <ClInclude Include="MemMapHelpers.h">
       <Filter>Core</Filter>
     </ClInclude>
+    <ClInclude Include="MIPS\ARM64\Arm64Jit.h" />
+    <ClInclude Include="MIPS\ARM64\Arm64RegCache.h" />
+    <ClInclude Include="MIPS\ARM64\Arm64RegCacheFPU.h" />
+    <ClInclude Include="MIPS\ARM64\Arm64Asm.h" />
   </ItemGroup>
   <ItemGroup>
     <None Include="CMakeLists.txt" />
@@ -1071,4 +1085,4 @@
     <None Include="..\android\jni\Android.mk" />
     <None Include="GameLogNotes.txt" />
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/Core/HLE/ReplaceTables.cpp b/Core/HLE/ReplaceTables.cpp
index e6c354d190..1f22e7432f 100644
--- a/Core/HLE/ReplaceTables.cpp
+++ b/Core/HLE/ReplaceTables.cpp
@@ -1015,6 +1015,8 @@ static int Hook_gakuenheaven_download_frame() {
 
 #ifdef ARM
 #define JITFUNC(f) (&MIPSComp::ArmJit::f)
+#elif defined(ARM64)
+#define JITFUNC(f) (&MIPSComp::Arm64Jit::f)
 #elif defined(_M_X64) || defined(_M_IX86)
 #define JITFUNC(f) (&MIPSComp::Jit::f)
 #elif defined(MIPS)
diff --git a/Core/MIPS/ARM64/Arm64Asm.cpp b/Core/MIPS/ARM64/Arm64Asm.cpp
new file mode 100644
index 0000000000..f7642ffedc
--- /dev/null
+++ b/Core/MIPS/ARM64/Arm64Asm.cpp
@@ -0,0 +1,86 @@
+// Copyright (c) 2015- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+
+#include "Core/MemMap.h"
+#include "Core/MIPS/MIPS.h"
+#include "Core/System.h"
+#include "Core/CoreTiming.h"
+#include "Common/MemoryUtil.h"
+#include "Common/CPUDetect.h"
+#include "Common/Arm64Emitter.h"
+#include "Core/MIPS/ARM64/Arm64Jit.h"
+#include "Core/MIPS/ARM64/Arm64Asm.h"
+#include "Core/MIPS/JitCommon/JitCommon.h"
+
+using namespace Arm64Gen;
+
+//static int temp32; // unused?
+
+static const bool enableDebug = false;
+
+//static bool enableStatistics = false; //unused?
+
+//The standard ARM calling convention allocates the 16 ARM registers as:
+
+// r15 is the program counter.
+// r14 is the link register. (The BL instruction, used in a subroutine call, stores the return address in this register).
+// r13 is the stack pointer. (The Push/Pop instructions in "Thumb" operating mode use this register only).
+// r12 is the Intra-Procedure-call scratch register.
+// r4 to r11: used to hold local variables.
+// r0 to r3: used to hold argument values passed to a subroutine, and also hold results returned from a subroutine.
+
+// Mappable registers:
+//	 R2, R3, R4, R5, R6, R8, R11
+
+// STATIC ALLOCATION ARM:
+// R10 : MIPS state
+// R11 : Memory base pointer.
+// R7 :  Down counter
+extern volatile CoreState coreState;
+
+void ShowPC(u32 sp) {
+	if (currentMIPS) {
+		ERROR_LOG(JIT, "ShowPC : %08x  ArmSP : %08x", currentMIPS->pc, sp);
+	} else {
+		ERROR_LOG(JIT, "Universe corrupt?");
+	}
+}
+
+void DisassembleArm(const u8 *data, int size);
+
+// PLAN: no more block numbers - crazy opcodes just contain offset within
+// dynarec buffer
+// At this offset - 4, there is an int specifying the block number.
+
+namespace MIPSComp {
+
+using namespace Arm64JitConstants;
+
+void Arm64Jit::GenerateFixedCode()
+{
+
+	// Uncomment if you want to see the output...
+	// INFO_LOG(JIT, "THE DISASM ========================");
+	// DisassembleArm(enterCode, GetCodePtr() - enterCode);
+	// INFO_LOG(JIT, "END OF THE DISASM ========================");
+
+	// Don't forget to zap the instruction cache!
+	FlushIcache();
+}
+
+}  // namespace MIPSComp
diff --git a/Core/MIPS/ARM64/Arm64Asm.h b/Core/MIPS/ARM64/Arm64Asm.h
new file mode 100644
index 0000000000..dd3b2046d8
--- /dev/null
+++ b/Core/MIPS/ARM64/Arm64Asm.h
@@ -0,0 +1,22 @@
+// Copyright (c) 2012- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#pragma once
+
+#include "Core/MIPS/MIPS.h"
+
+// Runtime generated assembly routines, like the Dispatcher.
diff --git a/Core/MIPS/ARM64/Arm64CompALU.cpp b/Core/MIPS/ARM64/Arm64CompALU.cpp
new file mode 100644
index 0000000000..15cb41197e
--- /dev/null
+++ b/Core/MIPS/ARM64/Arm64CompALU.cpp
@@ -0,0 +1,83 @@
+// Copyright (c) 2012- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#include <algorithm>
+
+#include "Core/MIPS/MIPS.h"
+#include "Core/MIPS/MIPSCodeUtils.h"
+#include "Core/MIPS/ARM64/Arm64Jit.h"
+#include "Core/MIPS/ARM64/Arm64RegCache.h"
+#include "Common/CPUDetect.h"
+
+using namespace MIPSAnalyst;
+
+#define _RS MIPS_GET_RS(op)
+#define _RT MIPS_GET_RT(op)
+#define _RD MIPS_GET_RD(op)
+#define _FS MIPS_GET_FS(op)
+#define _FT MIPS_GET_FT(op)
+#define _FD MIPS_GET_FD(op)
+#define _SA MIPS_GET_SA(op)
+#define _POS  ((op>> 6) & 0x1F)
+#define _SIZE ((op>>11) & 0x1F)
+#define _IMM16 (signed short)(op & 0xFFFF)
+#define _IMM26 (op & 0x03FFFFFF)
+
+// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
+// Currently known non working ones should have DISABLE.
+
+//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
+#define CONDITIONAL_DISABLE ;
+#define DISABLE { Comp_Generic(op); return; }
+
+namespace MIPSComp
+{
+using namespace Arm64Gen;
+using namespace Arm64JitConstants;
+
+void Arm64Jit::Comp_IType(MIPSOpcode op) {
+	DISABLE;
+}
+
+void Arm64Jit::Comp_RType2(MIPSOpcode op) {
+	DISABLE;
+}
+
+void Arm64Jit::Comp_RType3(MIPSOpcode op) {
+	DISABLE;
+}
+
+void Arm64Jit::Comp_ShiftType(MIPSOpcode op) {
+	DISABLE;
+}
+
+void Arm64Jit::Comp_Special3(MIPSOpcode op) {
+	DISABLE;
+}
+
+void Arm64Jit::Comp_Allegrex(MIPSOpcode op) {
+	DISABLE;
+}
+
+void Arm64Jit::Comp_Allegrex2(MIPSOpcode op) {
+	DISABLE;
+}
+
+void Arm64Jit::Comp_MulDivType(MIPSOpcode op) {
+	DISABLE;
+}
+}
diff --git a/Core/MIPS/ARM64/Arm64CompBranch.cpp b/Core/MIPS/ARM64/Arm64CompBranch.cpp
new file mode 100644
index 0000000000..3fd83a68b1
--- /dev/null
+++ b/Core/MIPS/ARM64/Arm64CompBranch.cpp
@@ -0,0 +1,614 @@
+// Copyright (c) 2012- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#include "Core/Reporting.h"
+#include "Core/Config.h"
+#include "Core/MemMap.h"
+#include "Core/HLE/HLE.h"
+#include "Core/HLE/HLETables.h"
+
+#include "Core/MIPS/MIPS.h"
+#include "Core/MIPS/MIPSCodeUtils.h"
+#include "Core/MIPS/MIPSAnalyst.h"
+#include "Core/MIPS/MIPSTables.h"
+
+#include "Core/MIPS/ARM64/Arm64Jit.h"
+#include "Core/MIPS/ARM64/Arm64RegCache.h"
+#include "Core/MIPS/JitCommon/JitBlockCache.h"
+
+#include "Common/Arm64Emitter.h"
+
+#define _RS MIPS_GET_RS(op)
+#define _RT MIPS_GET_RT(op)
+#define _RD MIPS_GET_RD(op)
+#define _FS MIPS_GET_FS(op)
+#define _FT MIPS_GET_FT(op)
+#define _FD MIPS_GET_FD(op)
+#define _SA MIPS_GET_SA(op)
+#define _POS  ((op>> 6) & 0x1F)
+#define _SIZE ((op>>11) & 0x1F)
+#define _IMM16 (signed short)(op & 0xFFFF)
+#define _IMM26 (op & 0x03FFFFFF)
+
+#define LOOPOPTIMIZATION 0
+
+// We can disable nice delay slots.
+// #define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false;
+#define CONDITIONAL_NICE_DELAYSLOT ;
+
+using namespace MIPSAnalyst;
+
+namespace MIPSComp
+{
+	using namespace Arm64Gen;
+	using namespace Arm64JitConstants;
+
+void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
+{
+	if (js.inDelaySlot) {
+		ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
+		return;
+	}
+	int offset = _IMM16 << 2;
+	MIPSGPReg rt = _RT;
+	MIPSGPReg rs = _RS;
+	u32 targetAddr = js.compilerPC + offset + 4;
+
+	bool immBranch = false;
+	bool immBranchTaken = false;
+	if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
+		// The cc flags are opposites: when NOT to take the branch.
+		bool immBranchNotTaken;
+		s32 rsImm = (s32)gpr.GetImm(rs);
+		s32 rtImm = (s32)gpr.GetImm(rt);
+
+		switch (cc)
+		{
+		case CC_EQ: immBranchNotTaken = rsImm == rtImm; break;
+		case CC_NEQ: immBranchNotTaken = rsImm != rtImm; break;
+		default: immBranchNotTaken = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSRTComp().");
+		}
+		immBranch = true;
+		immBranchTaken = !immBranchNotTaken;
+	}
+
+	if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions) {
+		if (!immBranchTaken) {
+			// Skip the delay slot if likely, otherwise it'll be the next instruction.
+			if (likely)
+				js.compilerPC += 4;
+			return;
+		}
+
+		// Branch taken.  Always compile the delay slot, and then go to dest.
+		CompileDelaySlot(DELAYSLOT_NICE);
+		AddContinuedBlock(targetAddr);
+		// Account for the increment in the loop.
+		js.compilerPC = targetAddr - 4;
+		// In case the delay slot was a break or something.
+		js.compiling = true;
+		return;
+	}
+
+	MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
+	bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
+	CONDITIONAL_NICE_DELAYSLOT;
+
+	if (immBranch) {
+		// Continuing is handled above, this is just static jumping.
+		if (immBranchTaken || !likely)
+			CompileDelaySlot(DELAYSLOT_FLUSH);
+		else
+			FlushAll();
+
+		const u32 destAddr = immBranchTaken ? targetAddr : js.compilerPC + 8;
+		WriteExit(destAddr, js.nextExit++);
+	} else {
+		if (!likely && delaySlotIsNice)
+			CompileDelaySlot(DELAYSLOT_NICE);
+
+		// We might be able to flip the condition (EQ/NEQ are easy.)
+		const bool canFlip = cc == CC_EQ || cc == CC_NEQ;
+
+		// TODO ARM64: Optimize for immediates
+		gpr.MapInIn(rs, rt);
+		CMP(gpr.R(rs), gpr.R(rt));
+
+		Arm64Gen::FixupBranch ptr;
+		if (!likely) {
+			if (!delaySlotIsNice)
+				CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
+			else
+				FlushAll();
+			ptr = B(cc);
+		} else {
+			FlushAll();
+			ptr = B(cc);
+			CompileDelaySlot(DELAYSLOT_FLUSH);
+		}
+
+		// Take the branch
+		WriteExit(targetAddr, js.nextExit++);
+
+		SetJumpTarget(ptr);
+		// Not taken
+		WriteExit(js.compilerPC + 8, js.nextExit++);
+	}
+
+	js.compiling = false;
+}
+
+
+void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool likely)
+{
+	if (js.inDelaySlot) {
+		ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
+		return;
+	}
+	int offset = _IMM16 << 2;
+	MIPSGPReg rs = _RS;
+	u32 targetAddr = js.compilerPC + offset + 4;
+
+	bool immBranch = false;
+	bool immBranchTaken = false;
+	if (gpr.IsImm(rs)) {
+		// The cc flags are opposites: when NOT to take the branch.
+		bool immBranchNotTaken;
+		s32 imm = (s32)gpr.GetImm(rs);
+
+		switch (cc)
+		{
+		case CC_GT: immBranchNotTaken = imm > 0; break;
+		case CC_GE: immBranchNotTaken = imm >= 0; break;
+		case CC_LT: immBranchNotTaken = imm < 0; break;
+		case CC_LE: immBranchNotTaken = imm <= 0; break;
+		default: immBranchNotTaken = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSZeroComp().");
+		}
+		immBranch = true;
+		immBranchTaken = !immBranchNotTaken;
+	}
+
+	if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions) {
+		if (!immBranchTaken) {
+			// Skip the delay slot if likely, otherwise it'll be the next instruction.
+			if (likely)
+				js.compilerPC += 4;
+			return;
+		}
+
+		// Branch taken.  Always compile the delay slot, and then go to dest.
+		CompileDelaySlot(DELAYSLOT_NICE);
+		if (andLink)
+			gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
+
+		AddContinuedBlock(targetAddr);
+		// Account for the increment in the loop.
+		js.compilerPC = targetAddr - 4;
+		// In case the delay slot was a break or something.
+		js.compiling = true;
+		return;
+	}
+
+	MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
+	bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
+	CONDITIONAL_NICE_DELAYSLOT;
+
+	if (immBranch) {
+		// Continuing is handled above, this is just static jumping.
+		if (immBranchTaken && andLink)
+			gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
+		if (immBranchTaken || !likely)
+			CompileDelaySlot(DELAYSLOT_FLUSH);
+		else
+			FlushAll();
+
+		const u32 destAddr = immBranchTaken ? targetAddr : js.compilerPC + 8;
+		WriteExit(destAddr, js.nextExit++);
+	} else {
+		if (!likely && delaySlotIsNice)
+			CompileDelaySlot(DELAYSLOT_NICE);
+
+		gpr.MapReg(rs);
+		CMP(gpr.R(rs), 0);
+
+		Arm64Gen::FixupBranch ptr;
+		if (!likely)
+		{
+			if (!delaySlotIsNice)
+				CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
+			else
+				FlushAll();
+			ptr = B(cc);
+		}
+		else
+		{
+			FlushAll();
+			ptr = B(cc);
+			CompileDelaySlot(DELAYSLOT_FLUSH);
+		}
+
+		// Take the branch
+		if (andLink)
+		{
+			gpr.SetRegImm(SCRATCHREG1, js.compilerPC + 8);
+			STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, MIPS_REG_RA * 4);
+		}
+
+		WriteExit(targetAddr, js.nextExit++);
+
+		SetJumpTarget(ptr);
+		// Not taken
+		WriteExit(js.compilerPC + 8, js.nextExit++);
+	}
+	js.compiling = false;
+}
+
+
+void Arm64Jit::Comp_RelBranch(MIPSOpcode op)
+{
+	// The CC flags here should be opposite of the actual branch becuase they skip the branching action.
+	switch (op >> 26)
+	{
+	case 4: BranchRSRTComp(op, CC_NEQ, false); break;//beq
+	case 5: BranchRSRTComp(op, CC_EQ,  false); break;//bne
+
+	case 6: BranchRSZeroComp(op, CC_GT, false, false); break;//blez
+	case 7: BranchRSZeroComp(op, CC_LE, false, false); break;//bgtz
+
+	case 20: BranchRSRTComp(op, CC_NEQ, true); break;//beql
+	case 21: BranchRSRTComp(op, CC_EQ,  true); break;//bnel
+
+	case 22: BranchRSZeroComp(op, CC_GT, false, true); break;//blezl
+	case 23: BranchRSZeroComp(op, CC_LE, false, true); break;//bgtzl
+
+	default:
+		_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
+		break;
+	}
+}
+
+void Arm64Jit::Comp_RelBranchRI(MIPSOpcode op)
+{
+	switch ((op >> 16) & 0x1F)
+	{
+	case 0: BranchRSZeroComp(op, CC_GE, false, false); break; //if ((s32)R(rs) <  0) DelayBranchTo(addr); else PC += 4; break;//bltz
+	case 1: BranchRSZeroComp(op, CC_LT, false, false); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez
+	case 2: BranchRSZeroComp(op, CC_GE, false, true);  break; //if ((s32)R(rs) <  0) DelayBranchTo(addr); else PC += 8; break;//bltzl
+	case 3: BranchRSZeroComp(op, CC_LT, false, true);  break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl
+	case 16: BranchRSZeroComp(op, CC_GE, true, false); break;  //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) <  0) DelayBranchTo(addr); else PC += 4; break;//bltzal
+	case 17: BranchRSZeroComp(op, CC_LT, true, false);  break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgezal
+	case 18: BranchRSZeroComp(op, CC_GE, true, true);  break;  //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) <  0) DelayBranchTo(addr); else SkipLikely(); break;//bltzall
+	case 19: BranchRSZeroComp(op, CC_LT, true, true);   break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else SkipLikely(); break;//bgezall
+	default:
+		_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
+		break;
+	}
+}
+
+// If likely is set, discard the branch slot if NOT taken.
+void Arm64Jit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely) {
+	if (js.inDelaySlot) {
+		ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
+		return;
+	}
+	int offset = _IMM16 << 2;
+	u32 targetAddr = js.compilerPC + offset + 4;
+
+	MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
+	bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp);
+	CONDITIONAL_NICE_DELAYSLOT;
+	if (!likely && delaySlotIsNice)
+		CompileDelaySlot(DELAYSLOT_NICE);
+
+	gpr.MapReg(MIPS_REG_FPCOND);
+	TSTI2R(gpr.R(MIPS_REG_FPCOND), 1, W0);
+
+	Arm64Gen::FixupBranch ptr;
+	if (!likely) {
+		if (!delaySlotIsNice)
+			CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
+		else
+			FlushAll();
+		ptr = B(cc);
+	} else {
+		FlushAll();
+		ptr = B(cc);
+		CompileDelaySlot(DELAYSLOT_FLUSH);
+	}
+
+	// Take the branch
+	WriteExit(targetAddr, js.nextExit++);
+
+	SetJumpTarget(ptr);
+	// Not taken
+	WriteExit(js.compilerPC + 8, js.nextExit++);
+	js.compiling = false;
+}
+
+void Arm64Jit::Comp_FPUBranch(MIPSOpcode op) {
+	switch((op >> 16) & 0x1f) {
+	case 0:	BranchFPFlag(op, CC_NEQ, false); break;  // bc1f
+	case 1: BranchFPFlag(op, CC_EQ, false); break;  // bc1t
+	case 2: BranchFPFlag(op, CC_NEQ, true);  break;  // bc1fl
+	case 3: BranchFPFlag(op, CC_EQ, true);  break;  // bc1tl
+	default:
+		_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
+		break;
+	}
+}
+
+// If likely is set, discard the branch slot if NOT taken.
+void Arm64Jit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely) {
+	if (js.inDelaySlot) {
+		ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
+		return;
+	}
+	int offset = _IMM16 << 2;
+	u32 targetAddr = js.compilerPC + offset + 4;
+
+	MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
+
+	// Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle)
+	// The behavior is undefined - the CPU may take the second branch even if the first one passes.
+	// However, it does consistently try each branch, which these games seem to expect.
+	bool delaySlotIsBranch = MIPSCodeUtils::IsVFPUBranch(delaySlotOp);
+	bool delaySlotIsNice = !delaySlotIsBranch && IsDelaySlotNiceVFPU(op, delaySlotOp);
+	CONDITIONAL_NICE_DELAYSLOT;
+	if (!likely && delaySlotIsNice)
+		CompileDelaySlot(DELAYSLOT_NICE);
+	if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
+		ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", js.compilerPC);
+
+	int imm3 = (op >> 18) & 7;
+
+	gpr.MapReg(MIPS_REG_VFPUCC);
+	TSTI2R(gpr.R(MIPS_REG_VFPUCC), 1 << imm3, W0);
+
+	Arm64Gen::FixupBranch ptr;
+	js.inDelaySlot = true;
+	if (!likely)
+	{
+		if (!delaySlotIsNice && !delaySlotIsBranch)
+			CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
+		else
+			FlushAll();
+		ptr = B(cc);
+	}
+	else
+	{
+		FlushAll();
+		ptr = B(cc);
+		if (!delaySlotIsBranch)
+			CompileDelaySlot(DELAYSLOT_FLUSH);
+	}
+	js.inDelaySlot = false;
+
+	// Take the branch
+	WriteExit(targetAddr, js.nextExit++);
+
+	SetJumpTarget(ptr);
+	// Not taken
+	u32 notTakenTarget = js.compilerPC + (delaySlotIsBranch ? 4 : 8);
+	WriteExit(notTakenTarget, js.nextExit++);
+	js.compiling = false;
+}
+
+void Arm64Jit::Comp_VBranch(MIPSOpcode op)
+{
+	switch ((op >> 16) & 3)
+	{
+	case 0:	BranchVFPUFlag(op, CC_NEQ, false); break;  // bvf
+	case 1: BranchVFPUFlag(op, CC_EQ,  false); break;  // bvt
+	case 2: BranchVFPUFlag(op, CC_NEQ, true);  break;  // bvfl
+	case 3: BranchVFPUFlag(op, CC_EQ,  true);  break;  // bvtl
+	}
+}
+
+void Arm64Jit::Comp_Jump(MIPSOpcode op) {
+	if (js.inDelaySlot) {
+		ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
+		return;
+	}
+	u32 off = _IMM26 << 2;
+	u32 targetAddr = (js.compilerPC & 0xF0000000) | off;
+
+	// Might be a stubbed address or something?
+	if (!Memory::IsValidAddress(targetAddr)) {
+		if (js.nextExit == 0) {
+			ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x", targetAddr);
+		} else {
+			js.compiling = false;
+		}
+		// TODO: Mark this block dirty or something?  May be indication it will be changed by imports.
+		return;
+	}
+
+	switch (op >> 26) {
+	case 2: //j
+		CompileDelaySlot(DELAYSLOT_NICE);
+		if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) {
+			AddContinuedBlock(targetAddr);
+			// Account for the increment in the loop.
+			js.compilerPC = targetAddr - 4;
+			// In case the delay slot was a break or something.
+			js.compiling = true;
+			return;
+		}
+		FlushAll();
+		WriteExit(targetAddr, js.nextExit++);
+		break;
+
+	case 3: //jal
+		if (ReplaceJalTo(targetAddr))
+			return;
+
+		gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
+		CompileDelaySlot(DELAYSLOT_NICE);
+		if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) {
+			AddContinuedBlock(targetAddr);
+			// Account for the increment in the loop.
+			js.compilerPC = targetAddr - 4;
+			// In case the delay slot was a break or something.
+			js.compiling = true;
+			return;
+		}
+		FlushAll();
+		WriteExit(targetAddr, js.nextExit++);
+		break;
+
+	default:
+		_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
+		break;
+	}
+	js.compiling = false;
+}
+
+void Arm64Jit::Comp_JumpReg(MIPSOpcode op)
+{
+	if (js.inDelaySlot) {
+		ERROR_LOG_REPORT(JIT, "Branch in JumpReg delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
+		return;
+	}
+	MIPSGPReg rs = _RS;
+	MIPSGPReg rd = _RD;
+	bool andLink = (op & 0x3f) == 9;
+
+	MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
+	bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
+	if (andLink && rs == rd)
+		delaySlotIsNice = false;
+	CONDITIONAL_NICE_DELAYSLOT;
+
+	ARM64Reg destReg = X8;
+	if (IsSyscall(delaySlotOp)) {
+		gpr.MapReg(rs);
+		MovToPC(gpr.R(rs));  // For syscall to be able to return.
+		if (andLink)
+			gpr.SetImm(rd, js.compilerPC + 8);
+		CompileDelaySlot(DELAYSLOT_FLUSH);
+		return;  // Syscall wrote exit code.
+	} else if (delaySlotIsNice) {
+		if (andLink)
+			gpr.SetImm(rd, js.compilerPC + 8);
+		CompileDelaySlot(DELAYSLOT_NICE);
+
+		if (!andLink && rs == MIPS_REG_RA && g_Config.bDiscardRegsOnJRRA) {
+			// According to the MIPS ABI, there are some regs we don't need to preserve.
+			// Let's discard them so we don't need to write them back.
+			// NOTE: Not all games follow the MIPS ABI! Tekken 6, for example, will crash
+			// with this enabled.
+			gpr.DiscardR(MIPS_REG_COMPILER_SCRATCH);
+			for (int i = MIPS_REG_A0; i <= MIPS_REG_T7; i++)
+				gpr.DiscardR((MIPSGPReg)i);
+			gpr.DiscardR(MIPS_REG_T8);
+			gpr.DiscardR(MIPS_REG_T9);
+		}
+
+		if (jo.continueJumps && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions) {
+			AddContinuedBlock(gpr.GetImm(rs));
+			// Account for the increment in the loop.
+			js.compilerPC = gpr.GetImm(rs) - 4;
+			// In case the delay slot was a break or something.
+			js.compiling = true;
+			return;
+		}
+
+		gpr.MapReg(rs);
+		destReg = gpr.R(rs);  // Safe because FlushAll doesn't change any regs
+		FlushAll();
+	} else {
+		// Delay slot - this case is very rare, might be able to free up R8.
+		gpr.MapReg(rs);
+		MOV(W8, gpr.R(rs));
+		if (andLink)
+			gpr.SetImm(rd, js.compilerPC + 8);
+		CompileDelaySlot(DELAYSLOT_NICE);
+		FlushAll();
+	}
+
+	switch (op & 0x3f) 
+	{
+	case 8: //jr
+		break;
+	case 9: //jalr
+		break;
+	default:
+		_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
+		break;
+	}
+
+	WriteExitDestInR(destReg);
+	js.compiling = false;
+}
+
+	
+void Arm64Jit::Comp_Syscall(MIPSOpcode op)
+{
+	if (!g_Config.bSkipDeadbeefFilling)
+	{
+		// All of these will be overwritten with DEADBEEF anyway.
+		gpr.DiscardR(MIPS_REG_COMPILER_SCRATCH);
+		// We need to keep A0 - T3, which are used for args.
+		gpr.DiscardR(MIPS_REG_T4);
+		gpr.DiscardR(MIPS_REG_T5);
+		gpr.DiscardR(MIPS_REG_T6);
+		gpr.DiscardR(MIPS_REG_T7);
+		gpr.DiscardR(MIPS_REG_T8);
+		gpr.DiscardR(MIPS_REG_T9);
+
+		gpr.DiscardR(MIPS_REG_HI);
+		gpr.DiscardR(MIPS_REG_LO);
+	}
+
+	// If we're in a delay slot, this is off by one.
+	const int offset = js.inDelaySlot ? -1 : 0;
+	WriteDownCount(offset);
+	RestoreRoundingMode();
+	js.downcountAmount = -offset;
+
+	// TODO: Maybe discard v0, v1, and some temps?  Definitely at?
+	FlushAll();
+
+	SaveDowncount();
+	// Skip the CallSyscall where possible.
+	void *quickFunc = GetQuickSyscallFunc(op);
+	if (quickFunc)
+	{
+		gpr.SetRegImm(W0, (u32)(intptr_t)GetSyscallInfo(op));
+		// Already flushed, so X1 is safe.
+		QuickCallFunction(X1, quickFunc);
+	}
+	else
+	{
+		gpr.SetRegImm(W0, op.encoding);
+		QuickCallFunction(X1, (void *)&CallSyscall);
+	}
+	ApplyRoundingMode();
+	RestoreDowncount();
+
+	WriteSyscallExit();
+	js.compiling = false;
+}
+
+void Arm64Jit::Comp_Break(MIPSOpcode op)
+{
+	Comp_Generic(op);
+	WriteSyscallExit();
+	js.compiling = false;
+}
+
+}   // namespace Mipscomp
diff --git a/Core/MIPS/ARM64/Arm64CompFPU.cpp b/Core/MIPS/ARM64/Arm64CompFPU.cpp
new file mode 100644
index 0000000000..a7a0947820
--- /dev/null
+++ b/Core/MIPS/ARM64/Arm64CompFPU.cpp
@@ -0,0 +1,75 @@
+// Copyright (c) 2012- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#include "Core/Config.h"
+#include "Core/MemMap.h"
+#include "Core/MIPS/MIPS.h"
+#include "Core/MIPS/MIPSCodeUtils.h"
+#include "Core/MIPS/MIPSTables.h"
+
+#include "Core/MIPS/ARM64/Arm64Jit.h"
+#include "Core/MIPS/ARM64/Arm64RegCache.h"
+#include "Common/CPUDetect.h"
+
+#define _RS MIPS_GET_RS(op)
+#define _RT MIPS_GET_RT(op)
+#define _RD MIPS_GET_RD(op)
+#define _FS MIPS_GET_FS(op)
+#define _FT MIPS_GET_FT(op)
+#define _FD MIPS_GET_FD(op)
+#define _SA MIPS_GET_SA(op)
+#define _POS  ((op>> 6) & 0x1F)
+#define _SIZE ((op>>11) & 0x1F)
+#define _IMM16 (signed short)(op & 0xFFFF)
+#define _IMM26 (op & 0x03FFFFFF)
+
+// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
+// Currently known non working ones should have DISABLE.
+
+//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
+#define CONDITIONAL_DISABLE ;
+#define DISABLE { Comp_Generic(op); return; }
+
+namespace MIPSComp
+{
+	using namespace Arm64Gen;
+	using namespace Arm64JitConstants;
+
+void Arm64Jit::Comp_FPU3op(MIPSOpcode op)
+{ 
+	DISABLE;
+}
+
+void Arm64Jit::Comp_FPULS(MIPSOpcode op)
+{
+	DISABLE;
+}
+
+void Arm64Jit::Comp_FPUComp(MIPSOpcode op) {
+	DISABLE;
+}
+
+void Arm64Jit::Comp_FPU2op(MIPSOpcode op) {
+	DISABLE;
+}
+
+void Arm64Jit::Comp_mxc1(MIPSOpcode op)
+{
+	DISABLE;
+}
+
+}	// namespace MIPSComp
diff --git a/Core/MIPS/ARM64/Arm64CompLoadStore.cpp b/Core/MIPS/ARM64/Arm64CompLoadStore.cpp
new file mode 100644
index 0000000000..5741b8af63
--- /dev/null
+++ b/Core/MIPS/ARM64/Arm64CompLoadStore.cpp
@@ -0,0 +1,83 @@
+// Copyright (c) 2012- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+
+// Optimization ideas:
+//
+// It's common to see sequences of stores writing or reading to a contiguous set of
+// addresses in function prologues/epilogues:
+//  sw s5, 104(sp)
+//  sw s4, 100(sp)
+//  sw s3, 96(sp)
+//  sw s2, 92(sp)
+//  sw s1, 88(sp)
+//  sw s0, 84(sp)
+//  sw ra, 108(sp)
+//  mov s4, a0
+//  mov s3, a1
+//  ...
+// Such sequences could easily be detected and turned into nice contiguous
+// sequences of ARM stores instead of the current 3 instructions per sw/lw.
+//
+// Also, if we kept track of the likely register content of a cached register,
+// (pointer or data), we could avoid many BIC instructions.
+
+
+#include "Core/MemMap.h"
+#include "Core/Config.h"
+#include "Core/MIPS/MIPS.h"
+#include "Core/MIPS/MIPSAnalyst.h"
+#include "Core/MIPS/MIPSCodeUtils.h"
+#include "Core/MIPS/ARM64/Arm64Jit.h"
+#include "Core/MIPS/ARM64/Arm64RegCache.h"
+
+#define _RS MIPS_GET_RS(op)
+#define _RT MIPS_GET_RT(op)
+#define _RD MIPS_GET_RD(op)
+#define _FS MIPS_GET_FS(op)
+#define _FT MIPS_GET_FT(op)
+#define _FD MIPS_GET_FD(op)
+#define _SA MIPS_GET_SA(op)
+#define _POS  ((op>> 6) & 0x1F)
+#define _SIZE ((op>>11) & 0x1F)
+#define _IMM16 (signed short)(op & 0xFFFF)
+#define _IMM26 (op & 0x03FFFFFF)
+
+// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
+// Currently known non working ones should have DISABLE.
+
+// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
+#define CONDITIONAL_DISABLE ;
+#define DISABLE { Comp_Generic(op); return; }
+
+namespace MIPSComp
+{
+	using namespace Arm64Gen;
+	using namespace Arm64JitConstants;
+	
+	void Arm64Jit::Comp_ITypeMemLR(MIPSOpcode op, bool load) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_ITypeMem(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Cache(MIPSOpcode op) {
+		DISABLE;
+	}
+}
diff --git a/Core/MIPS/ARM64/Arm64CompReplace.cpp b/Core/MIPS/ARM64/Arm64CompReplace.cpp
new file mode 100644
index 0000000000..38527cea94
--- /dev/null
+++ b/Core/MIPS/ARM64/Arm64CompReplace.cpp
@@ -0,0 +1,33 @@
+// Copyright (c) 2013- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#include "Common/CPUDetect.h"
+#include "Core/MemMap.h"
+#include "Core/MIPS/JitCommon/JitCommon.h"
+#include "Core/MIPS/ARM64/Arm64Jit.h"
+#include "Core/MIPS/ARM64/Arm64RegCache.h"
+
+namespace MIPSComp {
+
+int Arm64Jit::Replace_fabsf() {
+	// TODO ARM64
+	// fpr.MapDirtyIn(0, 12);
+	// VABS(fpr.R(0), fpr.R(12));
+	return 4;  // Number of instructions in the MIPS function
+}
+
+}
\ No newline at end of file
diff --git a/Core/MIPS/ARM64/Arm64CompVFPU.cpp b/Core/MIPS/ARM64/Arm64CompVFPU.cpp
new file mode 100644
index 0000000000..4ab8953240
--- /dev/null
+++ b/Core/MIPS/ARM64/Arm64CompVFPU.cpp
@@ -0,0 +1,231 @@
+// Copyright (c) 2012- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#include <cmath>
+#include "math/math_util.h"
+
+#include "Core/MemMap.h"
+#include "Core/MIPS/MIPS.h"
+#include "Core/MIPS/MIPSTables.h"
+#include "Core/MIPS/MIPSAnalyst.h"
+#include "Core/MIPS/MIPSCodeUtils.h"
+#include "Common/CPUDetect.h"
+#include "Core/Config.h"
+#include "Core/Reporting.h"
+
+#include "Core/MIPS/ARM64/Arm64Jit.h"
+#include "Core/MIPS/ARM64/Arm64RegCache.h"
+
+// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
+// Currently known non working ones should have DISABLE.
+
+// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
+
+#define CONDITIONAL_DISABLE ;
+#define DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; }
+#define _RS MIPS_GET_RS(op)
+#define _RT MIPS_GET_RT(op)
+#define _RD MIPS_GET_RD(op)
+#define _FS MIPS_GET_FS(op)
+#define _FT MIPS_GET_FT(op)
+#define _FD MIPS_GET_FD(op)
+#define _SA MIPS_GET_SA(op)
+#define _POS  ((op>> 6) & 0x1F)
+#define _SIZE ((op>>11) & 0x1F)
+#define _IMM16 (signed short)(op & 0xFFFF)
+#define _IMM26 (op & 0x03FFFFFF)
+
+namespace MIPSComp
+{
+	using namespace Arm64Gen;
+	using namespace Arm64JitConstants;
+
+	void Arm64Jit::Comp_VPFX(MIPSOpcode op)
+	{
+		CONDITIONAL_DISABLE;
+		int data = op & 0xFFFFF;
+		int regnum = (op >> 24) & 3;
+		switch (regnum) {
+		case 0:  // S
+			js.prefixS = data;
+			js.prefixSFlag = JitState::PREFIX_KNOWN_DIRTY;
+			break;
+		case 1:  // T
+			js.prefixT = data;
+			js.prefixTFlag = JitState::PREFIX_KNOWN_DIRTY;
+			break;
+		case 2:  // D
+			js.prefixD = data;
+			js.prefixDFlag = JitState::PREFIX_KNOWN_DIRTY;
+			break;
+		default:
+			ERROR_LOG(CPU, "VPFX - bad regnum %i : data=%08x", regnum, data);
+			break;
+		}
+	}
+
+	void Arm64Jit::Comp_SV(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_SVQ(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_VVectorInit(MIPSOpcode op)
+	{
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_VIdt(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_VMatrixInit(MIPSOpcode op)
+	{
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_VHdp(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	static const float MEMORY_ALIGNED16(vavg_table[4]) = { 1.0f, 1.0f / 2.0f, 1.0f / 3.0f, 1.0f / 4.0f };
+
+	void Arm64Jit::Comp_Vhoriz(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_VDot(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_VecDo3(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_VV2Op(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vi2f(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vh2f(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vf2i(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Mftv(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vmfvc(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vmtvc(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vmmov(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_VScl(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vmmul(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vmscl(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vtfm(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_VCrs(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_VDet(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vi2x(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vx2i(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_VCrossQuat(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vcmp(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vcmov(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Viim(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vfim(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vcst(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	// Very heavily used by FF:CC. Should be replaced by a fast approximation instead of
+	// calling the math library.
+	// Apparently this may not work on hardfp. I don't think we have any platforms using this though.
+	void Arm64Jit::Comp_VRot(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vsgn(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vocp(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_ColorConv(MIPSOpcode op) {
+		DISABLE;
+	}
+
+	void Arm64Jit::Comp_Vbfy(MIPSOpcode op) {
+		DISABLE;
+	}
+}
diff --git a/Core/MIPS/ARM64/Arm64Jit.cpp b/Core/MIPS/ARM64/Arm64Jit.cpp
new file mode 100644
index 0000000000..2bd62769ea
--- /dev/null
+++ b/Core/MIPS/ARM64/Arm64Jit.cpp
@@ -0,0 +1,485 @@
+// Copyright (c) 2012- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#include "base/logging.h"
+#include "Common/ChunkFile.h"
+#include "Common/CPUDetect.h"
+
+#include "Core/Reporting.h"
+#include "Core/Config.h"
+#include "Core/Core.h"
+#include "Core/CoreTiming.h"
+#include "Core/Debugger/SymbolMap.h"
+#include "Core/MemMap.h"
+
+#include "Core/MIPS/MIPS.h"
+#include "Core/MIPS/MIPSCodeUtils.h"
+#include "Core/MIPS/MIPSInt.h"
+#include "Core/MIPS/MIPSTables.h"
+#include "Core/HLE/ReplaceTables.h"
+#include "Core/MIPS/ARM64/Arm64RegCache.h"
+#include "Core/MIPS/ARM64/Arm64RegCacheFPU.h"
+
+#include "Core/MIPS/ARM64/Arm64Jit.h"
+
+#include "ext/disarm.h"
+
+using namespace Arm64JitConstants;
+
+void DisassembleArm64Print(const u8 *data, int size) {
+	ILOG("ARM64 TODO");
+}
+
+namespace MIPSComp
+{
+using namespace Arm64Gen;
+using namespace Arm64JitConstants;
+
+Arm64Jit::Arm64Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &js, &jo), fpr(mips, &js, &jo), mips_(mips) { 
+	logBlocks = 0;
+	dontLogBlocks = 0;
+	blocks.Init();
+	gpr.SetEmitter(this);
+	fpr.SetEmitter(this);
+	AllocCodeSpace(1024 * 1024 * 16);  // 32MB is the absolute max because that's what an ARM branch instruction can reach, backwards and forwards.
+	GenerateFixedCode();
+
+	js.startDefaultPrefix = mips_->HasDefaultPrefix();
+}
+
+Arm64Jit::~Arm64Jit() {
+}
+
+void Arm64Jit::DoState(PointerWrap &p)
+{
+	auto s = p.Section("Jit", 1, 2);
+	if (!s)
+		return;
+
+	p.Do(js.startDefaultPrefix);
+	if (s >= 2) {
+		p.Do(js.hasSetRounding);
+		js.lastSetRounding = 0;
+	} else {
+		js.hasSetRounding = 1;
+	}
+}
+
+// This is here so the savestate matches between jit and non-jit.
+void Arm64Jit::DoDummyState(PointerWrap &p)
+{
+	auto s = p.Section("Jit", 1, 2);
+	if (!s)
+		return;
+
+	bool dummy = false;
+	p.Do(dummy);
+	if (s >= 2) {
+		dummy = true;
+		p.Do(dummy);
+	}
+}
+
+void Arm64Jit::FlushAll()
+{
+	gpr.FlushAll();
+	fpr.FlushAll();
+	FlushPrefixV();
+}
+
+void Arm64Jit::FlushPrefixV()
+{
+	if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0) {
+		gpr.SetRegImm(SCRATCHREG1, js.prefixS);
+		STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_SPREFIX]));
+		js.prefixSFlag = (JitState::PrefixState) (js.prefixSFlag & ~JitState::PREFIX_DIRTY);
+	}
+
+	if ((js.prefixTFlag & JitState::PREFIX_DIRTY) != 0) {
+		gpr.SetRegImm(SCRATCHREG1, js.prefixT);
+		STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_TPREFIX]));
+		js.prefixTFlag = (JitState::PrefixState) (js.prefixTFlag & ~JitState::PREFIX_DIRTY);
+	}
+
+	if ((js.prefixDFlag & JitState::PREFIX_DIRTY) != 0) {
+		gpr.SetRegImm(SCRATCHREG1, js.prefixD);
+		STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_DPREFIX]));
+		js.prefixDFlag = (JitState::PrefixState) (js.prefixDFlag & ~JitState::PREFIX_DIRTY);
+	}
+}
+
+void Arm64Jit::ClearCache()
+{
+	blocks.Clear();
+	ClearCodeSpace();
+	GenerateFixedCode();
+}
+
+void Arm64Jit::InvalidateCache()
+{
+	blocks.Clear();
+}
+
+void Arm64Jit::InvalidateCacheAt(u32 em_address, int length)
+{
+	blocks.InvalidateICache(em_address, length);
+}
+
+void Arm64Jit::EatInstruction(MIPSOpcode op) {
+	MIPSInfo info = MIPSGetInfo(op);
+	if (info & DELAYSLOT) {
+		ERROR_LOG_REPORT_ONCE(ateDelaySlot, JIT, "Ate a branch op.");
+	}
+	if (js.inDelaySlot) {
+		ERROR_LOG_REPORT_ONCE(ateInDelaySlot, JIT, "Ate an instruction inside a delay slot.");
+	}
+
+	js.numInstructions++;
+	js.compilerPC += 4;
+	js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
+}
+
+void Arm64Jit::CompileDelaySlot(int flags)
+{
+	// TODO ARM64
+}
+
+
+void Arm64Jit::Compile(u32 em_address) {
+	if (GetSpaceLeft() < 0x10000 || blocks.IsFull()) {
+		ClearCache();
+	}
+
+	int block_num = blocks.AllocateBlock(em_address);
+	JitBlock *b = blocks.GetBlock(block_num);
+	DoJit(em_address, b);
+	blocks.FinalizeBlock(block_num, jo.enableBlocklink);
+
+	bool cleanSlate = false;
+
+	if (js.hasSetRounding && !js.lastSetRounding) {
+		WARN_LOG(JIT, "Detected rounding mode usage, rebuilding jit with checks");
+		// Won't loop, since hasSetRounding is only ever set to 1.
+		js.lastSetRounding = js.hasSetRounding;
+		cleanSlate = true;
+	}
+
+	// Drat.  The VFPU hit an uneaten prefix at the end of a block.
+	if (js.startDefaultPrefix && js.MayHavePrefix()) {
+		WARN_LOG(JIT, "An uneaten prefix at end of block: %08x", js.compilerPC - 4);
+		js.LogPrefix();
+
+		// Let's try that one more time.  We won't get back here because we toggled the value.
+		js.startDefaultPrefix = false;
+		cleanSlate = true;
+	}
+
+	if (cleanSlate) {
+		// Our assumptions are all wrong so it's clean-slate time.
+		ClearCache();
+		Compile(em_address);
+	}
+}
+
+void Arm64Jit::RunLoopUntil(u64 globalticks)
+{
+	((void (*)())enterCode)();
+}
+
+const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b)
+{
+	js.cancel = false;
+	js.blockStart = js.compilerPC = mips_->pc;
+	js.lastContinuedPC = 0;
+	js.initialBlockSize = 0;
+	js.nextExit = 0;
+	js.downcountAmount = 0;
+	js.curBlock = b;
+	js.compiling = true;
+	js.inDelaySlot = false;
+	js.PrefixStart();
+
+	// We add a downcount flag check before the block, used when entering from a linked block.
+	// The last block decremented downcounter, and the flag should still be available.
+	// Got three variants here of where we position the code, needs detailed benchmarking.
+
+	FixupBranch bail;
+	/*
+	if (jo.useBackJump) {
+		// Moves the MOVI2R and B *before* checkedEntry, and just branch backwards there.
+		// Speedup seems to be zero unfortunately but I guess it may vary from device to device.
+		// Not intrusive so keeping it around here to experiment with, may help on ARMv6 due to
+		// large/slow construction of 32-bit immediates?
+		JumpTarget backJump = GetCodePtr();
+		gpr.SetRegImm(R0, js.blockStart);
+		B((const void *)outerLoopPCInR0);
+		b->checkedEntry = GetCodePtr();
+		SetCC(CC_LT);
+		B(backJump);
+		SetCC(CC_AL);
+	} else if (jo.useForwardJump) {
+		b->checkedEntry = GetCodePtr();
+		SetCC(CC_LT);
+		bail = B();
+		SetCC(CC_AL);
+	} else {
+		b->checkedEntry = GetCodePtr();
+		SetCC(CC_LT);
+		gpr.SetRegImm(R0, js.blockStart);
+		B((const void *)outerLoopPCInR0);
+		SetCC(CC_AL);
+	}*/
+	// TODO ARM64
+
+	b->normalEntry = GetCodePtr();
+	// TODO: this needs work
+	MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address);
+
+	gpr.Start(analysis);
+	fpr.Start(analysis);
+
+	int partialFlushOffset = 0;
+
+	js.numInstructions = 0;
+	while (js.compiling)
+	{
+		gpr.SetCompilerPC(js.compilerPC);  // Let it know for log messages
+		MIPSOpcode inst = Memory::Read_Opcode_JIT(js.compilerPC);
+		//MIPSInfo info = MIPSGetInfo(inst);
+		//if (info & IS_VFPU) {
+		//	logBlocks = 1;
+		//}
+
+		js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);
+
+		MIPSCompileOp(inst);
+	
+		js.compilerPC += 4;
+		js.numInstructions++;
+
+		// Safety check, in case we get a bunch of really large jit ops without a lot of branching.
+		if (GetSpaceLeft() < 0x800 || js.numInstructions >= JitBlockCache::MAX_BLOCK_INSTRUCTIONS)
+		{
+			FlushAll();
+			WriteExit(js.compilerPC, js.nextExit++);
+			js.compiling = false;
+		}
+	}
+
+	if (jo.useForwardJump) {
+		//SetJumpTarget(bail);
+		//gpr.SetRegImm(R0, js.blockStart);
+		//B((const void *)outerLoopPCInR0);
+	}
+
+	char temp[256];
+	if (logBlocks > 0 && dontLogBlocks == 0) {
+		INFO_LOG(JIT, "=============== mips ===============");
+		for (u32 cpc = em_address; cpc != js.compilerPC + 4; cpc += 4) {
+			MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp, true);
+			INFO_LOG(JIT, "M: %08x   %s", cpc, temp);
+		}
+	}
+
+	b->codeSize = GetCodePtr() - b->normalEntry;
+
+	if (logBlocks > 0 && dontLogBlocks == 0) {
+		INFO_LOG(JIT, "=============== ARM ===============");
+		DisassembleArm64Print(b->normalEntry, GetCodePtr() - b->normalEntry);
+	}
+	if (logBlocks > 0)
+		logBlocks--;
+	if (dontLogBlocks > 0)
+		dontLogBlocks--;
+
+	// Don't forget to zap the newly written instructions in the instruction cache!
+	FlushIcache();
+
+	if (js.lastContinuedPC == 0)
+		b->originalSize = js.numInstructions;
+	else
+	{
+		// We continued at least once.  Add the last proxy and set the originalSize correctly.
+		blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (js.compilerPC - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
+		b->originalSize = js.initialBlockSize;
+	}
+	return b->normalEntry;
+}
+
+void Arm64Jit::AddContinuedBlock(u32 dest)
+{
+	// The first block is the root block.  When we continue, we create proxy blocks after that.
+	if (js.lastContinuedPC == 0)
+		js.initialBlockSize = js.numInstructions;
+	else
+		blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (js.compilerPC - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
+	js.lastContinuedPC = dest;
+}
+
+bool Arm64Jit::DescribeCodePtr(const u8 *ptr, std::string &name)
+{
+	// TODO: Not used by anything yet.
+	return false;
+}
+
+void Arm64Jit::Comp_RunBlock(MIPSOpcode op)
+{
+	// This shouldn't be necessary, the dispatcher should catch us before we get here.
+	ERROR_LOG(JIT, "Comp_RunBlock should never be reached!");
+}
+
+bool Arm64Jit::ReplaceJalTo(u32 dest) {
+	return false;
+}
+
+void Arm64Jit::Comp_ReplacementFunc(MIPSOpcode op)
+{
+	// TODO ARM64
+}
+
+void Arm64Jit::Comp_Generic(MIPSOpcode op)
+{
+	FlushAll();
+	MIPSInterpretFunc func = MIPSGetInterpretFunc(op);
+	if (func) {
+		SaveDowncount();
+		// TODO: Perhaps keep the rounding mode for interp?
+		RestoreRoundingMode();
+		// gpr.SetRegImm(SCRATCHREG1, js.compilerPC);
+		// MovToPC(SCRATCHREG1);
+		//gpr.SetRegImm(R0, op.encoding);
+		//QuickCallFunction(R1, (void *)func);
+		// TODO ARM64
+		ApplyRoundingMode();
+		RestoreDowncount();
+	}
+
+	const MIPSInfo info = MIPSGetInfo(op);
+	if ((info & IS_VFPU) != 0 && (info & VFPU_NO_PREFIX) == 0)
+	{
+		// If it does eat them, it'll happen in MIPSCompileOp().
+		if ((info & OUT_EAT_PREFIX) == 0)
+			js.PrefixUnknown();
+	}
+}
+
+void Arm64Jit::MovFromPC(ARM64Reg r) {
+	LDR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));
+}
+
+void Arm64Jit::MovToPC(ARM64Reg r) {
+	STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));
+}
+
+void Arm64Jit::SaveDowncount() {
+	if (jo.downcountInRegister)
+		STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
+}
+
+void Arm64Jit::RestoreDowncount() {
+	if (jo.downcountInRegister)
+		LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
+}
+
+void Arm64Jit::WriteDownCount(int offset) {
+	// TODO ARM64
+}
+
+// Abuses R2
+void Arm64Jit::WriteDownCountR(ARM64Reg reg) {
+	if (jo.downcountInRegister) {
+		SUBS(DOWNCOUNTREG, DOWNCOUNTREG, reg);
+	} else {
+		LDR(INDEX_UNSIGNED, X2, CTXREG, offsetof(MIPSState, downcount));
+		SUBS(X2, X2, reg);
+		STR(INDEX_UNSIGNED, X2, CTXREG, offsetof(MIPSState, downcount));
+	}
+}
+
+void Arm64Jit::RestoreRoundingMode(bool force) {
+	// TODO ARM64
+}
+
+void Arm64Jit::ApplyRoundingMode(bool force) {
+	// TODO ARM64
+}
+
+void Arm64Jit::UpdateRoundingMode() {
+	// TODO ARM64
+}
+
+// IDEA - could have a WriteDualExit that takes two destinations and two condition flags,
+// and just have conditional that set PC "twice". This only works when we fall back to dispatcher
+// though, as we need to have the SUBS flag set in the end. So with block linking in the mix,
+// I don't think this gives us that much benefit.
+void Arm64Jit::WriteExit(u32 destination, int exit_num)
+{
+	WriteDownCount(); 
+	//If nobody has taken care of this yet (this can be removed when all branches are done)
+	JitBlock *b = js.curBlock;
+	b->exitAddress[exit_num] = destination;
+	b->exitPtrs[exit_num] = GetWritableCodePtr();
+
+	// Link opportunity!
+	int block = blocks.GetBlockNumberFromStartAddress(destination);
+	if (block >= 0 && jo.enableBlocklink) {
+		// It exists! Joy of joy!
+		B(blocks.GetBlock(block)->checkedEntry);
+		b->linkStatus[exit_num] = true;
+	} else {
+		gpr.SetRegImm(X0, destination);
+		B((const void *)dispatcherPCInR0);	
+	}
+}
+
+void Arm64Jit::WriteExitDestInR(ARM64Reg Reg) 
+{
+	MovToPC(Reg);
+	WriteDownCount();
+	// TODO: shouldn't need an indirect branch here...
+	B((const void *)dispatcher);
+}
+
+void Arm64Jit::WriteSyscallExit()
+{
+	WriteDownCount();
+	B((const void *)dispatcherCheckCoreState);
+}
+
+void Arm64Jit::Comp_DoNothing(MIPSOpcode op) { }
+
+#define _RS ((op>>21) & 0x1F)
+#define _RT ((op>>16) & 0x1F)
+#define _RD ((op>>11) & 0x1F)
+#define _FS ((op>>11) & 0x1F)
+#define _FT ((op>>16) & 0x1F)
+#define _FD ((op>>6) & 0x1F)
+#define _POS ((op>>6) & 0x1F)
+#define _SIZE ((op>>11) & 0x1F)
+
+//memory regions:
+//
+// 08-0A
+// 48-4A
+// 04-05
+// 44-45
+// mov eax, addrreg
+	// shr eax, 28
+// mov eax, [table+eax]
+// mov dreg, [eax+offreg]
+	
+}
diff --git a/Core/MIPS/ARM64/Arm64Jit.h b/Core/MIPS/ARM64/Arm64Jit.h
new file mode 100644
index 0000000000..fa4313d9d3
--- /dev/null
+++ b/Core/MIPS/ARM64/Arm64Jit.h
@@ -0,0 +1,291 @@
+// Copyright (c) 2012- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#pragma once
+
+#include "Common/CPUDetect.h"
+#include "Common/ArmCommon.h"
+#include "Common/Arm64Emitter.h"
+#include "Core/MIPS/JitCommon/JitState.h"
+#include "Core/MIPS/JitCommon/JitBlockCache.h"
+#include "Core/MIPS/ARM64/Arm64Asm.h"
+#include "Core/MIPS/ARM64/Arm64RegCache.h"
+#include "Core/MIPS/ARM64/Arm64RegCacheFPU.h"
+#include "Core/MIPS/MIPSVFPUUtils.h"
+
+#ifndef offsetof
+#include "stddef.h"
+#endif
+
+namespace MIPSComp
+{
+
+struct Arm64JitOptions
+{
+	Arm64JitOptions()  {
+		enableBlocklink = true;
+		downcountInRegister = true;
+		useBackJump = false;
+		useForwardJump = false;
+		cachePointers = true;
+		immBranches = false;
+		continueBranches = false;
+		continueJumps = false;
+		continueMaxInstructions = 300;
+
+		useNEONVFPU = false;  // true
+		if (!cpu_info.bNEON)
+			useNEONVFPU = false;
+	}
+
+	bool useNEONVFPU;
+	bool enableBlocklink;
+	bool downcountInRegister;
+	bool useBackJump;
+	bool useForwardJump;
+	bool cachePointers;
+	bool immBranches;
+	bool continueBranches;
+	bool continueJumps;
+	int continueMaxInstructions;
+};
+
+class Arm64Jit : public Arm64Gen::ARM64CodeBlock
+{
+public:
+	Arm64Jit(MIPSState *mips);
+	virtual ~Arm64Jit();
+
+	void DoState(PointerWrap &p);
+	static void DoDummyState(PointerWrap &p);
+
+	// Compiled ops should ignore delay slots
+	// the compiler will take care of them by itself
+	// OR NOT
+	void Comp_Generic(MIPSOpcode op);
+
+	void RunLoopUntil(u64 globalticks);
+
+	void Compile(u32 em_address);	// Compiles a block at current MIPS PC
+	const u8 *DoJit(u32 em_address, JitBlock *b);
+
+	bool DescribeCodePtr(const u8 *ptr, std::string &name);
+
+	void CompileDelaySlot(int flags);
+	void EatInstruction(MIPSOpcode op);
+	void AddContinuedBlock(u32 dest);
+
+	void Comp_RunBlock(MIPSOpcode op);
+	void Comp_ReplacementFunc(MIPSOpcode op);
+
+	// Ops
+	void Comp_ITypeMem(MIPSOpcode op);
+	void Comp_Cache(MIPSOpcode op);
+
+	void Comp_RelBranch(MIPSOpcode op);
+	void Comp_RelBranchRI(MIPSOpcode op);
+	void Comp_FPUBranch(MIPSOpcode op);
+	void Comp_FPULS(MIPSOpcode op);
+	void Comp_FPUComp(MIPSOpcode op);
+	void Comp_Jump(MIPSOpcode op);
+	void Comp_JumpReg(MIPSOpcode op);
+	void Comp_Syscall(MIPSOpcode op);
+	void Comp_Break(MIPSOpcode op);
+
+	void Comp_IType(MIPSOpcode op);
+	void Comp_RType2(MIPSOpcode op);
+	void Comp_RType3(MIPSOpcode op);
+	void Comp_ShiftType(MIPSOpcode op);
+	void Comp_Allegrex(MIPSOpcode op);
+	void Comp_Allegrex2(MIPSOpcode op);
+	void Comp_VBranch(MIPSOpcode op);
+	void Comp_MulDivType(MIPSOpcode op);
+	void Comp_Special3(MIPSOpcode op);
+
+	void Comp_FPU3op(MIPSOpcode op);
+	void Comp_FPU2op(MIPSOpcode op);
+	void Comp_mxc1(MIPSOpcode op);
+
+	void Comp_DoNothing(MIPSOpcode op);
+
+	void Comp_SV(MIPSOpcode op);
+	void Comp_SVQ(MIPSOpcode op);
+	void Comp_VPFX(MIPSOpcode op);
+	void Comp_VVectorInit(MIPSOpcode op);
+	void Comp_VMatrixInit(MIPSOpcode op);
+	void Comp_VDot(MIPSOpcode op);
+	void Comp_VecDo3(MIPSOpcode op);
+	void Comp_VV2Op(MIPSOpcode op);
+	void Comp_Mftv(MIPSOpcode op);
+	void Comp_Vmfvc(MIPSOpcode op);
+	void Comp_Vmtvc(MIPSOpcode op);
+	void Comp_Vmmov(MIPSOpcode op);
+	void Comp_VScl(MIPSOpcode op);
+	void Comp_Vmmul(MIPSOpcode op);
+	void Comp_Vmscl(MIPSOpcode op);
+	void Comp_Vtfm(MIPSOpcode op);
+	void Comp_VHdp(MIPSOpcode op);
+	void Comp_VCrs(MIPSOpcode op);
+	void Comp_VDet(MIPSOpcode op);
+	void Comp_Vi2x(MIPSOpcode op);
+	void Comp_Vx2i(MIPSOpcode op);
+	void Comp_Vf2i(MIPSOpcode op);
+	void Comp_Vi2f(MIPSOpcode op);
+	void Comp_Vh2f(MIPSOpcode op);
+	void Comp_Vcst(MIPSOpcode op);
+	void Comp_Vhoriz(MIPSOpcode op);
+	void Comp_VRot(MIPSOpcode op);
+	void Comp_VIdt(MIPSOpcode op);
+	void Comp_Vcmp(MIPSOpcode op);
+	void Comp_Vcmov(MIPSOpcode op);
+	void Comp_Viim(MIPSOpcode op);
+	void Comp_Vfim(MIPSOpcode op);
+	void Comp_VCrossQuat(MIPSOpcode op);
+	void Comp_Vsgn(MIPSOpcode op);
+	void Comp_Vocp(MIPSOpcode op);
+	void Comp_ColorConv(MIPSOpcode op);
+	void Comp_Vbfy(MIPSOpcode op);
+
+	// Non-NEON: VPFX
+
+	// NEON implementations of the VFPU ops.
+	void CompNEON_SV(MIPSOpcode op);
+	void CompNEON_SVQ(MIPSOpcode op);
+	void CompNEON_VVectorInit(MIPSOpcode op);
+	void CompNEON_VMatrixInit(MIPSOpcode op);
+	void CompNEON_VDot(MIPSOpcode op);
+	void CompNEON_VecDo3(MIPSOpcode op);
+	void CompNEON_VV2Op(MIPSOpcode op);
+	void CompNEON_Mftv(MIPSOpcode op);
+	void CompNEON_Vmfvc(MIPSOpcode op);
+	void CompNEON_Vmtvc(MIPSOpcode op);
+	void CompNEON_Vmmov(MIPSOpcode op);
+	void CompNEON_VScl(MIPSOpcode op);
+	void CompNEON_Vmmul(MIPSOpcode op);
+	void CompNEON_Vmscl(MIPSOpcode op);
+	void CompNEON_Vtfm(MIPSOpcode op);
+	void CompNEON_VHdp(MIPSOpcode op);
+	void CompNEON_VCrs(MIPSOpcode op);
+	void CompNEON_VDet(MIPSOpcode op);
+	void CompNEON_Vi2x(MIPSOpcode op);
+	void CompNEON_Vx2i(MIPSOpcode op);
+	void CompNEON_Vf2i(MIPSOpcode op);
+	void CompNEON_Vi2f(MIPSOpcode op);
+	void CompNEON_Vh2f(MIPSOpcode op);
+	void CompNEON_Vcst(MIPSOpcode op);
+	void CompNEON_Vhoriz(MIPSOpcode op);
+	void CompNEON_VRot(MIPSOpcode op);
+	void CompNEON_VIdt(MIPSOpcode op);
+	void CompNEON_Vcmp(MIPSOpcode op);
+	void CompNEON_Vcmov(MIPSOpcode op);
+	void CompNEON_Viim(MIPSOpcode op);
+	void CompNEON_Vfim(MIPSOpcode op);
+	void CompNEON_VCrossQuat(MIPSOpcode op);
+	void CompNEON_Vsgn(MIPSOpcode op);
+	void CompNEON_Vocp(MIPSOpcode op);
+	void CompNEON_ColorConv(MIPSOpcode op);
+	void CompNEON_Vbfy(MIPSOpcode op);
+
+	int Replace_fabsf();
+
+	JitBlockCache *GetBlockCache() { return &blocks; }
+
+	void ClearCache();
+	void InvalidateCache();
+	void InvalidateCacheAt(u32 em_address, int length = 4);
+
+	void EatPrefix() { js.EatPrefix(); }
+
+private:
+	void GenerateFixedCode();
+	void FlushAll();
+	void FlushPrefixV();
+
+	void WriteDownCount(int offset = 0);
+	void WriteDownCountR(Arm64Gen::ARM64Reg reg);
+	void RestoreRoundingMode(bool force = false);
+	void ApplyRoundingMode(bool force = false);
+	void UpdateRoundingMode();
+	void MovFromPC(Arm64Gen::ARM64Reg r);
+	void MovToPC(Arm64Gen::ARM64Reg r);
+
+	bool ReplaceJalTo(u32 dest);
+
+	void SaveDowncount();
+	void RestoreDowncount();
+
+	void WriteExit(u32 destination, int exit_num);
+	void WriteExitDestInR(Arm64Gen::ARM64Reg Reg);
+	void WriteSyscallExit();
+
+	// Utility compilation functions
+	void BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely);
+	void BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely);
+	void BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool likely);
+	void BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely);
+
+	// Utilities to reduce duplicated code
+	void CompImmLogic(MIPSGPReg rs, MIPSGPReg rt, u32 uimm, void (ARM64XEmitter::*arith)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg src, Arm64Gen::ARM64Reg op2), bool (ARM64XEmitter::*tryArithI2R)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg src, u32 val), u32 (*eval)(u32 a, u32 b));
+	void CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, void (ARM64XEmitter::*arithOp2)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg rm, Arm64Gen::ARM64Reg rn), bool (ARM64XEmitter::*tryArithI2R)(Arm64Gen::ARM64Reg dst, Arm64Gen::ARM64Reg rm, u32 val), u32 (*eval)(u32 a, u32 b), bool symmetric = false);
+
+	void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz);
+	void ApplyPrefixD(const u8 *vregs, VectorSize sz);
+	void GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) {
+		_assert_(js.prefixSFlag & JitState::PREFIX_KNOWN);
+		GetVectorRegs(regs, sz, vectorReg);
+		ApplyPrefixST(regs, js.prefixS, sz);
+	}
+	void GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) {
+		_assert_(js.prefixTFlag & JitState::PREFIX_KNOWN);
+		GetVectorRegs(regs, sz, vectorReg);
+		ApplyPrefixST(regs, js.prefixT, sz);
+	}
+	void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg);
+
+	// Utils
+	void SetR0ToEffectiveAddress(MIPSGPReg rs, s16 offset);
+	void SetCCAndR0ForSafeAddress(MIPSGPReg rs, s16 offset, Arm64Gen::ARM64Reg tempReg, bool reverse = false);
+	void Comp_ITypeMemLR(MIPSOpcode op, bool load);
+
+	JitBlockCache blocks;
+	Arm64JitOptions jo;
+	JitState js;
+
+	Arm64RegCache gpr;
+	ArmRegCacheFPU fpr;
+
+	MIPSState *mips_;
+
+	int dontLogBlocks;
+	int logBlocks;
+
+public:
+	// Code pointers
+	const u8 *enterCode;
+
+	const u8 *outerLoop;
+	const u8 *outerLoopPCInR0;
+	const u8 *dispatcherCheckCoreState;
+	const u8 *dispatcherPCInR0;
+	const u8 *dispatcher;
+	const u8 *dispatcherNoCheck;
+
+	const u8 *breakpointBailout;
+};
+
+}	// namespace MIPSComp
+
diff --git a/Core/MIPS/ARM64/Arm64RegCache.cpp b/Core/MIPS/ARM64/Arm64RegCache.cpp
new file mode 100644
index 0000000000..43852bd070
--- /dev/null
+++ b/Core/MIPS/ARM64/Arm64RegCache.cpp
@@ -0,0 +1,503 @@
+// Copyright (c) 2012- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#include "Core/MemMap.h"
+#include "Core/MIPS/ARM64/Arm64RegCache.h"
+#include "Core/MIPS/ARM64/Arm64Jit.h"
+#include "Core/MIPS/MIPSAnalyst.h"
+#include "Core/Reporting.h"
+#include "Common/Arm64Emitter.h"
+
+#ifndef offsetof
+#include "stddef.h"
+#endif
+
+using namespace Arm64Gen;
+using namespace Arm64JitConstants;
+
+Arm64RegCache::Arm64RegCache(MIPSState *mips, MIPSComp::JitState *js, MIPSComp::Arm64JitOptions *jo) : mips_(mips), js_(js), jo_(jo) {
+}
+
+void Arm64RegCache::Init(ARM64XEmitter *emitter) {
+	emit_ = emitter;
+}
+
+void Arm64RegCache::Start(MIPSAnalyst::AnalysisResults &stats) {
+	for (int i = 0; i < NUM_ARMREG; i++) {
+		ar[i].mipsReg = MIPS_REG_INVALID;
+		ar[i].isDirty = false;
+	}
+	for (int i = 0; i < NUM_MIPSREG; i++) {
+		mr[i].loc = ML_MEM;
+		mr[i].reg = INVALID_REG;
+		mr[i].imm = -1;
+		mr[i].spillLock = false;
+	}
+}
+
+const ARM64Reg *Arm64RegCache::GetMIPSAllocationOrder(int &count) {
+	// Note that R0 is reserved as scratch for now.
+	// R12 is also potentially usable.
+	// R4-R7 are registers we could use for static allocation or downcount.
+	// R8 is used to preserve flags in nasty branches.
+	// R9 and upwards are reserved for jit basics.
+	// R14 (LR) is used as a scratch reg (overwritten on calls/return.)
+
+	// TODO ARM64
+	if (jo_->downcountInRegister) {
+		static const ARM64Reg allocationOrder[] = {
+			X1, X2, X3, X4, X5, X6, X12,
+		};
+		count = sizeof(allocationOrder) / sizeof(const int);
+		return allocationOrder;
+	} else {
+		static const ARM64Reg allocationOrder2[] = {
+			X1, X2, X3, X4, X5, X6, X7, X12,
+		};
+		count = sizeof(allocationOrder2) / sizeof(const int);
+		return allocationOrder2;
+	}
+}
+
+void Arm64RegCache::FlushBeforeCall() {
+	// R4-R11 are preserved. Others need flushing.
+	FlushArmReg(X1);
+	FlushArmReg(X2);
+	FlushArmReg(X3);
+	FlushArmReg(X12);
+}
+
+bool Arm64RegCache::IsMapped(MIPSGPReg mipsReg) {
+	return mr[mipsReg].loc == ML_ARMREG;
+}
+
+void Arm64RegCache::SetRegImm(ARM64Reg reg, u32 imm) {
+	// On ARM64, at least Cortex A57, good old MOVT/MOVW  (MOVK in 64-bit) is really fast.
+	emit_->MOVI2R(reg, imm);
+}
+
+void Arm64RegCache::MapRegTo(ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags) {
+	ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false;
+	if ((mapFlags & MAP_NOINIT) != MAP_NOINIT) {
+		if (mipsReg == MIPS_REG_ZERO) {
+			// If we get a request to load the zero register, at least we won't spend
+			// time on a memory access...
+			// TODO: EOR?
+			emit_->MOVI2R(reg, 0);
+
+			// This way, if we SetImm() it, we'll keep it.
+			mr[mipsReg].loc = ML_ARMREG_IMM;
+			mr[mipsReg].imm = 0;
+		} else {
+			switch (mr[mipsReg].loc) {
+			case ML_MEM:
+				emit_->LDR(INDEX_UNSIGNED, reg, CTXREG, GetMipsRegOffset(mipsReg));
+				mr[mipsReg].loc = ML_ARMREG;
+				break;
+			case ML_IMM:
+				SetRegImm(reg, mr[mipsReg].imm);
+				ar[reg].isDirty = true;  // IMM is always dirty.
+
+				// If we are mapping dirty, it means we're gonna overwrite.
+				// So the imm value is no longer valid.
+				if (mapFlags & MAP_DIRTY)
+					mr[mipsReg].loc = ML_ARMREG;
+				else
+					mr[mipsReg].loc = ML_ARMREG_IMM;
+				break;
+			default:
+				mr[mipsReg].loc = ML_ARMREG;
+				break;
+			}
+		}
+	} else {
+		if (mipsReg == MIPS_REG_ZERO) {
+			// This way, if we SetImm() it, we'll keep it.
+			mr[mipsReg].loc = ML_ARMREG_IMM;
+			mr[mipsReg].imm = 0;
+		} else {
+			mr[mipsReg].loc = ML_ARMREG;
+		}
+	}
+	ar[reg].mipsReg = mipsReg;
+	mr[mipsReg].reg = reg;
+}
+
+ARM64Reg Arm64RegCache::FindBestToSpill(bool unusedOnly, bool *clobbered) {
+	int allocCount;
+	const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount);
+
+	static const int UNUSED_LOOKAHEAD_OPS = 30;
+
+	*clobbered = false;
+	for (int i = 0; i < allocCount; i++) {
+		ARM64Reg reg = allocOrder[i];
+		if (ar[reg].mipsReg != MIPS_REG_INVALID && mr[ar[reg].mipsReg].spillLock)
+			continue;
+
+		// Awesome, a clobbered reg.  Let's use it.
+		if (MIPSAnalyst::IsRegisterClobbered(ar[reg].mipsReg, compilerPC_, UNUSED_LOOKAHEAD_OPS)) {
+			*clobbered = true;
+			return reg;
+		}
+
+		// Not awesome.  A used reg.  Let's try to avoid spilling.
+		if (unusedOnly && MIPSAnalyst::IsRegisterUsed(ar[reg].mipsReg, compilerPC_, UNUSED_LOOKAHEAD_OPS)) {
+			continue;
+		}
+
+		return reg;
+	}
+
+	return INVALID_REG;
+}
+
+// TODO: Somewhat smarter spilling - currently simply spills the first available, should do
+// round robin or FIFO or something.
+ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) {
+	// Let's see if it's already mapped. If so we just need to update the dirty flag.
+	// We don't need to check for ML_NOINIT because we assume that anyone who maps
+	// with that flag immediately writes a "known" value to the register.
+	if (mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM) {
+		ARM64Reg armReg = mr[mipsReg].reg;
+		if (ar[armReg].mipsReg != mipsReg) {
+			ERROR_LOG_REPORT(JIT, "Register mapping out of sync! %i", mipsReg);
+		}
+		if (mapFlags & MAP_DIRTY) {
+			// Mapping dirty means the old imm value is invalid.
+			mr[mipsReg].loc = ML_ARMREG;
+			ar[armReg].isDirty = true;
+		}
+		return (ARM64Reg)mr[mipsReg].reg;
+	}
+
+	// Okay, not mapped, so we need to allocate an ARM register.
+
+	int allocCount;
+	const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount);
+
+	ARM64Reg desiredReg = INVALID_REG;
+	// Try to "statically" allocate the first 6 regs after v0.
+	int desiredOrder = allocCount - (6 - (mipsReg - (int)MIPS_REG_V0));
+	if (desiredOrder >= 0 && desiredOrder < allocCount)
+		desiredReg = allocOrder[desiredOrder];
+
+	if (desiredReg != INVALID_REG) {
+		if (ar[desiredReg].mipsReg == MIPS_REG_INVALID) {
+			// With this placement, we may be able to optimize flush.
+			MapRegTo(desiredReg, mipsReg, mapFlags);
+			return desiredReg;
+		}
+	}
+
+allocate:
+	for (int i = 0; i < allocCount; i++) {
+		ARM64Reg reg = allocOrder[i];
+
+		if (ar[reg].mipsReg == MIPS_REG_INVALID) {
+			// That means it's free. Grab it, and load the value into it (if requested).
+			MapRegTo(reg, mipsReg, mapFlags);
+			return reg;
+		}
+	}
+
+	// Still nothing. Let's spill a reg and goto 10.
+	// TODO: Use age or something to choose which register to spill?
+	// TODO: Spill dirty regs first? or opposite?
+	bool clobbered;
+	ARM64Reg bestToSpill = FindBestToSpill(true, &clobbered);
+	if (bestToSpill == INVALID_REG) {
+		bestToSpill = FindBestToSpill(false, &clobbered);
+	}
+
+	if (bestToSpill != INVALID_REG) {
+		// ERROR_LOG(JIT, "Out of registers at PC %08x - spills register %i.", mips_->pc, bestToSpill);
+		// TODO: Broken somehow in Dante's Inferno, but most games work.  Bad flags in MIPSTables somewhere?
+		if (clobbered) {
+			DiscardR(ar[bestToSpill].mipsReg);
+		} else {
+			FlushArmReg(bestToSpill);
+		}
+		goto allocate;
+	}
+
+	// Uh oh, we have all them spilllocked....
+	ERROR_LOG_REPORT(JIT, "Out of spillable registers at PC %08x!!!", mips_->pc);
+	return INVALID_REG;
+}
+
+void Arm64RegCache::MapInIn(MIPSGPReg rd, MIPSGPReg rs) {
+	SpillLock(rd, rs);
+	MapReg(rd);
+	MapReg(rs);
+	ReleaseSpillLocks();
+}
+
+void Arm64RegCache::MapDirtyIn(MIPSGPReg rd, MIPSGPReg rs, bool avoidLoad) {
+	SpillLock(rd, rs);
+	bool load = !avoidLoad || rd == rs;
+	MapReg(rd, load ? MAP_DIRTY : MAP_NOINIT);
+	MapReg(rs);
+	ReleaseSpillLocks();
+}
+
+void Arm64RegCache::MapDirtyInIn(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad) {
+	SpillLock(rd, rs, rt);
+	bool load = !avoidLoad || (rd == rs || rd == rt);
+	MapReg(rd, load ? MAP_DIRTY : MAP_NOINIT);
+	MapReg(rt);
+	MapReg(rs);
+	ReleaseSpillLocks();
+}
+
+void Arm64RegCache::MapDirtyDirtyIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, bool avoidLoad) {
+	SpillLock(rd1, rd2, rs);
+	bool load1 = !avoidLoad || rd1 == rs;
+	bool load2 = !avoidLoad || rd2 == rs;
+	MapReg(rd1, load1 ? MAP_DIRTY : MAP_NOINIT);
+	MapReg(rd2, load2 ? MAP_DIRTY : MAP_NOINIT);
+	MapReg(rs);
+	ReleaseSpillLocks();
+}
+
+void Arm64RegCache::MapDirtyDirtyInIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad) {
+	SpillLock(rd1, rd2, rs, rt);
+	bool load1 = !avoidLoad || (rd1 == rs || rd1 == rt);
+	bool load2 = !avoidLoad || (rd2 == rs || rd2 == rt);
+	MapReg(rd1, load1 ? MAP_DIRTY : MAP_NOINIT);
+	MapReg(rd2, load2 ? MAP_DIRTY : MAP_NOINIT);
+	MapReg(rt);
+	MapReg(rs);
+	ReleaseSpillLocks();
+}
+
+void Arm64RegCache::FlushArmReg(ARM64Reg r) {
+	if (ar[r].mipsReg == MIPS_REG_INVALID) {
+		// Nothing to do, reg not mapped.
+		if (ar[r].isDirty) {
+			ERROR_LOG_REPORT(JIT, "Dirty but no mipsreg?");
+		}
+		return;
+	}
+	if (ar[r].mipsReg != MIPS_REG_INVALID) {
+		auto &mreg = mr[ar[r].mipsReg];
+		if (mreg.loc == ML_ARMREG_IMM) {
+			// We know its immedate value, no need to STR now.
+			mreg.loc = ML_IMM;
+			mreg.reg = INVALID_REG;
+		} else {
+			if (ar[r].isDirty && mreg.loc == ML_ARMREG)
+				emit_->STR(INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(ar[r].mipsReg));
+			mreg.loc = ML_MEM;
+			mreg.reg = INVALID_REG;
+			mreg.imm = 0;
+		}
+	}
+	ar[r].isDirty = false;
+	ar[r].mipsReg = MIPS_REG_INVALID;
+}
+
+void Arm64RegCache::DiscardR(MIPSGPReg mipsReg) {
+	const RegMIPSLoc prevLoc = mr[mipsReg].loc;
+	if (prevLoc == ML_ARMREG || prevLoc == ML_ARMREG_IMM) {
+		ARM64Reg armReg = mr[mipsReg].reg;
+		ar[armReg].isDirty = false;
+		ar[armReg].mipsReg = MIPS_REG_INVALID;
+		mr[mipsReg].reg = INVALID_REG;
+		mr[mipsReg].loc = ML_MEM;
+		mr[mipsReg].imm = 0;
+	}
+}
+
+void Arm64RegCache::FlushR(MIPSGPReg r) {
+	switch (mr[r].loc) {
+	case ML_IMM:
+		// IMM is always "dirty".
+		if (r != MIPS_REG_ZERO) {
+			SetRegImm(SCRATCHREG1, mr[r].imm);
+			emit_->STR(INDEX_UNSIGNED, SCRATCHREG1, CTXREG, GetMipsRegOffset(r));
+		}
+		break;
+
+	case ML_ARMREG:
+	case ML_ARMREG_IMM:
+		if (mr[r].reg == INVALID_REG) {
+			ERROR_LOG_REPORT(JIT, "FlushR: MipsReg %d had bad ArmReg", r);
+		}
+		if (ar[mr[r].reg].isDirty) {
+			if (r != MIPS_REG_ZERO) {
+				emit_->STR(INDEX_UNSIGNED, (ARM64Reg)mr[r].reg, CTXREG, GetMipsRegOffset(r));
+			}
+			ar[mr[r].reg].isDirty = false;
+		}
+		ar[mr[r].reg].mipsReg = MIPS_REG_INVALID;
+		break;
+
+	case ML_MEM:
+		// Already there, nothing to do.
+		break;
+
+	default:
+		ERROR_LOG_REPORT(JIT, "FlushR: MipsReg %d with invalid location %d", r, mr[r].loc);
+		break;
+	}
+	mr[r].loc = ML_MEM;
+	mr[r].reg = INVALID_REG;
+	mr[r].imm = 0;
+}
+
+// Note: if allowFlushImm is set, this also flushes imms while checking the sequence.
+int Arm64RegCache::FlushGetSequential(MIPSGPReg startMipsReg, bool allowFlushImm) {
+	// Only start a sequence on a dirty armreg.
+	// TODO: Could also start with an imm?
+	const auto &startMipsInfo = mr[startMipsReg];
+	if ((startMipsInfo.loc != ML_ARMREG && startMipsInfo.loc != ML_ARMREG_IMM) || startMipsInfo.reg == INVALID_REG || !ar[startMipsInfo.reg].isDirty) {
+		return 0;
+	}
+
+	int allocCount;
+	const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount);
+
+	int c = 1;
+	// The sequence needs to have ascending arm regs for STMIA.
+	int lastArmReg = startMipsInfo.reg;
+	// Can't use HI/LO, only regs in the main r[] array.
+	for (int r = (int)startMipsReg + 1; r < 32; ++r) {
+		if ((mr[r].loc == ML_ARMREG || mr[r].loc == ML_ARMREG_IMM) && mr[r].reg != INVALID_REG) {
+			if ((int)mr[r].reg > lastArmReg && ar[mr[r].reg].isDirty) {
+				++c;
+				lastArmReg = mr[r].reg;
+				continue;
+			}
+		// If we're not allowed to flush imms, don't even consider them.
+		} else if (allowFlushImm && mr[r].loc == ML_IMM && MIPSGPReg(r) != MIPS_REG_ZERO) {
+			// Okay, let's search for a free (and later) reg to put this imm into.
+			bool found = false;
+			for (int j = 0; j < allocCount; ++j) {
+				ARM64Reg immReg = allocOrder[j];
+				if ((int)immReg > lastArmReg && ar[immReg].mipsReg == MIPS_REG_INVALID) {
+					++c;
+					lastArmReg = immReg;
+
+					// Even if the sequence fails, we'll need it in a reg anyway, might as well be this one.
+					MapRegTo(immReg, MIPSGPReg(r), 0);
+					found = true;
+					break;
+				}
+			}
+			if (found) {
+				continue;
+			}
+		}
+
+		// If it didn't hit a continue above, the chain is over.
+		// There's no way to skip a slot with STMIA.
+		break;
+	}
+
+	return c;
+}
+
+void Arm64RegCache::FlushAll() {
+	// TODO: Flush in pairs
+	for (int i = 0; i < NUM_MIPSREG; i++) {
+		MIPSGPReg mipsReg = MIPSGPReg(i);
+		FlushR(mipsReg);
+	}
+
+	// Sanity check
+	for (int i = 0; i < NUM_ARMREG; i++) {
+		if (ar[i].mipsReg != MIPS_REG_INVALID) {
+			ERROR_LOG_REPORT(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg);
+		}
+	}
+}
+
+void Arm64RegCache::SetImm(MIPSGPReg r, u32 immVal) {
+	if (r == MIPS_REG_ZERO && immVal != 0)
+		ERROR_LOG(JIT, "Trying to set immediate %08x to r0", immVal);
+
+	if (mr[r].loc == ML_ARMREG_IMM && mr[r].imm == immVal) {
+		// Already have that value, let's keep it in the reg.
+		return;
+	}
+	// Zap existing value if cached in a reg
+	if (mr[r].reg != INVALID_REG) {
+		ar[mr[r].reg].mipsReg = MIPS_REG_INVALID;
+		ar[mr[r].reg].isDirty = false;
+	}
+	mr[r].loc = ML_IMM;
+	mr[r].imm = immVal;
+	mr[r].reg = INVALID_REG;
+}
+
+bool Arm64RegCache::IsImm(MIPSGPReg r) const {
+	if (r == MIPS_REG_ZERO) return true;
+	return mr[r].loc == ML_IMM || mr[r].loc == ML_ARMREG_IMM;
+}
+
+u32 Arm64RegCache::GetImm(MIPSGPReg r) const {
+	if (r == MIPS_REG_ZERO) return 0;
+	if (mr[r].loc != ML_IMM && mr[r].loc != ML_ARMREG_IMM) {
+		ERROR_LOG_REPORT(JIT, "Trying to get imm from non-imm register %i", r);
+	}
+	return mr[r].imm;
+}
+
+int Arm64RegCache::GetMipsRegOffset(MIPSGPReg r) {
+	if (r < 32)
+		return r * 4;
+	switch (r) {
+	case MIPS_REG_HI:
+		return offsetof(MIPSState, hi);
+	case MIPS_REG_LO:
+		return offsetof(MIPSState, lo);
+	case MIPS_REG_FPCOND:
+		return offsetof(MIPSState, fpcond);
+	case MIPS_REG_VFPUCC:
+		return offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_CC]);
+	default:
+		ERROR_LOG_REPORT(JIT, "bad mips register %i", r);
+		return 0;  // or what?
+	}
+}
+
+void Arm64RegCache::SpillLock(MIPSGPReg r1, MIPSGPReg r2, MIPSGPReg r3, MIPSGPReg r4) {
+	mr[r1].spillLock = true;
+	if (r2 != MIPS_REG_INVALID) mr[r2].spillLock = true;
+	if (r3 != MIPS_REG_INVALID) mr[r3].spillLock = true;
+	if (r4 != MIPS_REG_INVALID) mr[r4].spillLock = true;
+}
+
+void Arm64RegCache::ReleaseSpillLocks() {
+	for (int i = 0; i < NUM_MIPSREG; i++) {
+		mr[i].spillLock = false;
+	}
+}
+
+void Arm64RegCache::ReleaseSpillLock(MIPSGPReg reg) {
+	mr[reg].spillLock = false;
+}
+
+ARM64Reg Arm64RegCache::R(MIPSGPReg mipsReg) {
+	if (mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM) {
+		return (ARM64Reg)mr[mipsReg].reg;
+	} else {
+		ERROR_LOG_REPORT(JIT, "Reg %i not in arm reg. compilerPC = %08x", mipsReg, compilerPC_);
+		return INVALID_REG;  // BAAAD
+	}
+}
diff --git a/Core/MIPS/ARM64/Arm64RegCache.h b/Core/MIPS/ARM64/Arm64RegCache.h
new file mode 100644
index 0000000000..1de81828f2
--- /dev/null
+++ b/Core/MIPS/ARM64/Arm64RegCache.h
@@ -0,0 +1,153 @@
+// Copyright (c) 2012- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#pragma once
+
+#include "Core/MIPS/MIPS.h"
+#include "Core/MIPS/MIPSAnalyst.h"
+#include "Common/Arm64Emitter.h"
+
+namespace Arm64JitConstants {
+
+// Bogus mappings, TODO ARM64
+const Arm64Gen::ARM64Reg JITBASEREG = Arm64Gen::W0;
+const Arm64Gen::ARM64Reg CTXREG = Arm64Gen::X1;
+const Arm64Gen::ARM64Reg MEMBASEREG = Arm64Gen::X2;
+const Arm64Gen::ARM64Reg SCRATCHREG1 = Arm64Gen::W3;
+const Arm64Gen::ARM64Reg SCRATCHREG2 = Arm64Gen::W4;
+const Arm64Gen::ARM64Reg DOWNCOUNTREG = Arm64Gen::W5;
+
+enum {
+	TOTAL_MAPPABLE_MIPSREGS = 36,
+};
+
+enum RegMIPSLoc {
+	ML_IMM,
+	ML_ARMREG,
+	// In an arm reg, but also has a known immediate value.
+	ML_ARMREG_IMM,
+	ML_MEM,
+};
+
+// These collide with something on Blackberry.
+#undef MAP_NOINIT
+#undef MAP_READ
+
+// Initing is the default so the flag is reversed.
+enum {
+	MAP_DIRTY = 1,
+	MAP_NOINIT = 2 | MAP_DIRTY,
+};
+
+}
+
+// R1 to R6: mapped MIPS regs
+// R8 = flags (maybe we could do better here?)
+// R9 = code pointers
+// R10 = MIPS context
+// R11 = base pointer
+// R14 = scratch (actually LR)
+
+
+typedef int MIPSReg;
+
+struct RegARM {
+	MIPSGPReg mipsReg;  // if -1, no mipsreg attached.
+	bool isDirty;  // Should the register be written back?
+};
+
+struct RegMIPS {
+	// Where is this MIPS register?
+	Arm64JitConstants::RegMIPSLoc loc;
+	// Data (only one of these is used, depending on loc. Could make a union).
+	u32 imm;
+	Arm64Gen::ARM64Reg reg;  // reg index
+	bool spillLock;  // if true, this register cannot be spilled.
+	// If loc == ML_MEM, it's back in its location in the CPU context struct.
+};
+
+namespace MIPSComp {
+	struct Arm64JitOptions;
+	struct JitState;
+}
+
+class Arm64RegCache {
+public:
+	Arm64RegCache(MIPSState *mips, MIPSComp::JitState *js, MIPSComp::Arm64JitOptions *jo);
+	~Arm64RegCache() {}
+
+	void Init(Arm64Gen::ARM64XEmitter *emitter);
+	void Start(MIPSAnalyst::AnalysisResults &stats);
+
+	// Protect the arm register containing a MIPS register from spilling, to ensure that
+	// it's being kept allocated.
+	void SpillLock(MIPSGPReg reg, MIPSGPReg reg2 = MIPS_REG_INVALID, MIPSGPReg reg3 = MIPS_REG_INVALID, MIPSGPReg reg4 = MIPS_REG_INVALID);
+	void ReleaseSpillLock(MIPSGPReg reg);
+	void ReleaseSpillLocks();
+
+	void SetImm(MIPSGPReg reg, u32 immVal);
+	bool IsImm(MIPSGPReg reg) const;
+	u32 GetImm(MIPSGPReg reg) const;
+	// Optimally set a register to an imm value (possibly using another register.)
+	void SetRegImm(Arm64Gen::ARM64Reg reg, u32 imm);
+
+	// Returns an ARM register containing the requested MIPS register.
+	Arm64Gen::ARM64Reg MapReg(MIPSGPReg reg, int mapFlags = 0);
+
+	bool IsMapped(MIPSGPReg reg);
+	bool IsMappedAsPointer(MIPSGPReg reg);
+
+	void MapInIn(MIPSGPReg rd, MIPSGPReg rs);
+	void MapDirtyIn(MIPSGPReg rd, MIPSGPReg rs, bool avoidLoad = true);
+	void MapDirtyInIn(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad = true);
+	void MapDirtyDirtyIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, bool avoidLoad = true);
+	void MapDirtyDirtyInIn(MIPSGPReg rd1, MIPSGPReg rd2, MIPSGPReg rs, MIPSGPReg rt, bool avoidLoad = true);
+	void FlushArmReg(Arm64Gen::ARM64Reg r);
+	void FlushR(MIPSGPReg r);
+	void FlushBeforeCall();
+	void FlushAll();
+	void DiscardR(MIPSGPReg r);
+
+	Arm64Gen::ARM64Reg R(MIPSGPReg preg); // Returns a cached register, while checking that it's NOT mapped as a pointer
+
+	void SetEmitter(Arm64Gen::ARM64XEmitter *emitter) { emit_ = emitter; }
+
+	// For better log output only.
+	void SetCompilerPC(u32 compilerPC) { compilerPC_ = compilerPC; }
+
+	int GetMipsRegOffset(MIPSGPReg r);
+
+private:
+	const Arm64Gen::ARM64Reg *GetMIPSAllocationOrder(int &count);
+	void MapRegTo(Arm64Gen::ARM64Reg reg, MIPSGPReg mipsReg, int mapFlags);
+	int FlushGetSequential(MIPSGPReg startMipsReg, bool allowFlushImm);
+	Arm64Gen::ARM64Reg FindBestToSpill(bool unusedOnly, bool *clobbered);
+		
+	MIPSState *mips_;
+	Arm64Gen::ARM64XEmitter *emit_;
+	MIPSComp::JitState *js_;
+	MIPSComp::Arm64JitOptions *jo_;
+	u32 compilerPC_;
+
+	enum {
+		NUM_ARMREG = 32,  // 31 actual registers, plus the zero register.
+		NUM_MIPSREG = Arm64JitConstants::TOTAL_MAPPABLE_MIPSREGS,
+	};
+
+	RegARM ar[NUM_ARMREG];
+	RegMIPS mr[NUM_MIPSREG];
+};
diff --git a/Core/MIPS/ARM64/Arm64RegCacheFPU.cpp b/Core/MIPS/ARM64/Arm64RegCacheFPU.cpp
new file mode 100644
index 0000000000..7c9bdcd1ef
--- /dev/null
+++ b/Core/MIPS/ARM64/Arm64RegCacheFPU.cpp
@@ -0,0 +1,570 @@
+// Copyright (c) 2012- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#include <cstring>
+
+#include "base/logging.h"
+#include "Common/CPUDetect.h"
+#include "Core/MIPS/MIPS.h"
+#include "Core/MIPS/ARM64/Arm64RegCacheFPU.h"
+#include "Core/MIPS/ARM64/Arm64Jit.h"
+#include "Core/MIPS/MIPSTables.h"
+
+using namespace Arm64Gen;
+using namespace Arm64JitConstants;
+
+ArmRegCacheFPU::ArmRegCacheFPU(MIPSState *mips, MIPSComp::JitState *js, MIPSComp::Arm64JitOptions *jo) : mips_(mips), vr(mr + 32), js_(js), jo_(jo), initialReady(false) {
+	if (cpu_info.bNEON) {
+		numARMFpuReg_ = 32;
+	} else {
+		numARMFpuReg_ = 16;
+	}
+}
+
+void ArmRegCacheFPU::Start(MIPSAnalyst::AnalysisResults &stats) {
+	if (!initialReady) {
+		SetupInitialRegs();
+		initialReady = true;
+	}
+
+	memcpy(ar, arInitial, sizeof(ar));
+	memcpy(mr, mrInitial, sizeof(mr));
+	pendingFlush = false;
+}
+
+void ArmRegCacheFPU::SetupInitialRegs() {
+	for (int i = 0; i < numARMFpuReg_; i++) {
+		arInitial[i].mipsReg = -1;
+		arInitial[i].isDirty = false;
+	}
+	for (int i = 0; i < NUM_MIPSFPUREG; i++) {
+		mrInitial[i].loc = ML_MEM;
+		mrInitial[i].reg = INVALID_REG;
+		mrInitial[i].spillLock = false;
+		mrInitial[i].tempLock = false;
+	}
+	for (int i = 0; i < MAX_ARMQUADS; i++) {
+		qr[i].isDirty = false;
+		qr[i].mipsVec = -1;
+		qr[i].sz = V_Invalid;
+		qr[i].spillLock = false;
+		qr[i].isTemp = false;
+		memset(qr[i].vregs, 0xff, 4);
+	}
+}
+
+const ARM64Reg *ArmRegCacheFPU::GetMIPSAllocationOrder(int &count) {
+	// VFP mapping
+	// VFPU registers and regular FP registers are mapped interchangably on top of the standard
+	// 16 FPU registers.
+
+	// NEON mapping
+	// We map FPU and VFPU registers entirely separately. FPU is mapped to 12 of the bottom 16 S registers.
+	// VFPU is mapped to the upper 48 regs, 32 of which can only be reached through NEON
+	// (or D16-D31 as doubles, but not relevant).
+	// Might consider shifting the split in the future, giving more regs to NEON allowing it to map more quads.
+	
+	// We should attempt to map scalars to low Q registers and wider things to high registers,
+	// as the NEON instructions are all 2-vector or 4-vector, they don't do scalar, we want to be
+	// able to use regular VFP instructions too.
+	static const ARM64Reg allocationOrder[] = {
+		// Reserve four temp registers. Useful when building quads until we really figure out
+		// how to do that best.
+		S4,  S5,  S6,  S7,   // Q1
+		S8,  S9,  S10, S11,  // Q2
+		S12, S13, S14, S15,  // Q3
+		S16, S17, S18, S19,  // Q4
+		S20, S21, S22, S23,  // Q5
+		S24, S25, S26, S27,  // Q6
+		S28, S29, S30, S31,  // Q7
+		// Q8-Q15 free for NEON tricks
+	};
+
+	static const ARM64Reg allocationOrderNEONVFPU[] = {
+		// Reserve four temp registers. Useful when building quads until we really figure out
+		// how to do that best.
+		S4,  S5,  S6,  S7,   // Q1
+		S8,  S9,  S10, S11,  // Q2
+		S12, S13, S14, S15,  // Q3
+		// Q4-Q15 free for VFPU
+	};
+
+	// NOTE: It's important that S2/S3 are not allocated with bNEON, even if !useNEONVFPU.
+	// They are used by a few instructions, like vh2f.
+	if (jo_->useNEONVFPU) {
+		count = sizeof(allocationOrderNEONVFPU) / sizeof(const ARM64Reg);
+		return allocationOrderNEONVFPU;
+	} else {
+		count = sizeof(allocationOrder) / sizeof(const ARM64Reg);
+		return allocationOrder;
+	}
+}
+
+bool ArmRegCacheFPU::IsMapped(MIPSReg r) {
+	return mr[r].loc == ML_ARMREG;
+}
+
+ARM64Reg ArmRegCacheFPU::MapReg(MIPSReg mipsReg, int mapFlags) {
+	// INFO_LOG(JIT, "FPR MapReg: %i flags=%i", mipsReg, mapFlags);
+	if (jo_->useNEONVFPU && mipsReg >= 32) {
+		ERROR_LOG(JIT, "Cannot map VFPU registers to ARM VFP registers in NEON mode. PC=%08x", js_->compilerPC);
+		return S0;
+	}
+
+	pendingFlush = true;
+	// Let's see if it's already mapped. If so we just need to update the dirty flag.
+	// We don't need to check for ML_NOINIT because we assume that anyone who maps
+	// with that flag immediately writes a "known" value to the register.
+	if (mr[mipsReg].loc == ML_ARMREG) {
+		if (ar[mr[mipsReg].reg].mipsReg != mipsReg) {
+			ERROR_LOG(JIT, "Reg mapping out of sync! MR %i", mipsReg);
+		}
+		if (mapFlags & MAP_DIRTY) {
+			ar[mr[mipsReg].reg].isDirty = true;
+		}
+		//INFO_LOG(JIT, "Already mapped %i to %i", mipsReg, mr[mipsReg].reg);
+		return (ARM64Reg)(mr[mipsReg].reg + S0);
+	}
+
+	// Okay, not mapped, so we need to allocate an ARM register.
+
+	int allocCount;
+	const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount);
+
+allocate:
+	for (int i = 0; i < allocCount; i++) {
+		int reg = allocOrder[i] - S0;
+
+		if (ar[reg].mipsReg == -1) {
+			// That means it's free. Grab it, and load the value into it (if requested).
+			ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false;
+			if ((mapFlags & MAP_NOINIT) != MAP_NOINIT) {
+				if (mr[mipsReg].loc == ML_MEM && mipsReg < TEMP0) {
+					// emit_->VLDR((ARM64Reg)(reg + S0), CTXREG, GetMipsRegOffset(mipsReg));
+				}
+			}
+			ar[reg].mipsReg = mipsReg;
+			mr[mipsReg].loc = ML_ARMREG;
+			mr[mipsReg].reg = reg;
+			//INFO_LOG(JIT, "Mapped %i to %i", mipsReg, mr[mipsReg].reg);
+			return (ARM64Reg)(reg + S0);
+		}
+	}
+
+
+	// Still nothing. Let's spill a reg and goto 10.
+	// TODO: Use age or something to choose which register to spill?
+	// TODO: Spill dirty regs first? or opposite?
+	int bestToSpill = -1;
+	for (int i = 0; i < allocCount; i++) {
+		int reg = allocOrder[i] - S0;
+		if (ar[reg].mipsReg != -1 && (mr[ar[reg].mipsReg].spillLock || mr[ar[reg].mipsReg].tempLock))
+			continue;
+		bestToSpill = reg;
+		break;
+	}
+
+	if (bestToSpill != -1) {
+		FlushArmReg((ARM64Reg)(S0 + bestToSpill));
+		goto allocate;
+	}
+
+	// Uh oh, we have all them spilllocked....
+	ERROR_LOG(JIT, "Out of spillable registers at PC %08x!!!", js_->compilerPC);
+	return INVALID_REG;
+}
+
+void ArmRegCacheFPU::MapInIn(MIPSReg rd, MIPSReg rs) {
+	SpillLock(rd, rs);
+	MapReg(rd);
+	MapReg(rs);
+	ReleaseSpillLock(rd);
+	ReleaseSpillLock(rs);
+}
+
+void ArmRegCacheFPU::MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad) {
+	SpillLock(rd, rs);
+	bool overlap = avoidLoad && rd == rs;
+	MapReg(rd, overlap ? MAP_DIRTY : MAP_NOINIT);
+	MapReg(rs);
+	ReleaseSpillLock(rd);
+	ReleaseSpillLock(rs);
+}
+
+void ArmRegCacheFPU::MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad) {
+	SpillLock(rd, rs, rt);
+	bool overlap = avoidLoad && (rd == rs || rd == rt);
+	MapReg(rd, overlap ? MAP_DIRTY : MAP_NOINIT);
+	MapReg(rt);
+	MapReg(rs);
+	ReleaseSpillLock(rd);
+	ReleaseSpillLock(rs);
+	ReleaseSpillLock(rt);
+}
+
+void ArmRegCacheFPU::SpillLockV(const u8 *v, VectorSize sz) {
+	for (int i = 0; i < GetNumVectorElements(sz); i++) {
+		vr[v[i]].spillLock = true;
+	}
+}
+
+void ArmRegCacheFPU::SpillLockV(int vec, VectorSize sz) {
+	u8 v[4];
+	GetVectorRegs(v, sz, vec);
+	SpillLockV(v, sz);
+}
+
+void ArmRegCacheFPU::MapRegV(int vreg, int flags) {
+	MapReg(vreg + 32, flags);
+}
+
+void ArmRegCacheFPU::LoadToRegV(ARM64Reg armReg, int vreg) {
+	if (vr[vreg].loc == ML_ARMREG) {
+		// emit_->VMOV(armReg, (ARM64Reg)(S0 + vr[vreg].reg));
+	} else {
+		MapRegV(vreg);
+		// emit_->VMOV(armReg, V(vreg));
+	}
+}
+
+void ArmRegCacheFPU::MapRegsAndSpillLockV(int vec, VectorSize sz, int flags) {
+	u8 v[4];
+	GetVectorRegs(v, sz, vec);
+	SpillLockV(v, sz);
+	for (int i = 0; i < GetNumVectorElements(sz); i++) {
+		MapRegV(v[i], flags);
+	}
+}
+
+void ArmRegCacheFPU::MapRegsAndSpillLockV(const u8 *v, VectorSize sz, int flags) {
+	SpillLockV(v, sz);
+	for (int i = 0; i < GetNumVectorElements(sz); i++) {
+		MapRegV(v[i], flags);
+	}
+}
+
+void ArmRegCacheFPU::MapInInV(int vs, int vt) {
+	SpillLockV(vs);
+	SpillLockV(vt);
+	MapRegV(vs);
+	MapRegV(vt);
+	ReleaseSpillLockV(vs);
+	ReleaseSpillLockV(vt);
+}
+
+void ArmRegCacheFPU::MapDirtyInV(int vd, int vs, bool avoidLoad) {
+	bool overlap = avoidLoad && (vd == vs);
+	SpillLockV(vd);
+	SpillLockV(vs);
+	MapRegV(vd, overlap ? MAP_DIRTY : MAP_NOINIT);
+	MapRegV(vs);
+	ReleaseSpillLockV(vd);
+	ReleaseSpillLockV(vs);
+}
+
+void ArmRegCacheFPU::MapDirtyInInV(int vd, int vs, int vt, bool avoidLoad) {
+	bool overlap = avoidLoad && ((vd == vs) || (vd == vt));
+	SpillLockV(vd);
+	SpillLockV(vs);
+	SpillLockV(vt);
+	MapRegV(vd, overlap ? MAP_DIRTY : MAP_NOINIT);
+	MapRegV(vs);
+	MapRegV(vt);
+	ReleaseSpillLockV(vd);
+	ReleaseSpillLockV(vs);
+	ReleaseSpillLockV(vt);
+}
+
+void ArmRegCacheFPU::FlushArmReg(ARM64Reg r) {
+	if (r >= S0 && r <= S31) {
+		int reg = r - S0;
+		if (ar[reg].mipsReg == -1) {
+			// Nothing to do, reg not mapped.
+			return;
+		}
+		if (ar[reg].mipsReg != -1) {
+			if (ar[reg].isDirty && mr[ar[reg].mipsReg].loc == ML_ARMREG)
+			{
+				//INFO_LOG(JIT, "Flushing ARM reg %i", reg);
+				// emit_->VSTR(r, CTXREG, GetMipsRegOffset(ar[reg].mipsReg));
+			}
+			// IMMs won't be in an ARM reg.
+			mr[ar[reg].mipsReg].loc = ML_MEM;
+			mr[ar[reg].mipsReg].reg = INVALID_REG;
+		} else {
+			ERROR_LOG(JIT, "Dirty but no mipsreg?");
+		}
+		ar[reg].isDirty = false;
+		ar[reg].mipsReg = -1;
+	}
+}
+
+void ArmRegCacheFPU::FlushV(MIPSReg r) {
+	FlushR(r + 32);
+}
+
+void ArmRegCacheFPU::FlushR(MIPSReg r) {
+	switch (mr[r].loc) {
+	case ML_IMM:
+		// IMM is always "dirty".
+		// IMM is not allowed for FP (yet).
+		ERROR_LOG(JIT, "Imm in FP register?");
+		break;
+
+	case ML_ARMREG:
+		if (mr[r].reg == INVALID_REG) {
+			ERROR_LOG(JIT, "FlushR: MipsReg had bad ArmReg");
+		}
+
+		if (mr[r].reg >= Q0 && mr[r].reg <= Q15) {
+			// This should happen rarely, but occasionally we need to flush a single stray
+			// mipsreg that's been part of a quad.
+			int quad = mr[r].reg - Q0;
+			if (qr[quad].isDirty) {
+				WARN_LOG(JIT, "FlushR found quad register %i - PC=%08x", quad, js_->compilerPC);
+				//emit_->ADDI2R(R0, CTXREG, GetMipsRegOffset(r), R1);
+				//emit_->VST1_lane(F_32, (ARM64Reg)mr[r].reg, R0, mr[r].lane, true);
+			}
+		} else {
+			if (ar[mr[r].reg].isDirty) {
+				//INFO_LOG(JIT, "Flushing dirty reg %i", mr[r].reg);
+				// emit_->VSTR((ARM64Reg)(mr[r].reg + S0), CTXREG, GetMipsRegOffset(r));
+				ar[mr[r].reg].isDirty = false;
+			}
+			ar[mr[r].reg].mipsReg = -1;
+		}
+		break;
+
+	case ML_MEM:
+		// Already there, nothing to do.
+		break;
+
+	default:
+		//BAD
+		break;
+	}
+	mr[r].loc = ML_MEM;
+	mr[r].reg = (int)INVALID_REG;
+}
+
+int ArmRegCacheFPU::GetNumARMFPURegs() {
+	if (cpu_info.bNEON)
+		return 32;
+	else
+		return 16;
+}
+
+// Scalar only. Need a similar one for sequential Q vectors.
+int ArmRegCacheFPU::FlushGetSequential(int a, int maxArmReg) {
+	int c = 1;
+	int lastMipsOffset = GetMipsRegOffset(ar[a].mipsReg);
+	a++;
+	while (a < maxArmReg) {
+		if (!ar[a].isDirty || ar[a].mipsReg == -1)
+			break;
+		int mipsOffset = GetMipsRegOffset(ar[a].mipsReg);
+		if (mipsOffset != lastMipsOffset + 4) {
+			break;
+		}
+
+		lastMipsOffset = mipsOffset;
+		a++;
+		c++;
+	}
+	return c;
+}
+
+void ArmRegCacheFPU::FlushAll() {
+	if (!pendingFlush) {
+		// Nothing allocated.  FPU regs are not nearly as common as GPR.
+		return;
+	}
+
+	// Discard temps!
+	for (int i = TEMP0; i < TEMP0 + NUM_TEMPS; i++) {
+		DiscardR(i);
+	}
+
+	// Loop through the ARM registers, then use GetMipsRegOffset to determine if MIPS registers are
+	// sequential. This is necessary because we store VFPU registers in a staggered order to get
+	// columns sequential (most VFPU math in nearly all games is in columns, not rows).
+	
+	int numArmRegs;
+	// We rely on the allocation order being sequential.
+	const ARM64Reg baseReg = GetMIPSAllocationOrder(numArmRegs)[0];
+
+	for (int i = 0; i < numArmRegs; i++) {
+		int a = (baseReg - S0) + i;
+		int m = ar[a].mipsReg;
+
+		if (ar[a].isDirty) {
+			if (m == -1) {
+				ILOG("ARM reg %i is dirty but has no mipsreg", a);
+				continue;
+			}
+
+			int c = FlushGetSequential(a, GetNumARMFPURegs());
+			if (c == 1) {
+				// ILOG("Got single register: %i (%i)", a, m);
+				//emit_->VSTR((ARM64Reg)(a + S0), CTXREG, GetMipsRegOffset(m));
+			} else if (c == 2) {
+				// Probably not worth using VSTMIA for two.
+				int offset = GetMipsRegOffset(m);
+				//emit_->VSTR((ARM64Reg)(a + S0), CTXREG, offset);
+				//emit_->VSTR((ARM64Reg)(a + 1 + S0), CTXREG, offset + 4);
+			} else {
+				// ILOG("Got sequence: %i at %i (%i)", c, a, m);
+				//emit_->ADDI2R(SCRATCHREG1, CTXREG, GetMipsRegOffset(m), SCRATCHREG2);
+				// ILOG("VSTMIA R0, %i, %i", a, c);
+				//emit_->VSTMIA(SCRATCHREG1, false, (ARM64Reg)(S0 + a), c);
+			}
+
+			// Skip past, and mark as non-dirty.
+			for (int j = 0; j < c; j++) {
+				int b = a + j;
+				mr[ar[b].mipsReg].loc = ML_MEM;
+				mr[ar[b].mipsReg].reg = (int)INVALID_REG;
+				ar[a + j].mipsReg = -1;
+				ar[a + j].isDirty = false;
+			}
+			i += c - 1;
+		} else {
+			if (m != -1) {
+				mr[m].loc = ML_MEM;
+				mr[m].reg = (int)INVALID_REG;
+			}
+			ar[a].mipsReg = -1;
+			// already not dirty
+		}
+	}
+
+	// Sanity check
+	for (int i = 0; i < numARMFpuReg_; i++) {
+		if (ar[i].mipsReg != -1) {
+			ERROR_LOG(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg);
+		}
+	}
+	pendingFlush = false;
+}
+
+void ArmRegCacheFPU::DiscardR(MIPSReg r) {
+	switch (mr[r].loc) {
+	case ML_IMM:
+		// IMM is always "dirty".
+		// IMM is not allowed for FP (yet).
+		ERROR_LOG(JIT, "Imm in FP register?");
+		break;
+		 
+	case ML_ARMREG:
+		if (mr[r].reg == INVALID_REG) {
+			ERROR_LOG(JIT, "DiscardR: MipsReg had bad ArmReg");
+		} else {
+			// Note that we DO NOT write it back here. That's the whole point of Discard.
+			ar[mr[r].reg].isDirty = false;
+			ar[mr[r].reg].mipsReg = -1;
+		}
+		break;
+
+	case ML_MEM:
+		// Already there, nothing to do.
+		break;
+
+	default:
+		//BAD
+		break;
+	}
+	mr[r].loc = ML_MEM;
+	mr[r].reg = (int)INVALID_REG;
+	mr[r].tempLock = false;
+	mr[r].spillLock = false;
+}
+
+bool ArmRegCacheFPU::IsTempX(ARM64Reg r) const {
+	return ar[r - S0].mipsReg >= TEMP0;
+}
+
+int ArmRegCacheFPU::GetTempR() {
+	if (jo_->useNEONVFPU) {
+		ERROR_LOG(JIT, "VFP temps not allowed in NEON mode");
+		return 0;
+	}
+	pendingFlush = true;
+	for (int r = TEMP0; r < TEMP0 + NUM_TEMPS; ++r) {
+		if (mr[r].loc == ML_MEM && !mr[r].tempLock) {
+			mr[r].tempLock = true;
+			return r;
+		}
+	}
+
+	ERROR_LOG(CPU, "Out of temp regs! Might need to DiscardR() some");
+	_assert_msg_(JIT, 0, "Regcache ran out of temp regs, might need to DiscardR() some.");
+	return -1;
+}
+
+int ArmRegCacheFPU::GetMipsRegOffset(MIPSReg r) {
+	// These are offsets within the MIPSState structure. First there are the GPRS, then FPRS, then the "VFPURs", then the VFPU ctrls.
+	if (r < 0 || r > 32 + 128 + NUM_TEMPS) {
+		ERROR_LOG(JIT, "bad mips register %i, out of range", r);
+		return 0;  // or what?
+	}
+
+	if (r < 32 || r >= 32 + 128) {
+		return (32 + r) << 2;
+	} else {
+		// r is between 32 and 128 + 32
+		return (32 + 32 + voffset[r - 32]) << 2;
+	}
+}
+
+void ArmRegCacheFPU::SpillLock(MIPSReg r1, MIPSReg r2, MIPSReg r3, MIPSReg r4) {
+	mr[r1].spillLock = true;
+	if (r2 != -1) mr[r2].spillLock = true;
+	if (r3 != -1) mr[r3].spillLock = true;
+	if (r4 != -1) mr[r4].spillLock = true;
+}
+
+// This is actually pretty slow with all the 160 regs...
+void ArmRegCacheFPU::ReleaseSpillLocksAndDiscardTemps() {
+	for (int i = 0; i < NUM_MIPSFPUREG; i++) {
+		mr[i].spillLock = false;
+	}
+	for (int i = TEMP0; i < TEMP0 + NUM_TEMPS; ++i) {
+		DiscardR(i);
+	}
+	for (int i = 0; i < MAX_ARMQUADS; i++) {
+		qr[i].spillLock = false;
+		if (qr[i].isTemp) {
+			qr[i].isTemp = false;
+			qr[i].sz = V_Invalid;
+		}
+	}
+}
+
+ARM64Reg ArmRegCacheFPU::R(int mipsReg) {
+	if (mr[mipsReg].loc == ML_ARMREG) {
+		return (ARM64Reg)(mr[mipsReg].reg + S0);
+	} else {
+		if (mipsReg < 32) {
+			ERROR_LOG(JIT, "FReg %i not in ARM reg. compilerPC = %08x : %s", mipsReg, js_->compilerPC, MIPSDisasmAt(js_->compilerPC));
+		} else if (mipsReg < 32 + 128) {
+			ERROR_LOG(JIT, "VReg %i not in ARM reg. compilerPC = %08x : %s", mipsReg - 32, js_->compilerPC, MIPSDisasmAt(js_->compilerPC));
+		} else {
+			ERROR_LOG(JIT, "Tempreg %i not in ARM reg. compilerPC = %08x : %s", mipsReg - 128 - 32, js_->compilerPC, MIPSDisasmAt(js_->compilerPC));
+		}
+		return INVALID_REG;  // BAAAD
+	}
+}
diff --git a/Core/MIPS/ARM64/Arm64RegCacheFPU.h b/Core/MIPS/ARM64/Arm64RegCacheFPU.h
new file mode 100644
index 0000000000..e51d268cb5
--- /dev/null
+++ b/Core/MIPS/ARM64/Arm64RegCacheFPU.h
@@ -0,0 +1,186 @@
+// Copyright (c) 2012- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#pragma once
+
+#pragma once
+
+#include "../MIPS.h"
+#include "../MIPSAnalyst.h"
+#include "Core/MIPS/ARM64/Arm64RegCache.h"
+#include "Core/MIPS/MIPSVFPUUtils.h"
+#include "Common/Arm64Emitter.h"
+
+// These collide with something on Blackberry.
+#undef MAP_NOINIT
+#undef MAP_READ
+
+namespace Arm64JitConstants {
+
+enum {
+	NUM_TEMPS = 16,
+	TEMP0 = 32 + 128,
+	TOTAL_MAPPABLE_MIPSFPUREGS = 32 + 128 + NUM_TEMPS,
+};
+
+enum {
+	MAP_READ = 0,
+	MAP_MTX_TRANSPOSED = 16,
+	MAP_PREFER_LOW = 16,
+	MAP_PREFER_HIGH = 32,
+
+	// Force is not yet correctly implemented, if the reg is already mapped it will not move
+	MAP_FORCE_LOW = 64,  // Only map Q0-Q7  (and probably not Q0-Q3 as they are S registers so that leaves Q8-Q15)
+	MAP_FORCE_HIGH = 128,  // Only map Q8-Q15
+};
+
+}
+
+struct FPURegARM64 {
+	int mipsReg;  // if -1, no mipsreg attached.
+	bool isDirty;  // Should the register be written back?
+};
+
+struct FPURegQuad64 {
+	int mipsVec;
+	VectorSize sz;
+	u8 vregs[4];
+	bool isDirty;
+	bool spillLock;
+	bool isTemp;
+};
+
+struct FPURegMIPS {
+	// Where is this MIPS register?
+	Arm64JitConstants::RegMIPSLoc loc;
+	// Data (only one of these is used, depending on loc. Could make a union).
+	u32 reg;
+	int lane;
+
+	bool spillLock;  // if true, this register cannot be spilled.
+	bool tempLock;
+	// If loc == ML_MEM, it's back in its location in the CPU context struct.
+};
+
+namespace MIPSComp {
+	struct Arm64JitOptions;
+	struct JitState;
+}
+
+class ArmRegCacheFPU {
+public:
+	ArmRegCacheFPU(MIPSState *mips, MIPSComp::JitState *js, MIPSComp::Arm64JitOptions *jo);
+	~ArmRegCacheFPU() {}
+
+	void Init(Arm64Gen::ARM64XEmitter *emitter);
+
+	void Start(MIPSAnalyst::AnalysisResults &stats);
+
+	// Protect the arm register containing a MIPS register from spilling, to ensure that
+	// it's being kept allocated.
+	void SpillLock(MIPSReg reg, MIPSReg reg2 = -1, MIPSReg reg3 = -1, MIPSReg reg4 = -1);
+	void SpillLockV(MIPSReg r) { SpillLock(r + 32); }
+
+	void ReleaseSpillLocksAndDiscardTemps();
+	void ReleaseSpillLock(int mipsreg) {
+		mr[mipsreg].spillLock = false;
+	}
+	void ReleaseSpillLockV(int mipsreg) {
+		ReleaseSpillLock(mipsreg + 32);
+	}
+
+	void SetImm(MIPSReg reg, u32 immVal);
+	bool IsImm(MIPSReg reg) const;
+	u32 GetImm(MIPSReg reg) const;
+
+	// Returns an ARM register containing the requested MIPS register.
+	Arm64Gen::ARM64Reg MapReg(MIPSReg reg, int mapFlags = 0);
+	void MapInIn(MIPSReg rd, MIPSReg rs);
+	void MapDirty(MIPSReg rd);
+	void MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad = true);
+	void MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad = true);
+	bool IsMapped(MIPSReg r);
+	void FlushArmReg(Arm64Gen::ARM64Reg r);
+	void FlushR(MIPSReg r);
+	void DiscardR(MIPSReg r);
+	Arm64Gen::ARM64Reg R(int preg); // Returns a cached register
+
+	// VFPU register as single ARM VFP registers. Must not be used in the upcoming NEON mode!
+	void MapRegV(int vreg, int flags = 0);
+	void LoadToRegV(Arm64Gen::ARM64Reg armReg, int vreg);
+	void MapInInV(int rt, int rs);
+	void MapDirtyInV(int rd, int rs, bool avoidLoad = true);
+	void MapDirtyInInV(int rd, int rs, int rt, bool avoidLoad = true);
+
+	bool IsTempX(Arm64Gen::ARM64Reg r) const;
+	MIPSReg GetTempV() { return GetTempR() - 32; }
+	// VFPU registers as single VFP registers.
+	Arm64Gen::ARM64Reg V(int vreg) { return R(vreg + 32); }
+	 
+	int FlushGetSequential(int a, int maxArmReg);
+	void FlushAll();
+
+	// This one is allowed at any point.
+	void FlushV(MIPSReg r);
+
+	// NOTE: These require you to release spill locks manually!
+	void MapRegsAndSpillLockV(int vec, VectorSize vsz, int flags);
+	void MapRegsAndSpillLockV(const u8 *v, VectorSize vsz, int flags);
+
+	void SpillLockV(const u8 *v, VectorSize vsz);
+	void SpillLockV(int vec, VectorSize vsz);
+
+	void SetEmitter(Arm64Gen::ARM64XEmitter *emitter) { emit_ = emitter; }
+
+	int GetMipsRegOffset(MIPSReg r);
+
+private:
+	MIPSReg GetTempR();
+	const Arm64Gen::ARM64Reg *GetMIPSAllocationOrder(int &count);
+	int GetMipsRegOffsetV(MIPSReg r) {
+		return GetMipsRegOffset(r + 32);
+	}
+	int GetNumARMFPURegs();
+
+	void SetupInitialRegs();
+
+	MIPSState *mips_;
+	Arm64Gen::ARM64XEmitter *emit_;
+	MIPSComp::JitState *js_;
+	MIPSComp::Arm64JitOptions *jo_;
+
+	int numARMFpuReg_;
+	int qTime_;
+
+	enum {
+		// With NEON, we have 64 S = 32 D = 16 Q registers. Only the first 32 S registers
+		// are individually mappable though.
+		MAX_ARMFPUREG = 32,
+		MAX_ARMQUADS = 16,
+		NUM_MIPSFPUREG = Arm64JitConstants::TOTAL_MAPPABLE_MIPSFPUREGS,
+	};
+
+	FPURegARM64 ar[MAX_ARMFPUREG];
+	FPURegMIPS mr[NUM_MIPSFPUREG];
+	FPURegQuad64 qr[MAX_ARMQUADS];
+	FPURegMIPS *vr;
+
+	bool pendingFlush;
+	bool initialReady;
+	FPURegARM64 arInitial[MAX_ARMFPUREG];
+	FPURegMIPS mrInitial[NUM_MIPSFPUREG];
+};
diff --git a/Core/MIPS/JitCommon/JitBlockCache.h b/Core/MIPS/JitCommon/JitBlockCache.h
index 40f12569e4..65c6559cd9 100644
--- a/Core/MIPS/JitCommon/JitBlockCache.h
+++ b/Core/MIPS/JitCommon/JitBlockCache.h
@@ -30,6 +30,10 @@
 #include "Common/ArmEmitter.h"
 namespace ArmGen { class ARMXEmitter; }
 typedef ArmGen::ARMXCodeBlock NativeCodeBlock;
+#elif defined(ARM64)
+#include "Common/Arm64Emitter.h"
+namespace Arm64Gen { class ARM64XEmitter; }
+typedef Arm64Gen::ARM64CodeBlock NativeCodeBlock;
 #elif defined(_M_IX86) || defined(_M_X64)
 #include "Common/x64Emitter.h"
 namespace Gen { class XEmitter; }
@@ -44,7 +48,7 @@ namespace FakeGen { class FakeXEmitter; }
 typedef FakeGen::FakeXCodeBlock NativeCodeBlock;
 #endif
 
-#if defined(ARM)
+#if defined(ARM) || defined(ARM64)
 const int MAX_JIT_BLOCK_EXITS = 2;
 #else
 const int MAX_JIT_BLOCK_EXITS = 8;
diff --git a/Core/MIPS/JitCommon/JitCommon.cpp b/Core/MIPS/JitCommon/JitCommon.cpp
index eb9643b2b4..b7a7e76aae 100644
--- a/Core/MIPS/JitCommon/JitCommon.cpp
+++ b/Core/MIPS/JitCommon/JitCommon.cpp
@@ -23,10 +23,13 @@
 
 #include "ext/disarm.h"
 #include "ext/udis86/udis86.h"
+#include "Core/Util/DisArm64.h"
 
 namespace MIPSComp {
 #if defined(ARM)
 	ArmJit *jit;
+#elif defined(ARM64)
+	Arm64Jit *jit;
 #else
 	Jit *jit;
 #endif
@@ -35,6 +38,7 @@ namespace MIPSComp {
 	}
 }
 
+#if !defined(ARM64)
 // We compile this for x86 as well because it may be useful when developing the ARM JIT on a PC.
 std::vector<std::string> DisassembleArm2(const u8 *data, int size) {
 	std::vector<std::string> lines;
@@ -75,8 +79,37 @@ std::vector<std::string> DisassembleArm2(const u8 *data, int size) {
 	}
 	return lines;
 }
+#endif
 
-#ifndef ARM
+#if !defined(ARM)
+std::vector<std::string> DisassembleArm64(const u8 *data, int size) {
+	std::vector<std::string> lines;
+
+	char temp[256];
+	int bkpt_count = 0;
+	for (int i = 0; i < size; i += 4) {
+		const u32 *codePtr = (const u32 *)(data + i);
+		u32 inst = codePtr[0];
+		Arm64Dis((u32)(intptr_t)codePtr, inst, temp, sizeof(temp), false);
+		std::string buf = temp;
+		if (buf == "BKPT 1") {
+			bkpt_count++;
+		} else {
+			if (bkpt_count) {
+				lines.push_back(StringFromFormat("BKPT 1 (x%i)", bkpt_count));
+				bkpt_count = 0;
+			}
+			lines.push_back(buf);
+		}
+	}
+	if (bkpt_count) {
+		lines.push_back(StringFromFormat("BKPT 1 (x%i)", bkpt_count));
+	}
+	return lines;
+}
+#endif
+
+#if !defined(ARM) && !defined(ARM64)
 
 const char *ppsspp_resolver(struct ud*,
 	uint64_t addr,
diff --git a/Core/MIPS/JitCommon/JitCommon.h b/Core/MIPS/JitCommon/JitCommon.h
index 59b687c85a..a3a1c35828 100644
--- a/Core/MIPS/JitCommon/JitCommon.h
+++ b/Core/MIPS/JitCommon/JitCommon.h
@@ -24,6 +24,7 @@
 
 // TODO: Find a better place for these.
 std::vector<std::string> DisassembleArm2(const u8 *data, int size);
+std::vector<std::string> DisassembleArm64(const u8 *data, int size);
 std::vector<std::string> DisassembleX86(const u8 *data, int size);
 
 namespace MIPSComp {
diff --git a/Core/MIPS/JitCommon/NativeJit.h b/Core/MIPS/JitCommon/NativeJit.h
index 603b5f4713..67cc8f8556 100644
--- a/Core/MIPS/JitCommon/NativeJit.h
+++ b/Core/MIPS/JitCommon/NativeJit.h
@@ -26,6 +26,9 @@ struct JitBlock;
 #if defined(ARM)
 #include "../ARM/ArmJit.h"
 typedef MIPSComp::ArmJit NativeJit;
+#elif defined(ARM64)
+#include "../ARM64/Arm64Jit.h"
+typedef MIPSComp::Arm64Jit NativeJit;
 #elif defined(_M_IX86) || defined(_M_X64)
 #include "../x86/Jit.h"
 typedef MIPSComp::Jit NativeJit;
diff --git a/Core/MIPS/MIPS.cpp b/Core/MIPS/MIPS.cpp
index edc6f500fc..347eb052b2 100644
--- a/Core/MIPS/MIPS.cpp
+++ b/Core/MIPS/MIPS.cpp
@@ -211,6 +211,8 @@ void MIPSState::Init() {
 	if (PSP_CoreParameter().cpuCore == CPU_JIT) {
 #ifdef ARM
 			MIPSComp::jit = new MIPSComp::ArmJit(this);
+#elif defined(ARM64)
+			MIPSComp::jit = new MIPSComp::Arm64Jit(this);
 #elif defined(_M_IX86) || defined(_M_X64)
 			MIPSComp::jit = new MIPSComp::Jit(this);
 #elif defined(MIPS)
@@ -218,6 +220,8 @@ void MIPSState::Init() {
 #else
 			MIPSComp::jit = new MIPSComp::FakeJit(this);
 #endif
+	} else {
+		MIPSComp::jit = nullptr;
 	}
 }
 
@@ -236,6 +240,8 @@ void MIPSState::UpdateCore(CPUCore desired) {
 		if (!MIPSComp::jit) {
 #ifdef ARM
 			MIPSComp::jit = new MIPSComp::ArmJit(this);
+#elif defined(ARM64)
+			MIPSComp::jit = new MIPSComp::Arm64Jit(this);
 #elif defined(_M_IX86) || defined(_M_X64)
 			MIPSComp::jit = new MIPSComp::Jit(this);
 #elif defined(MIPS)
diff --git a/Core/MIPS/MIPSTables.cpp b/Core/MIPS/MIPSTables.cpp
index b79b7ee537..07b8bd18dd 100644
--- a/Core/MIPS/MIPSTables.cpp
+++ b/Core/MIPS/MIPSTables.cpp
@@ -90,6 +90,8 @@ struct MIPSInstruction {
 
 #ifdef ARM
 #define JITFUNC(f) (&ArmJit::f)
+#elif defined(ARM64)
+#define JITFUNC(f) (&Arm64Jit::f)
 #elif defined(_M_X64) || defined(_M_IX86)
 #define JITFUNC(f) (&Jit::f)
 #elif defined(MIPS)
diff --git a/Core/MemMap.cpp b/Core/MemMap.cpp
index 6c1455f5f2..3ef061b88b 100644
--- a/Core/MemMap.cpp
+++ b/Core/MemMap.cpp
@@ -185,7 +185,7 @@ static bool Memory_TryBase(u32 flags) {
 			if (!*view.out_ptr_low)
 				goto bail;
 		}
-#ifdef _M_X64
+#if defined(_M_X64) || defined(ARM64)
 		*view.out_ptr = (u8*)g_arena.CreateView(
 			position, view.size, base + view.virtual_address);
 #else
diff --git a/Core/Util/AudioFormatNEON.cpp b/Core/Util/AudioFormatNEON.cpp
index c30a3236da..3dd73dee6b 100644
--- a/Core/Util/AudioFormatNEON.cpp
+++ b/Core/Util/AudioFormatNEON.cpp
@@ -20,7 +20,7 @@
 #include "Core/Util/AudioFormat.h"
 #include "Core/Util/AudioFormatNEON.h"
 
-#ifndef ARM
+#if !defined(ARM) && !defined(ARM64)
 #error Should not be compiled on non-ARM.
 #endif
 
diff --git a/GPU/Common/TextureDecoderNEON.cpp b/GPU/Common/TextureDecoderNEON.cpp
index 92bf04d523..c1fb50a7ee 100644
--- a/GPU/Common/TextureDecoderNEON.cpp
+++ b/GPU/Common/TextureDecoderNEON.cpp
@@ -18,7 +18,7 @@
 #include <arm_neon.h>
 #include "GPU/Common/TextureDecoder.h"
 
-#ifndef ARM
+#if !defined(ARM) && !defined(ARM64)
 #error Should not be compiled on non-ARM.
 #endif
 
@@ -29,7 +29,7 @@ u32 QuickTexHashNEON(const void *checkp, u32 size) {
 	__builtin_prefetch(checkp, 0, 0);
 
 	if (((intptr_t)checkp & 0xf) == 0 && (size & 0x3f) == 0) {
-#ifdef IOS
+#if defined(IOS) || defined(ARM64)
 		uint32x4_t cursor = vdupq_n_u32(0);
 		uint16x8_t cursor2 = vld1q_u16(QuickTexHashInitial);
 		uint16x8_t update = vdupq_n_u16(0x2455U);
diff --git a/GPU/Common/VertexDecoderArm64.cpp b/GPU/Common/VertexDecoderArm64.cpp
new file mode 100644
index 0000000000..ee47742a85
--- /dev/null
+++ b/GPU/Common/VertexDecoderArm64.cpp
@@ -0,0 +1,33 @@
+// Copyright (c) 2013- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#include "base/logging.h"
+#include "Common/CPUDetect.h"
+#include "Core/Config.h"
+#include "Core/Reporting.h"
+#include "Common/Arm64Emitter.h"
+#include "GPU/GPUState.h"
+#include "GPU/Common/VertexDecoderCommon.h"
+
+JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
+	// TODO ARM64
+	return NULL;
+}
+
+bool VertexDecoderJitCache::CompileStep(const VertexDecoder &dec, int step) {
+	return false;
+}
diff --git a/GPU/Common/VertexDecoderCommon.h b/GPU/Common/VertexDecoderCommon.h
index 225d71f78b..426c1ae418 100644
--- a/GPU/Common/VertexDecoderCommon.h
+++ b/GPU/Common/VertexDecoderCommon.h
@@ -25,6 +25,8 @@
 #include "GPU/ge_constants.h"
 #ifdef ARM
 #include "Common/ArmEmitter.h"
+#elif defined(ARM64)
+#include "Common/Arm64Emitter.h"
 #elif defined(_M_IX86) || defined(_M_X64)
 #include "Common/x64Emitter.h"
 #elif defined(MIPS)
@@ -446,8 +448,7 @@ struct VertexDecoderOptions {
 	bool expand8BitNormalsToFloat;
 };
 
-class VertexDecoder
-{
+class VertexDecoder {
 public:
 	VertexDecoder();
 
@@ -587,6 +588,8 @@ public:
 
 #ifdef ARM
 class VertexDecoderJitCache : public ArmGen::ARMXCodeBlock {
+#elif defined(ARM64)
+class VertexDecoderJitCache : public Arm64Gen::ARM64CodeBlock {
 #elif defined(_M_IX86) || defined(_M_X64)
 class VertexDecoderJitCache : public Gen::XCodeBlock {
 #elif defined(MIPS)
diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj
index fd1886b67c..31217b07aa 100644
--- a/GPU/GPU.vcxproj
+++ b/GPU/GPU.vcxproj
@@ -262,6 +262,12 @@
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </ClCompile>
+    <ClCompile Include="Common\VertexDecoderArm64.cpp">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
     <ClCompile Include="Common\VertexDecoderCommon.cpp" />
     <ClCompile Include="Common\VertexDecoderX86.cpp" />
     <ClCompile Include="Debugger\Breakpoints.cpp" />
diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters
index 3ee6fe52ba..045ee59d1b 100644
--- a/GPU/GPU.vcxproj.filters
+++ b/GPU/GPU.vcxproj.filters
@@ -350,6 +350,9 @@
     <ClCompile Include="Directx9\DepalettizeShaderDX9.cpp">
       <Filter>DirectX9</Filter>
     </ClCompile>
+    <ClCompile Include="Common\VertexDecoderArm64.cpp">
+      <Filter>Common</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <None Include="CMakeLists.txt" />
diff --git a/UI/DevScreens.cpp b/UI/DevScreens.cpp
index 4f0674a7c3..534c403f51 100644
--- a/UI/DevScreens.cpp
+++ b/UI/DevScreens.cpp
@@ -283,6 +283,8 @@ const char *GetCompilerABI() {
 	return "armeabi-v7a";
 #elif defined(ARM)
 	return "armeabi";
+#elif defined(ARM64)
+	return "arm64";
 #elif defined(_M_IX86)
 	return "x86";
 #elif defined(_M_X64)
diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp
index e05bd84d54..df93a9686a 100644
--- a/UI/GameSettingsScreen.cpp
+++ b/UI/GameSettingsScreen.cpp
@@ -469,6 +469,9 @@ void GameSettingsScreen::CreateViews() {
 	systemSettings->Add(new ItemHeader(s->T("UI Language")));
 	systemSettings->Add(new Choice(dev->T("Language", "Language")))->OnClick.Handle(this, &GameSettingsScreen::OnLanguage);
 
+	systemSettings->Add(new ItemHeader(s->T("Developer Tools")));
+	systemSettings->Add(new Choice(s->T("Developer Tools")))->OnClick.Handle(this, &GameSettingsScreen::OnDeveloperTools);
+
 	systemSettings->Add(new ItemHeader(s->T("Help the PPSSPP team")));
 	enableReports_ = Reporting::IsEnabled();
 	enableReportsCheckbox_ = new CheckBox(&enableReports_, s->T("Enable Compatibility Server Reports"));
@@ -491,9 +494,6 @@ void GameSettingsScreen::CreateViews() {
 #endif
 	systemSettings->Add(new CheckBox(&g_Config.bSetRoundingMode, s->T("Respect FPU rounding (disable for old GEB saves)")))->OnClick.Handle(this, &GameSettingsScreen::OnJitAffectingSetting);
 
-	systemSettings->Add(new ItemHeader(s->T("Developer Tools")));
-	systemSettings->Add(new Choice(s->T("Developer Tools")))->OnClick.Handle(this, &GameSettingsScreen::OnDeveloperTools);
-
 	systemSettings->Add(new ItemHeader(s->T("General")));
 
 #ifdef ANDROID
diff --git a/android/jni/Android.mk b/android/jni/Android.mk
index de40bf89e7..377ff68b2c 100644
--- a/android/jni/Android.mk
+++ b/android/jni/Android.mk
@@ -51,7 +51,6 @@ ARCH_FILES := \
   $(SRC)/GPU/Common/VertexDecoderX86.cpp
 endif
 
-# ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
 ifeq ($(findstring armeabi-v7a,$(TARGET_ARCH_ABI)),armeabi-v7a)
 ARCH_FILES := \
   $(SRC)/GPU/Common/TextureDecoderNEON.cpp.neon \
@@ -72,9 +71,30 @@ ARCH_FILES := \
   $(SRC)/Core/MIPS/ARM/ArmRegCache.cpp \
   $(SRC)/Core/MIPS/ARM/ArmRegCacheFPU.cpp \
   $(SRC)/GPU/Common/VertexDecoderArm.cpp \
+  $(SRC)/ext/disarm.cpp \
   ArmEmitterTest.cpp
 endif
 
+ifeq ($(findstring arm64-v8a,$(TARGET_ARCH_ABI)),arm64-v8a)
+ARCH_FILES := \
+  $(SRC)/GPU/Common/TextureDecoderNEON.cpp \
+  $(SRC)/Core/Util/AudioFormatNEON.cpp \
+  $(SRC)/Common/Arm64Emitter.cpp \
+  $(SRC)/Common/ArmCPUDetect.cpp \
+  $(SRC)/Core/MIPS/ARM64/Arm64CompALU.cpp \
+  $(SRC)/Core/MIPS/ARM64/Arm64CompBranch.cpp \
+  $(SRC)/Core/MIPS/ARM64/Arm64CompFPU.cpp \
+  $(SRC)/Core/MIPS/ARM64/Arm64CompLoadStore.cpp \
+  $(SRC)/Core/MIPS/ARM64/Arm64CompVFPU.cpp \
+  $(SRC)/Core/MIPS/ARM64/Arm64CompReplace.cpp \
+  $(SRC)/Core/MIPS/ARM64/Arm64Asm.cpp \
+  $(SRC)/Core/MIPS/ARM64/Arm64Jit.cpp \
+  $(SRC)/Core/MIPS/ARM64/Arm64RegCache.cpp \
+  $(SRC)/Core/MIPS/ARM64/Arm64RegCacheFPU.cpp \
+  $(SRC)/Core/Util/DisArm64.cpp \
+  $(SRC)/GPU/Common/VertexDecoderArm64.cpp
+endif
+
 ifeq ($(TARGET_ARCH_ABI),armeabi)
 ARCH_FILES := \
   $(SRC)/Common/ArmEmitter.cpp \
@@ -112,7 +132,6 @@ EXEC_AND_LIB_FILES := \
   $(SRC)/Core/MIPS/MIPSDebugInterface.cpp \
   $(SRC)/UI/ui_atlas.cpp \
   $(SRC)/UI/OnScreenDisplay.cpp \
-  $(SRC)/ext/disarm.cpp \
   $(SRC)/ext/libkirk/AES.c \
   $(SRC)/ext/libkirk/amctrl.c \
   $(SRC)/ext/libkirk/SHA1.c \
diff --git a/android/jni/Application.mk b/android/jni/Application.mk
index 7ae0f7e0aa..64df16e077 100644
--- a/android/jni/Application.mk
+++ b/android/jni/Application.mk
@@ -1,6 +1,7 @@
 APP_STL := gnustl_static
 APP_PLATFORM := android-9
-APP_ABI := armeabi-v7a x86
+#APP_ABI := armeabi-v7a x86
 #APP_ABI := armeabi-v7a
+APP_ABI := arm64-v8a
 APP_GNUSTL_CPP_FEATURES := 
-NDK_TOOLCHAIN_VERSION := 4.8
+# NDK_TOOLCHAIN_VERSION := 4.9
diff --git a/android/jni/Locals.mk b/android/jni/Locals.mk
index cbc59e0e6a..f8a940f305 100644
--- a/android/jni/Locals.mk
+++ b/android/jni/Locals.mk
@@ -1,10 +1,10 @@
 # These are definitions for LOCAL_ variables for PPSSPP.
 # They are shared between ppsspp_jni (lib for Android app) and ppsspp_headless.
 
-LOCAL_CFLAGS := -DUSE_FFMPEG -DUSING_GLES2 -DMOBILE_DEVICE -O3 -fsigned-char -Wall -Wno-multichar -Wno-psabi -Wno-unused-variable -fno-strict-aliasing -D__STDC_CONSTANT_MACROS
+LOCAL_CFLAGS := -DUSE_FFMPEG -DUSING_GLES2 -DMOBILE_DEVICE -O3 -fsigned-char -Wall -Wno-multichar -Wno-psabi -Wno-unused-variable -fno-strict-aliasing -D__STDC_CONSTANT_MACROS -Wno-format
 # yes, it's really CPPFLAGS for C++
 # literal-suffix is generated by Android default code and causes noise.
-LOCAL_CPPFLAGS := -fno-exceptions -std=gnu++11 -fno-rtti -Wno-reorder -Wno-literal-suffix
+LOCAL_CPPFLAGS := -fno-exceptions -std=gnu++11 -fno-rtti -Wno-reorder -Wno-literal-suffix -Wno-format
 LOCAL_C_INCLUDES := \
   $(LOCAL_PATH)/../../Common \
   $(LOCAL_PATH)/../.. \
@@ -49,12 +49,12 @@ ifeq ($(TARGET_ARCH_ABI),x86)
   LOCAL_CFLAGS := $(LOCAL_CFLAGS) -D_ARCH_32 -D_M_IX86 -fomit-frame-pointer -mtune=atom -mfpmath=sse -mssse3
 endif
 ifeq ($(TARGET_ARCH_ABI),arm64-v8a)
-  #LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavformat.a
-  #LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavcodec.a
-  #LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libswresample.a
-  #LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libswscale.a
-  #LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavutil.a
-  #LOCAL_C_INCLUDES += $(LOCAL_PATH)/../../ffmpeg/android/arm64/include
+  LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavformat.a
+  LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavcodec.a
+  LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libswresample.a
+  LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libswscale.a
+  LOCAL_LDLIBS += $(LOCAL_PATH)/../../ffmpeg/android/arm64/lib/libavutil.a
+  LOCAL_C_INCLUDES += $(LOCAL_PATH)/../../ffmpeg/android/arm64/include
 
   LOCAL_CFLAGS := $(LOCAL_CFLAGS) -D_ARCH_64 -DARM64
 endif