Merge pull request #6989 from hrydgard/x86-emitter-merge

Merge from Dolphin's x86-64 emitter
2025-04-02 11:01:50 -04:00 · 2014-10-12 19:52:59 +02:00 · 2014-10-12 19:52:59 +02:00 · 6cb2c9c97d
commit 6cb2c9c97d
parent b7f2731860 7bde976069
12 changed files with 1254 additions and 737 deletions
--- a/Common/CPUDetect.cpp
+++ b/Common/CPUDetect.cpp
@ -49,6 +49,17 @@ void do_cpuid(u32 regs[4], u32 cpuid_leaf) {
 #ifdef _M_SSE
 #include <xmmintrin.h>
 #define _XCR_XFEATURE_ENABLED_MASK 0
 static unsigned long long _xgetbv(unsigned int index)
 {
 	unsigned int eax, edx;
 	__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
 	return ((unsigned long long)edx << 32) | eax;
 }
 #else
 #define _XCR_XFEATURE_ENABLED_MASK 0
 #endif
 #if defined __FreeBSD__
@ -172,6 +183,38 @@ void CPUInfo::Detect() {
 				bFMA = true;
 		}
 		if ((cpu_id[2] >> 25) & 1) bAES = true;
 		if ((cpu_id[3] >> 24) & 1)
 		{
 			// We can use FXSAVE.
 			bFXSR = true;
 		}
 		// AVX support requires 3 separate checks:
 		//  - Is the AVX bit set in CPUID?
 		//  - Is the XSAVE bit set in CPUID?
 		//  - XGETBV result has the XCR bit set.
 		if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1))
 		{
 			if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6)
 			{
 				bAVX = true;
 				if ((cpu_id[2] >> 12) & 1)
 					bFMA = true;
 			}
 		}
 		if (max_std_fn >= 7)
 		{
 			do_cpuid(cpu_id, 0x00000007);
 			// careful; we can't enable AVX2 unless the XSAVE/XGETBV checks above passed
 			if ((cpu_id[1] >> 5) & 1)
 				bAVX2 = bAVX;
 			if ((cpu_id[1] >> 3) & 1)
 				bBMI1 = true;
 			if ((cpu_id[1] >> 8) & 1)
 				bBMI2 = true;
 		}
 	}
 	if (max_ex_fn >= 0x80000004) {
 		// Extract brand string
--- a/Common/CPUDetect.h
+++ b/Common/CPUDetect.h
@ -56,10 +56,15 @@ struct CPUInfo {
 	bool bLZCNT;
 	bool bSSE4A;
 	bool bAVX;
 	bool bAVX2;
 	bool bFMA;
 	bool bAES;
 	bool bLAHFSAHF64;
 	bool bLongMode;
 	bool bBMI1;
 	bool bBMI2;
 	bool bMOVBE;
 	bool bFXSR;
 	// ARM specific CPUInfo
 	bool bSwp;
--- a/Common/x64Emitter.cpp
+++ b/Common/x64Emitter.cpp
--- a/Common/x64Emitter.h
+++ b/Common/x64Emitter.h
@ -22,8 +22,8 @@
 #include "Common.h"
-#if !defined(_M_IX86) && !defined(_M_X64)
+#ifdef _M_X64
-#error "Don't build this on arm."
+#define _ARCH_64
 #endif
 #ifdef _M_X64
@ -55,6 +55,9 @@ enum X64Reg
 	XMM0=0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
 	XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
 	YMM0=0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
 	YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15,
 	INVALID_REG = 0xFFFFFFFF
 };
@ -117,8 +120,7 @@ enum NormalOp {
 	nrmXCHG,
 };
-enum
+enum {
 {
 	CMP_EQ = 0,
 	CMP_LT = 1,
 	CMP_LE = 2,
@ -129,8 +131,19 @@ enum
 	CMP_ORD = 7,
 };
 enum FloatOp {
 	floatLD = 0,
 	floatST = 2,
 	floatSTP = 3,
 	floatLD80 = 5,
 	floatSTP80 = 7,
 	floatINVALID = -1,
 };
 class XEmitter;
 // RIP addressing does not benefit from micro op fusion on Core arch
 struct OpArg
 {
 	OpArg() {}  // dummy op arg, used for storage
@ -140,11 +153,18 @@ struct OpArg
 		scale = (u8)_scale;
 		offsetOrBaseReg = (u16)rmReg;
 		indexReg = (u16)scaledReg;
-		//if scale == 0 never mind offseting
+		//if scale == 0 never mind offsetting
 		offset = _offset;
 	}
 	bool operator==(OpArg b)
 	{
 		return operandReg == b.operandReg && scale == b.scale && offsetOrBaseReg == b.offsetOrBaseReg &&
 		       indexReg == b.indexReg && offset == b.offset;
 	}
 	void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const;
-	void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=(X64Reg)0xFF, bool warn_64bit_offset = true) const;
+	void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W = 0) const;
 	void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=INVALID_REG, bool warn_64bit_offset = true) const;
 	void WriteFloatModRM(XEmitter *emit, FloatOp op);
 	void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits);
 	// This one is public - must be written to
 	u64 offset;  // use RIP-relative as much as possible - 64-bit immediates are not available.
@ -153,7 +173,10 @@ struct OpArg
 	void WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const;
 	bool IsImm() const {return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 || scale == SCALE_IMM64;}
 	bool IsSimpleReg() const {return scale == SCALE_NONE;}
-	bool IsSimpleReg(X64Reg reg) const {
+	bool IsSimpleReg(X64Reg reg) const
 	{
 		if (!IsSimpleReg())
 			return false;
 		return GetSimpleReg() == reg;
 	}
@ -192,6 +215,7 @@ struct OpArg
 	void IncreaseOffset(int sz) {
 		offset += sz;
 	}
 private:
 	u8 scale;
 	u16 offsetOrBaseReg;
@ -203,21 +227,30 @@ template <typename T>
 inline OpArg M(const T *ptr)    {return OpArg((u64)(const void *)ptr, (int)SCALE_RIP);}
 inline OpArg R(X64Reg value)    {return OpArg(0, SCALE_NONE, value);}
 inline OpArg MatR(X64Reg value) {return OpArg(0, SCALE_ATREG, value);}
-inline OpArg MDisp(X64Reg value, int offset) {
+
 inline OpArg MDisp(X64Reg value, int offset)
 {
 	return OpArg((u32)offset, SCALE_ATREG, value);
 }
-inline OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) {
+
 inline OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset)
 {
 	return OpArg(offset, scale, base, scaled);
 }
-inline OpArg MScaled(X64Reg scaled, int scale, int offset) {
+
 inline OpArg MScaled(X64Reg scaled, int scale, int offset)
 {
 	if (scale == SCALE_1)
 		return OpArg(offset, SCALE_ATREG, scaled);
 	else
 		return OpArg(offset, scale | 0x20, RAX, scaled);
 }
-inline OpArg MRegSum(X64Reg base, X64Reg offset) {
+
 inline OpArg MRegSum(X64Reg base, X64Reg offset)
 {
 	return MComplex(base, offset, 1, 0);
 }
 inline OpArg Imm8 (u8 imm)  {return OpArg(imm, SCALE_IMM8);}
 inline OpArg Imm16(u16 imm) {return OpArg(imm, SCALE_IMM16);} //rarely used
 inline OpArg Imm32(u32 imm) {return OpArg(imm, SCALE_IMM32);}
@ -229,19 +262,23 @@ inline OpArg SImmAuto(s32 imm) {
 	return OpArg(imm, (imm >= 128 || imm < -128) ? SCALE_IMM32 : SCALE_IMM8);
 }
-#ifdef _M_X64
+#ifdef _ARCH_64
 inline OpArg ImmPtr(const void* imm) {return Imm64((u64)imm);}
 #else
 inline OpArg ImmPtr(const void* imm) {return Imm32((u32)imm);}
 #endif
-inline u32 PtrOffset(const void *ptr, const void *base) {
+
-#ifdef _M_X64
+inline u32 PtrOffset(const void* ptr, const void* base)
 {
 #ifdef _ARCH_64
 	s64 distance = (s64)ptr-(s64)base;
 	if (distance >= 0x80000000LL ||
-	    distance < -0x80000000LL) {
+	    distance < -0x80000000LL)
-		_assert_msg_(JIT, 0, "pointer offset out of range");
+	{
 		_assert_msg_(DYNA_REC, 0, "pointer offset out of range");
 		return 0;
 	}
 	return (u32)distance;
 #else
 	return (u32)ptr-(u32)base;
@ -259,6 +296,18 @@ struct FixupBranch
 	int type; //0 = 8bit 1 = 32bit
 };
 enum SSECompare
 {
 	EQ = 0,
 	LT,
 	LE,
 	UNORD,
 	NEQ,
 	NLT,
 	NLE,
 	ORD,
 };
 typedef const u8* JumpTarget;
 class XEmitter
@ -266,33 +315,40 @@ class XEmitter
 	friend struct OpArg;  // for Write8 etc
 private:
 	u8 *code;
 	bool flags_locked;
 	void CheckFlags();
 	void Rex(int w, int r, int x, int b);
 	void WriteSimple1Byte(int bits, u8 byte, X64Reg reg);
 	void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg);
 	void WriteMulDivType(int bits, OpArg src, int ext);
-	void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2);
+	void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false);
 	void WriteShift(int bits, OpArg dest, OpArg &shift, int ext);
 	void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext);
 	void WriteMXCSR(OpArg arg, int ext);
-	void WriteSSEOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0);
+	void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
-	void WriteSSEOp2(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0);
+	void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
 	void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
 	void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
 	void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
 	void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
 	void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
 	void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
 	void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg);
 	void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);
 	void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
 protected:
-	inline void Write8(u8 value)   {
+	inline void Write8(u8 value)   {*code++ = value;}
 		//if (value == 0xcc) {
 		//	value = 0xcc;   // set breakpoint here to find where mysterious 0xcc are written
 		//}
 		*code++ = value;
 	}
 	inline void Write16(u16 value) {*(u16*)code = (value); code += 2;}
 	inline void Write32(u32 value) {*(u32*)code = (value); code += 4;}
 	inline void Write64(u64 value) {*(u64*)code = (value); code += 8;}
 public:
-	XEmitter() { code = NULL; }
+	XEmitter() { code = nullptr; flags_locked = false; }
-	XEmitter(u8 *code_ptr) { code = code_ptr; }
+	XEmitter(u8 *code_ptr) { code = code_ptr; flags_locked = false; }
 	virtual ~XEmitter() {}
 	void WriteModRM(int mod, int rm, int reg);
@ -306,6 +362,9 @@ public:
 	const u8 *GetCodePtr() const;
 	u8 *GetWritableCodePtr();
 	void LockFlags() { flags_locked = true; }
 	void UnlockFlags() { flags_locked = false; }
 	// Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU
 	// INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other string instr.,
 	// INC and DEC are slow on Intel Core, but not on AMD. They create a
@ -316,7 +375,7 @@ public:
 	void INT3();
 	// Do nothing
-	void NOP(int count = 1); //nop padding - TODO: fast nop slides, for amd and intel (check their manuals)
+	void NOP(size_t count = 1);
 	// Save energy in wait-loops on P4 only. Probably not too useful.
 	void PAUSE();
@ -453,6 +512,14 @@ public:
 	void MOVSX(int dbits, int sbits, X64Reg dest, OpArg src); //automatically uses MOVSXD if necessary
 	void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src);
 	// Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE.
 	void MOVBE(int dbits, const OpArg& dest, const OpArg& src);
 	// Available only on AMD >= Phenom or Intel >= Haswell
 	void LZCNT(int bits, X64Reg dest, OpArg src);
 	// Note: this one is actually part of BMI1
 	void TZCNT(int bits, X64Reg dest, OpArg src);
 	// WARNING - These two take 11-13 cycles and are VectorPath! (AMD64)
 	void STMXCSR(OpArg memloc);
 	void LDMXCSR(OpArg memloc);
@ -461,7 +528,31 @@ public:
 	void LOCK();
 	void REP();
 	void REPNE();
 	void FSOverride();
 	void GSOverride();
 	// x87
 	enum x87StatusWordBits {
 		x87_InvalidOperation = 0x1,
 		x87_DenormalizedOperand = 0x2,
 		x87_DivisionByZero = 0x4,
 		x87_Overflow = 0x8,
 		x87_Underflow = 0x10,
 		x87_Precision = 0x20,
 		x87_StackFault = 0x40,
 		x87_ErrorSummary = 0x80,
 		x87_C0 = 0x100,
 		x87_C1 = 0x200,
 		x87_C2 = 0x400,
 		x87_TopOfStack = 0x2000 | 0x1000 | 0x800,
 		x87_C3 = 0x4000,
 		x87_FPUBusy = 0x8000,
 	};
 	void FLD(int bits, OpArg src);
 	void FST(int bits, OpArg dest);
 	void FSTP(int bits, OpArg dest);
 	void FNSTSW_AX();
 	void FWAIT();
 	// SSE/SSE2: Floating point arithmetic
@ -493,18 +584,6 @@ public:
 	inline void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); }
 	inline void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); }
 	// I don't think these exist
 	/*
 	void ANDSD(X64Reg regOp, OpArg arg);  
 	void ANDNSS(X64Reg regOp, OpArg arg); 
 	void ANDNSD(X64Reg regOp, OpArg arg); 
 	void ORSS(X64Reg regOp, OpArg arg);   
 	void ORSD(X64Reg regOp, OpArg arg);   
 	void XORSS(X64Reg regOp, OpArg arg);   
 	void XORSD(X64Reg regOp, OpArg arg);   
 	*/
 	// SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double)
 	void ADDPS(X64Reg regOp, OpArg arg);
 	void ADDPD(X64Reg regOp, OpArg arg);
@ -541,11 +620,8 @@ public:
 	// SSE/SSE2: Useful alternative to shuffle in some cases.
 	void MOVDDUP(X64Reg regOp, OpArg arg);
 	// THESE TWO ARE NEW AND UNTESTED
 	void UNPCKLPS(X64Reg dest, OpArg src);
 	void UNPCKHPS(X64Reg dest, OpArg src);
 	// These are OK.
 	void UNPCKLPD(X64Reg dest, OpArg src);
 	void UNPCKHPD(X64Reg dest, OpArg src);
@ -555,19 +631,17 @@ public:
 	void UCOMISS(X64Reg regOp, OpArg arg);
 	void UCOMISD(X64Reg regOp, OpArg arg);
-	// SSE/SSE2: Moves. Use the right data type for your data to avoid slight penalties on some CPUs.
+	// SSE/SSE2: Moves. Use the right data type for your data, in most cases.
 	// Singles
 	void MOVAPS(X64Reg regOp, OpArg arg);
 	void MOVAPS(OpArg arg, X64Reg regOp);
 	void MOVUPS(X64Reg regOp, OpArg arg);
 	void MOVUPS(OpArg arg, X64Reg regOp);
 	// Doubles
 	void MOVAPD(X64Reg regOp, OpArg arg);
 	void MOVAPS(OpArg arg, X64Reg regOp);
 	void MOVAPD(OpArg arg, X64Reg regOp);
 	void MOVUPS(X64Reg regOp, OpArg arg);
 	void MOVUPD(X64Reg regOp, OpArg arg);
 	void MOVUPS(OpArg arg, X64Reg regOp);
 	void MOVUPD(OpArg arg, X64Reg regOp);
-	// Integers (NOTE: untested - I added these then it turned out I didn't have a use for them after all).
+
 	void MOVDQA(X64Reg regOp, OpArg arg);
 	void MOVDQA(OpArg arg, X64Reg regOp);
 	void MOVDQU(X64Reg regOp, OpArg arg);
@ -578,6 +652,14 @@ public:
 	void MOVSS(OpArg arg, X64Reg regOp);
 	void MOVSD(OpArg arg, X64Reg regOp);
 	void MOVLPD(X64Reg regOp, OpArg arg);
 	void MOVHPD(X64Reg regOp, OpArg arg);
 	void MOVLPD(OpArg arg, X64Reg regOp);
 	void MOVHPD(OpArg arg, X64Reg regOp);
 	void MOVHLPS(X64Reg regOp1, X64Reg regOp2);
 	void MOVLHPS(X64Reg regOp1, X64Reg regOp2);
 	void MOVD_xmm(X64Reg dest, const OpArg &arg);
 	void MOVQ_xmm(X64Reg dest, OpArg arg);
 	void MOVD_xmm(const OpArg &arg, X64Reg src);
@ -595,37 +677,34 @@ public:
 	void CVTPS2PD(X64Reg dest, OpArg src);
 	void CVTPD2PS(X64Reg dest, OpArg src);
 	void CVTSS2SD(X64Reg dest, OpArg src);
 	void CVTSI2SS(X64Reg dest, OpArg src);
 	void CVTSD2SS(X64Reg dest, OpArg src);
-	void CVTSD2SI(X64Reg dest, OpArg src);
+	void CVTSI2SD(X64Reg dest, OpArg src);
 	void CVTDQ2PD(X64Reg regOp, OpArg arg);
 	void CVTPD2DQ(X64Reg regOp, OpArg arg);
 	void CVTDQ2PS(X64Reg regOp, OpArg arg);
 	void CVTPS2DQ(X64Reg regOp, OpArg arg);
 	void CVTSI2SS(X64Reg xregdest, OpArg arg);  // Yeah, destination really is a GPR like EAX!
 	void CVTSS2SI(X64Reg xregdest, OpArg arg);  // Yeah, destination really is a GPR like EAX!
 	void CVTTSS2SI(X64Reg xregdest, OpArg arg);  // Yeah, destination really is a GPR like EAX!
 	void CVTTSD2SI(X64Reg xregdest, OpArg arg);  // Yeah, destination really is a GPR like EAX!
 	void CVTTPS2DQ(X64Reg regOp, OpArg arg);
-	void CVTTPD2DQ(X64Reg xregdest, OpArg arg);
+	void CVTTPD2DQ(X64Reg regOp, OpArg arg);
 	// Destinations are X64 regs (rax, rbx, ...) for these instructions.
 	void CVTSS2SI(X64Reg xregdest, OpArg src);
 	void CVTSD2SI(X64Reg xregdest, OpArg src);
 	void CVTTSS2SI(X64Reg xregdest, OpArg arg);
 	void CVTTSD2SI(X64Reg xregdest, OpArg arg);
 	// SSE2: Packed integer instructions
 	void PACKSSDW(X64Reg dest, OpArg arg);
 	void PACKSSWB(X64Reg dest, OpArg arg);
-	//void PACKUSDW(X64Reg dest, OpArg arg);
+	void PACKUSDW(X64Reg dest, OpArg arg);
 	void PACKUSWB(X64Reg dest, OpArg arg);
 	void PUNPCKLBW(X64Reg dest, const OpArg &arg);
 	void PUNPCKLWD(X64Reg dest, const OpArg &arg);
 	void PUNPCKLDQ(X64Reg dest, const OpArg &arg);
-	void PMOVSXBW(X64Reg dest, const OpArg &arg);
+	void PTEST(X64Reg dest, OpArg arg);
 	void PMOVSXBD(X64Reg dest, const OpArg &arg);
 	void PMOVSXWD(X64Reg dest, const OpArg &arg);
 	void PMOVZXBW(X64Reg dest, const OpArg &arg);
 	void PMOVZXBD(X64Reg dest, const OpArg &arg);
 	void PMOVZXWD(X64Reg dest, const OpArg &arg);
 	void PAND(X64Reg dest, OpArg arg);
 	void PANDN(X64Reg dest, OpArg arg);
 	void PXOR(X64Reg dest, OpArg arg);
@ -675,32 +754,84 @@ public:
 	// SSE4 has PMAXSB and PMINSB and PMAXUW and PMINUW too if we need them.
 	void PMOVMSKB(X64Reg dest, OpArg arg);
 	void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle);
 	void PSHUFB(X64Reg dest, OpArg arg);
 	void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle);
 	void PSHUFLW(X64Reg dest, OpArg arg, u8 shuffle);
 	void PSHUFHW(X64Reg dest, OpArg arg, u8 shuffle);
 	void PSRLW(X64Reg reg, int shift);
 	void PSRLD(X64Reg reg, int shift);
 	void PSRLQ(X64Reg reg, int shift);
 	void PSRLQ(X64Reg reg, OpArg arg);
 	void PSRLDQ(X64Reg reg, int shift);
 	void PSLLW(X64Reg reg, int shift);
 	void PSLLD(X64Reg reg, int shift);
 	void PSLLQ(X64Reg reg, int shift);
 	void PSRLDQ(X64Reg reg, int shift);
 	void PSLLDQ(X64Reg reg, int shift);
 	void PSRAW(X64Reg reg, int shift);
 	void PSRAD(X64Reg reg, int shift);
-	void RTDSC();
+	// SSE4: data type conversions
 	void PMOVSXBW(X64Reg dest, OpArg arg);
 	void PMOVSXBD(X64Reg dest, OpArg arg);
 	void PMOVSXBQ(X64Reg dest, OpArg arg);
 	void PMOVSXWD(X64Reg dest, OpArg arg);
 	void PMOVSXWQ(X64Reg dest, OpArg arg);
 	void PMOVSXDQ(X64Reg dest, OpArg arg);
 	void PMOVZXBW(X64Reg dest, OpArg arg);
 	void PMOVZXBD(X64Reg dest, OpArg arg);
 	void PMOVZXBQ(X64Reg dest, OpArg arg);
 	void PMOVZXWD(X64Reg dest, OpArg arg);
 	void PMOVZXWQ(X64Reg dest, OpArg arg);
 	void PMOVZXDQ(X64Reg dest, OpArg arg);
 	// SSE4: variable blend instructions (xmm0 implicit argument)
 	void PBLENDVB(X64Reg dest, OpArg arg);
 	void BLENDVPS(X64Reg dest, OpArg arg);
 	void BLENDVPD(X64Reg dest, OpArg arg);
 	// AVX
 	void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle);
 	void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	// VEX GPR instructions
 	void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
 	void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
 	void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
 	void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate);
 	void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
 	void BLSR(int bits, X64Reg regOp, OpArg arg);
 	void BLSMSK(int bits, X64Reg regOp, OpArg arg);
 	void BLSI(int bits, X64Reg regOp, OpArg arg);
 	void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
 	void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
 	void RDTSC();
 	// Utility functions
 	// The difference between this and CALL is that this aligns the stack
 	// where appropriate.
 	void ABI_CallFunction(const void *func);
 	template <typename T>
 	void ABI_CallFunction(T (*func)()) {
 		ABI_CallFunction((const void *)func);
@ -709,10 +840,10 @@ public:
 	void ABI_CallFunction(const u8 *func) {
 		ABI_CallFunction((const void *)func);
 	}
 	void ABI_CallFunctionC16(const void *func, u16 param1);
 	void ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2);
 	// These only support u32 parameters, but that's enough for a lot of uses.
 	// These will destroy the 1 or 2 first "parameter regs".
 	void ABI_CallFunctionC(const void *func, u32 param1);
@ -730,8 +861,8 @@ public:
 	void ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2);
 	// Pass a register as a parameter.
-	void ABI_CallFunctionR(const void *func, Gen::X64Reg reg1);
+	void ABI_CallFunctionR(const void *func, X64Reg reg1);
-	void ABI_CallFunctionRR(const void *func, Gen::X64Reg reg1, Gen::X64Reg reg2);
+	void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2);
 	template <typename Tr, typename T1>
 	void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) {
@ -789,8 +920,7 @@ public:
 	// Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
 	void FreeCodeSpace();
-	bool IsInSpace(const u8 *ptr) const
+	bool IsInSpace(const u8 *ptr) const {
 	{
 		return ptr >= region && ptr < region + region_size;
 	}
@ -798,13 +928,11 @@ public:
 	// Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()).
 	void WriteProtect();
-	void ResetCodePtr()
+	void ResetCodePtr() {
 	{
 		SetCodePtr(region);
 	}
-	size_t GetSpaceLeft() const
+	size_t GetSpaceLeft() const {
 	{
 		return region_size - (GetCodePtr() - region);
 	}
@ -819,4 +947,4 @@ public:
 }  // namespace
-#endif // _DOLPHIN_INTEL_CODEGEN_
+#endif
--- a/Core/Debugger/DisassemblyManager.cpp
+++ b/Core/Debugger/DisassemblyManager.cpp
@ -772,7 +772,7 @@ bool DisassemblyMacro::disassemble(u32 address, DisassemblyLineInfo& dest, bool
 		dest.params = buffer;
 		dest.info.hasRelevantAddress = true;
-		dest.info.releventAddress = immediate;
+		dest.info.relevantAddress = immediate;
 		break;
 	case MACRO_MEMORYIMM:
 		dest.name = name;
@ -792,7 +792,7 @@ bool DisassemblyMacro::disassemble(u32 address, DisassemblyLineInfo& dest, bool
 		dest.info.dataSize = dataSize;
 		dest.info.hasRelevantAddress = true;
-		dest.info.releventAddress = immediate;
+		dest.info.relevantAddress = immediate;
 		break;
 	default:
 		return false;
--- a/Core/MIPS/MIPS.h
+++ b/Core/MIPS/MIPS.h
@ -26,8 +26,7 @@ class PointerWrap;
 typedef Memory::Opcode MIPSOpcode;
-enum MIPSGPReg
+enum MIPSGPReg {
 {
 	MIPS_REG_ZERO=0,
 	MIPS_REG_COMPILER_SCRATCH=1,
@ -65,17 +64,16 @@ enum MIPSGPReg
 	MIPS_REG_FP=30,
 	MIPS_REG_RA=31,
 	MIPS_REG_INVALID=-1,
 	// Not real regs, just for convenience/jit mapping.
 	MIPS_REG_HI = 32,
 	MIPS_REG_LO = 33,
 	MIPS_REG_FPCOND = 34,
 	MIPS_REG_VFPUCC = 35,
 	MIPS_REG_INVALID=-1,
 };
-enum
+enum {
 {
 	VFPU_CTRL_SPREFIX,
 	VFPU_CTRL_TPREFIX,
 	VFPU_CTRL_DPREFIX,
--- a/Core/MIPS/MIPSAnalyst.cpp
+++ b/Core/MIPS/MIPSAnalyst.cpp
@ -1204,19 +1204,19 @@ skip:
 			case 0x20:	// add
 			case 0x21:	// addu
 				info.hasRelevantAddress = true;
-				info.releventAddress = cpu->GetRegValue(0,MIPS_GET_RS(op))+cpu->GetRegValue(0,MIPS_GET_RT(op));
+				info.relevantAddress = cpu->GetRegValue(0,MIPS_GET_RS(op))+cpu->GetRegValue(0,MIPS_GET_RT(op));
 				break;
 			case 0x22:	// sub
 			case 0x23:	// subu
 				info.hasRelevantAddress = true;
-				info.releventAddress = cpu->GetRegValue(0,MIPS_GET_RS(op))-cpu->GetRegValue(0,MIPS_GET_RT(op));
+				info.relevantAddress = cpu->GetRegValue(0,MIPS_GET_RS(op))-cpu->GetRegValue(0,MIPS_GET_RT(op));
 				break;
 			}
 			break;
 		case 0x08:	// addi
 		case 0x09:	// adiu
 			info.hasRelevantAddress = true;
-			info.releventAddress = cpu->GetRegValue(0,MIPS_GET_RS(op))+((s16)(op & 0xFFFF));
+			info.relevantAddress = cpu->GetRegValue(0,MIPS_GET_RS(op))+((s16)(op & 0xFFFF));
 			break;
 		}
@ -1323,7 +1323,7 @@ skip:
 			info.dataAddress = rs + imm16;
 			info.hasRelevantAddress = true;
-			info.releventAddress = info.dataAddress;
+			info.relevantAddress = info.dataAddress;
 		}
 		return info;
--- a/Core/MIPS/MIPSAnalyst.h
+++ b/Core/MIPS/MIPSAnalyst.h
@ -154,7 +154,7 @@ namespace MIPSAnalyst
 		u32 dataAddress;
 		bool hasRelevantAddress;
-		u32 releventAddress;
+		u32 relevantAddress;
 	} MipsOpcodeInfo;
 	MipsOpcodeInfo GetOpcodeInfo(DebugInterface* cpu, u32 address);
--- a/Core/MIPS/MIPSInt.cpp
+++ b/Core/MIPS/MIPSInt.cpp
@ -74,29 +74,13 @@ int MIPS_SingleStep()
 #else
 	MIPSOpcode op = Memory::Read_Opcode_JIT(mipsr4k.pc);
 #endif
-	/*
+	if (mipsr4k.inDelaySlot) {
 	// Choke on VFPU
 	MIPSInfo info = MIPSGetInfo(op);
 	if (info & IS_VFPU)
 	{
 		if (!Core_IsStepping() && !GetAsyncKeyState(VK_LSHIFT))
 		{
 			Core_EnableStepping(true);
 			return;
 		}
 	}*/
 	if (mipsr4k.inDelaySlot)
 	{
 		MIPSInterpret(op);
-		if (mipsr4k.inDelaySlot)
+		if (mipsr4k.inDelaySlot) {
 		{
 			mipsr4k.pc = mipsr4k.nextPC;
 			mipsr4k.inDelaySlot = false;
 		}
-	}
+	} else {
 	else
 	{
 		MIPSInterpret(op);
 	}
 	return 1;
@ -872,14 +856,12 @@ namespace MIPSInt
 		int pos = _POS;
 		// Don't change $zr.
-		if (rt == 0)
+		if (rt == 0) {
 		{
 			PC += 4;
 			return;
 		}
-		switch (op & 0x3f)
+		switch (op & 0x3f) {
 		{
 		case 0x0: //ext
 			{
 				int size = _SIZE + 1;
@ -1025,10 +1007,10 @@ namespace MIPSInt
 		switch (op & 0x3f)
 		{
-		case 0: F(fd) = F(fs) + F(ft); break; //add
+		case 0: F(fd) = F(fs) + F(ft); break; // add.s
-		case 1: F(fd) = F(fs) - F(ft); break; //sub
+		case 1: F(fd) = F(fs) - F(ft); break; // sub.s
-		case 2: F(fd) = F(fs) * F(ft); break; //mul
+		case 2: F(fd) = F(fs) * F(ft); break; // mul.s
-		case 3: F(fd) = F(fs) / F(ft); break; //div
+		case 3: F(fd) = F(fs) / F(ft); break; // div.s
 		default:
 			_dbg_assert_msg_(CPU,0,"Trying to interpret FPU3Op instruction that can't be interpreted");
 			break;
--- a/Core/MIPS/MIPSTables.cpp
+++ b/Core/MIPS/MIPSTables.cpp
@ -31,8 +31,7 @@
 #include "JitCommon/JitCommon.h"
-enum MipsEncoding
+enum MipsEncoding {
 {
 	Imme,
 	Spec,
 	Spe2,
@ -66,8 +65,7 @@ enum MipsEncoding
 	Inval = -2,
 };
-struct MIPSInstruction
+struct MIPSInstruction {
 {
 	MipsEncoding altEncoding;
 	const char *name;
 	MIPSComp::MIPSCompileFunc compile;
@ -152,7 +150,7 @@ const MIPSInstruction tableImmediate[64] = // xxxxxx ..... ..... ...............
 	INVALID,
 	INVALID,
 	INSTR("swr", &Jit::Comp_ITypeMem, Dis_ITypeMem, Int_ITypeMem, IN_IMM16|IN_RS_ADDR|IN_RT|OUT_MEM|MEMTYPE_WORD),
-	INSTR("cache", &Jit::Comp_Cache, Dis_Cache, Int_Cache, IN_MEM|IN_IMM16|IN_RS_ADDR|IN_OTHER|OUT_OTHER),
+	INSTR("cache", &Jit::Comp_Cache, Dis_Cache, Int_Cache, IN_MEM|IN_IMM16|IN_RS_ADDR),
 	//48
 	INSTR("ll", &Jit::Comp_Generic, Dis_Generic, Int_StoreSync, IN_MEM|IN_IMM16|IN_RS_ADDR|OUT_RT|OUT_OTHER|MEMTYPE_WORD),
 	INSTR("lwc1", &Jit::Comp_FPULS, Dis_FPULS, Int_FPULS, IN_MEM|IN_IMM16|IN_RS_ADDR|OUT_OTHER|MEMTYPE_FLOAT),
@ -198,22 +196,22 @@ const MIPSInstruction tableSpecial[64] = // 000000 ..... ..... ..... ..... xxxxx
 	INSTR("sync",  &Jit::Comp_DoNothing, Dis_Generic, Int_Sync, 0),
 	//16
-	INSTR("mfhi",  &Jit::Comp_MulDivType, Dis_FromHiloTransfer, Int_MulDivType, OUT_RD|IN_OTHER),
+	INSTR("mfhi",  &Jit::Comp_MulDivType, Dis_FromHiloTransfer, Int_MulDivType, OUT_RD|IN_HI),
-	INSTR("mthi",  &Jit::Comp_MulDivType, Dis_ToHiloTransfer,   Int_MulDivType, IN_RS|OUT_OTHER),
+	INSTR("mthi",  &Jit::Comp_MulDivType, Dis_ToHiloTransfer,   Int_MulDivType, IN_RS|OUT_HI),
-	INSTR("mflo",  &Jit::Comp_MulDivType, Dis_FromHiloTransfer, Int_MulDivType, OUT_RD|IN_OTHER),
+	INSTR("mflo",  &Jit::Comp_MulDivType, Dis_FromHiloTransfer, Int_MulDivType, OUT_RD|IN_LO),
-	INSTR("mtlo",  &Jit::Comp_MulDivType, Dis_ToHiloTransfer,   Int_MulDivType, IN_RS|OUT_OTHER),
+	INSTR("mtlo",  &Jit::Comp_MulDivType, Dis_ToHiloTransfer,   Int_MulDivType, IN_RS|OUT_LO),
 	INVALID,
 	INVALID,
 	INSTR("clz",   &Jit::Comp_RType2, Dis_RType2, Int_RType2, OUT_RD|IN_RS),
 	INSTR("clo",   &Jit::Comp_RType2, Dis_RType2, Int_RType2, OUT_RD|IN_RS),
 	//24
-	INSTR("mult",  &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|OUT_OTHER),
+	INSTR("mult",  &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|OUT_HI|OUT_LO),
-	INSTR("multu", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|OUT_OTHER),
+	INSTR("multu", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|OUT_HI|OUT_LO),
-	INSTR("div",   &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|OUT_OTHER),
+	INSTR("div",   &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|OUT_HI|OUT_LO),
-	INSTR("divu",  &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|OUT_OTHER),
+	INSTR("divu",  &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|OUT_HI|OUT_LO),
-	INSTR("madd",  &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|IN_OTHER|OUT_OTHER),
+	INSTR("madd",  &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|IN_HI|IN_LO|OUT_HI|OUT_LO),
-	INSTR("maddu", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|IN_OTHER|OUT_OTHER),
+	INSTR("maddu", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|IN_HI|IN_LO|OUT_HI|OUT_LO),
 	INVALID,
 	INVALID,
@ -234,8 +232,8 @@ const MIPSInstruction tableSpecial[64] = // 000000 ..... ..... ..... ..... xxxxx
 	INSTR("sltu", &Jit::Comp_RType3, Dis_RType3, Int_RType3, IN_RS|IN_RT|OUT_RD),
 	INSTR("max",  &Jit::Comp_RType3, Dis_RType3, Int_RType3, IN_RS|IN_RT|OUT_RD),
 	INSTR("min",  &Jit::Comp_RType3, Dis_RType3, Int_RType3, IN_RS|IN_RT|OUT_RD),
-	INSTR("msub",  &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|IN_OTHER|OUT_OTHER),
+	INSTR("msub",  &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|IN_HI|IN_LO|OUT_HI|OUT_LO),
-	INSTR("msubu", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|IN_OTHER|OUT_OTHER),
+	INSTR("msubu", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|IN_HI|IN_LO|OUT_HI|OUT_LO),
 	//48
 	INSTR("tge",  &Jit::Comp_Generic, Dis_RType3, 0, 0),
@ -262,9 +260,9 @@ const MIPSInstruction tableSpecial2[64] = // 011100 ..... ..... ..... ..... xxxx
 	INVALID_X_8,
 	//32
 	INVALID, INVALID, INVALID, INVALID,
-	INSTR("mfic", &Jit::Comp_Generic, Dis_Generic, Int_Special2, 0),
+	INSTR("mfic", &Jit::Comp_Generic, Dis_Generic, Int_Special2, OUT_OTHER),
 	INVALID,
-	INSTR("mtic", &Jit::Comp_Generic, Dis_Generic, Int_Special2, 0),
+	INSTR("mtic", &Jit::Comp_Generic, Dis_Generic, Int_Special2, OUT_OTHER),
 	INVALID,
 	//40
 	INVALID_X_8,
@ -369,11 +367,11 @@ const MIPSInstruction tableCop2BC2[4] = // 010010 01000 ...xx ................
 const MIPSInstruction tableCop0[32] = // 010000 xxxxx ..... ................
 {
-	INSTR("mfc0", &Jit::Comp_Generic, Dis_Generic, 0, OUT_RT),
+	INSTR("mfc0", &Jit::Comp_Generic, Dis_Generic, 0, OUT_RT),  // unused
 	INVALID,
 	INVALID,
 	INVALID,
-	INSTR("mtc0", &Jit::Comp_Generic, Dis_Generic, 0, IN_RT),
+	INSTR("mtc0", &Jit::Comp_Generic, Dis_Generic, 0, IN_RT),  // unused
 	INVALID,
 	INVALID,
 	INVALID,
@ -423,11 +421,11 @@ const MIPSInstruction tableCop0CO[64] = // 010000 1.... ..... ..... ..... xxxxxx
 const MIPSInstruction tableCop1[32] = // 010001 xxxxx ..... ..... ...........
 {
-	INSTR("mfc1", &Jit::Comp_mxc1, Dis_mxc1, Int_mxc1, IN_OTHER|OUT_RT),
+	INSTR("mfc1", &Jit::Comp_mxc1, Dis_mxc1, Int_mxc1, IN_FS|OUT_RT),
 	INVALID,
 	INSTR("cfc1", &Jit::Comp_mxc1, Dis_mxc1, Int_mxc1, IN_OTHER|IN_FPUFLAG|OUT_RT),
 	INVALID,
-	INSTR("mtc1", &Jit::Comp_mxc1, Dis_mxc1, Int_mxc1, IN_RT|OUT_OTHER),
+	INSTR("mtc1", &Jit::Comp_mxc1, Dis_mxc1, Int_mxc1, IN_RT|OUT_FS),
 	INVALID,
 	INSTR("ctc1", &Jit::Comp_mxc1, Dis_mxc1, Int_mxc1, IN_RT|OUT_FPUFLAG|OUT_OTHER),
 	INVALID,
@ -455,20 +453,20 @@ const MIPSInstruction tableCop1BC[32] = // 010001 01000 xxxxx ................
 const MIPSInstruction tableCop1S[64] = // 010001 10000 ..... ..... ..... xxxxxx
 {
-	INSTR("add.s",  &Jit::Comp_FPU3op, Dis_FPU3op, Int_FPU3op, IN_OTHER|OUT_OTHER),
+	INSTR("add.s",  &Jit::Comp_FPU3op, Dis_FPU3op, Int_FPU3op, OUT_FD|IN_FS|IN_FT),
-	INSTR("sub.s",  &Jit::Comp_FPU3op, Dis_FPU3op, Int_FPU3op, IN_OTHER|OUT_OTHER),
+	INSTR("sub.s",  &Jit::Comp_FPU3op, Dis_FPU3op, Int_FPU3op, OUT_FD|IN_FS|IN_FT),
-	INSTR("mul.s",  &Jit::Comp_FPU3op, Dis_FPU3op, Int_FPU3op, IN_OTHER|OUT_OTHER),
+	INSTR("mul.s",  &Jit::Comp_FPU3op, Dis_FPU3op, Int_FPU3op, OUT_FD|IN_FS|IN_FT),
-	INSTR("div.s",  &Jit::Comp_FPU3op, Dis_FPU3op, Int_FPU3op, IN_OTHER|OUT_OTHER),
+	INSTR("div.s",  &Jit::Comp_FPU3op, Dis_FPU3op, Int_FPU3op, OUT_FD|IN_FS|IN_FT),
-	INSTR("sqrt.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER),
+	INSTR("sqrt.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS),
-	INSTR("abs.s",  &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER),
+	INSTR("abs.s",  &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS),
-	INSTR("mov.s",  &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER),
+	INSTR("mov.s",  &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS),
-	INSTR("neg.s",  &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER),
+	INSTR("neg.s",  &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS),
 	//8
 	INVALID, INVALID, INVALID, INVALID,
-	INSTR("round.w.s",  &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER),
+	INSTR("round.w.s",  &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS),
-	INSTR("trunc.w.s",  &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER),
+	INSTR("trunc.w.s",  &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS),
-	INSTR("ceil.w.s",   &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER),
+	INSTR("ceil.w.s",   &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS),
-	INSTR("floor.w.s",  &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER),
+	INSTR("floor.w.s",  &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS),
 	//16
 	INVALID_X_8,
 	//24
@ -476,29 +474,29 @@ const MIPSInstruction tableCop1S[64] = // 010001 10000 ..... ..... ..... xxxxxx
 	//32
 	INVALID, INVALID, INVALID, INVALID,
 	//36
-	INSTR("cvt.w.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER),
+	INSTR("cvt.w.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS),
 	INVALID,
 	INSTR("dis.int", &Jit::Comp_Generic, Dis_Generic, Int_Interrupt, 0),
 	INVALID,
 	//40
 	INVALID_X_8,
 	//48 - 010001 10000 ..... ..... ..... 11xxxx
-	INSTR("c.f",   &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG),
+	INSTR("c.f",   &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, OUT_FPUFLAG),
-	INSTR("c.un",  &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG),
+	INSTR("c.un",  &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG),
-	INSTR("c.eq",  &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG),
+	INSTR("c.eq",  &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG),
-	INSTR("c.ueq", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG),
+	INSTR("c.ueq", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG),
-	INSTR("c.olt", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG),
+	INSTR("c.olt", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG),
-	INSTR("c.ult", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG),
+	INSTR("c.ult", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG),
-	INSTR("c.ole", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG),
+	INSTR("c.ole", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG),
-	INSTR("c.ule", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG),
+	INSTR("c.ule", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG),
-	INSTR("c.sf",  &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG),
+	INSTR("c.sf",  &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, OUT_FPUFLAG),
-	INSTR("c.ngle",&Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG),
+	INSTR("c.ngle",&Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG),
-	INSTR("c.seq", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG),
+	INSTR("c.seq", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG),
-	INSTR("c.ngl", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG),
+	INSTR("c.ngl", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG),
-	INSTR("c.lt",  &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG),
+	INSTR("c.lt",  &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG),
-	INSTR("c.nge", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG),
+	INSTR("c.nge", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG),
-	INSTR("c.le",  &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG),
+	INSTR("c.le",  &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG),
-	INSTR("c.ngt", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG),
+	INSTR("c.ngt", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG),
 };
 const MIPSInstruction tableCop1W[64] = // 010001 10100 ..... ..... ..... xxxxxx
@ -511,7 +509,7 @@ const MIPSInstruction tableCop1W[64] = // 010001 10100 ..... ..... ..... xxxxxx
 	//24
 	INVALID_X_8,
 	//32
-	INSTR("cvt.s.w", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER),
+	INSTR("cvt.s.w", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS),
 	INVALID, INVALID, INVALID,
 	//36
 	INVALID,
@ -890,8 +888,6 @@ const MIPSInstruction *mipsTables[NumEncodings] =
 	0,
 };
 //arm encoding table
 //const MIPSInstruction mipsinstructions[] = 
 //{
--- a/Core/MIPS/MIPSTables.h
+++ b/Core/MIPS/MIPSTables.h
@ -25,14 +25,14 @@ struct MIPSInfo {
 		value = 0;
 	}
-	explicit MIPSInfo(u32 v) : value(v) {
+	explicit MIPSInfo(u64 v) : value(v) {
 	}
-	u32 operator & (const u32 &arg) const {
+	u64 operator & (const u32 &arg) const {
 		return value & arg;
 	}
-	u32 value;
+	u64 value;
 };
 #define CONDTYPE_MASK   0x00000007
@ -49,44 +49,59 @@ struct MIPSInfo {
 // as long as the other flags are checked,
 // there is no way to misinterpret these
 // as CONDTYPE_X
-#define MEMTYPE_MASK    0x00000007
+#define MEMTYPE_MASK    0x00000007ULL
-#define MEMTYPE_BYTE    0x00000001
+#define MEMTYPE_BYTE    0x00000001ULL
-#define MEMTYPE_HWORD   0x00000002
+#define MEMTYPE_HWORD   0x00000002ULL
-#define MEMTYPE_WORD    0x00000003
+#define MEMTYPE_WORD    0x00000003ULL
-#define MEMTYPE_FLOAT   0x00000004
+#define MEMTYPE_FLOAT   0x00000004ULL
-#define MEMTYPE_VQUAD   0x00000005
+#define MEMTYPE_VQUAD   0x00000005ULL
-#define IS_CONDMOVE     0x00000008
+#define IS_CONDMOVE     0x00000008ULL
-#define DELAYSLOT       0x00000010
+#define DELAYSLOT       0x00000010ULL
-#define BAD_INSTRUCTION 0x00000020
+#define BAD_INSTRUCTION 0x00000020ULL
-#define LIKELY          0x00000040
+#define LIKELY          0x00000040ULL
-#define IS_CONDBRANCH   0x00000080
+#define IS_CONDBRANCH   0x00000080ULL
-#define IS_JUMP         0x00000100
+#define IS_JUMP         0x00000100ULL
-#define IN_RS           0x00000200
+#define IN_RS           0x00000200ULL
-#define IN_RS_ADDR      (0x00000400 | IN_RS)
+#define IN_RS_ADDR      (0x00000400ULL | IN_RS)
-#define IN_RS_SHIFT     (0x00000800 | IN_RS)
+#define IN_RS_SHIFT     (0x00000800ULL | IN_RS)
-#define IN_RT           0x00001000
+#define IN_RT           0x00001000ULL
-#define IN_SA           0x00002000
+#define IN_SA           0x00002000ULL
-#define IN_IMM16        0x00004000
+#define IN_IMM16        0x00004000ULL
-#define IN_IMM26        0x00008000
+#define IN_IMM26        0x00008000ULL
-#define IN_MEM          0x00010000
+#define IN_MEM          0x00010000ULL
-#define IN_OTHER        0x00020000
+#define IN_OTHER        0x00020000ULL
-#define IN_FPUFLAG      0x00040000
+#define IN_FPUFLAG      0x00040000ULL
-#define IN_VFPU_CC      0x00080000
+#define IN_VFPU_CC      0x00080000ULL
-#define OUT_RT          0x00100000
+#define OUT_RT          0x00100000ULL
-#define OUT_RD          0x00200000
+#define OUT_RD          0x00200000ULL
-#define OUT_RA          0x00400000
+#define OUT_RA          0x00400000ULL
-#define OUT_MEM         0x00800000
+#define OUT_MEM         0x00800000ULL
-#define OUT_OTHER       0x01000000
+#define OUT_OTHER       0x01000000ULL
-#define OUT_FPUFLAG     0x02000000
+#define OUT_FPUFLAG     0x02000000ULL
-#define OUT_VFPU_CC     0x04000000
+#define OUT_VFPU_CC     0x04000000ULL
-#define OUT_EAT_PREFIX  0x08000000
+#define OUT_EAT_PREFIX  0x08000000ULL
-#define VFPU_NO_PREFIX  0x10000000
+#define VFPU_NO_PREFIX  0x10000000ULL
-#define IS_VFPU         0x20000000
+#define IS_VFPU         0x20000000ULL
-#define IS_FPU          0x40000000
+#define IS_FPU          0x40000000ULL
 #define IN_FS           0x000100000000ULL
 #define IN_FT           0x000200000000ULL
 #define IN_LO           0x000400000000ULL
 #define IN_HI           0x000800000000ULL
 #define OUT_FD          0x001000000000ULL
 #define OUT_FS          0x002000000000ULL
 #define OUT_LO          0x004000000000ULL
 #define OUT_HI          0x008000000000ULL
 #define IN_VS           0x010000000000ULL
 #define IN_VT           0x020000000000ULL
 #define OUT_VD          0x100000000000ULL
 #ifndef CDECL
 #define CDECL
--- a/Windows/Debugger/CtrlDisAsmView.cpp
+++ b/Windows/Debugger/CtrlDisAsmView.cpp
@ -643,7 +643,7 @@ void CtrlDisAsmView::followBranch()
 		} else if (line.info.hasRelevantAddress)
 		{
 			// well, not  exactly a branch, but we can do something anyway
-			SendMessage(GetParent(wnd),WM_DEB_GOTOHEXEDIT,line.info.releventAddress,0);
+			SendMessage(GetParent(wnd),WM_DEB_GOTOHEXEDIT,line.info.relevantAddress,0);
 			SetFocus(wnd);
 		}
 	} else if (line.type == DISTYPE_DATA)