diff --git a/Core/MIPS/ARM/ArmCompVFPU.cpp b/Core/MIPS/ARM/ArmCompVFPU.cpp index 34bfa08029..ab9a76e330 100644 --- a/Core/MIPS/ARM/ArmCompVFPU.cpp +++ b/Core/MIPS/ARM/ArmCompVFPU.cpp @@ -40,6 +40,11 @@ namespace MIPSComp DISABLE; } + void Jit::Comp_VV2Op(u32 op) + { + DISABLE; + } + void Jit::Comp_Mftv(u32 op) { DISABLE; diff --git a/Core/MIPS/ARM/ArmJit.h b/Core/MIPS/ARM/ArmJit.h index 1cb80e2d4f..0acda3f242 100644 --- a/Core/MIPS/ARM/ArmJit.h +++ b/Core/MIPS/ARM/ArmJit.h @@ -112,6 +112,7 @@ public: void Comp_VPFX(u32 op); void Comp_VDot(u32 op); void Comp_VecDo3(u32 op); + void Comp_VV2Op(u32 op); void Comp_Mftv(u32 op); void Comp_Vmtvc(u32 op); diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp index 90b3fbface..f4273a10be 100644 --- a/Core/MIPS/MIPSIntVFPU.cpp +++ b/Core/MIPS/MIPSIntVFPU.cpp @@ -492,8 +492,8 @@ namespace MIPSInt case 17: d[i] = 1.0f / sqrtf(s[i]); break; //vrsq case 18: d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin case 19: d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos - case 20: d[i] = powf(2.0f, s[i]); break; - case 21: d[i] = logf(s[i])/log(2.0f); break; + case 20: d[i] = powf(2.0f, s[i]); break; //vexp2 + case 21: d[i] = logf(s[i])/log(2.0f); break; //vlog2 case 22: d[i] = sqrtf(s[i]); break; //vsqrt case 23: d[i] = asinf(s[i] * (float)M_2_PI); break; //vasin case 24: d[i] = -1.0f / s[i]; break; // vnrcp diff --git a/Core/MIPS/MIPSTables.cpp b/Core/MIPS/MIPSTables.cpp index 821fd72c90..c039f37473 100644 --- a/Core/MIPS/MIPSTables.cpp +++ b/Core/MIPS/MIPSTables.cpp @@ -582,31 +582,31 @@ const MIPSInstruction tableVFPU7[32] = // 110100 00000 10111 0000000000000000 const MIPSInstruction tableVFPU4[32] = //110100 00000 xxxxx { - INSTR("vmov", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX), - INSTR("vabs", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX), - INSTR("vneg", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX), + INSTR("vmov", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX), + INSTR("vabs", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX), + INSTR("vneg", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX), INSTR("vidt", &Jit::Comp_Generic, Dis_VectorSet1, Int_Vidt,IS_VFPU|OUT_EAT_PREFIX), - INSTR("vsat0", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), - INSTR("vsat1", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), + INSTR("vsat0", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), + INSTR("vsat1", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), INSTR("vzero", &Jit::Comp_Generic, Dis_VectorSet1, Int_VVectorInit, IS_VFPU|OUT_EAT_PREFIX), INSTR("vone", &Jit::Comp_Generic, Dis_VectorSet1, Int_VVectorInit, IS_VFPU|OUT_EAT_PREFIX), //8 {-2},{-2},{-2},{-2},{-2},{-2},{-2},{-2}, //16 - INSTR("vrcp", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), - INSTR("vrsq", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), - INSTR("vsin", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), - INSTR("vcos", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), - INSTR("vexp2", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), - INSTR("vlog2", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), - INSTR("vsqrt", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), - INSTR("vasin", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), + INSTR("vrcp", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), + INSTR("vrsq", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), + INSTR("vsin", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), + INSTR("vcos", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), + INSTR("vexp2", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), + INSTR("vlog2", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), + INSTR("vsqrt", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), + INSTR("vasin", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), //24 - INSTR("vnrcp", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX), + INSTR("vnrcp", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX), {-2}, - INSTR("vnsin", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX), + INSTR("vnsin", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX), {-2}, - INSTR("vrexp2",&Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), + INSTR("vrexp2",&Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX), {-2},{-2},{-2}, //32 }; diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp index 3343bddac0..a022cb9a8f 100644 --- a/Core/MIPS/x86/CompVFPU.cpp +++ b/Core/MIPS/x86/CompVFPU.cpp @@ -28,9 +28,9 @@ // All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. // Currently known non working ones should have DISABLE. -// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; } +// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocks(); Comp_Generic(op); return; } #define CONDITIONAL_DISABLE ; -#define DISABLE { Comp_Generic(op); return; } +#define DISABLE { fpr.ReleaseSpillLocks(); Comp_Generic(op); return; } #define _RS ((op>>21) & 0x1F) @@ -167,7 +167,7 @@ void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) { // Vector regs can overlap in all sorts of swizzled ways. // This does allow a single overlap in sregs[i]. -bool IsOverlapSafeAllowS(int dreg, int di, int sn, u8 sregs[], int tn, u8 tregs[]) +bool IsOverlapSafeAllowS(int dreg, int di, int sn, u8 sregs[], int tn = 0, u8 tregs[] = NULL) { for (int i = 0; i < sn; ++i) { @@ -184,7 +184,7 @@ bool IsOverlapSafeAllowS(int dreg, int di, int sn, u8 sregs[], int tn, u8 tregs[ return true; } -bool IsOverlapSafe(int dreg, int di, int sn, u8 sregs[], int tn, u8 tregs[]) +bool IsOverlapSafe(int dreg, int di, int sn, u8 sregs[], int tn = 0, u8 tregs[] = NULL) { return IsOverlapSafeAllowS(dreg, di, sn, sregs, tn, tregs) && sregs[di] != dreg; } @@ -339,23 +339,18 @@ void Jit::Comp_SVQ(u32 op) void Jit::Comp_VDot(u32 op) { CONDITIONAL_DISABLE; - if (js.HasUnknownPrefix()) { - Comp_Generic(op); - return; - } + if (js.HasUnknownPrefix()) + DISABLE; - int vd = _VD; - int vs = _VS; - int vt = _VT; VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); // TODO: Force read one of them into regs? probably not. u8 sregs[4], tregs[4], dregs[1]; - GetVectorRegsPrefixS(sregs, sz, vs); - GetVectorRegsPrefixT(tregs, sz, vt); - GetVectorRegsPrefixD(dregs, V_Single, vd); + GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegsPrefixT(tregs, sz, _VT); + GetVectorRegsPrefixD(dregs, V_Single, _VD); - int n = GetNumVectorElements(sz); X64Reg tempxreg = XMM0; if (IsOverlapSafe(dregs[0], 0, n, sregs, n, tregs)) { @@ -387,20 +382,8 @@ void Jit::Comp_VDot(u32 op) { void Jit::Comp_VecDo3(u32 op) { CONDITIONAL_DISABLE; - if (js.HasUnknownPrefix()) { - Comp_Generic(op); - return; - } - - int vd = _VD; - int vs = _VS; - int vt = _VT; - VectorSize sz = GetVecSize(op); - - u8 sregs[4], tregs[4], dregs[4]; - GetVectorRegsPrefixS(sregs, sz, vs); - GetVectorRegsPrefixT(tregs, sz, vt); - GetVectorRegsPrefixD(dregs, sz, vd); + if (js.HasUnknownPrefix()) + DISABLE; void (XEmitter::*xmmop)(X64Reg, OpArg) = NULL; switch (op >> 26) @@ -430,14 +413,16 @@ void Jit::Comp_VecDo3(u32 op) { } if (xmmop == NULL) - { - fpr.ReleaseSpillLocks(); - Comp_Generic(op); - return; - } + DISABLE; + VectorSize sz = GetVecSize(op); int n = GetNumVectorElements(sz); + u8 sregs[4], tregs[4], dregs[4]; + GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegsPrefixT(tregs, sz, _VT); + GetVectorRegsPrefixD(dregs, sz, _VD); + X64Reg tempxregs[4]; for (int i = 0; i < n; ++i) { @@ -480,6 +465,123 @@ void Jit::Comp_VecDo3(u32 op) { fpr.ReleaseSpillLocks(); } +void Jit::Comp_VV2Op(u32 op) { + CONDITIONAL_DISABLE; + + if (js.HasUnknownPrefix()) + DISABLE; + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + u8 sregs[4], dregs[4]; + GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegsPrefixD(dregs, sz, _VD); + + X64Reg tempxregs[4]; + for (int i = 0; i < n; ++i) + { + if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs)) + { + int reg = fpr.GetTempV(); + fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY); + fpr.SpillLockV(reg); + tempxregs[i] = fpr.VX(reg); + } + else + { + fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY); + fpr.SpillLockV(dregs[i]); + tempxregs[i] = fpr.VX(dregs[i]); + } + } + + // Warning: sregs[i] and tempxregs[i] may be the same reg. + // Helps for vmov, hurts for vrcp, etc. + for (int i = 0; i < n; ++i) + { + switch ((op >> 16) & 0x1f) + { + case 0: // d[i] = s[i]; break; //vmov + // Probably for swizzle. + if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i])) + MOVSS(tempxregs[i], fpr.V(sregs[i])); + break; + case 1: // d[i] = fabsf(s[i]); break; //vabs + if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i])) + MOVSS(tempxregs[i], fpr.V(sregs[i])); + ANDPS(tempxregs[i], M((void *)&noSignMask)); + break; + case 2: // d[i] = -s[i]; break; //vneg + if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i])) + MOVSS(tempxregs[i], fpr.V(sregs[i])); + XORPS(tempxregs[i], M((void *)&signBitLower)); + break; + case 4: // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0 + if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i])) + MOVSS(tempxregs[i], fpr.V(sregs[i])); + MAXSS(tempxregs[i], M((void *)&zero)); + MINSS(tempxregs[i], M((void *)&one)); + break; + case 5: // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1 + if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i])) + MOVSS(tempxregs[i], fpr.V(sregs[i])); + MAXSS(tempxregs[i], M((void *)&minus_one)); + MINSS(tempxregs[i], M((void *)&one)); + break; + case 16: // d[i] = 1.0f / s[i]; break; //vrcp + MOVSS(XMM0, M((void *)&one)); + DIVSS(XMM0, fpr.V(sregs[i])); + MOVSS(tempxregs[i], R(XMM0)); + break; + case 17: // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq + SQRTSS(XMM0, fpr.V(sregs[i])); + MOVSS(tempxregs[i], M((void *)&one)); + DIVSS(tempxregs[i], R(XMM0)); + break; + case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin + DISABLE; + break; + case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos + DISABLE; + break; + case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2 + DISABLE; + break; + case 21: // d[i] = logf(s[i])/log(2.0f); break; //vlog2 + DISABLE; + break; + case 22: // d[i] = sqrtf(s[i]); break; //vsqrt + SQRTSS(tempxregs[i], fpr.V(sregs[i])); + break; + case 23: // d[i] = asinf(s[i] * (float)M_2_PI); break; //vasin + DISABLE; + break; + case 24: // d[i] = -1.0f / s[i]; break; // vnrcp + MOVSS(XMM0, M((void *)&one)); + DIVSS(XMM0, fpr.V(sregs[i])); + XORPS(XMM0, M((void *)&signBitLower)); + MOVSS(tempxregs[i], R(XMM0)); + break; + case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin + DISABLE; + break; + case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2 + DISABLE; + break; + } + } + for (int i = 0; i < n; ++i) + { + if (!fpr.V(dregs[i]).IsSimpleReg(tempxregs[i])) + MOVSS(fpr.V(dregs[i]), tempxregs[i]); + } + + ApplyPrefixD(dregs, sz); + + fpr.ReleaseSpillLocks(); +} + void Jit::Comp_Mftv(u32 op) { CONDITIONAL_DISABLE; diff --git a/Core/MIPS/x86/Jit.h b/Core/MIPS/x86/Jit.h index cf86376393..b4803e202b 100644 --- a/Core/MIPS/x86/Jit.h +++ b/Core/MIPS/x86/Jit.h @@ -187,6 +187,7 @@ public: void Comp_VPFX(u32 op); void Comp_VDot(u32 op); void Comp_VecDo3(u32 op); + void Comp_VV2Op(u32 op); void Comp_Mftv(u32 op); void Comp_Vmtvc(u32 op);