diff --git a/Core/MIPS/ARM/ArmCompVFPU.cpp b/Core/MIPS/ARM/ArmCompVFPU.cpp index 432874e9cc..34bfa08029 100644 --- a/Core/MIPS/ARM/ArmCompVFPU.cpp +++ b/Core/MIPS/ARM/ArmCompVFPU.cpp @@ -35,6 +35,11 @@ namespace MIPSComp DISABLE; } + void Jit::Comp_VecDo3(u32 op) + { + DISABLE; + } + void Jit::Comp_Mftv(u32 op) { DISABLE; diff --git a/Core/MIPS/ARM/ArmJit.h b/Core/MIPS/ARM/ArmJit.h index 26988388b9..730a6938ce 100644 --- a/Core/MIPS/ARM/ArmJit.h +++ b/Core/MIPS/ARM/ArmJit.h @@ -110,6 +110,7 @@ public: void Comp_SVQ(u32 op); void Comp_VPFX(u32 op); void Comp_VDot(u32 op); + void Comp_VecDo3(u32 op); void Comp_Mftv(u32 op); void Comp_Vmtvc(u32 op); diff --git a/Core/MIPS/MIPSTables.cpp b/Core/MIPS/MIPSTables.cpp index d38882a0e3..ea58c8de4c 100644 --- a/Core/MIPS/MIPSTables.cpp +++ b/Core/MIPS/MIPSTables.cpp @@ -363,11 +363,11 @@ const MIPSInstruction tableCop2[32] = INSTR("mfc2", &Jit::Comp_Generic, Dis_Generic, 0, OUT_RT), {-2}, INSTR("cfc2", &Jit::Comp_Generic, Dis_Generic, 0, 0), - INSTR("mfv", &Jit::Comp_Mftv, Dis_Mftv, Int_Mftv, 0), + INSTR("mfv", &Jit::Comp_Mftv, Dis_Mftv, Int_Mftv, IS_VFPU), INSTR("mtc2", &Jit::Comp_Generic, Dis_Generic, 0, IN_RT), {-2}, INSTR("ctc2", &Jit::Comp_Generic, Dis_Generic, 0, 0), - INSTR("mtv", &Jit::Comp_Mftv, Dis_Mftv, Int_Mftv, 0), + INSTR("mtv", &Jit::Comp_Mftv, Dis_Mftv, Int_Mftv, IS_VFPU), {Cop2BC2}, INSTR("??", &Jit::Comp_Generic, Dis_Generic, 0, 0), @@ -478,17 +478,17 @@ const MIPSInstruction tableCop1BC[32] = const MIPSInstruction tableVFPU0[8] = { - INSTR("vadd",&Jit::Comp_Generic, Dis_VectorSet3, Int_VecDo3, IS_VFPU), - INSTR("vsub",&Jit::Comp_Generic, Dis_VectorSet3, Int_VecDo3, IS_VFPU), + INSTR("vadd",&Jit::Comp_VecDo3, Dis_VectorSet3, Int_VecDo3, IS_VFPU), + INSTR("vsub",&Jit::Comp_VecDo3, Dis_VectorSet3, Int_VecDo3, IS_VFPU), INSTR("vsbn",&Jit::Comp_Generic, Dis_VectorSet3, Int_Vsbn, IS_VFPU), {-2}, {-2}, {-2}, {-2}, - INSTR("vdiv",&Jit::Comp_Generic, Dis_VectorSet3, Int_VecDo3, IS_VFPU), + INSTR("vdiv",&Jit::Comp_VecDo3, Dis_VectorSet3, Int_VecDo3, IS_VFPU), }; const MIPSInstruction tableVFPU1[8] = { - INSTR("vmul",&Jit::Comp_Generic, Dis_VectorSet3, Int_VecDo3, IS_VFPU), + INSTR("vmul",&Jit::Comp_VecDo3, Dis_VectorSet3, Int_VecDo3, IS_VFPU), INSTR("vdot",&Jit::Comp_VDot, Dis_VectorDot, Int_VDot, IS_VFPU), INSTR("vscl",&Jit::Comp_Generic, Dis_VScl, Int_VScl, IS_VFPU), {-2}, diff --git a/Core/MIPS/x86/CompALU.cpp b/Core/MIPS/x86/CompALU.cpp index 1f80c714d7..e7da034134 100644 --- a/Core/MIPS/x86/CompALU.cpp +++ b/Core/MIPS/x86/CompALU.cpp @@ -32,9 +32,9 @@ using namespace MIPSAnalyst; // All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. // Currently known non working ones should have DISABLE. -//#define CONDITIONAL_DISABLE Comp_Generic(op); return; +//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; } #define CONDITIONAL_DISABLE ; -#define DISABLE Comp_Generic(op); return; +#define DISABLE { Comp_Generic(op); return; } namespace MIPSComp { diff --git a/Core/MIPS/x86/CompFPU.cpp b/Core/MIPS/x86/CompFPU.cpp index 1e6b01d465..f5ac3226ea 100644 --- a/Core/MIPS/x86/CompFPU.cpp +++ b/Core/MIPS/x86/CompFPU.cpp @@ -34,9 +34,9 @@ // All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. // Currently known non working ones should have DISABLE. -// #define CONDITIONAL_DISABLE Comp_Generic(op); return; +// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; } #define CONDITIONAL_DISABLE ; -#define DISABLE Comp_Generic(op); return; +#define DISABLE { Comp_Generic(op); return; } namespace MIPSComp { diff --git a/Core/MIPS/x86/CompLoadStore.cpp b/Core/MIPS/x86/CompLoadStore.cpp index eb37eb9666..f0dc88e1d2 100644 --- a/Core/MIPS/x86/CompLoadStore.cpp +++ b/Core/MIPS/x86/CompLoadStore.cpp @@ -35,9 +35,9 @@ // All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. // Currently known non working ones should have DISABLE. -// #define CONDITIONAL_DISABLE Comp_Generic(op); return; +// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; } #define CONDITIONAL_DISABLE ; -#define DISABLE Comp_Generic(op); return; +#define DISABLE { Comp_Generic(op); return; } namespace MIPSComp { diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp index c4b0e28072..e693afbba1 100644 --- a/Core/MIPS/x86/CompVFPU.cpp +++ b/Core/MIPS/x86/CompVFPU.cpp @@ -28,9 +28,9 @@ // All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. // Currently known non working ones should have DISABLE. -// #define CONDITIONAL_DISABLE Comp_Generic(op); return; +// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; } #define CONDITIONAL_DISABLE ; -#define DISABLE Comp_Generic(op); return; +#define DISABLE { Comp_Generic(op); return; } #define _RS ((op>>21) & 0x1F) @@ -62,19 +62,17 @@ void Jit::Comp_VPFX(u32 op) switch (regnum) { case 0: // S js.prefixS = data; - js.prefixSKnown = true; + js.prefixSFlag = JitState::PREFIX_KNOWN_DIRTY; break; case 1: // T js.prefixT = data; - js.prefixTKnown = true; + js.prefixTFlag = JitState::PREFIX_KNOWN_DIRTY; break; case 2: // D js.prefixD = data; - js.prefixDKnown = true; + js.prefixDFlag = JitState::PREFIX_KNOWN_DIRTY; break; } - // TODO: Defer this to end of block - MOV(32, M((void *)&mips_->vfpuCtrl[VFPU_CTRL_SPREFIX + regnum]), Imm32(data)); } @@ -114,7 +112,7 @@ void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) { } void Jit::ApplyPrefixD(const u8 *vregs, u32 prefix, VectorSize sz, bool onlyWriteMask) { - _assert_(js.prefixDKnown); + _assert_(js.prefixDFlag & JitState::PREFIX_KNOWN); if (!prefix) return; int n = GetNumVectorElements(sz); @@ -202,8 +200,7 @@ void Jit::Comp_SV(u32 op) { break; default: - _dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted"); - break; + DISABLE; } } @@ -288,10 +285,15 @@ void Jit::Comp_SVQ(u32 op) } } - void Jit::Comp_VDot(u32 op) { DISABLE; + // WARNING: No prefix support! + if (js.MayHavePrefix()) { + Comp_Generic(op); + js.EatPrefix(); + return; + } int vd = _VD; int vs = _VS; @@ -309,7 +311,6 @@ void Jit::Comp_VDot(u32 op) { MOVSS(XMM0, fpr.V(sregs[0])); MULSS(XMM0, fpr.V(tregs[0])); - float sum = 0.0f; int n = GetNumVectorElements(sz); for (int i = 1; i < n; i++) { @@ -331,6 +332,78 @@ void Jit::Comp_VDot(u32 op) { js.EatPrefix(); } +void Jit::Comp_VecDo3(u32 op) { + DISABLE; + + // WARNING: No prefix support! + if (js.MayHavePrefix()) + { + Comp_Generic(op); + js.EatPrefix(); + return; + } + + int vd = _VD; + int vs = _VS; + int vt = _VT; + VectorSize sz = GetVecSize(op); + + u8 sregs[4], tregs[4], dregs[4]; + GetVectorRegs(sregs, sz, vs); + GetVectorRegs(tregs, sz, vt); + GetVectorRegs(dregs, sz, vd); + + void (XEmitter::*xmmop)(X64Reg, OpArg) = NULL; + switch (op >> 26) + { + case 24: //VFPU0 + switch ((op >> 23)&7) + { + case 0: // d[i] = s[i] + t[i]; break; //vadd + xmmop = &XEmitter::ADDSS; + break; + case 1: // d[i] = s[i] - t[i]; break; //vsub + xmmop = &XEmitter::SUBSS; + break; + case 7: // d[i] = s[i] / t[i]; break; //vdiv + xmmop = &XEmitter::DIVSS; + break; + } + break; + case 25: //VFPU1 + switch ((op >> 23)&7) + { + case 0: // d[i] = s[i] * t[i]; break; //vmul + xmmop = &XEmitter::MULSS; + break; + } + break; + } + + if (xmmop == NULL) + { + Comp_Generic(op); + js.EatPrefix(); + return; + } + + int n = GetNumVectorElements(sz); + // We need at least n temporaries... + if (n > 2) + fpr.Flush(); + + for (int i = 0; i < n; ++i) + MOVSS((X64Reg) (XMM0 + i), fpr.V(sregs[i])); + for (int i = 0; i < n; ++i) + (this->*xmmop)((X64Reg) (XMM0 + i), fpr.V(tregs[i])); + for (int i = 0; i < n; ++i) + MOVSS(fpr.V(dregs[i]), (X64Reg) (XMM0 + i)); + + fpr.ReleaseSpillLocks(); + + js.EatPrefix(); +} + void Jit::Comp_Mftv(u32 op) { CONDITIONAL_DISABLE; @@ -346,6 +419,8 @@ void Jit::Comp_Mftv(u32 op) { gpr.BindToRegister(rt, false, true); MOV(32, gpr.R(rt), fpr.V(imm)); } else if (imm < 128 + VFPU_CTRL_MAX) { //mtvc + // In case we have a saved prefix. + FlushPrefixV(); gpr.BindToRegister(rt, false, true); MOV(32, gpr.R(rt), M(¤tMIPS->vfpuCtrl[imm - 128])); } else { @@ -367,11 +442,11 @@ void Jit::Comp_Mftv(u32 op) { // TODO: Optimization if rt is Imm? if (imm - 128 == VFPU_CTRL_SPREFIX) { - js.prefixSKnown = false; + js.prefixSFlag = JitState::PREFIX_UNKNOWN; } else if (imm - 128 == VFPU_CTRL_TPREFIX) { - js.prefixTKnown = false; + js.prefixTFlag = JitState::PREFIX_UNKNOWN; } else if (imm - 128 == VFPU_CTRL_DPREFIX) { - js.prefixDKnown = false; + js.prefixDFlag = JitState::PREFIX_UNKNOWN; } } else { //ERROR @@ -381,8 +456,6 @@ void Jit::Comp_Mftv(u32 op) { default: DISABLE; - _dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted"); - break; } } @@ -396,11 +469,11 @@ void Jit::Comp_Vmtvc(u32 op) { fpr.ReleaseSpillLocks(); if (imm - 128 == VFPU_CTRL_SPREFIX) { - js.prefixSKnown = false; + js.prefixSFlag = JitState::PREFIX_UNKNOWN; } else if (imm - 128 == VFPU_CTRL_TPREFIX) { - js.prefixTKnown = false; + js.prefixTFlag = JitState::PREFIX_UNKNOWN; } else if (imm - 128 == VFPU_CTRL_DPREFIX) { - js.prefixDKnown = false; + js.prefixDFlag = JitState::PREFIX_UNKNOWN; } } } diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index dc6a789830..0a859ca2ae 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -109,6 +109,34 @@ void Jit::FlushAll() { gpr.Flush(); fpr.Flush(); + FlushPrefixV(); +} + +void Jit::FlushPrefixV() +{ + if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0) + { + MOV(32, M((void *)&mips_->vfpuCtrl[VFPU_CTRL_SPREFIX]), Imm32(js.prefixS)); + js.prefixSFlag = (JitState::PrefixState) (js.prefixSFlag & ~JitState::PREFIX_DIRTY); + } + + if ((js.prefixTFlag & JitState::PREFIX_DIRTY) != 0) + { + MOV(32, M((void *)&mips_->vfpuCtrl[VFPU_CTRL_TPREFIX]), Imm32(js.prefixT)); + js.prefixTFlag = (JitState::PrefixState) (js.prefixTFlag & ~JitState::PREFIX_DIRTY); + } + + if ((js.prefixDFlag & JitState::PREFIX_DIRTY) != 0) + { + MOV(32, M((void *)&mips_->vfpuCtrl[VFPU_CTRL_DPREFIX]), Imm32(js.prefixD)); + + _dbg_assert_msg_(JIT, sizeof(bool) <= 4, "Bools shouldn't be that big?"); + const size_t bool_stride = 4 / sizeof(bool); + for (size_t i = 0; i < ARRAY_SIZE(mips_->vfpuWriteMask); i += bool_stride) + MOV(32, M((void *)&mips_->vfpuWriteMask[i]), Imm32(*(u32 *)&js.writeMask[i])); + + js.prefixDFlag = (JitState::PrefixState) (js.prefixDFlag & ~JitState::PREFIX_DIRTY); + } } void Jit::WriteDowncount(int offset) @@ -259,6 +287,10 @@ void Jit::Comp_Generic(u32 op) } else _dbg_assert_msg_(JIT, 0, "Trying to compile instruction that can't be interpreted"); + + // Might have eaten prefixes, hard to tell... + if ((MIPSGetInfo(op) & IS_VFPU) != 0) + js.PrefixStart(); } void Jit::WriteExit(u32 destination, int exit_num) diff --git a/Core/MIPS/x86/Jit.h b/Core/MIPS/x86/Jit.h index e39b93868e..2d5f9ef3c1 100644 --- a/Core/MIPS/x86/Jit.h +++ b/Core/MIPS/x86/Jit.h @@ -48,6 +48,14 @@ struct JitOptions struct JitState { + enum PrefixState + { + PREFIX_UNKNOWN = 0x00, + PREFIX_KNOWN = 0x01, + PREFIX_DIRTY = 0x10, + PREFIX_KNOWN_DIRTY = 0x11, + }; + u32 compilerPC; u32 blockStart; bool cancel; @@ -62,22 +70,39 @@ struct JitState u32 prefixT; u32 prefixD; bool writeMask[4]; - bool prefixSKnown; - bool prefixTKnown; - bool prefixDKnown; + PrefixState prefixSFlag; + PrefixState prefixTFlag; + PrefixState prefixDFlag; void PrefixStart() { - prefixSKnown = false; - prefixTKnown = false; - prefixDKnown = false; + prefixSFlag = PREFIX_UNKNOWN; + prefixTFlag = PREFIX_UNKNOWN; + prefixDFlag = PREFIX_UNKNOWN; + } + bool MayHavePrefix() const { + if (!(prefixSFlag & PREFIX_KNOWN) || !(prefixTFlag & PREFIX_KNOWN) || !(prefixDFlag & PREFIX_KNOWN)) { + return true; + } else if (prefixS != 0xE4 || prefixT != 0xE4 || prefixD != 0) { + return true; + } else if (writeMask[0] || writeMask[1] || writeMask[2] || writeMask[3]) { + return true; + } + + return false; } void EatPrefix() { - prefixSKnown = true; - prefixTKnown = true; - prefixDKnown = true; - prefixS = 0xE4; - prefixT = 0xE4; - prefixD = 0x0; - writeMask[0] = writeMask[1] = writeMask[2] = writeMask[3] = false; + if ((prefixSFlag & PREFIX_KNOWN) == 0 || prefixS != 0xE4) { + prefixSFlag = PREFIX_KNOWN_DIRTY; + prefixS = 0xE4; + } + if ((prefixTFlag & PREFIX_KNOWN) == 0 || prefixT != 0xE4) { + prefixTFlag = PREFIX_KNOWN_DIRTY; + prefixT = 0xE4; + } + if ((prefixDFlag & PREFIX_KNOWN) == 0 || prefixD != 0x0 || writeMask[0] || writeMask[1] || writeMask[2] || writeMask[3]) { + prefixDFlag = PREFIX_KNOWN_DIRTY; + prefixD = 0x0; + writeMask[0] = writeMask[1] = writeMask[2] = writeMask[3] = false; + } } }; @@ -140,6 +165,7 @@ public: void Comp_SVQ(u32 op); void Comp_VPFX(u32 op); void Comp_VDot(u32 op); + void Comp_VecDo3(u32 op); void Comp_Mftv(u32 op); void Comp_Vmtvc(u32 op); @@ -155,6 +181,7 @@ public: void ClearCacheAt(u32 em_address); private: void FlushAll(); + void FlushPrefixV(); void WriteDowncount(int offset = 0); // See CompileDelaySlotFlags for flags.