From e5024135a608b1897f7c69bf5f13bf299c7152d8 Mon Sep 17 00:00:00 2001 From: Ced2911 Date: Tue, 3 Sep 2013 12:16:49 +0200 Subject: [PATCH] more on vfpu --- Core/MIPS/PPC/PpcCompVFPU.cpp | 867 ++++++++++++++++++++++++++++++- Core/MIPS/PPC/PpcJit.h | 15 + Core/MIPS/PPC/PpcRegCacheVPU.cpp | 313 +++++++++++ Core/MIPS/PPC/PpcRegCacheVPU.h | 106 ++++ 4 files changed, 1275 insertions(+), 26 deletions(-) create mode 100644 Core/MIPS/PPC/PpcRegCacheVPU.cpp create mode 100644 Core/MIPS/PPC/PpcRegCacheVPU.h diff --git a/Core/MIPS/PPC/PpcCompVFPU.cpp b/Core/MIPS/PPC/PpcCompVFPU.cpp index 3d47af2bba..064e04bbf4 100644 --- a/Core/MIPS/PPC/PpcCompVFPU.cpp +++ b/Core/MIPS/PPC/PpcCompVFPU.cpp @@ -1,3 +1,6 @@ + +#include "math/math_util.h" + #include "Common/ChunkFile.h" #include "Core/Core.h" #include "Core/CoreTiming.h" @@ -19,18 +22,23 @@ const bool disablePrefixes = false; #define CONDITIONAL_DISABLE ; #define DISABLE { fpr.ReleaseSpillLocksAndDiscardTemps(); Comp_Generic(op); return; } -#define _RS ((op>>21) & 0x1F) -#define _RT ((op>>16) & 0x1F) -#define _RD ((op>>11) & 0x1F) -#define _FS ((op>>11) & 0x1F) -#define _FT ((op>>16) & 0x1F) -#define _FD ((op>>6 ) & 0x1F) -#define _POS ((op>>6 ) & 0x1F) -#define _SIZE ((op>>11 ) & 0x1F) +#define _RS MIPS_GET_RS(op) +#define _RT MIPS_GET_RT(op) +#define _RD MIPS_GET_RD(op) +#define _FS MIPS_GET_FS(op) +#define _FT MIPS_GET_FT(op) +#define _FD MIPS_GET_FD(op) +#define _SA MIPS_GET_SA(op) +#define _POS ((op>> 6) & 0x1F) +#define _SIZE ((op>>11) & 0x1F) +#define _IMM16 (signed short)(op & 0xFFFF) +#define _IMM26 (op & 0x03FFFFFF) using namespace PpcGen; +//#define USE_VMX128 + namespace MIPSComp { // Vector regs can overlap in all sorts of swizzled ways. @@ -57,6 +65,108 @@ namespace MIPSComp return IsOverlapSafeAllowS(dreg, di, sn, sregs, tn, tregs) && sregs[di] != dreg; } + void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) { + if (prefix == 0xE4) return; + + int n = GetNumVectorElements(sz); + u8 origV[4]; + static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f}; + + for (int i = 0; i < n; i++) + origV[i] = vregs[i]; + + for (int i = 0; i < n; i++) + { + int regnum = (prefix >> (i*2)) & 3; + int abs = (prefix >> (8+i)) & 1; + int negate = (prefix >> (16+i)) & 1; + int constants = (prefix >> (12+i)) & 1; + + // Unchanged, hurray. + if (!constants && regnum == i && !abs && !negate) + continue; + + // This puts the value into a temp reg, so we won't write the modified value back. + vregs[i] = fpr.GetTempV(); + if (!constants) { + fpr.MapDirtyInV(vregs[i], origV[regnum]); + fpr.SpillLockV(vregs[i]); + + // Prefix may say "z, z, z, z" but if this is a pair, we force to x. + // TODO: But some ops seem to use const 0 instead? + if (regnum >= n) { + WARN_LOG(CPU, "JIT: Invalid VFPU swizzle: %08x : %d / %d at PC = %08x (%s)", prefix, regnum, n, js.compilerPC, currentMIPS->DisasmAt(js.compilerPC)); + regnum = 0; + } + + if (abs) { + FABS(fpr.V(vregs[i]), fpr.V(origV[regnum])); + if (negate) + FNEG(fpr.V(vregs[i]), fpr.V(vregs[i])); + } else { + if (negate) + FNEG(fpr.V(vregs[i]), fpr.V(origV[regnum])); + else + FMR(fpr.V(vregs[i]), fpr.V(origV[regnum])); + } + } else { + fpr.MapRegV(vregs[i], MAP_DIRTY | MAP_NOINIT); + fpr.SpillLockV(vregs[i]); + MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs<<2)], negate); + } + } + } + + void Jit::GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg) { + _assert_(js.prefixDFlag & PpcJitState::PREFIX_KNOWN); + + GetVectorRegs(regs, sz, vectorReg); + if (js.prefixD == 0) + return; + + int n = GetNumVectorElements(sz); + for (int i = 0; i < n; i++) { + // Hopefully this is rare, we'll just write it into a reg we drop. + if (js.VfpuWriteMask(i)) + regs[i] = fpr.GetTempV(); + } + } + + void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) { + _assert_(js.prefixDFlag & PpcJitState::PREFIX_KNOWN); + if (!js.prefixD) return; + + int n = GetNumVectorElements(sz); + for (int i = 0; i < n; i++) { + if (js.VfpuWriteMask(i)) + continue; + + // TODO: These clampers are wrong - put this into google + // and look at the plot: abs(x) - abs(x-0.5) + 0.5 + // It's too steep. + + // Also, they mishandle NaN and Inf. + int sat = (js.prefixD >> (i * 2)) & 3; + if (sat == 1) { + fpr.MapRegV(vregs[i], MAP_DIRTY); + + MOVI2F(FPR6, 0.0f); + MOVI2F(FPR7, 1.0f); + + FMAX(fpr.V(vregs[i]), fpr.V(vregs[i]), FPR6); + FMIN(fpr.V(vregs[i]), fpr.V(vregs[i]), FPR7); + } else if (sat == 3) { + fpr.MapRegV(vregs[i], MAP_DIRTY); + + MOVI2F(FPR6, -1.0f); + MOVI2F(FPR7, 1.0f); + + FMAX(fpr.V(vregs[i]), fpr.V(vregs[i]), FPR6); + FMIN(fpr.V(vregs[i]), fpr.V(vregs[i]), FPR7); + } + } + } + void Jit::Comp_SV(MIPSOpcode op) { CONDITIONAL_DISABLE; @@ -168,47 +278,573 @@ namespace MIPSComp } void Jit::Comp_VPFX(MIPSOpcode op) { - Comp_Generic(op); + CONDITIONAL_DISABLE; + int data = op & 0xFFFFF; + int regnum = (op >> 24) & 3; + switch (regnum) { + case 0: // S + js.prefixS = data; + js.prefixSFlag = PpcJitState::PREFIX_KNOWN_DIRTY; + break; + case 1: // T + js.prefixT = data; + js.prefixTFlag = PpcJitState::PREFIX_KNOWN_DIRTY; + break; + case 2: // D + js.prefixD = data; + js.prefixDFlag = PpcJitState::PREFIX_KNOWN_DIRTY; + break; + default: + ERROR_LOG(CPU, "VPFX - bad regnum %i : data=%08x", regnum, data); + break; + } } void Jit::Comp_VVectorInit(MIPSOpcode op) { - Comp_Generic(op); + CONDITIONAL_DISABLE; + + // WARNING: No prefix support! + if (js.HasUnknownPrefix() || disablePrefixes) { + DISABLE; + } + + switch ((op >> 16) & 0xF) + { + case 6: // v=zeros; break; //vzero + MOVI2F(FPR5, 0.0f); + break; + case 7: // v=ones; break; //vone + MOVI2F(FPR5, 1.0f); + break; + default: + DISABLE; + break; + } + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + u8 dregs[4]; + GetVectorRegsPrefixD(dregs, sz, _VD); + fpr.MapRegsAndSpillLockV(dregs, sz, MAP_NOINIT | MAP_DIRTY); + + for (int i = 0; i < n; ++i) + FMR(fpr.V(dregs[i]), FPR5); + + ApplyPrefixD(dregs, sz); + + fpr.ReleaseSpillLocksAndDiscardTemps(); } void Jit::Comp_VMatrixInit(MIPSOpcode op) { - Comp_Generic(op); + CONDITIONAL_DISABLE; + + if (js.HasUnknownPrefix() || disablePrefixes) { + DISABLE; + } + + MatrixSize sz = GetMtxSize(op); + int n = GetMatrixSide(sz); + + u8 dregs[16]; + GetMatrixRegs(dregs, sz, _VD); + + switch ((op >> 16) & 0xF) { + case 3: // vmidt + MOVI2F(FPR6, 0.0f); + MOVI2F(FPR7, 1.0f); + for (int a = 0; a < n; a++) { + for (int b = 0; b < n; b++) { + fpr.MapRegV(dregs[a * 4 + b], MAP_DIRTY | MAP_NOINIT); + FMR(fpr.V(dregs[a * 4 + b]), a == b ? FPR7 : FPR6); + } + } + break; + case 6: // vmzero + MOVI2F(FPR6, 0.0f); + for (int a = 0; a < n; a++) { + for (int b = 0; b < n; b++) { + fpr.MapRegV(dregs[a * 4 + b], MAP_DIRTY | MAP_NOINIT); + FMR(fpr.V(dregs[a * 4 + b]), FPR6); + } + } + break; + case 7: // vmone + MOVI2F(FPR7, 1.0f); + for (int a = 0; a < n; a++) { + for (int b = 0; b < n; b++) { + fpr.MapRegV(dregs[a * 4 + b], MAP_DIRTY | MAP_NOINIT); + FMR(fpr.V(dregs[a * 4 + b]), FPR7); + } + } + break; + } + + fpr.ReleaseSpillLocksAndDiscardTemps(); } void Jit::Comp_VDot(MIPSOpcode op) { - Comp_Generic(op); + CONDITIONAL_DISABLE; + if (js.HasUnknownPrefix() || disablePrefixes) { + DISABLE; + } + + int vd = _VD; + int vs = _VS; + int vt = _VT; + VectorSize sz = GetVecSize(op); + + // TODO: Force read one of them into regs? probably not. + u8 sregs[4], tregs[4], dregs[1]; + GetVectorRegsPrefixS(sregs, sz, vs); + GetVectorRegsPrefixT(tregs, sz, vt); + GetVectorRegsPrefixD(dregs, V_Single, vd); + + // TODO: applyprefixST here somehow (shuffle, etc...) + fpr.MapRegsAndSpillLockV(sregs, sz, 0); + fpr.MapRegsAndSpillLockV(tregs, sz, 0); + FMULS(FPR6, fpr.V(sregs[0]), fpr.V(tregs[0])); + + int n = GetNumVectorElements(sz); + for (int i = 1; i < n; i++) { + // sum += s[i]*t[i]; + FMADDS(FPR6, fpr.V(sregs[i]), fpr.V(tregs[i]), FPR6); + } + fpr.ReleaseSpillLocksAndDiscardTemps(); + + fpr.MapRegV(dregs[0], MAP_NOINIT | MAP_DIRTY); + + // TODO: applyprefixD here somehow (write mask etc..) + FMR(fpr.V(dregs[0]), FPR6); + ApplyPrefixD(dregs, V_Single); + fpr.ReleaseSpillLocksAndDiscardTemps(); } void Jit::Comp_VecDo3(MIPSOpcode op) { - Comp_Generic(op); + CONDITIONAL_DISABLE; + + if (js.HasUnknownPrefix() || disablePrefixes) { + DISABLE; + } + + int vd = _VD; + int vs = _VS; + int vt = _VT; + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + u8 sregs[4], tregs[4], dregs[4]; + GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegsPrefixT(tregs, sz, _VT); + GetVectorRegsPrefixD(dregs, sz, _VD); + + MIPSReg tempregs[4]; + for (int i = 0; i < n; i++) { + if (!IsOverlapSafe(dregs[i], i, n, sregs, n, tregs)) { + tempregs[i] = fpr.GetTempV(); + } else { + tempregs[i] = dregs[i]; + } + } + + for (int i = 0; i < n; i++) { + fpr.MapDirtyInInV(tempregs[i], sregs[i], tregs[i]); + switch (op >> 26) { + case 24: //VFPU0 + switch ((op >> 23)&7) { + case 0: // d[i] = s[i] + t[i]; break; //vadd + FADDS(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i])); + break; + case 1: // d[i] = s[i] - t[i]; break; //vsub + FSUBS(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i])); + break; + case 7: // d[i] = s[i] / t[i]; break; //vdiv + FDIVS(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i])); + break; + default: + DISABLE; + } + break; + case 25: //VFPU1 + switch ((op >> 23) & 7) { + case 0: // d[i] = s[i] * t[i]; break; //vmul + FMULS(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i])); + break; + default: + DISABLE; + } + break; + case 27: //VFPU3 + switch ((op >> 23) & 7) { + case 2: // vmin + FMIN(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i])); + break; + case 3: // vmax + FMAX(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i])); + break; + case 6: // vsge + DISABLE; // pending testing + break; + case 7: // vslt + DISABLE; // pending testing + break; + } + break; + + default: + DISABLE; + } + } + + for (int i = 0; i < n; i++) { + if (dregs[i] != tempregs[i]) { + fpr.MapDirtyInV(dregs[i], tempregs[i]); + FMR(fpr.V(dregs[i]), fpr.V(tempregs[i])); + } + } + ApplyPrefixD(dregs, sz); + + fpr.ReleaseSpillLocksAndDiscardTemps(); } void Jit::Comp_VV2Op(MIPSOpcode op) { - Comp_Generic(op); + CONDITIONAL_DISABLE; + + if (js.HasUnknownPrefix() || disablePrefixes) { + DISABLE; + } + + // Pre-processing: Eliminate silly no-op VMOVs, common in Wipeout Pure + if (((op >> 16) & 0x1f) == 0 && _VS == _VD && js.HasNoPrefix()) { + return; + } + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + u8 sregs[4], dregs[4]; + GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegsPrefixD(dregs, sz, _VD); + + MIPSReg tempregs[4]; + for (int i = 0; i < n; ++i) { + if (!IsOverlapSafe(dregs[i], i, n, sregs)) { + tempregs[i] = fpr.GetTempV(); + } else { + tempregs[i] = dregs[i]; + } + } + + // Warning: sregs[i] and tempxregs[i] may be the same reg. + // Helps for vmov, hurts for vrcp, etc. + for (int i = 0; i < n; ++i) { + switch ((op >> 16) & 0x1f) { + case 0: // d[i] = s[i]; break; //vmov + // Probably for swizzle. + fpr.MapDirtyInV(tempregs[i], sregs[i]); + FMR(fpr.V(tempregs[i]), fpr.V(sregs[i])); + break; + case 1: // d[i] = fabsf(s[i]); break; //vabs + fpr.MapDirtyInV(tempregs[i], sregs[i]); + FABS(fpr.V(tempregs[i]), fpr.V(sregs[i])); + break; + case 2: // d[i] = -s[i]; break; //vneg + fpr.MapDirtyInV(tempregs[i], sregs[i]); + FNEG(fpr.V(tempregs[i]), fpr.V(sregs[i])); + break; + + /* These are probably just as broken as the prefix. + case 4: // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0 + fpr.MapDirtyInV(tempregs[i], sregs[i]); + MOVI2F(S0, 0.5f, R0); + VABS(S1, fpr.V(sregs[i])); // S1 = fabs(x) + VSUB(fpr.V(tempregs[i]), fpr.V(sregs[i]), S0); // S2 = fabs(x-0.5f) {VABD} + VABS(fpr.V(tempregs[i]), fpr.V(tempregs[i])); + VSUB(fpr.V(tempregs[i]), S1, fpr.V(tempregs[i])); // v[i] = S1 - S2 + 0.5f + VADD(fpr.V(tempregs[i]), fpr.V(tempregs[i]), S0); + break; + case 5: // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1 + fpr.MapDirtyInV(tempregs[i], sregs[i]); + MOVI2F(S0, 1.0f, R0); + VABS(S1, fpr.V(sregs[i])); // S1 = fabs(x) + VSUB(fpr.V(tempregs[i]), fpr.V(sregs[i]), S0); // S2 = fabs(x-1.0f) {VABD} + VABS(fpr.V(tempregs[i]), fpr.V(tempregs[i])); + VSUB(fpr.V(tempregs[i]), S1, fpr.V(tempregs[i])); // v[i] = S1 - S2 + break; + */ + + case 16: // d[i] = 1.0f / s[i]; break; //vrcp + fpr.MapDirtyInV(tempregs[i], sregs[i]); + MOVI2F(FPR6, 1.0f); + FDIVS(fpr.V(tempregs[i]), FPR6, fpr.V(sregs[i])); + break; + case 17: // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq + fpr.MapDirtyInV(tempregs[i], sregs[i]); + MOVI2F(FPR6, 1.0f); + FSQRTS(FPR7, fpr.V(sregs[i])); + FDIVS(fpr.V(tempregs[i]), FPR6, FPR7); + break; + case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin + DISABLE; + break; + case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos + DISABLE; + break; + case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2 + DISABLE; + break; + case 21: // d[i] = logf(s[i])/log(2.0f); break; //vlog2 + DISABLE; + break; + case 22: // d[i] = sqrtf(s[i]); break; //vsqrt + fpr.MapDirtyInV(tempregs[i], sregs[i]); + FSQRTS(fpr.V(tempregs[i]), fpr.V(sregs[i])); + FABS(fpr.V(tempregs[i]), fpr.V(tempregs[i])); + break; + case 23: // d[i] = asinf(s[i] * (float)M_2_PI); break; //vasin + DISABLE; + break; + case 24: // d[i] = -1.0f / s[i]; break; // vnrcp + fpr.MapDirtyInV(tempregs[i], sregs[i]); + MOVI2F(FPR6, -1.0f); + FDIVS(fpr.V(tempregs[i]), FPR6, fpr.V(sregs[i])); + break; + case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin + DISABLE; + break; + case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2 + DISABLE; + break; + default: + DISABLE; + break; + } + } + + for (int i = 0; i < n; ++i) { + if (dregs[i] != tempregs[i]) { + fpr.MapDirtyInV(dregs[i], tempregs[i]); + FMR(fpr.V(dregs[i]), fpr.V(tempregs[i])); + } + } + + ApplyPrefixD(dregs, sz); + + fpr.ReleaseSpillLocksAndDiscardTemps(); } void Jit::Comp_Mftv(MIPSOpcode op) { - Comp_Generic(op); + CONDITIONAL_DISABLE; + + int imm = op & 0xFF; + MIPSGPReg rt = _RT; + switch ((op >> 21) & 0x1f) + { + case 3: //mfv / mfvc + // rt = 0, imm = 255 appears to be used as a CPU interlock by some games. + if (rt != 0) { + if (imm < 128) { //R(rt) = VI(imm); + fpr.FlushV(imm); + gpr.MapReg(rt, MAP_NOINIT | MAP_DIRTY); + LWZ(gpr.R(rt), CTXREG, fpr.GetMipsRegOffsetV(imm)); + } else if (imm < 128 + VFPU_CTRL_MAX) { //mtvc + DISABLE; + // In case we have a saved prefix. + //FlushPrefixV(); + //gpr.BindToRegister(rt, false, true); + //MOV(32, gpr.R(rt), M(¤tMIPS->vfpuCtrl[imm - 128])); + } else { + //ERROR - maybe need to make this value too an "interlock" value? + ERROR_LOG(CPU, "mfv - invalid register %i", imm); + } + } + break; + + case 7: // mtv + if (imm < 128) { + gpr.FlushR(rt); + fpr.MapRegV(imm, MAP_DIRTY | MAP_NOINIT); + LFS(fpr.V(imm), CTXREG, gpr.GetMipsRegOffset(rt)); + } else if (imm < 128 + VFPU_CTRL_MAX) { //mtvc //currentMIPS->vfpuCtrl[imm - 128] = R(rt); + gpr.MapReg(rt); + STW(gpr.R(rt), CTXREG, offsetof(MIPSState, vfpuCtrl) + 4 * (imm - 128)); + //gpr.BindToRegister(rt, true, false); + //MOV(32, M(¤tMIPS->vfpuCtrl[imm - 128]), gpr.R(rt)); + + // TODO: Optimization if rt is Imm? + // Set these BEFORE disable! + if (imm - 128 == VFPU_CTRL_SPREFIX) { + js.prefixSFlag = PpcJitState::PREFIX_UNKNOWN; + } else if (imm - 128 == VFPU_CTRL_TPREFIX) { + js.prefixTFlag = PpcJitState::PREFIX_UNKNOWN; + } else if (imm - 128 == VFPU_CTRL_DPREFIX) { + js.prefixDFlag = PpcJitState::PREFIX_UNKNOWN; + } + } else { + //ERROR + _dbg_assert_msg_(CPU,0,"mtv - invalid register"); + } + break; + + default: + DISABLE; + } + + fpr.ReleaseSpillLocksAndDiscardTemps(); } void Jit::Comp_Vmtvc(MIPSOpcode op) { - Comp_Generic(op); + CONDITIONAL_DISABLE; + + int vs = _VS; + int imm = op & 0xFF; + if (imm >= 128 && imm < 128 + VFPU_CTRL_MAX) { + fpr.MapRegV(vs); + ADDI(SREG, CTXREG, offsetof(MIPSState, vfpuCtrl[0]) + (imm - 128) * 4); + SFS(fpr.V(vs), SREG, 0); + fpr.ReleaseSpillLocksAndDiscardTemps(); + + if (imm - 128 == VFPU_CTRL_SPREFIX) { + js.prefixSFlag = PpcJitState::PREFIX_UNKNOWN; + } else if (imm - 128 == VFPU_CTRL_TPREFIX) { + js.prefixTFlag = PpcJitState::PREFIX_UNKNOWN; + } else if (imm - 128 == VFPU_CTRL_DPREFIX) { + js.prefixDFlag = PpcJitState::PREFIX_UNKNOWN; + } + } } void Jit::Comp_Vmmov(MIPSOpcode op) { - Comp_Generic(op); + CONDITIONAL_DISABLE; + + // TODO: This probably ignores prefixes? + //if (js.MayHavePrefix()) { + // DISABLE; + //} + + if (_VS == _VD) { + // A lot of these in Wipeout... Just drop the instruction entirely. + return; + } + + MatrixSize sz = GetMtxSize(op); + int n = GetMatrixSide(sz); + + u8 sregs[16], dregs[16]; + GetMatrixRegs(sregs, sz, _VS); + GetMatrixRegs(dregs, sz, _VD); + + // Rough overlap check. + bool overlap = false; + if (GetMtx(_VS) == GetMtx(_VD)) { + // Potential overlap (guaranteed for 3x3 or more). + overlap = true; + } + + if (overlap) { + // Not so common, fallback. + DISABLE; + } else { + for (int a = 0; a < n; a++) { + for (int b = 0; b < n; b++) { + fpr.MapDirtyInV(dregs[a * 4 + b], sregs[a * 4 + b]); + FMR(fpr.V(dregs[a * 4 + b]), fpr.V(sregs[a * 4 + b])); + } + } + fpr.ReleaseSpillLocksAndDiscardTemps(); + } } void Jit::Comp_VScl(MIPSOpcode op) { - Comp_Generic(op); + CONDITIONAL_DISABLE; + + if (js.HasUnknownPrefix() || disablePrefixes) { + DISABLE; + } + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + u8 sregs[4], dregs[4], treg; + GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegs(&treg, V_Single, _VT); + GetVectorRegsPrefixD(dregs, sz, _VD); + + // Move to S0 early, so we don't have to worry about overlap with scale. + fpr.LoadToRegV(FPR6, treg); + + // For prefixes to work, we just have to ensure that none of the output registers spill + // and that there's no overlap. + MIPSReg tempregs[4]; + for (int i = 0; i < n; ++i) { + if (!IsOverlapSafe(dregs[i], i, n, sregs)) { + // Need to use temp regs + tempregs[i] = fpr.GetTempV(); + } else { + tempregs[i] = dregs[i]; + } + } + + // The meat of the function! + for (int i = 0; i < n; i++) { + fpr.MapDirtyInV(tempregs[i], sregs[i]); + FMULS(fpr.V(tempregs[i]), fpr.V(sregs[i]), FPR6); + } + + for (int i = 0; i < n; i++) { + // All must be mapped for prefixes to work. + if (dregs[i] != tempregs[i]) { + fpr.MapDirtyInV(dregs[i], tempregs[i]); + FMR(fpr.V(dregs[i]), fpr.V(tempregs[i])); + } + } + + ApplyPrefixD(dregs, sz); + + fpr.ReleaseSpillLocksAndDiscardTemps(); } void Jit::Comp_Vmmul(MIPSOpcode op) { - Comp_Generic(op); + CONDITIONAL_DISABLE; + + // TODO: This probably ignores prefixes? + if (js.MayHavePrefix() || disablePrefixes) { + DISABLE; + } + + MatrixSize sz = GetMtxSize(op); + int n = GetMatrixSide(sz); + + u8 sregs[16], tregs[16], dregs[16]; + GetMatrixRegs(sregs, sz, _VS); + GetMatrixRegs(tregs, sz, _VT); + GetMatrixRegs(dregs, sz, _VD); + + // Rough overlap check. + bool overlap = false; + if (GetMtx(_VS) == GetMtx(_VD) || GetMtx(_VT) == GetMtx(_VD)) { + // Potential overlap (guaranteed for 3x3 or more). + overlap = true; + } + + if (overlap) { + DISABLE; + } else { + for (int a = 0; a < n; a++) { + for (int b = 0; b < n; b++) { + fpr.MapInInV(sregs[b * 4], tregs[a * 4]); + FMULS(FPR6, fpr.V(sregs[b * 4]), fpr.V(tregs[a * 4])); + for (int c = 1; c < n; c++) { + fpr.MapInInV(sregs[b * 4 + c], tregs[a * 4 + c]); + FMADDS(FPR6, fpr.V(sregs[b * 4 + c]), fpr.V(tregs[a * 4 + c]), FPR6); + } + fpr.MapRegV(dregs[a * 4 + b], MAP_DIRTY | MAP_NOINIT); + FMR(fpr.V(dregs[a * 4 + b]), FPR6); + } + } + fpr.ReleaseSpillLocksAndDiscardTemps(); + } } void Jit::Comp_Vmscl(MIPSOpcode op) { @@ -216,7 +852,63 @@ namespace MIPSComp } void Jit::Comp_Vtfm(MIPSOpcode op) { - Comp_Generic(op); + CONDITIONAL_DISABLE; + + // TODO: This probably ignores prefixes? Or maybe uses D? + if (js.MayHavePrefix() || disablePrefixes) { + DISABLE; + } + + VectorSize sz = GetVecSize(op); + MatrixSize msz = GetMtxSize(op); + int n = GetNumVectorElements(sz); + int ins = (op >> 23) & 7; + + bool homogenous = false; + if (n == ins) { + n++; + sz = (VectorSize)((int)(sz) + 1); + msz = (MatrixSize)((int)(msz) + 1); + homogenous = true; + } + // Otherwise, n should already be ins + 1. + else if (n != ins + 1) { + DISABLE; + } + + u8 sregs[16], dregs[4], tregs[4]; + GetMatrixRegs(sregs, msz, _VS); + GetVectorRegs(tregs, sz, _VT); + GetVectorRegs(dregs, sz, _VD); + + // TODO: test overlap, optimize. + int tempregs[4]; + for (int i = 0; i < n; i++) { + fpr.MapInInV(sregs[i * 4], tregs[0]); + FMULS(FPR6, fpr.V(sregs[i * 4]), fpr.V(tregs[0])); + for (int k = 1; k < n; k++) { + if (!homogenous || k != n - 1) { + fpr.MapInInV(sregs[i * 4 + k], tregs[k]); + FMADDS(FPR6, fpr.V(sregs[i * 4 + k]), fpr.V(tregs[k]), FPR6); + } else { + fpr.MapRegV(sregs[i * 4 + k]); + FADDS(FPR6, FPR6, fpr.V(sregs[i * 4 + k])); + } + } + + int temp = fpr.GetTempV(); + fpr.MapRegV(temp, MAP_NOINIT | MAP_DIRTY); + fpr.SpillLockV(temp); + FMR(fpr.V(temp), FPR6); + tempregs[i] = temp; + } + for (int i = 0; i < n; i++) { + u8 temp = tempregs[i]; + fpr.MapRegV(dregs[i], MAP_NOINIT | MAP_DIRTY); + FMR(fpr.V(dregs[i]), fpr.V(temp)); + } + + fpr.ReleaseSpillLocksAndDiscardTemps(); } void Jit::Comp_VHdp(MIPSOpcode op) { @@ -240,7 +932,7 @@ namespace MIPSComp } void Jit::Comp_Vf2i(MIPSOpcode op) { - Comp_Generic(op); + DISABLE; } void Jit::Comp_Vi2f(MIPSOpcode op) { @@ -248,7 +940,29 @@ namespace MIPSComp } void Jit::Comp_Vcst(MIPSOpcode op) { - Comp_Generic(op); + CONDITIONAL_DISABLE; + + if (js.HasUnknownPrefix() || disablePrefixes) { + DISABLE; + } + + int conNum = (op >> 16) & 0x1f; + int vd = _VD; + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + u8 dregs[4]; + GetVectorRegsPrefixD(dregs, sz, _VD); + fpr.MapRegsAndSpillLockV(dregs, sz, MAP_NOINIT | MAP_DIRTY); + + MOVI2R(SREG, (u32)(void *)&cst_constants[conNum]); + LFS(FPR6, SREG, 0); + for (int i = 0; i < n; ++i) + FMR(fpr.V(dregs[i]), FPR6); + + ApplyPrefixD(dregs, sz); + fpr.ReleaseSpillLocksAndDiscardTemps(); } void Jit::Comp_Vhoriz(MIPSOpcode op) { @@ -260,7 +974,40 @@ namespace MIPSComp } void Jit::Comp_VIdt(MIPSOpcode op) { - Comp_Generic(op); + CONDITIONAL_DISABLE + + if (js.HasUnknownPrefix() || disablePrefixes) { + DISABLE; + } + + int vd = _VD; + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + MOVI2F(FPR6, 0.0f); + MOVI2F(FPR7, 1.0f); + u8 dregs[4]; + GetVectorRegsPrefixD(dregs, sz, _VD); + fpr.MapRegsAndSpillLockV(dregs, sz, MAP_NOINIT | MAP_DIRTY); + switch (sz) + { + case V_Pair: + FMR(fpr.V(dregs[0]), (vd&1)==0 ? FPR7 : FPR6); + FMR(fpr.V(dregs[1]), (vd&1)==1 ? FPR7 : FPR6); + break; + case V_Quad: + FMR(fpr.V(dregs[0]), (vd&3)==0 ? FPR7 : FPR6); + FMR(fpr.V(dregs[1]), (vd&3)==1 ? FPR7 : FPR6); + FMR(fpr.V(dregs[2]), (vd&3)==2 ? FPR7 : FPR6); + FMR(fpr.V(dregs[3]), (vd&3)==3 ? FPR7 : FPR6); + break; + default: + _dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted"); + break; + } + + ApplyPrefixD(dregs, sz); + + fpr.ReleaseSpillLocksAndDiscardTemps(); } void Jit::Comp_Vcmp(MIPSOpcode op) { @@ -272,15 +1019,83 @@ namespace MIPSComp } void Jit::Comp_Viim(MIPSOpcode op) { - Comp_Generic(op); + CONDITIONAL_DISABLE; + + u8 dreg; + GetVectorRegs(&dreg, V_Single, _VT); + + s32 imm = (s32)(s16)(u16)(op & 0xFFFF); + fpr.MapRegV(dreg, MAP_DIRTY | MAP_NOINIT); + MOVI2F(fpr.V(dreg), (float)imm); + + ApplyPrefixD(&dreg, V_Single); + fpr.ReleaseSpillLocksAndDiscardTemps(); } void Jit::Comp_Vfim(MIPSOpcode op) { - Comp_Generic(op); + CONDITIONAL_DISABLE; + + if (js.HasUnknownPrefix() || disablePrefixes) { + DISABLE; + } + + u8 dreg; + GetVectorRegs(&dreg, V_Single, _VT); + + FP16 half; + half.u = op & 0xFFFF; + FP32 fval = half_to_float_fast5(half); + fpr.MapRegV(dreg, MAP_DIRTY | MAP_NOINIT); + MOVI2F(fpr.V(dreg), fval.f); + + ApplyPrefixD(&dreg, V_Single); + fpr.ReleaseSpillLocksAndDiscardTemps(); } void Jit::Comp_VCrossQuat(MIPSOpcode op) { - Comp_Generic(op); + // This op does not support prefixes. + if (js.HasUnknownPrefix() || disablePrefixes) + DISABLE; + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + u8 sregs[4], tregs[4], dregs[4]; + GetVectorRegs(sregs, sz, _VS); + GetVectorRegs(tregs, sz, _VT); + GetVectorRegs(dregs, sz, _VD); + + // Map everything into registers. + fpr.MapRegsAndSpillLockV(sregs, sz, 0); + fpr.MapRegsAndSpillLockV(tregs, sz, 0); + + if (sz == V_Triple) { + int temp3 = fpr.GetTempV(); + fpr.MapRegV(temp3, MAP_DIRTY | MAP_NOINIT); + // Cross product vcrsp.t + + // Compute X + FMULS(FPR6, fpr.V(sregs[1]), fpr.V(tregs[2])); + FMSUBS(FPR6, fpr.V(sregs[2]), fpr.V(tregs[1]), FPR6); + + // Compute Y + FMULS(FPR7, fpr.V(sregs[2]), fpr.V(tregs[0])); + FMSUBS(FPR7, fpr.V(sregs[0]), fpr.V(tregs[2]), FPR6); + + // Compute Z + FMULS(fpr.V(temp3), fpr.V(sregs[0]), fpr.V(tregs[1])); + FMSUBS(fpr.V(temp3), fpr.V(sregs[1]), fpr.V(tregs[0]), FPR6); + + fpr.MapRegsAndSpillLockV(dregs, V_Triple, MAP_DIRTY | MAP_NOINIT); + FMR(fpr.V(dregs[0]), FPR6); + FMR(fpr.V(dregs[1]), FPR7); + FMR(fpr.V(dregs[2]), fpr.V(temp3)); + } else if (sz == V_Quad) { + // Quaternion product vqmul.q untested + DISABLE; + } + + fpr.ReleaseSpillLocksAndDiscardTemps(); } void Jit::Comp_Vsge(MIPSOpcode op) { Comp_Generic(op); diff --git a/Core/MIPS/PPC/PpcJit.h b/Core/MIPS/PPC/PpcJit.h index d2a96e2bba..2305fd84b5 100644 --- a/Core/MIPS/PPC/PpcJit.h +++ b/Core/MIPS/PPC/PpcJit.h @@ -236,6 +236,21 @@ namespace MIPSComp // Utilities to reduce duplicated code void CompImmLogic(int rs, int rt, u32 uimm, void (PPCXEmitter::*arith)(PPCReg Rd, PPCReg Ra, unsigned short imm), u32 (*eval)(u32 a, u32 b)); void CompType3(int rd, int rs, int rt, void (PPCXEmitter::*arithOp2)(PPCReg Rd, PPCReg Ra, PPCReg Rb), u32 (*eval)(u32 a, u32 b), bool isSub = false); + + + void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz); + void ApplyPrefixD(const u8 *vregs, VectorSize sz); + void GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) { + _assert_(js.prefixSFlag & PpcJitState::PREFIX_KNOWN); + GetVectorRegs(regs, sz, vectorReg); + ApplyPrefixST(regs, js.prefixS, sz); + } + void GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) { + _assert_(js.prefixTFlag & PpcJitState::PREFIX_KNOWN); + GetVectorRegs(regs, sz, vectorReg); + ApplyPrefixST(regs, js.prefixT, sz); + } + void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg); // flush regs void FlushAll(); diff --git a/Core/MIPS/PPC/PpcRegCacheVPU.cpp b/Core/MIPS/PPC/PpcRegCacheVPU.cpp new file mode 100644 index 0000000000..905ce47db4 --- /dev/null +++ b/Core/MIPS/PPC/PpcRegCacheVPU.cpp @@ -0,0 +1,313 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include +#include "PpcRegCacheVPU.h" +#include "PpcJit.h" + +using namespace PpcGen; + +PpcRegCacheVPU::PpcRegCacheVPU(MIPSState *mips, MIPSComp::PpcJitOptions *options) : mips_(mips), options_(options) { +} + +void PpcRegCacheVPU::Init(PPCXEmitter *emitter) { + emit_ = emitter; +} + +void PpcRegCacheVPU::Start(MIPSAnalyst::AnalysisResults &stats) { + for (int i = 0; i < NUM_PPCVPUREG; i++) { + ar[i].mipsReg = -1; + ar[i].isDirty = false; + } + for (int i = 0; i < NUM_MIPSVPUREG; i++) { + mr[i].loc = ML_MEM; + mr[i].reg = INVALID_REG; + mr[i].imm = -1; + mr[i].spillLock = false; + } +} + +const PPCReg *PpcRegCacheVPU::GetMIPSAllocationOrder(int &count) { + // Note that R0 is reserved as scratch for now. + // R1 could be used as it's only used for scratch outside "regalloc space" now. + // R12 is also potentially usable. + // R4-R7 are registers we could use for static allocation or downcount. + // R8 is used to preserve flags in nasty branches. + // R9 and upwards are reserved for jit basics. + if (options_->downcountInRegister) { + static const PPCReg allocationOrder[] = { + /*R14, R15, R16, R17, R18, R19,*/ + R20, R21, R22, R23, R24, R25, + R26, R27, R28, R29, R30, R31, + }; + count = sizeof(allocationOrder) / sizeof(const int); + return allocationOrder; + } else { + static const PPCReg allocationOrder2[] = { + /*R14, R15, R16, R17, R18, R19,*/ + R20, R21, R22, R23, R24, R25, + R26, R27, R28, R29, R30, R31, + }; + count = sizeof(allocationOrder2) / sizeof(const int); + return allocationOrder2; + } +} + +void PpcRegCacheVPU::FlushBeforeCall() { + // R4-R11 are preserved. Others need flushing. + /* + FlushPpcReg(R2); + FlushPpcReg(R3); + FlushPpcReg(R12); + */ +} + +// TODO: Somewhat smarter spilling - currently simply spills the first available, should do +// round robin or FIFO or something. +PPCReg PpcRegCacheVPU::MapReg(MIPSReg mipsReg, int mapFlags) { + // Let's see if it's already mapped. If so we just need to update the dirty flag. + // We don't need to check for ML_NOINIT because we assume that anyone who maps + // with that flag immediately writes a "known" value to the register. + if (mr[mipsReg].loc == ML_PPCREG) { + if (ar[mr[mipsReg].reg].mipsReg != mipsReg) { + ERROR_LOG(HLE, "Register mapping out of sync! %i", mipsReg); + } + if (mapFlags & MAP_DIRTY) { + ar[mr[mipsReg].reg].isDirty = true; + } + return (PPCReg)mr[mipsReg].reg; + } + + // Okay, not mapped, so we need to allocate an ARM register. + + int allocCount; + const PPCReg *allocOrder = GetMIPSAllocationOrder(allocCount); + +allocate: + for (int i = 0; i < allocCount; i++) { + int reg = allocOrder[i]; + + if (ar[reg].mipsReg == -1) { + // That means it's free. Grab it, and load the value into it (if requested). + ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false; + if (!(mapFlags & MAP_NOINIT)) { + if (mr[mipsReg].loc == ML_MEM) { + if (mipsReg != 0) { + emit_->LWZ((PPCReg)reg, CTXREG, GetMipsRegOffset(mipsReg)); + } else { + // If we get a request to load the zero register, at least we won't spend + // time on a memory access... + emit_->MOVI2R((PPCReg)reg, 0); + } + } else if (mr[mipsReg].loc == ML_IMM) { + emit_->MOVI2R((PPCReg)reg, mr[mipsReg].imm); + ar[reg].isDirty = true; // IMM is always dirty. + } + } + ar[reg].mipsReg = mipsReg; + mr[mipsReg].loc = ML_PPCREG; + mr[mipsReg].reg = (PPCReg)reg; + return (PPCReg)reg; + } + } + + // Still nothing. Let's spill a reg and goto 10. + // TODO: Use age or something to choose which register to spill? + // TODO: Spill dirty regs first? or opposite? + int bestToSpill = -1; + for (int i = 0; i < allocCount; i++) { + int reg = allocOrder[i]; + if (ar[reg].mipsReg != -1 && mr[ar[reg].mipsReg].spillLock) + continue; + bestToSpill = reg; + break; + } + + if (bestToSpill != -1) { + // ERROR_LOG(JIT, "Out of registers at PC %08x - spills register %i.", mips_->pc, bestToSpill); + FlushPpcReg((PPCReg)bestToSpill); + goto allocate; + } + + // Uh oh, we have all them spilllocked.... + ERROR_LOG(JIT, "Out of spillable registers at PC %08x!!!", mips_->pc); + return INVALID_REG; +} + +void PpcRegCacheVPU::MapInIn(MIPSReg rd, MIPSReg rs) { + SpillLock(rd, rs); + MapReg(rd); + MapReg(rs); + ReleaseSpillLocks(); +} + +void PpcRegCacheVPU::MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad) { + SpillLock(rd, rs); + bool load = !avoidLoad || rd == rs; + MapReg(rd, MAP_DIRTY | (load ? 0 : MAP_NOINIT)); + MapReg(rs); + ReleaseSpillLocks(); +} + +void PpcRegCacheVPU::MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad) { + SpillLock(rd, rs, rt); + bool load = !avoidLoad || (rd == rs || rd == rt); + MapReg(rd, MAP_DIRTY | (load ? 0 : MAP_NOINIT)); + MapReg(rt); + MapReg(rs); + ReleaseSpillLocks(); +} + +void PpcRegCacheVPU::MapDirtyDirtyInIn(MIPSReg rd1, MIPSReg rd2, MIPSReg rs, MIPSReg rt, bool avoidLoad) { + SpillLock(rd1, rd2, rs, rt); + bool load1 = !avoidLoad || (rd1 == rs || rd1 == rt); + bool load2 = !avoidLoad || (rd2 == rs || rd2 == rt); + MapReg(rd1, MAP_DIRTY | (load1 ? 0 : MAP_NOINIT)); + MapReg(rd2, MAP_DIRTY | (load2 ? 0 : MAP_NOINIT)); + MapReg(rt); + MapReg(rs); + ReleaseSpillLocks(); +} + +void PpcRegCacheVPU::FlushPpcReg(PPCReg r) { + if (ar[r].mipsReg == -1) { + // Nothing to do, reg not mapped. + return; + } + if (ar[r].mipsReg != -1) { + if (ar[r].isDirty && mr[ar[r].mipsReg].loc == ML_PPCREG) + emit_->STW(r, CTXREG, GetMipsRegOffset(ar[r].mipsReg)); + // IMMs won't be in an ARM reg. + mr[ar[r].mipsReg].loc = ML_MEM; + mr[ar[r].mipsReg].reg = INVALID_REG; + mr[ar[r].mipsReg].imm = 0; + } else { + ERROR_LOG(HLE, "Dirty but no mipsreg?"); + } + ar[r].isDirty = false; + ar[r].mipsReg = -1; +} + +void PpcRegCacheVPU::FlushR(MIPSReg r) { + switch (mr[r].loc) { + case ML_IMM: + // IMM is always "dirty". + emit_->MOVI2R(SREG, mr[r].imm); + emit_->STW(SREG, CTXREG, GetMipsRegOffset(r)); + break; + + case ML_PPCREG: + if (mr[r].reg == INVALID_REG) { + ERROR_LOG(HLE, "FlushMipsReg: MipsReg had bad PpcReg"); + } + if (ar[mr[r].reg].isDirty) { + emit_->STW((PPCReg)mr[r].reg, CTXREG, GetMipsRegOffset(r)); + ar[mr[r].reg].isDirty = false; + } + ar[mr[r].reg].mipsReg = -1; + break; + + case ML_MEM: + // Already there, nothing to do. + break; + + default: + //BAD + break; + } + mr[r].loc = ML_MEM; + mr[r].reg = INVALID_REG; + mr[r].imm = 0; +} + +void PpcRegCacheVPU::FlushAll() { + for (int i = 0; i < NUM_MIPSVPUREG; i++) { + FlushR(i); + } + // Sanity check + for (int i = 0; i < NUM_PPCVPUREG; i++) { + if (ar[i].mipsReg != -1) { + ERROR_LOG(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg); + } + } +} + +void PpcRegCacheVPU::SetImm(MIPSReg r, u32 immVal) { + if (r == 0) + ERROR_LOG(JIT, "Trying to set immediate %08x to r0", immVal); + + // Zap existing value if cached in a reg + if (mr[r].loc == ML_PPCREG) { + ar[mr[r].reg].mipsReg = -1; + ar[mr[r].reg].isDirty = false; + } + mr[r].loc = ML_IMM; + mr[r].imm = immVal; + mr[r].reg = INVALID_REG; +} + +bool PpcRegCacheVPU::IsImm(MIPSReg r) const { + if (r == 0) return true; + return mr[r].loc == ML_IMM; +} + +u32 PpcRegCacheVPU::GetImm(MIPSReg r) const { + if (r == 0) return 0; + if (mr[r].loc != ML_IMM) { + ERROR_LOG(JIT, "Trying to get imm from non-imm register %i", r); + } + return mr[r].imm; +} + +int PpcRegCacheVPU::GetMipsRegOffset(MIPSReg r) { + if (r < 32) + return r * 4; + switch (r) { + case MIPSREG_HI: + return offsetof(MIPSState, hi); + case MIPSREG_LO: + return offsetof(MIPSState, lo); + } + ERROR_LOG(JIT, "bad mips register %i", r); + return 0; // or what? +} + +void PpcRegCacheVPU::SpillLock(MIPSReg r1, MIPSReg r2, MIPSReg r3, MIPSReg r4) { + mr[r1].spillLock = true; + if (r2 != -1) mr[r2].spillLock = true; + if (r3 != -1) mr[r3].spillLock = true; + if (r4 != -1) mr[r4].spillLock = true; +} + +void PpcRegCacheVPU::ReleaseSpillLocks() { + for (int i = 0; i < NUM_MIPSVPUREG; i++) { + mr[i].spillLock = false; + } +} + +void PpcRegCacheVPU::ReleaseSpillLock(MIPSReg reg) { + mr[reg].spillLock = false; +} + +PPCReg PpcRegCacheVPU::R(int mipsReg) { + if (mr[mipsReg].loc == ML_PPCREG) { + return (PPCReg)mr[mipsReg].reg; + } else { + ERROR_LOG(JIT, "Reg %i not in ppc reg. compilerPC = %08x", mipsReg, compilerPC_); + return INVALID_REG; // BAAAD + } +} diff --git a/Core/MIPS/PPC/PpcRegCacheVPU.h b/Core/MIPS/PPC/PpcRegCacheVPU.h new file mode 100644 index 0000000000..6ebf8356a6 --- /dev/null +++ b/Core/MIPS/PPC/PpcRegCacheVPU.h @@ -0,0 +1,106 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + + +/** +PPC reg cache based on arm version +**/ + +#pragma once + +#include "../MIPS.h" +#include "../MIPSAnalyst.h" +#include "ppcEmitter.h" +#include "Core/MIPS/PPC/PpcRegCache.h" + +using namespace PpcGen; + +typedef int MIPSReg; + +struct VPURegPPC { + int mipsReg; // if -1, no mipsreg attached. + bool isDirty; // Should the register be written back? +}; + +struct VPURegMIPS { + // Where is this MIPS register? + RegMIPSLoc loc; + // Data (only one of these is used, depending on loc. Could make a union). + u32 imm; + PPCReg reg; // reg index + bool spillLock; // if true, this register cannot be spilled. + // If loc == ML_MEM, it's back in its location in the CPU context struct. +}; +namespace MIPSComp { + struct PpcJitOptions; +} + +class PpcRegCacheVPU +{ +public: + PpcRegCacheVPU(MIPSState *mips, MIPSComp::PpcJitOptions *options); + ~PpcRegCacheVPU() {} + + void Init(PPCXEmitter *emitter); + void Start(MIPSAnalyst::AnalysisResults &stats); + + // Protect the arm register containing a MIPS register from spilling, to ensure that + // it's being kept allocated. + void SpillLock(MIPSReg reg, MIPSReg reg2 = -1, MIPSReg reg3 = -1, MIPSReg reg4 = -1); + void ReleaseSpillLock(MIPSReg reg); + void ReleaseSpillLocks(); + + void SetImm(MIPSReg reg, u32 immVal); + bool IsImm(MIPSReg reg) const; + u32 GetImm(MIPSReg reg) const; + + // Returns an ARM register containing the requested MIPS register. + PPCReg MapReg(MIPSReg reg, int mapFlags = 0); + void MapInIn(MIPSReg rd, MIPSReg rs); + void MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad = true); + void MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad = true); + void MapDirtyDirtyInIn(MIPSReg rd1, MIPSReg rd2, MIPSReg rs, MIPSReg rt, bool avoidLoad = true); + void FlushPpcReg(PPCReg r); + void FlushR(MIPSReg r); + void FlushBeforeCall(); + void FlushAll(); + + PPCReg R(int preg); // Returns a cached register + + void SetEmitter(PPCXEmitter *emitter) { emit_ = emitter; } + + // For better log output only. + void SetCompilerPC(u32 compilerPC) { compilerPC_ = compilerPC; } + + int GetMipsRegOffset(MIPSReg r); + +private: + const PPCReg *GetMIPSAllocationOrder(int &count); + + MIPSState *mips_; + MIPSComp::PpcJitOptions *options_; + PPCXEmitter *emit_; + u32 compilerPC_; + + enum { + NUM_PPCVPUREG = 128, + NUM_MIPSVPUREG = 32, + }; + + VPURegPPC ar[NUM_PPCVPUREG]; + VPURegMIPS mr[NUM_MIPSVPUREG]; +};