From bbeb5758b79c70622feb30e685a9a6c2385f1437 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 27 Nov 2014 00:07:17 -0800 Subject: [PATCH] x86jit: Simplify VS() / VSX() usage. --- Core/MIPS/x86/CompVFPU.cpp | 121 +++++++++++++++++----------------- Core/MIPS/x86/RegCacheFPU.cpp | 13 ++-- Core/MIPS/x86/RegCacheFPU.h | 24 +++---- 3 files changed, 79 insertions(+), 79 deletions(-) diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp index 8258569139..8073424094 100644 --- a/Core/MIPS/x86/CompVFPU.cpp +++ b/Core/MIPS/x86/CompVFPU.cpp @@ -384,7 +384,7 @@ void Jit::Comp_SVQ(MIPSOpcode op) safe.SetFar(); OpArg src; if (safe.PrepareRead(src, 16)) { - MOVAPS(fpr.VSX(vregs[0]), safe.NextFastAddress(0)); + MOVAPS(fpr.VSX(vregs), safe.NextFastAddress(0)); } else { // Hmm... probably never happens. } @@ -433,7 +433,7 @@ void Jit::Comp_SVQ(MIPSOpcode op) safe.SetFar(); OpArg dest; if (safe.PrepareWrite(dest, 16)) { - MOVAPS(safe.NextFastAddress(0), fpr.VSX(vregs[0])); + MOVAPS(safe.NextFastAddress(0), fpr.VSX(vregs)); } else { // Hmm... probably never happens. } @@ -488,9 +488,9 @@ void Jit::Comp_VVectorInit(MIPSOpcode op) { if (fpr.TryMapRegsVS(dregs, sz, MAP_NOINIT | MAP_DIRTY)) { if (type == 6) { - XORPS(fpr.VSX(dregs[0]), fpr.VS(dregs[0])); + XORPS(fpr.VSX(dregs), fpr.VS(dregs)); } else if (type == 7) { - MOVAPS(fpr.VSX(dregs[0]), M(&oneOneOneOne)); + MOVAPS(fpr.VSX(dregs), M(&oneOneOneOne)); } else { DISABLE; } @@ -531,9 +531,9 @@ void Jit::Comp_VIdt(MIPSOpcode op) { u8 dregs[4]; GetVectorRegsPrefixD(dregs, sz, _VD); - if (sz == V_Quad && fpr.TryMapRegsVS(dregs, sz, MAP_NOINIT | MAP_DIRTY)) { - int n = vd & 3; - MOVAPD(fpr.VSX(dregs[0]), M(identityMatrix[n])); + if (fpr.TryMapRegsVS(dregs, sz, MAP_NOINIT | MAP_DIRTY)) { + int row = vd & (n - 1); + MOVAPD(fpr.VSX(dregs), M(identityMatrix[row])); ApplyPrefixD(dregs, sz); fpr.ReleaseSpillLocks(); return; @@ -586,42 +586,42 @@ void Jit::Comp_VDot(MIPSOpcode op) { switch (sz) { case V_Pair: if (cpu_info.bSSE4_1) { - MOVAPD(XMM0, fpr.VS(sregs[0])); - DPPS(XMM0, fpr.VS(tregs[0]), 0x31); - MOVAPD(fpr.VSX(dregs[0]), R(XMM0)); + MOVAPD(XMM0, fpr.VS(sregs)); + DPPS(XMM0, fpr.VS(tregs), 0x31); + MOVAPD(fpr.VSX(dregs), R(XMM0)); } else { - MOVAPD(XMM0, fpr.VS(sregs[0])); - MULPS(XMM0, fpr.VS(tregs[0])); + MOVAPD(XMM0, fpr.VS(sregs)); + MULPS(XMM0, fpr.VS(tregs)); MOVAPD(R(XMM1), XMM0); SHUFPS(XMM1, R(XMM0), _MM_SHUFFLE(1, 1, 1, 1)); ADDPS(XMM1, R(XMM0)); - MOVAPD(fpr.VS(dregs[0]), XMM1); + MOVAPD(fpr.VS(dregs), XMM1); } break; case V_Triple: if (cpu_info.bSSE4_1) { - MOVAPD(XMM0, fpr.VS(sregs[0])); - DPPS(XMM0, fpr.VS(tregs[0]), 0x71); - MOVAPD(fpr.VSX(dregs[0]), R(XMM0)); + MOVAPD(XMM0, fpr.VS(sregs)); + DPPS(XMM0, fpr.VS(tregs), 0x71); + MOVAPD(fpr.VSX(dregs), R(XMM0)); } else { - MOVAPD(XMM0, fpr.VS(sregs[0])); - MULPS(XMM0, fpr.VS(tregs[0])); + MOVAPD(XMM0, fpr.VS(sregs)); + MULPS(XMM0, fpr.VS(tregs)); MOVAPD(R(XMM1), XMM0); SHUFPS(XMM1, R(XMM0), _MM_SHUFFLE(3, 2, 1, 1)); ADDSS(XMM1, R(XMM0)); SHUFPS(XMM0, R(XMM1), _MM_SHUFFLE(3, 2, 2, 2)); ADDSS(XMM1, R(XMM0)); - MOVAPD(fpr.VS(dregs[0]), XMM1); + MOVAPD(fpr.VS(dregs), XMM1); } break; case V_Quad: if (cpu_info.bSSE4_1) { - MOVAPD(XMM0, fpr.VS(sregs[0])); - DPPS(XMM0, fpr.VS(tregs[0]), 0xF1); - MOVAPD(fpr.VSX(dregs[0]), R(XMM0)); + MOVAPD(XMM0, fpr.VS(sregs)); + DPPS(XMM0, fpr.VS(tregs), 0xF1); + MOVAPD(fpr.VSX(dregs), R(XMM0)); } else { - MOVAPD(XMM0, fpr.VS(sregs[0])); - MOVAPD(XMM1, fpr.VS(tregs[0])); + MOVAPD(XMM0, fpr.VS(sregs)); + MOVAPD(XMM1, fpr.VS(tregs)); MULPS(XMM0, R(XMM1)); MOVAPD(XMM1, R(XMM0)); SHUFPS(XMM1, R(XMM1), _MM_SHUFFLE(2, 3, 0, 1)); @@ -629,7 +629,7 @@ void Jit::Comp_VDot(MIPSOpcode op) { MOVAPD(XMM1, R(XMM0)); SHUFPS(XMM1, R(XMM1), _MM_SHUFFLE(0, 1, 2, 3)); ADDSS(XMM0, R(XMM1)); - MOVAPD(fpr.VSX(dregs[0]), R(XMM0)); + MOVAPD(fpr.VSX(dregs), R(XMM0)); } } ApplyPrefixD(dregs, V_Single); @@ -751,17 +751,16 @@ void Jit::Comp_VCrossQuat(MIPSOpcode op) { ); return _mm_shuffle_ps(result, result, _MM_SHUFFLE(3, 0, 2, 1)); */ - MOVAPS(XMM0, fpr.VS(tregs[0])); - MOVAPS(XMM1, fpr.VS(sregs[0])); + MOVAPS(XMM0, fpr.VS(tregs)); + MOVAPS(XMM1, fpr.VS(sregs)); SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(3, 0, 2, 1)); SHUFPS(XMM1, R(XMM1), _MM_SHUFFLE(3, 0, 2, 1)); - MULPS(XMM0, fpr.VS(sregs[0])); - MULPS(XMM1, fpr.VS(tregs[0])); + MULPS(XMM0, fpr.VS(sregs)); + MULPS(XMM1, fpr.VS(tregs)); SUBPS(XMM0, R(XMM1)); SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(3, 0, 2, 1)); - MOVAPS(fpr.VS(dregs[0]), XMM0); + MOVAPS(fpr.VS(dregs), XMM0); fpr.ReleaseSpillLocks(); - NOTICE_LOG(JIT, "Crossprod %08x", js.blockStart); return; } @@ -993,46 +992,46 @@ void Jit::Comp_VecDo3(MIPSOpcode op) { { case 2: // vmin // TODO: Mishandles NaN. - MOVAPS(XMM1, fpr.VS(sregs[0])); - MINPS(XMM1, fpr.VS(tregs[0])); - MOVAPS(fpr.VSX(dregs[0]), R(XMM1)); + MOVAPS(XMM1, fpr.VS(sregs)); + MINPS(XMM1, fpr.VS(tregs)); + MOVAPS(fpr.VSX(dregs), R(XMM1)); break; case 3: // vmax // TODO: Mishandles NaN. - MOVAPS(XMM1, fpr.VS(sregs[0])); - MAXPS(XMM1, fpr.VS(tregs[0])); - MOVAPS(fpr.VSX(dregs[0]), R(XMM1)); + MOVAPS(XMM1, fpr.VS(sregs)); + MAXPS(XMM1, fpr.VS(tregs)); + MOVAPS(fpr.VSX(dregs), R(XMM1)); break; case 6: // vsge // TODO: Mishandles NaN. - MOVAPS(XMM1, fpr.VS(sregs[0])); - CMPPS(XMM1, fpr.VS(tregs[0]), CMP_NLT); + MOVAPS(XMM1, fpr.VS(sregs)); + CMPPS(XMM1, fpr.VS(tregs), CMP_NLT); ANDPS(XMM1, M(&oneOneOneOne)); - MOVAPS(fpr.VSX(dregs[0]), R(XMM1)); + MOVAPS(fpr.VSX(dregs), R(XMM1)); break; case 7: // vslt - MOVAPS(XMM1, fpr.VS(sregs[0])); - CMPPS(XMM1, fpr.VS(tregs[0]), CMP_LT); + MOVAPS(XMM1, fpr.VS(sregs)); + CMPPS(XMM1, fpr.VS(tregs), CMP_LT); ANDPS(XMM1, M(&oneOneOneOne)); - MOVAPS(fpr.VSX(dregs[0]), R(XMM1)); + MOVAPS(fpr.VSX(dregs), R(XMM1)); break; } break; } if (opFunc != nullptr) { - if (fpr.VSX(dregs[0]) != fpr.VSX(tregs[0])) { - if (fpr.VSX(dregs[0]) != fpr.VSX(sregs[0])) { - MOVAPS(fpr.VSX(dregs[0]), fpr.VS(sregs[0])); + if (fpr.VSX(dregs) != fpr.VSX(tregs)) { + if (fpr.VSX(dregs) != fpr.VSX(sregs)) { + MOVAPS(fpr.VSX(dregs), fpr.VS(sregs)); } - (this->*opFunc)(fpr.VSX(dregs[0]), fpr.VS(tregs[0])); + (this->*opFunc)(fpr.VSX(dregs), fpr.VS(tregs)); } else if (symmetric) { // We already know d = t. - (this->*opFunc)(fpr.VSX(dregs[0]), fpr.VS(sregs[0])); + (this->*opFunc)(fpr.VSX(dregs), fpr.VS(sregs)); } else { - MOVAPS(XMM1, fpr.VS(sregs[0])); - (this->*opFunc)(XMM1, fpr.VS(tregs[0])); - MOVAPS(fpr.VSX(dregs[0]), R(XMM1)); + MOVAPS(XMM1, fpr.VS(sregs)); + (this->*opFunc)(XMM1, fpr.VS(tregs)); + MOVAPS(fpr.VSX(dregs), R(XMM1)); } } @@ -1633,7 +1632,7 @@ void Jit::Comp_Vx2i(MIPSOpcode op) { } if (fpr.TryMapRegsVS(dregs, outsize, MAP_NOINIT | MAP_DIRTY)) { - MOVAPS(fpr.VSX(dregs[0]), R(XMM0)); + MOVAPS(fpr.VSX(dregs), R(XMM0)); } else { // Done! TODO: The rest of this should be possible to extract into a function. fpr.MapRegsV(dregs, outsize, MAP_NOINIT | MAP_DIRTY); @@ -1793,7 +1792,7 @@ void Jit::Comp_Vcst(MIPSOpcode op) { if (fpr.TryMapRegsVS(dregs, sz, MAP_NOINIT | MAP_DIRTY)) { SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0,0,0,0)); - MOVAPS(fpr.VS(dregs[0]), XMM0); + MOVAPS(fpr.VS(dregs), XMM0); fpr.ReleaseSpillLocks(); return; } @@ -2010,17 +2009,17 @@ void Jit::Comp_VV2Op(MIPSOpcode op) { if (canSIMD && fpr.TryMapDirtyInVS(dregs, sz, sregs, sz)) { switch ((op >> 16) & 0x1f) { case 0: // vmov - MOVAPS(fpr.VSX(dregs[0]), fpr.VS(sregs[0])); + MOVAPS(fpr.VSX(dregs), fpr.VS(sregs)); break; case 1: // vabs if (dregs[0] != sregs[0]) - MOVAPS(fpr.VSX(dregs[0]), fpr.VS(sregs[0])); - ANDPS(fpr.VSX(dregs[0]), M(&noSignMask)); + MOVAPS(fpr.VSX(dregs), fpr.VS(sregs)); + ANDPS(fpr.VSX(dregs), M(&noSignMask)); break; case 2: // vneg if (dregs[0] != sregs[0]) - MOVAPS(fpr.VSX(dregs[0]), fpr.VS(sregs[0])); - XORPS(fpr.VSX(dregs[0]), M(&signBitAll)); + MOVAPS(fpr.VSX(dregs), fpr.VS(sregs)); + XORPS(fpr.VSX(dregs), M(&signBitAll)); break; } ApplyPrefixD(dregs, sz); @@ -2396,13 +2395,13 @@ void Jit::Comp_VScl(MIPSOpcode op) { GetVectorRegsPrefixD(dregs, sz, _VD); if (fpr.TryMapDirtyInInVS(dregs, sz, sregs, sz, &scale, V_Single, true)) { - MOVSS(XMM0, fpr.VS(scale)); + MOVSS(XMM0, fpr.VS(&scale)); if (sz != V_Single) SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); if (dregs[0] != sregs[0]) { - MOVAPS(fpr.VSX(dregs[0]), fpr.VS(sregs[0])); + MOVAPS(fpr.VSX(dregs), fpr.VS(sregs)); } - MULPS(fpr.VSX(dregs[0]), R(XMM0)); + MULPS(fpr.VSX(dregs), R(XMM0)); ApplyPrefixD(dregs, sz); fpr.ReleaseSpillLocks(); return; diff --git a/Core/MIPS/x86/RegCacheFPU.cpp b/Core/MIPS/x86/RegCacheFPU.cpp index aca5e75244..0852d2649c 100644 --- a/Core/MIPS/x86/RegCacheFPU.cpp +++ b/Core/MIPS/x86/RegCacheFPU.cpp @@ -122,11 +122,12 @@ bool FPURegCache::IsMappedVS(const u8 *v, VectorSize vsz) { return false; // And make sure the rest are mapped to the same reg in the right positions. - X64Reg xr = VSX(v[0]); + X64Reg xr = VSX(v); for (int i = 1; i < n; ++i) { - if (!IsMappedVS(v[i]) || VSX(v[i]) != xr) + u8 vi = v[i]; + if (!IsMappedVS(vi) || VSX(&vi) != xr) return false; - if (vregs[v[i]].lane != i + 1) + if (vregs[vi].lane != i + 1) return false; } // TODO: Optimize this case? It happens. @@ -203,7 +204,7 @@ bool FPURegCache::TryMapRegsVS(const u8 *v, VectorSize vsz, int flags) { if (IsMappedVS(v, vsz)) { // Already mapped then, perfect. Just mark dirty. if ((flags & MAP_DIRTY) != 0) - xregs[VSX(v[0])].dirty = true; + xregs[VSX(v)].dirty = true; return true; } @@ -215,7 +216,7 @@ bool FPURegCache::TryMapRegsVS(const u8 *v, VectorSize vsz, int flags) { MapRegV(v[0], flags); vregs[v[0]].lane = 1; if ((flags & MAP_DIRTY) != 0) - xregs[VSX(v[0])].dirty = true; + xregs[VSX(v)].dirty = true; Invariant(); return true; } @@ -433,7 +434,7 @@ void FPURegCache::SimpleRegsV(const u8 *v, MatrixSize msz, int flags) { void FPURegCache::SimpleRegV(const u8 v, int flags) { MIPSCachedFPReg &vr = vregs[v]; // Special optimization: if it's in a single simd, we can keep it there. - if (vr.lane == 1 && xregs[VSX(v)].mipsRegs[1] == -1) { + if (vr.lane == 1 && xregs[VSX(&v)].mipsRegs[1] == -1) { // Just change the lane to 0. vr.lane = 0; } else if (vr.lane != 0) { diff --git a/Core/MIPS/x86/RegCacheFPU.h b/Core/MIPS/x86/RegCacheFPU.h index d6bec04d69..36b398b3ef 100644 --- a/Core/MIPS/x86/RegCacheFPU.h +++ b/Core/MIPS/x86/RegCacheFPU.h @@ -128,10 +128,10 @@ public: PanicAlert("SIMD reg %d used as V reg (use VS instead)", vreg); return vregs[vreg].location; } - const OpArg &VS(int vreg) const { - if (vregs[vreg].lane == 0) - PanicAlert("V reg %d used as VS reg (use V instead)", vreg); - return vregs[vreg].location; + const OpArg &VS(const u8 *vs) const { + if (vregs[vs[0]].lane == 0) + PanicAlert("V reg %d used as VS reg (use V instead)", vs[0]); + return vregs[vs[0]].location; } X64Reg RX(int freg) const { @@ -150,12 +150,12 @@ public: return (X64Reg)-1; } - X64Reg VSX(int vreg) const { - if (vregs[vreg].lane == 0) - PanicAlert("V reg %d used as VS reg (use VX instead)", vreg); - if (vregs[vreg].away && vregs[vreg].location.IsSimpleReg()) - return vregs[vreg].location.GetSimpleReg(); - PanicAlert("Not so simple - v%i", vreg); + X64Reg VSX(const u8 *vs) const { + if (vregs[vs[0]].lane == 0) + PanicAlert("V reg %d used as VS reg (use VX instead)", vs[0]); + if (vregs[vs[0]].away && vregs[vs[0]].location.IsSimpleReg()) + return vregs[vs[0]].location.GetSimpleReg(); + PanicAlert("Not so simple - v%i", vs[0]); return (X64Reg)-1; } @@ -173,8 +173,8 @@ public: bool IsMappedV(int v) { return vregs[v].lane == 0 && V(v).IsSimpleReg(); } - bool IsMappedVS(int v) { - return vregs[v].lane != 0 && VS(v).IsSimpleReg(); + bool IsMappedVS(u8 v) { + return vregs[v].lane != 0 && VS(&v).IsSimpleReg(); } bool IsMappedVS(const u8 *v, VectorSize vsz); bool CanMapVS(const u8 *v, VectorSize vsz);