From 185d4db0810b1ca46e840aad5c62c206bda63177 Mon Sep 17 00:00:00 2001 From: Bovine Date: Sat, 3 Jan 2015 14:48:54 -0700 Subject: [PATCH 1/3] Fix simd vmmul transpose optimizations. Need to ensure S has been written back before transposing it or we'll end up writing back S'. --- Core/MIPS/x86/CompVFPU.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp index 3122ecbfd9..62284e2d63 100644 --- a/Core/MIPS/x86/CompVFPU.cpp +++ b/Core/MIPS/x86/CompVFPU.cpp @@ -2638,18 +2638,15 @@ void Jit::Comp_Vmmul(MIPSOpcode op) { bool transposeDest = false; bool transposeS = false; - // Apparently not reliable enough yet... monster hunter hd breaks - if (false) { - if ((vd & 0x20) && sz == M_4x4) { - vd ^= 0x20; - transposeDest = true; - } + if ((vd & 0x20) && sz == M_4x4) { + vd ^= 0x20; + transposeDest = true; + } - // Our algorithm needs a transposed S (which is the usual). - if (!(vs & 0x20) && sz == M_4x4) { - vs ^= 0x20; - transposeS = true; - } + // Our algorithm needs a transposed S (which is the usual). + if (!(vs & 0x20) && sz == M_4x4) { + vs ^= 0x20; + transposeS = true; } // The T matrix we will address individually. @@ -2666,6 +2663,9 @@ void Jit::Comp_Vmmul(MIPSOpcode op) { // Map all of S's columns into registers. for (int i = 0; i < n; i++) { + if (transposeS){ + fpr.StoreFromRegisterV(scols[i]); + } GetVectorRegs(scol[i], vsz, scols[i]); fpr.MapRegsVS(scol[i], vsz, 0); fpr.SpillLockV(scols[i], vsz); From 0fdebdc1caab4d91a34c785c6031b9f0ce0a6c12 Mon Sep 17 00:00:00 2001 From: Bovine Date: Sat, 3 Jan 2015 16:54:31 -0700 Subject: [PATCH 2/3] Fix discardVS. It's not valid for non-away values to have a lane. --- Core/MIPS/x86/RegCacheFPU.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Core/MIPS/x86/RegCacheFPU.cpp b/Core/MIPS/x86/RegCacheFPU.cpp index cf8b2a49cc..f07ec98fd1 100644 --- a/Core/MIPS/x86/RegCacheFPU.cpp +++ b/Core/MIPS/x86/RegCacheFPU.cpp @@ -753,6 +753,7 @@ void FPURegCache::DiscardVS(int vreg) { regs[mr].location = GetDefaultLocation(mr); regs[mr].away = false; regs[mr].tempLocked = false; + regs[mr].lane = 0; } xregs[xr].mipsRegs[i] = -1; } From 54c7a123065a514fd4169f4c5fa2c8c306ddc70f Mon Sep 17 00:00:00 2001 From: Bovine Date: Sat, 3 Jan 2015 16:58:03 -0700 Subject: [PATCH 3/3] Fix simd vmmul transpose optimizations. Yep, gotta discard the S matrix when we're done with it. --- Core/MIPS/x86/CompVFPU.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp index 62284e2d63..c066846f11 100644 --- a/Core/MIPS/x86/CompVFPU.cpp +++ b/Core/MIPS/x86/CompVFPU.cpp @@ -2726,6 +2726,11 @@ void Jit::Comp_Vmmul(MIPSOpcode op) { #endif MOVAPS(fpr.VS(dcol), XMM1); } + if (transposeS){ + for (int i = 0; i < n; i++){ + fpr.DiscardVS(scols[i]); + } + } #ifndef _M_X64 fpr.ReleaseSpillLocks();