Merge pull request #7261 from hilesaz/master

Fix simd vmmul transpose optimizations.
This commit is contained in:
Henrik Rydgård 2015-01-06 11:59:14 +01:00
commit bb1d571493
2 changed files with 17 additions and 11 deletions

View file

@ -2638,18 +2638,15 @@ void Jit::Comp_Vmmul(MIPSOpcode op) {
bool transposeDest = false;
bool transposeS = false;
// Apparently not reliable enough yet... monster hunter hd breaks
if (false) {
if ((vd & 0x20) && sz == M_4x4) {
vd ^= 0x20;
transposeDest = true;
}
if ((vd & 0x20) && sz == M_4x4) {
vd ^= 0x20;
transposeDest = true;
}
// Our algorithm needs a transposed S (which is the usual).
if (!(vs & 0x20) && sz == M_4x4) {
vs ^= 0x20;
transposeS = true;
}
// Our algorithm needs a transposed S (which is the usual).
if (!(vs & 0x20) && sz == M_4x4) {
vs ^= 0x20;
transposeS = true;
}
// The T matrix we will address individually.
@ -2666,6 +2663,9 @@ void Jit::Comp_Vmmul(MIPSOpcode op) {
// Map all of S's columns into registers.
for (int i = 0; i < n; i++) {
if (transposeS){
fpr.StoreFromRegisterV(scols[i]);
}
GetVectorRegs(scol[i], vsz, scols[i]);
fpr.MapRegsVS(scol[i], vsz, 0);
fpr.SpillLockV(scols[i], vsz);
@ -2726,6 +2726,11 @@ void Jit::Comp_Vmmul(MIPSOpcode op) {
#endif
MOVAPS(fpr.VS(dcol), XMM1);
}
if (transposeS){
for (int i = 0; i < n; i++){
fpr.DiscardVS(scols[i]);
}
}
#ifndef _M_X64
fpr.ReleaseSpillLocks();

View file

@ -757,6 +757,7 @@ void FPURegCache::DiscardVS(int vreg) {
regs[mr].location = GetDefaultLocation(mr);
regs[mr].away = false;
regs[mr].tempLocked = false;
regs[mr].lane = 0;
}
xregs[xr].mipsRegs[i] = -1;
}