diff --git a/Core/MIPS/ARM/ArmCompBranch.cpp b/Core/MIPS/ARM/ArmCompBranch.cpp index 77270651a6..89fadae1e9 100644 --- a/Core/MIPS/ARM/ArmCompBranch.cpp +++ b/Core/MIPS/ARM/ArmCompBranch.cpp @@ -538,6 +538,7 @@ void Jit::Comp_Syscall(MIPSOpcode op) // If we're in a delay slot, this is off by one. const int offset = js.inDelaySlot ? -1 : 0; WriteDownCount(offset); + ClearRoundingMode(); js.downcountAmount = -offset; // TODO: Maybe discard v0, v1, and some temps? Definitely at? diff --git a/Core/MIPS/ARM/ArmCompFPU.cpp b/Core/MIPS/ARM/ArmCompFPU.cpp index 58b777f356..f1efb1d4ee 100644 --- a/Core/MIPS/ARM/ArmCompFPU.cpp +++ b/Core/MIPS/ARM/ArmCompFPU.cpp @@ -50,6 +50,7 @@ namespace MIPSComp void Jit::Comp_FPU3op(MIPSOpcode op) { CONDITIONAL_DISABLE; + SetRoundingMode(); int ft = _FT; int fs = _FS; @@ -190,6 +191,9 @@ void Jit::Comp_FPULS(MIPSOpcode op) void Jit::Comp_FPUComp(MIPSOpcode op) { CONDITIONAL_DISABLE; + // TODO: Does this matter here? + SetRoundingMode(); + int opc = op & 0xF; if (opc >= 8) opc -= 8; // alias if (opc == 0) { // f, sf (signalling false) @@ -279,6 +283,7 @@ void Jit::Comp_FPU2op(MIPSOpcode op) { VNEG(fpr.R(fd), fpr.R(fs)); break; case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s + ClearRoundingMode(); fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); break; @@ -293,9 +298,10 @@ void Jit::Comp_FPU2op(MIPSOpcode op) { break; case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s { + ClearRoundingMode(); fpr.MapDirtyIn(fd, fs); VMRS(SCRATCHREG2); - // Assume we're always in round-to-zero mode. + // Assume we're always in round-to-nearest mode. ORR(SCRATCHREG1, SCRATCHREG2, AssumeMakeOperand2(1 << 22)); VMSR(SCRATCHREG1); VCMP(fpr.R(fs), fpr.R(fs)); @@ -310,9 +316,10 @@ void Jit::Comp_FPU2op(MIPSOpcode op) { } case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s { + ClearRoundingMode(); fpr.MapDirtyIn(fd, fs); VMRS(SCRATCHREG2); - // Assume we're always in round-to-zero mode. + // Assume we're always in round-to-nearest mode. ORR(SCRATCHREG1, SCRATCHREG2, AssumeMakeOperand2(2 << 22)); VMSR(SCRATCHREG1); VCMP(fpr.R(fs), fpr.R(fs)); @@ -331,30 +338,13 @@ void Jit::Comp_FPU2op(MIPSOpcode op) { break; case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s fpr.MapDirtyIn(fd, fs); - LDR(SCRATCHREG1, CTXREG, offsetof(MIPSState, fcr31)); - AND(SCRATCHREG1, SCRATCHREG1, Operand2(3)); - // MIPS Rounding Mode: ARM Rounding Mode - // 0: Round nearest 0 - // 1: Round to zero 3 - // 2: Round up (ceil) 1 - // 3: Round down (floor) 2 - CMP(SCRATCHREG1, Operand2(1)); - SetCC(CC_EQ); ADD(SCRATCHREG1, SCRATCHREG1, Operand2(2)); - SetCC(CC_GT); SUB(SCRATCHREG1, SCRATCHREG1, Operand2(1)); - SetCC(CC_AL); - - VMRS(SCRATCHREG2); - // Assume we're always in round-to-zero mode beforehand. - ORR(SCRATCHREG1, SCRATCHREG2, Operand2(SCRATCHREG1, ST_LSL, 22)); - VMSR(SCRATCHREG1); + SetRoundingMode(); VCMP(fpr.R(fs), fpr.R(fs)); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags). SetCC(CC_VS); MOVIU2F(fpr.R(fd), 0x7FFFFFFF, SCRATCHREG1); SetCC(CC_AL); - // Set the rounding mode back. TODO: Keep it? Dirty? - VMSR(SCRATCHREG2); break; default: DISABLE; diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp index 219c3c7147..94671e74ab 100644 --- a/Core/MIPS/ARM/ArmJit.cpp +++ b/Core/MIPS/ARM/ArmJit.cpp @@ -18,6 +18,7 @@ #include "base/logging.h" #include "Common/ChunkFile.h" #include "Core/Reporting.h" +#include "Core/Config.h" #include "Core/Core.h" #include "Core/CoreTiming.h" #include "Core/MemMap.h" @@ -541,6 +542,44 @@ void Jit::WriteDownCountR(ARMReg reg) } } +void Jit::ClearRoundingMode() +{ + if (js.roundingModeSet && g_Config.bSetRoundingMode) + { + VMRS(SCRATCHREG2); + // Assume we're always in round-to-nearest mode beforehand. + BIC(SCRATCHREG1, SCRATCHREG2, AssumeMakeOperand2(3 << 22)); + VMSR(SCRATCHREG1); + + js.roundingModeSet = false; + } +} + +void Jit::SetRoundingMode() +{ + if (!js.roundingModeSet && g_Config.bSetRoundingMode) + { + LDR(SCRATCHREG1, CTXREG, offsetof(MIPSState, fcr31)); + AND(SCRATCHREG1, SCRATCHREG1, Operand2(3)); + // MIPS Rounding Mode: ARM Rounding Mode + // 0: Round nearest 0 + // 1: Round to zero 3 + // 2: Round up (ceil) 1 + // 3: Round down (floor) 2 + CMP(SCRATCHREG1, Operand2(1)); + SetCC(CC_EQ); ADD(SCRATCHREG1, SCRATCHREG1, Operand2(2)); + SetCC(CC_GT); SUB(SCRATCHREG1, SCRATCHREG1, Operand2(1)); + SetCC(CC_AL); + + VMRS(SCRATCHREG2); + // Assume we're always in round-to-nearest mode beforehand. + ORR(SCRATCHREG1, SCRATCHREG2, Operand2(SCRATCHREG1, ST_LSL, 22)); + VMSR(SCRATCHREG1); + + js.roundingModeSet = true; + } +} + // IDEA - could have a WriteDualExit that takes two destinations and two condition flags, // and just have conditional that set PC "twice". This only works when we fall back to dispatcher // though, as we need to have the SUBS flag set in the end. So with block linking in the mix, @@ -548,6 +587,7 @@ void Jit::WriteDownCountR(ARMReg reg) void Jit::WriteExit(u32 destination, int exit_num) { WriteDownCount(); + ClearRoundingMode(); //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; b->exitAddress[exit_num] = destination; @@ -569,6 +609,7 @@ void Jit::WriteExitDestInR(ARMReg Reg) { MovToPC(Reg); WriteDownCount(); + ClearRoundingMode(); // TODO: shouldn't need an indirect branch here... B((const void *)dispatcher); } @@ -576,6 +617,7 @@ void Jit::WriteExitDestInR(ARMReg Reg) void Jit::WriteSyscallExit() { WriteDownCount(); + ClearRoundingMode(); B((const void *)dispatcherCheckCoreState); } diff --git a/Core/MIPS/ARM/ArmJit.h b/Core/MIPS/ARM/ArmJit.h index 35fc3b83a0..5fd96a4eff 100644 --- a/Core/MIPS/ARM/ArmJit.h +++ b/Core/MIPS/ARM/ArmJit.h @@ -190,6 +190,8 @@ private: void WriteDownCount(int offset = 0); void WriteDownCountR(ARMReg reg); + void ClearRoundingMode(); + void SetRoundingMode(); void MovFromPC(ARMReg r); void MovToPC(ARMReg r);