From dc91dc1ce83c9be8224a2502107dadf72a44b2b9 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Thu, 21 Aug 2014 23:37:04 -0700 Subject: [PATCH 1/6] x86jit: Support fpu rounding modes for mul, etc. Fixes Gods Eater Burst loading PSP savedata, but can no longer load old savedata. --- Core/Config.cpp | 1 + Core/Config.h | 1 + Core/MIPS/JitCommon/JitState.h | 5 +++- Core/MIPS/x86/CompBranch.cpp | 1 + Core/MIPS/x86/CompFPU.cpp | 4 +++ Core/MIPS/x86/Jit.cpp | 47 ++++++++++++++++++++++++++++++++++ Core/MIPS/x86/Jit.h | 2 ++ headless/Headless.cpp | 1 + 8 files changed, 61 insertions(+), 1 deletion(-) diff --git a/Core/Config.cpp b/Core/Config.cpp index 73135f1a82..7782dfadac 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -324,6 +324,7 @@ static ConfigSetting cpuSettings[] = { ConfigSetting("FastMemoryAccess", &g_Config.bFastMemory, true), ReportedConfigSetting("FuncReplacements", &g_Config.bFuncReplacements, true), ReportedConfigSetting("CPUSpeed", &g_Config.iLockedCPUSpeed, 0), + ReportedConfigSetting("SetRoundingMode", &g_Config.bSetRoundingMode, true), ConfigSetting(false), }; diff --git a/Core/Config.h b/Core/Config.h index 9a87a7c952..3afc93ff13 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -86,6 +86,7 @@ public: bool bCheckForNewVersion; bool bForceLagSync; bool bFuncReplacements; + bool bSetRoundingMode; // Definitely cannot be changed while game is running. bool bSeparateCPUThread; diff --git a/Core/MIPS/JitCommon/JitState.h b/Core/MIPS/JitCommon/JitState.h index ab50648775..d51450addb 100644 --- a/Core/MIPS/JitCommon/JitState.h +++ b/Core/MIPS/JitCommon/JitState.h @@ -58,7 +58,8 @@ namespace MIPSComp { : startDefaultPrefix(true), prefixSFlag(PREFIX_UNKNOWN), prefixTFlag(PREFIX_UNKNOWN), - prefixDFlag(PREFIX_UNKNOWN) {} + prefixDFlag(PREFIX_UNKNOWN), + roundingModeSet(false) {} u32 compilerPC; u32 blockStart; @@ -81,6 +82,8 @@ namespace MIPSComp { PrefixState prefixTFlag; PrefixState prefixDFlag; + bool roundingModeSet; + void PrefixStart() { if (startDefaultPrefix) { EatPrefix(); diff --git a/Core/MIPS/x86/CompBranch.cpp b/Core/MIPS/x86/CompBranch.cpp index 2a2db6a442..24fcd3dcfe 100644 --- a/Core/MIPS/x86/CompBranch.cpp +++ b/Core/MIPS/x86/CompBranch.cpp @@ -681,6 +681,7 @@ void Jit::Comp_Syscall(MIPSOpcode op) // If we're in a delay slot, this is off by one. const int offset = js.inDelaySlot ? -1 : 0; WriteDowncount(offset); + ClearRoundingMode(); js.downcountAmount = -offset; // Skip the CallSyscall where possible. diff --git a/Core/MIPS/x86/CompFPU.cpp b/Core/MIPS/x86/CompFPU.cpp index 54bff6728a..e9dc0dec9a 100644 --- a/Core/MIPS/x86/CompFPU.cpp +++ b/Core/MIPS/x86/CompFPU.cpp @@ -79,6 +79,7 @@ void Jit::CompFPTriArith(MIPSOpcode op, void (XEmitter::*arith)(X64Reg reg, OpAr void Jit::Comp_FPU3op(MIPSOpcode op) { CONDITIONAL_DISABLE; + SetRoundingMode(); switch (op & 0x3f) { case 0: CompFPTriArith(op, &XEmitter::ADDSS, false); break; //F(fd) = F(fs) + F(ft); //add @@ -173,6 +174,8 @@ void Jit::CompFPComp(int lhs, int rhs, u8 compare, bool allowNaN) void Jit::Comp_FPUComp(MIPSOpcode op) { CONDITIONAL_DISABLE; + // TODO: Does this matter here? + SetRoundingMode(); int fs = _FS; int ft = _FT; @@ -357,6 +360,7 @@ void Jit::Comp_mxc1(MIPSOpcode op) case 6: //currentMIPS->WriteFCR(fs, R(rt)); break; //ctc1 if (fs == 31) { + ClearRoundingMode(); if (gpr.IsImm(rt)) { gpr.SetImm(MIPS_REG_FPCOND, (gpr.GetImm(rt) >> 23) & 1); MOV(32, M(&mips_->fcr31), Imm32(gpr.GetImm(rt) & 0x0181FFFF)); diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index 74cfd97a6a..38b7c6c338 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -211,6 +211,49 @@ void Jit::WriteDowncount(int offset) SUB(32, M(¤tMIPS->downcount), downcount > 127 ? Imm32(downcount) : Imm8(downcount)); } +void Jit::ClearRoundingMode() +{ + if (js.roundingModeSet && g_Config.bSetRoundingMode) + { + STMXCSR(M(¤tMIPS->temp)); + // Clear the rounding mode bits back to 0. + AND(32, M(¤tMIPS->temp), Imm32(~(3 << 13))); + LDMXCSR(M(¤tMIPS->temp)); + + js.roundingModeSet = false; + } +} + +void Jit::SetRoundingMode() +{ + if (!js.roundingModeSet && g_Config.bSetRoundingMode) + { + MOV(32, R(EAX), M(&mips_->fcr31)); + AND(32, R(EAX), Imm8(3)); + + // If it's 0, we don't actually bother setting. This is the most common. + // We always use nearest as the default rounding mode. + FixupBranch skip = J_CC(CC_Z); + + STMXCSR(M(¤tMIPS->temp)); + + // The MIPS bits don't correspond exactly, so we have to adjust. + // 0 -> 0 (skip), 1 -> 3, 2 -> 2 (skip2), 3 -> 1 + CMP(32, R(EAX), Imm8(2)); + FixupBranch skip2 = J_CC(CC_Z); + XOR(32, R(EAX), Imm8(2)); + SetJumpTarget(skip2); + + SHL(32, R(EAX), Imm8(13)); + OR(32, M(¤tMIPS->temp), R(EAX)); + LDMXCSR(M(¤tMIPS->temp)); + + SetJumpTarget(skip); + + js.roundingModeSet = true; + } +} + void Jit::ClearCache() { blocks.Clear(); @@ -553,6 +596,7 @@ void Jit::WriteExit(u32 destination, int exit_num) } WriteDowncount(); + ClearRoundingMode(); //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; @@ -589,6 +633,7 @@ void Jit::WriteExitDestInReg(X64Reg reg) } WriteDowncount(); + ClearRoundingMode(); // Validate the jump to avoid a crash? if (!g_Config.bFastMemory) @@ -627,6 +672,7 @@ void Jit::WriteExitDestInReg(X64Reg reg) void Jit::WriteSyscallExit() { WriteDowncount(); + ClearRoundingMode(); if (js.afterOp & JitState::AFTER_MEMCHECK_CLEANUP) { ABI_CallFunction(&JitMemCheckCleanup); } @@ -646,6 +692,7 @@ bool Jit::CheckJitBreakpoint(u32 addr, int downcountOffset) CMP(32, R(EAX), Imm32(0)); FixupBranch skip = J_CC(CC_Z); WriteDowncount(downcountOffset); + ClearRoundingMode(); // Just to fix the stack. LOAD_FLAGS; JMP(asm_.dispatcherCheckCoreState, true); diff --git a/Core/MIPS/x86/Jit.h b/Core/MIPS/x86/Jit.h index 605015703a..0f83c0e87f 100644 --- a/Core/MIPS/x86/Jit.h +++ b/Core/MIPS/x86/Jit.h @@ -181,6 +181,8 @@ private: void FlushAll(); void FlushPrefixV(); void WriteDowncount(int offset = 0); + void ClearRoundingMode(); + void SetRoundingMode(); bool ReplaceJalTo(u32 dest); // See CompileDelaySlotFlags for flags. void CompileDelaySlot(int flags, RegCacheState *state = NULL); diff --git a/headless/Headless.cpp b/headless/Headless.cpp index db2b4c6914..58ba4d6ac2 100644 --- a/headless/Headless.cpp +++ b/headless/Headless.cpp @@ -346,6 +346,7 @@ int main(int argc, const char* argv[]) g_Config.bSoftwareSkinning = true; g_Config.bVertexDecoderJit = true; g_Config.bBlockTransferGPU = true; + g_Config.bSetRoundingMode = true; #ifdef _WIN32 InitSysDirectories(); From ab13b3648419380df83793c15c22796545f657ba Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 22 Aug 2014 00:01:06 -0700 Subject: [PATCH 2/6] x86jit: Implement cvt.w.s. Not really used that often, anyway, but easy enough and good for testing that we set the rounding mode correctly. --- Core/MIPS/x86/CompFPU.cpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/Core/MIPS/x86/CompFPU.cpp b/Core/MIPS/x86/CompFPU.cpp index e9dc0dec9a..d50f26b671 100644 --- a/Core/MIPS/x86/CompFPU.cpp +++ b/Core/MIPS/x86/CompFPU.cpp @@ -294,10 +294,33 @@ void Jit::Comp_FPU2op(MIPSOpcode op) { MOVSS(fpr.R(fd), XMM0); break; + case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s + { + SetRoundingMode(); + fpr.SpillLock(fs, fd); + fpr.StoreFromRegister(fd); + CVTSS2SI(EAX, fpr.R(fs)); + + // Did we get an indefinite integer value? + CMP(32, R(EAX), Imm32(0x80000000)); + FixupBranch skip = J_CC(CC_NE); + MOVSS(XMM0, fpr.R(fs)); + XORPS(XMM1, R(XMM1)); + CMPSS(XMM0, R(XMM1), CMP_LT); + + // At this point, -inf = 0xffffffff, inf/nan = 0x00000000. + // We want -inf to be 0x80000000 inf/nan to be 0x7fffffff, so we flip those bits. + MOVD_xmm(R(EAX), XMM0); + XOR(32, R(EAX), Imm32(0x7fffffff)); + + SetJumpTarget(skip); + MOV(32, fpr.R(fd), R(EAX)); + } + break; + case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s - case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s default: DISABLE; return; From 1fcbb7bbd45b81aecac684f2a7176a17d3cea5c2 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 22 Aug 2014 00:32:01 -0700 Subject: [PATCH 3/6] armjit: Respect the rounding mode for mul/etc. --- Core/MIPS/ARM/ArmCompBranch.cpp | 1 + Core/MIPS/ARM/ArmCompFPU.cpp | 30 ++++++++--------------- Core/MIPS/ARM/ArmJit.cpp | 42 +++++++++++++++++++++++++++++++++ Core/MIPS/ARM/ArmJit.h | 2 ++ 4 files changed, 55 insertions(+), 20 deletions(-) diff --git a/Core/MIPS/ARM/ArmCompBranch.cpp b/Core/MIPS/ARM/ArmCompBranch.cpp index 77270651a6..89fadae1e9 100644 --- a/Core/MIPS/ARM/ArmCompBranch.cpp +++ b/Core/MIPS/ARM/ArmCompBranch.cpp @@ -538,6 +538,7 @@ void Jit::Comp_Syscall(MIPSOpcode op) // If we're in a delay slot, this is off by one. const int offset = js.inDelaySlot ? -1 : 0; WriteDownCount(offset); + ClearRoundingMode(); js.downcountAmount = -offset; // TODO: Maybe discard v0, v1, and some temps? Definitely at? diff --git a/Core/MIPS/ARM/ArmCompFPU.cpp b/Core/MIPS/ARM/ArmCompFPU.cpp index 58b777f356..f1efb1d4ee 100644 --- a/Core/MIPS/ARM/ArmCompFPU.cpp +++ b/Core/MIPS/ARM/ArmCompFPU.cpp @@ -50,6 +50,7 @@ namespace MIPSComp void Jit::Comp_FPU3op(MIPSOpcode op) { CONDITIONAL_DISABLE; + SetRoundingMode(); int ft = _FT; int fs = _FS; @@ -190,6 +191,9 @@ void Jit::Comp_FPULS(MIPSOpcode op) void Jit::Comp_FPUComp(MIPSOpcode op) { CONDITIONAL_DISABLE; + // TODO: Does this matter here? + SetRoundingMode(); + int opc = op & 0xF; if (opc >= 8) opc -= 8; // alias if (opc == 0) { // f, sf (signalling false) @@ -279,6 +283,7 @@ void Jit::Comp_FPU2op(MIPSOpcode op) { VNEG(fpr.R(fd), fpr.R(fs)); break; case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s + ClearRoundingMode(); fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); break; @@ -293,9 +298,10 @@ void Jit::Comp_FPU2op(MIPSOpcode op) { break; case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s { + ClearRoundingMode(); fpr.MapDirtyIn(fd, fs); VMRS(SCRATCHREG2); - // Assume we're always in round-to-zero mode. + // Assume we're always in round-to-nearest mode. ORR(SCRATCHREG1, SCRATCHREG2, AssumeMakeOperand2(1 << 22)); VMSR(SCRATCHREG1); VCMP(fpr.R(fs), fpr.R(fs)); @@ -310,9 +316,10 @@ void Jit::Comp_FPU2op(MIPSOpcode op) { } case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s { + ClearRoundingMode(); fpr.MapDirtyIn(fd, fs); VMRS(SCRATCHREG2); - // Assume we're always in round-to-zero mode. + // Assume we're always in round-to-nearest mode. ORR(SCRATCHREG1, SCRATCHREG2, AssumeMakeOperand2(2 << 22)); VMSR(SCRATCHREG1); VCMP(fpr.R(fs), fpr.R(fs)); @@ -331,30 +338,13 @@ void Jit::Comp_FPU2op(MIPSOpcode op) { break; case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s fpr.MapDirtyIn(fd, fs); - LDR(SCRATCHREG1, CTXREG, offsetof(MIPSState, fcr31)); - AND(SCRATCHREG1, SCRATCHREG1, Operand2(3)); - // MIPS Rounding Mode: ARM Rounding Mode - // 0: Round nearest 0 - // 1: Round to zero 3 - // 2: Round up (ceil) 1 - // 3: Round down (floor) 2 - CMP(SCRATCHREG1, Operand2(1)); - SetCC(CC_EQ); ADD(SCRATCHREG1, SCRATCHREG1, Operand2(2)); - SetCC(CC_GT); SUB(SCRATCHREG1, SCRATCHREG1, Operand2(1)); - SetCC(CC_AL); - - VMRS(SCRATCHREG2); - // Assume we're always in round-to-zero mode beforehand. - ORR(SCRATCHREG1, SCRATCHREG2, Operand2(SCRATCHREG1, ST_LSL, 22)); - VMSR(SCRATCHREG1); + SetRoundingMode(); VCMP(fpr.R(fs), fpr.R(fs)); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags). SetCC(CC_VS); MOVIU2F(fpr.R(fd), 0x7FFFFFFF, SCRATCHREG1); SetCC(CC_AL); - // Set the rounding mode back. TODO: Keep it? Dirty? - VMSR(SCRATCHREG2); break; default: DISABLE; diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp index 219c3c7147..94671e74ab 100644 --- a/Core/MIPS/ARM/ArmJit.cpp +++ b/Core/MIPS/ARM/ArmJit.cpp @@ -18,6 +18,7 @@ #include "base/logging.h" #include "Common/ChunkFile.h" #include "Core/Reporting.h" +#include "Core/Config.h" #include "Core/Core.h" #include "Core/CoreTiming.h" #include "Core/MemMap.h" @@ -541,6 +542,44 @@ void Jit::WriteDownCountR(ARMReg reg) } } +void Jit::ClearRoundingMode() +{ + if (js.roundingModeSet && g_Config.bSetRoundingMode) + { + VMRS(SCRATCHREG2); + // Assume we're always in round-to-nearest mode beforehand. + BIC(SCRATCHREG1, SCRATCHREG2, AssumeMakeOperand2(3 << 22)); + VMSR(SCRATCHREG1); + + js.roundingModeSet = false; + } +} + +void Jit::SetRoundingMode() +{ + if (!js.roundingModeSet && g_Config.bSetRoundingMode) + { + LDR(SCRATCHREG1, CTXREG, offsetof(MIPSState, fcr31)); + AND(SCRATCHREG1, SCRATCHREG1, Operand2(3)); + // MIPS Rounding Mode: ARM Rounding Mode + // 0: Round nearest 0 + // 1: Round to zero 3 + // 2: Round up (ceil) 1 + // 3: Round down (floor) 2 + CMP(SCRATCHREG1, Operand2(1)); + SetCC(CC_EQ); ADD(SCRATCHREG1, SCRATCHREG1, Operand2(2)); + SetCC(CC_GT); SUB(SCRATCHREG1, SCRATCHREG1, Operand2(1)); + SetCC(CC_AL); + + VMRS(SCRATCHREG2); + // Assume we're always in round-to-nearest mode beforehand. + ORR(SCRATCHREG1, SCRATCHREG2, Operand2(SCRATCHREG1, ST_LSL, 22)); + VMSR(SCRATCHREG1); + + js.roundingModeSet = true; + } +} + // IDEA - could have a WriteDualExit that takes two destinations and two condition flags, // and just have conditional that set PC "twice". This only works when we fall back to dispatcher // though, as we need to have the SUBS flag set in the end. So with block linking in the mix, @@ -548,6 +587,7 @@ void Jit::WriteDownCountR(ARMReg reg) void Jit::WriteExit(u32 destination, int exit_num) { WriteDownCount(); + ClearRoundingMode(); //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; b->exitAddress[exit_num] = destination; @@ -569,6 +609,7 @@ void Jit::WriteExitDestInR(ARMReg Reg) { MovToPC(Reg); WriteDownCount(); + ClearRoundingMode(); // TODO: shouldn't need an indirect branch here... B((const void *)dispatcher); } @@ -576,6 +617,7 @@ void Jit::WriteExitDestInR(ARMReg Reg) void Jit::WriteSyscallExit() { WriteDownCount(); + ClearRoundingMode(); B((const void *)dispatcherCheckCoreState); } diff --git a/Core/MIPS/ARM/ArmJit.h b/Core/MIPS/ARM/ArmJit.h index 35fc3b83a0..5fd96a4eff 100644 --- a/Core/MIPS/ARM/ArmJit.h +++ b/Core/MIPS/ARM/ArmJit.h @@ -190,6 +190,8 @@ private: void WriteDownCount(int offset = 0); void WriteDownCountR(ARMReg reg); + void ClearRoundingMode(); + void SetRoundingMode(); void MovFromPC(ARMReg r); void MovToPC(ARMReg r); From fb6b3f94bb545530110d573e4723ed5771c6f56f Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 22 Aug 2014 00:35:02 -0700 Subject: [PATCH 4/6] Add a frontend option for GEB saves. May also affect performance, so can be disabled for that. --- UI/GameSettingsScreen.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp index d0ad767b3a..7ee4e2537c 100644 --- a/UI/GameSettingsScreen.cpp +++ b/UI/GameSettingsScreen.cpp @@ -375,6 +375,7 @@ void GameSettingsScreen::CreateViews() { #ifndef MOBILE_DEVICE systemSettings->Add(new PopupSliderChoice(&g_Config.iRewindFlipFrequency, 0, 1800, s->T("Rewind Snapshot Frequency", "Rewind Snapshot Frequency (0 = off, mem hog)"), screenManager())); #endif + systemSettings->Add(new CheckBox(&g_Config.bSetRoundingMode, s->T("Respect FPU rounding (disable for old GEB saves)")))->OnClick.Handle(this, &GameSettingsScreen::OnJitAffectingSetting); systemSettings->Add(new CheckBox(&g_Config.bAtomicAudioLocks, s->T("Atomic Audio locks (experimental)")))->SetEnabled(!PSP_IsInited()); #if defined(USING_WIN_UI) From 925557ed470e6cac30834dc579af1c4260972ca3 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 22 Aug 2014 09:48:00 -0700 Subject: [PATCH 5/6] x86jit: Maintain the rounding mode always. This should be less often than doing it per block that uses fpu, unless the game doesn't use fpu much at all. --- Core/MIPS/x86/Asm.cpp | 6 ++++ Core/MIPS/x86/CompBranch.cpp | 1 + Core/MIPS/x86/CompFPU.cpp | 4 --- Core/MIPS/x86/Jit.cpp | 61 ++++++++++++++++++++---------------- Core/MIPS/x86/Jit.h | 5 +-- 5 files changed, 44 insertions(+), 33 deletions(-) diff --git a/Core/MIPS/x86/Asm.cpp b/Core/MIPS/x86/Asm.cpp index 0fdf464313..59c4533890 100644 --- a/Core/MIPS/x86/Asm.cpp +++ b/Core/MIPS/x86/Asm.cpp @@ -77,7 +77,9 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit) #endif outerLoop = GetCodePtr(); + jit->ClearRoundingMode(this); ABI_CallFunction(reinterpret_cast(&CoreTiming::Advance)); + jit->SetRoundingMode(this); FixupBranch skipToRealDispatch = J(); //skip the sync and compare first time dispatcherCheckCoreState = GetCodePtr(); @@ -129,7 +131,9 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit) SetJumpTarget(notfound); //Ok, no block, let's jit + jit->ClearRoundingMode(this); ABI_CallFunction(&Jit); + jit->SetRoundingMode(this); JMP(dispatcherNoCheck); // Let's just dispatch again, we'll enter the block since we know it's there. SetJumpTarget(bail); @@ -139,10 +143,12 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit) J_CC(CC_Z, outerLoop, true); SetJumpTarget(badCoreState); + jit->ClearRoundingMode(this); ABI_PopAllCalleeSavedRegsAndAdjustStack(); RET(); breakpointBailout = GetCodePtr(); + jit->ClearRoundingMode(this); ABI_PopAllCalleeSavedRegsAndAdjustStack(); RET(); } \ No newline at end of file diff --git a/Core/MIPS/x86/CompBranch.cpp b/Core/MIPS/x86/CompBranch.cpp index 24fcd3dcfe..83ab2b298a 100644 --- a/Core/MIPS/x86/CompBranch.cpp +++ b/Core/MIPS/x86/CompBranch.cpp @@ -691,6 +691,7 @@ void Jit::Comp_Syscall(MIPSOpcode op) else ABI_CallFunctionC(&CallSyscall, op.encoding); + SetRoundingMode(); WriteSyscallExit(); js.compiling = false; } diff --git a/Core/MIPS/x86/CompFPU.cpp b/Core/MIPS/x86/CompFPU.cpp index d50f26b671..48fdca49b9 100644 --- a/Core/MIPS/x86/CompFPU.cpp +++ b/Core/MIPS/x86/CompFPU.cpp @@ -79,7 +79,6 @@ void Jit::CompFPTriArith(MIPSOpcode op, void (XEmitter::*arith)(X64Reg reg, OpAr void Jit::Comp_FPU3op(MIPSOpcode op) { CONDITIONAL_DISABLE; - SetRoundingMode(); switch (op & 0x3f) { case 0: CompFPTriArith(op, &XEmitter::ADDSS, false); break; //F(fd) = F(fs) + F(ft); //add @@ -174,8 +173,6 @@ void Jit::CompFPComp(int lhs, int rhs, u8 compare, bool allowNaN) void Jit::Comp_FPUComp(MIPSOpcode op) { CONDITIONAL_DISABLE; - // TODO: Does this matter here? - SetRoundingMode(); int fs = _FS; int ft = _FT; @@ -296,7 +293,6 @@ void Jit::Comp_FPU2op(MIPSOpcode op) { case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s { - SetRoundingMode(); fpr.SpillLock(fs, fd); fpr.StoreFromRegister(fd); CVTSS2SI(EAX, fpr.R(fs)); diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index 38b7c6c338..4f5cf98204 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -211,46 +211,46 @@ void Jit::WriteDowncount(int offset) SUB(32, M(¤tMIPS->downcount), downcount > 127 ? Imm32(downcount) : Imm8(downcount)); } -void Jit::ClearRoundingMode() +void Jit::ClearRoundingMode(XEmitter *emitter) { - if (js.roundingModeSet && g_Config.bSetRoundingMode) + if (g_Config.bSetRoundingMode) { - STMXCSR(M(¤tMIPS->temp)); + if (emitter == NULL) + emitter = this; + emitter->STMXCSR(M(¤tMIPS->temp)); // Clear the rounding mode bits back to 0. - AND(32, M(¤tMIPS->temp), Imm32(~(3 << 13))); - LDMXCSR(M(¤tMIPS->temp)); - - js.roundingModeSet = false; + emitter->AND(32, M(¤tMIPS->temp), Imm32(~(3 << 13))); + emitter->LDMXCSR(M(¤tMIPS->temp)); } } -void Jit::SetRoundingMode() +void Jit::SetRoundingMode(XEmitter *emitter) { - if (!js.roundingModeSet && g_Config.bSetRoundingMode) + if (g_Config.bSetRoundingMode) { - MOV(32, R(EAX), M(&mips_->fcr31)); - AND(32, R(EAX), Imm8(3)); + if (emitter == NULL) + emitter = this; + emitter->MOV(32, R(EAX), M(&mips_->fcr31)); + emitter->AND(32, R(EAX), Imm8(3)); // If it's 0, we don't actually bother setting. This is the most common. // We always use nearest as the default rounding mode. - FixupBranch skip = J_CC(CC_Z); + FixupBranch skip = emitter->J_CC(CC_Z); - STMXCSR(M(¤tMIPS->temp)); + emitter->STMXCSR(M(¤tMIPS->temp)); // The MIPS bits don't correspond exactly, so we have to adjust. // 0 -> 0 (skip), 1 -> 3, 2 -> 2 (skip2), 3 -> 1 - CMP(32, R(EAX), Imm8(2)); - FixupBranch skip2 = J_CC(CC_Z); - XOR(32, R(EAX), Imm8(2)); - SetJumpTarget(skip2); + emitter->CMP(32, R(EAX), Imm8(2)); + FixupBranch skip2 = emitter->J_CC(CC_Z); + emitter->XOR(32, R(EAX), Imm8(2)); + emitter->SetJumpTarget(skip2); - SHL(32, R(EAX), Imm8(13)); - OR(32, M(¤tMIPS->temp), R(EAX)); - LDMXCSR(M(¤tMIPS->temp)); + emitter->SHL(32, R(EAX), Imm8(13)); + emitter->OR(32, M(¤tMIPS->temp), R(EAX)); + emitter->LDMXCSR(M(¤tMIPS->temp)); - SetJumpTarget(skip); - - js.roundingModeSet = true; + emitter->SetJumpTarget(skip); } } @@ -485,7 +485,9 @@ bool Jit::ReplaceJalTo(u32 dest) { CompileDelaySlot(DELAYSLOT_NICE); FlushAll(); MOV(32, M(&mips_->pc), Imm32(js.compilerPC)); + ClearRoundingMode(); ABI_CallFunction(entry->replaceFunc); + SetRoundingMode(); SUB(32, M(¤tMIPS->downcount), R(EAX)); } @@ -535,7 +537,9 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op) // Standard function call, nothing fancy. // The function returns the number of cycles it took in EAX. MOV(32, M(&mips_->pc), Imm32(js.compilerPC)); + ClearRoundingMode(); ABI_CallFunction(entry->replaceFunc); + SetRoundingMode(); if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) { // Compile the original instruction at this address. We ignore cycles for hooks. @@ -559,11 +563,14 @@ void Jit::Comp_Generic(MIPSOpcode op) if (func) { + // TODO: Maybe we'd be better off keeping the rounding mode within interp? + ClearRoundingMode(); MOV(32, M(&mips_->pc), Imm32(js.compilerPC)); if (USE_JIT_MISSMAP) ABI_CallFunctionC(&JitLogMiss, op.encoding); else ABI_CallFunctionC(func, op.encoding); + SetRoundingMode(); } else ERROR_LOG_REPORT(JIT, "Trying to compile instruction %08x that can't be interpreted", op.encoding); @@ -596,7 +603,6 @@ void Jit::WriteExit(u32 destination, int exit_num) } WriteDowncount(); - ClearRoundingMode(); //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; @@ -633,7 +639,6 @@ void Jit::WriteExitDestInReg(X64Reg reg) } WriteDowncount(); - ClearRoundingMode(); // Validate the jump to avoid a crash? if (!g_Config.bFastMemory) @@ -672,9 +677,10 @@ void Jit::WriteExitDestInReg(X64Reg reg) void Jit::WriteSyscallExit() { WriteDowncount(); - ClearRoundingMode(); if (js.afterOp & JitState::AFTER_MEMCHECK_CLEANUP) { + ClearRoundingMode(); ABI_CallFunction(&JitMemCheckCleanup); + SetRoundingMode(); } JMP(asm_.dispatcherCheckCoreState, true); } @@ -686,13 +692,14 @@ bool Jit::CheckJitBreakpoint(u32 addr, int downcountOffset) SAVE_FLAGS; FlushAll(); MOV(32, M(&mips_->pc), Imm32(js.compilerPC)); + ClearRoundingMode(); ABI_CallFunction(&JitBreakpoint); + SetRoundingMode(); // If 0, the conditional breakpoint wasn't taken. CMP(32, R(EAX), Imm32(0)); FixupBranch skip = J_CC(CC_Z); WriteDowncount(downcountOffset); - ClearRoundingMode(); // Just to fix the stack. LOAD_FLAGS; JMP(asm_.dispatcherCheckCoreState, true); diff --git a/Core/MIPS/x86/Jit.h b/Core/MIPS/x86/Jit.h index 0f83c0e87f..44ee34f921 100644 --- a/Core/MIPS/x86/Jit.h +++ b/Core/MIPS/x86/Jit.h @@ -164,6 +164,9 @@ public: void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg); void EatPrefix() { js.EatPrefix(); } + void ClearRoundingMode(XEmitter *emitter = NULL); + void SetRoundingMode(XEmitter *emitter = NULL); + JitBlockCache *GetBlockCache() { return &blocks; } AsmRoutineManager &Asm() { return asm_; } @@ -181,8 +184,6 @@ private: void FlushAll(); void FlushPrefixV(); void WriteDowncount(int offset = 0); - void ClearRoundingMode(); - void SetRoundingMode(); bool ReplaceJalTo(u32 dest); // See CompileDelaySlotFlags for flags. void CompileDelaySlot(int flags, RegCacheState *state = NULL); From e9b5e6f277d77f1c3ed1c757b5298f2b36ec89de Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 22 Aug 2014 19:57:50 -0700 Subject: [PATCH 6/6] armjit: Maintain rounding mode throughout jit. --- Core/MIPS/ARM/ArmAsm.cpp | 5 +++++ Core/MIPS/ARM/ArmCompBranch.cpp | 1 + Core/MIPS/ARM/ArmJit.cpp | 14 +++++--------- Core/MIPS/JitCommon/JitState.h | 5 +---- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/Core/MIPS/ARM/ArmAsm.cpp b/Core/MIPS/ARM/ArmAsm.cpp index cc24c85961..d3c15afd14 100644 --- a/Core/MIPS/ARM/ArmAsm.cpp +++ b/Core/MIPS/ARM/ArmAsm.cpp @@ -114,7 +114,9 @@ void Jit::GenerateFixedCode() MovToPC(R0); outerLoop = GetCodePtr(); SaveDowncount(); + ClearRoundingMode(); QuickCallFunction(R0, &CoreTiming::Advance); + SetRoundingMode(); RestoreDowncount(); FixupBranch skipToRealDispatch = B(); //skip the sync and compare first time @@ -173,7 +175,9 @@ void Jit::GenerateFixedCode() // No block found, let's jit SaveDowncount(); + ClearRoundingMode(); QuickCallFunction(R2, (void *)&JitAt); + SetRoundingMode(); RestoreDowncount(); B(dispatcherNoCheck); // no point in special casing this @@ -195,6 +199,7 @@ void Jit::GenerateFixedCode() } SaveDowncount(); + ClearRoundingMode(); ADD(R_SP, R_SP, 4); diff --git a/Core/MIPS/ARM/ArmCompBranch.cpp b/Core/MIPS/ARM/ArmCompBranch.cpp index 89fadae1e9..2bdcc151dd 100644 --- a/Core/MIPS/ARM/ArmCompBranch.cpp +++ b/Core/MIPS/ARM/ArmCompBranch.cpp @@ -559,6 +559,7 @@ void Jit::Comp_Syscall(MIPSOpcode op) QuickCallFunction(R1, (void *)&CallSyscall); } RestoreDowncount(); + SetRoundingMode(); WriteSyscallExit(); js.compiling = false; diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp index 94671e74ab..fbb2d24e3e 100644 --- a/Core/MIPS/ARM/ArmJit.cpp +++ b/Core/MIPS/ARM/ArmJit.cpp @@ -467,10 +467,13 @@ void Jit::Comp_Generic(MIPSOpcode op) if (func) { SaveDowncount(); + // TODO: Perhaps keep the rounding mode for interp? + ClearRoundingMode(); gpr.SetRegImm(SCRATCHREG1, js.compilerPC); MovToPC(SCRATCHREG1); gpr.SetRegImm(R0, op.encoding); QuickCallFunction(R1, (void *)func); + SetRoundingMode(); RestoreDowncount(); } @@ -544,20 +547,18 @@ void Jit::WriteDownCountR(ARMReg reg) void Jit::ClearRoundingMode() { - if (js.roundingModeSet && g_Config.bSetRoundingMode) + if (g_Config.bSetRoundingMode) { VMRS(SCRATCHREG2); // Assume we're always in round-to-nearest mode beforehand. BIC(SCRATCHREG1, SCRATCHREG2, AssumeMakeOperand2(3 << 22)); VMSR(SCRATCHREG1); - - js.roundingModeSet = false; } } void Jit::SetRoundingMode() { - if (!js.roundingModeSet && g_Config.bSetRoundingMode) + if (g_Config.bSetRoundingMode) { LDR(SCRATCHREG1, CTXREG, offsetof(MIPSState, fcr31)); AND(SCRATCHREG1, SCRATCHREG1, Operand2(3)); @@ -575,8 +576,6 @@ void Jit::SetRoundingMode() // Assume we're always in round-to-nearest mode beforehand. ORR(SCRATCHREG1, SCRATCHREG2, Operand2(SCRATCHREG1, ST_LSL, 22)); VMSR(SCRATCHREG1); - - js.roundingModeSet = true; } } @@ -587,7 +586,6 @@ void Jit::SetRoundingMode() void Jit::WriteExit(u32 destination, int exit_num) { WriteDownCount(); - ClearRoundingMode(); //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; b->exitAddress[exit_num] = destination; @@ -609,7 +607,6 @@ void Jit::WriteExitDestInR(ARMReg Reg) { MovToPC(Reg); WriteDownCount(); - ClearRoundingMode(); // TODO: shouldn't need an indirect branch here... B((const void *)dispatcher); } @@ -617,7 +614,6 @@ void Jit::WriteExitDestInR(ARMReg Reg) void Jit::WriteSyscallExit() { WriteDownCount(); - ClearRoundingMode(); B((const void *)dispatcherCheckCoreState); } diff --git a/Core/MIPS/JitCommon/JitState.h b/Core/MIPS/JitCommon/JitState.h index d51450addb..ab50648775 100644 --- a/Core/MIPS/JitCommon/JitState.h +++ b/Core/MIPS/JitCommon/JitState.h @@ -58,8 +58,7 @@ namespace MIPSComp { : startDefaultPrefix(true), prefixSFlag(PREFIX_UNKNOWN), prefixTFlag(PREFIX_UNKNOWN), - prefixDFlag(PREFIX_UNKNOWN), - roundingModeSet(false) {} + prefixDFlag(PREFIX_UNKNOWN) {} u32 compilerPC; u32 blockStart; @@ -82,8 +81,6 @@ namespace MIPSComp { PrefixState prefixTFlag; PrefixState prefixDFlag; - bool roundingModeSet; - void PrefixStart() { if (startDefaultPrefix) { EatPrefix();