diff --git a/Core/Config.cpp b/Core/Config.cpp index 3e215b285d..e090cc7092 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -313,6 +313,14 @@ static ConfigSetting generalSettings[] = { ConfigSetting(false), }; +static bool DefaultForceFlushToZero() { +#ifdef ARM + return true; +#else + return false; +#endif +} + static ConfigSetting cpuSettings[] = { ReportedConfigSetting("Jit", &g_Config.bJit, &DefaultJit), ReportedConfigSetting("SeparateCPUThread", &g_Config.bSeparateCPUThread, false), @@ -323,6 +331,7 @@ static ConfigSetting cpuSettings[] = { ReportedConfigSetting("FuncReplacements", &g_Config.bFuncReplacements, true), ReportedConfigSetting("CPUSpeed", &g_Config.iLockedCPUSpeed, 0), ReportedConfigSetting("SetRoundingMode", &g_Config.bSetRoundingMode, true), + ReportedConfigSetting("ForceFlushToZero", &g_Config.bForceFlushToZero, &DefaultForceFlushToZero), ConfigSetting(false), }; diff --git a/Core/Config.h b/Core/Config.h index b6a543669a..d05bfbdfd1 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -98,6 +98,7 @@ public: bool bForceLagSync; bool bFuncReplacements; bool bSetRoundingMode; + bool bForceFlushToZero; // Definitely cannot be changed while game is running. bool bSeparateCPUThread; diff --git a/Core/MIPS/ARM/ArmCompFPU.cpp b/Core/MIPS/ARM/ArmCompFPU.cpp index 4fe72292f8..2a52d9c8f4 100644 --- a/Core/MIPS/ARM/ArmCompFPU.cpp +++ b/Core/MIPS/ARM/ArmCompFPU.cpp @@ -399,6 +399,8 @@ void Jit::Comp_mxc1(MIPSOpcode op) case 6: //ctc1 if (fs == 31) { + // Must clear before setting, since SetRoundingMode() assumes it was cleared. + ClearRoundingMode(); bool wasImm = gpr.IsImm(rt); if (wasImm) { gpr.SetImm(MIPS_REG_FPCOND, (gpr.GetImm(rt) >> 23) & 1); @@ -417,15 +419,8 @@ void Jit::Comp_mxc1(MIPSOpcode op) MOV(SCRATCHREG1, Operand2(gpr.R(rt), ST_LSR, 23)); AND(gpr.R(MIPS_REG_FPCOND), SCRATCHREG1, Operand2(1)); #endif - SetRoundingMode(); - } else { - if ((gpr.GetImm(rt) & 3) == 0) { - // Default nearest mode, let's do this the fast way. - ClearRoundingMode(); - } else { - SetRoundingMode(); - } } + SetRoundingMode(); } else { Comp_Generic(op); } diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp index ee2fe436a3..212ad15d62 100644 --- a/Core/MIPS/ARM/ArmJit.cpp +++ b/Core/MIPS/ARM/ArmJit.cpp @@ -508,8 +508,7 @@ void Jit::RestoreDowncount() { LDR(DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); } -void Jit::WriteDownCount(int offset) -{ +void Jit::WriteDownCount(int offset) { if (jo.downcountInRegister) { int theDowncount = js.downcountAmount + offset; Operand2 op2; @@ -538,8 +537,7 @@ void Jit::WriteDownCount(int offset) } // Abuses R2 -void Jit::WriteDownCountR(ARMReg reg) -{ +void Jit::WriteDownCountR(ARMReg reg) { if (jo.downcountInRegister) { SUBS(DOWNCOUNTREG, DOWNCOUNTREG, reg); } else { @@ -549,42 +547,64 @@ void Jit::WriteDownCountR(ARMReg reg) } } -void Jit::ClearRoundingMode() -{ - if (g_Config.bSetRoundingMode) - { +void Jit::ClearRoundingMode() { + if (g_Config.bSetRoundingMode) { VMRS(SCRATCHREG2); // Assume we're always in round-to-nearest mode beforehand. + // Also on ARM, we're always in flush-to-zero in C++, so stay that way. + if (!g_Config.bForceFlushToZero) { + ORR(SCRATCHREG2, SCRATCHREG2, AssumeMakeOperand2(4 << 22)); + } BIC(SCRATCHREG2, SCRATCHREG2, AssumeMakeOperand2(3 << 22)); VMSR(SCRATCHREG2); } } -void Jit::SetRoundingMode() -{ +void Jit::SetRoundingMode() { // NOTE: Must not destory R0. - if (g_Config.bSetRoundingMode) - { + if (g_Config.bSetRoundingMode) { LDR(SCRATCHREG2, CTXREG, offsetof(MIPSState, fcr31)); - AND(SCRATCHREG2, SCRATCHREG2, Operand2(3)); + if (!g_Config.bForceFlushToZero) { + TST(SCRATCHREG2, AssumeMakeOperand2(1 << 24)); + AND(SCRATCHREG2, SCRATCHREG2, Operand2(3)); + SetCC(CC_NEQ); + ADD(SCRATCHREG2, SCRATCHREG2, Operand2(4)); + SetCC(CC_AL); + // We can only skip if the rounding mode is zero and flush is set. + CMP(SCRATCHREG2, Operand2(4)); + } else { + ANDS(SCRATCHREG2, SCRATCHREG2, Operand2(3)); + } + // At this point, if it was zero, we can skip the rest. + FixupBranch skip = B_CC(CC_EQ); + PUSH(1, SCRATCHREG1); + // MIPS Rounding Mode: ARM Rounding Mode // 0: Round nearest 0 // 1: Round to zero 3 // 2: Round up (ceil) 1 // 3: Round down (floor) 2 - CMP(SCRATCHREG2, Operand2(1)); - FixupBranch skip = B_CC(CC_LT); + if (!g_Config.bForceFlushToZero) { + AND(SCRATCHREG1, SCRATCHREG2, Operand2(3)); + CMP(SCRATCHREG1, Operand2(1)); + } else { + CMP(SCRATCHREG2, Operand2(1)); + } + SetCC(CC_EQ); ADD(SCRATCHREG2, SCRATCHREG2, Operand2(2)); SetCC(CC_GT); SUB(SCRATCHREG2, SCRATCHREG2, Operand2(1)); SetCC(CC_AL); - PUSH(1, SCRATCHREG1); VMRS(SCRATCHREG1); // Assume we're always in round-to-nearest mode beforehand. - ORR(SCRATCHREG2, SCRATCHREG2, Operand2(SCRATCHREG1, ST_LSL, 22)); - VMSR(SCRATCHREG2); - POP(1, SCRATCHREG1); + if (!g_Config.bForceFlushToZero) { + // But we need to clear flush to zero in this case anyway. + BIC(SCRATCHREG1, SCRATCHREG1, AssumeMakeOperand2(7 << 22)); + } + ORR(SCRATCHREG1, SCRATCHREG1, Operand2(SCRATCHREG2, ST_LSL, 22)); + VMSR(SCRATCHREG1); + POP(1, SCRATCHREG1); SetJumpTarget(skip); } } diff --git a/Core/MIPS/x86/CompFPU.cpp b/Core/MIPS/x86/CompFPU.cpp index e38d71021f..e0d9bb8046 100644 --- a/Core/MIPS/x86/CompFPU.cpp +++ b/Core/MIPS/x86/CompFPU.cpp @@ -379,6 +379,7 @@ void Jit::Comp_mxc1(MIPSOpcode op) case 6: //currentMIPS->WriteFCR(fs, R(rt)); break; //ctc1 if (fs == 31) { + // Must clear before setting, since SetRoundingMode() assumes it was cleared. ClearRoundingMode(); if (gpr.IsImm(rt)) { gpr.SetImm(MIPS_REG_FPCOND, (gpr.GetImm(rt) >> 23) & 1); diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index 4715f12f16..dd1699a608 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -250,10 +250,14 @@ void Jit::SetRoundingMode(XEmitter *emitter) emitter->SHL(32, R(EAX), Imm8(13)); emitter->OR(32, M(¤tMIPS->temp), R(EAX)); - emitter->TEST(32, M(&mips_->fcr31), Imm32(1 << 24)); - FixupBranch skip3 = emitter->J_CC(CC_Z); - emitter->OR(32, M(¤tMIPS->temp), Imm32(1 << 15)); - emitter->SetJumpTarget(skip3); + if (g_Config.bForceFlushToZero) { + emitter->OR(32, M(¤tMIPS->temp), Imm32(1 << 15)); + } else { + emitter->TEST(32, M(&mips_->fcr31), Imm32(1 << 24)); + FixupBranch skip3 = emitter->J_CC(CC_Z); + emitter->OR(32, M(¤tMIPS->temp), Imm32(1 << 15)); + emitter->SetJumpTarget(skip3); + } emitter->LDMXCSR(M(¤tMIPS->temp));