From 928e2adfc97886f95386eb78471d088ba3cc9634 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 12 Oct 2014 11:34:26 -0700 Subject: [PATCH] jit: Avoid applying/restoring the rounding mode. If the game never sets it, we can skip around syscalls, interpreter, replacements, etc. --- Core/MIPS/ARM/ArmAsm.cpp | 10 +++--- Core/MIPS/ARM/ArmCompFPU.cpp | 1 + Core/MIPS/ARM/ArmJit.cpp | 65 +++++++++++++++++++++++++++++----- Core/MIPS/ARM/ArmJit.h | 5 +-- Core/MIPS/JitCommon/JitState.h | 7 +++- Core/MIPS/x86/Asm.cpp | 12 +++---- Core/MIPS/x86/CompFPU.cpp | 2 ++ Core/MIPS/x86/Jit.cpp | 62 ++++++++++++++++++++++++++------ Core/MIPS/x86/Jit.h | 5 +-- 9 files changed, 135 insertions(+), 34 deletions(-) diff --git a/Core/MIPS/ARM/ArmAsm.cpp b/Core/MIPS/ARM/ArmAsm.cpp index 129564e232..773c851ab9 100644 --- a/Core/MIPS/ARM/ArmAsm.cpp +++ b/Core/MIPS/ARM/ArmAsm.cpp @@ -114,9 +114,9 @@ void Jit::GenerateFixedCode() MovToPC(R0); outerLoop = GetCodePtr(); SaveDowncount(); - RestoreRoundingMode(); + RestoreRoundingMode(true); QuickCallFunction(R0, &CoreTiming::Advance); - ApplyRoundingMode(); + ApplyRoundingMode(true); RestoreDowncount(); FixupBranch skipToRealDispatch = B(); //skip the sync and compare first time @@ -175,9 +175,9 @@ void Jit::GenerateFixedCode() // No block found, let's jit SaveDowncount(); - RestoreRoundingMode(); + RestoreRoundingMode(true); QuickCallFunction(R2, (void *)&JitAt); - ApplyRoundingMode(); + ApplyRoundingMode(true); RestoreDowncount(); B(dispatcherNoCheck); // no point in special casing this @@ -199,7 +199,7 @@ void Jit::GenerateFixedCode() } SaveDowncount(); - RestoreRoundingMode(); + RestoreRoundingMode(true); ADD(R_SP, R_SP, 4); diff --git a/Core/MIPS/ARM/ArmCompFPU.cpp b/Core/MIPS/ARM/ArmCompFPU.cpp index 770fe95ea3..279f4ea5b7 100644 --- a/Core/MIPS/ARM/ArmCompFPU.cpp +++ b/Core/MIPS/ARM/ArmCompFPU.cpp @@ -420,6 +420,7 @@ void Jit::Comp_mxc1(MIPSOpcode op) AND(gpr.R(MIPS_REG_FPCOND), SCRATCHREG1, Operand2(1)); #endif } + UpdateRoundingMode(); ApplyRoundingMode(); } else { Comp_Generic(op); diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp index 7a422cf5f6..68f347edbd 100644 --- a/Core/MIPS/ARM/ArmJit.cpp +++ b/Core/MIPS/ARM/ArmJit.cpp @@ -94,22 +94,32 @@ Jit::Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &jo), fpr(mips), mips_ void Jit::DoState(PointerWrap &p) { - auto s = p.Section("Jit", 1); + auto s = p.Section("Jit", 1, 2); if (!s) return; p.Do(js.startDefaultPrefix); + if (s >= 2) { + p.Do(js.hasSetRounding); + js.lastSetRounding = 0; + } else { + js.hasSetRounding = 1; + } } // This is here so the savestate matches between jit and non-jit. void Jit::DoDummyState(PointerWrap &p) { - auto s = p.Section("Jit", 1); + auto s = p.Section("Jit", 1, 2); if (!s) return; bool dummy = false; p.Do(dummy); + if (s >= 2) { + dummy = true; + p.Do(dummy); + } } void Jit::FlushAll() @@ -201,17 +211,28 @@ void Jit::Compile(u32 em_address) { DoJit(em_address, b); blocks.FinalizeBlock(block_num, jo.enableBlocklink); + bool cleanSlate = false; + + if (js.hasSetRounding && !js.lastSetRounding) { + WARN_LOG(JIT, "Detected rounding mode usage, rebuilding jit with checks"); + // Won't loop, since hasSetRounding is only ever set to 1. + js.lastSetRounding = js.hasSetRounding; + cleanSlate = true; + } + // Drat. The VFPU hit an uneaten prefix at the end of a block. if (js.startDefaultPrefix && js.MayHavePrefix()) { WARN_LOG(JIT, "An uneaten prefix at end of block: %08x", js.compilerPC - 4); js.LogPrefix(); + // Let's try that one more time. We won't get back here because we toggled the value. js.startDefaultPrefix = false; + cleanSlate = true; + } + if (cleanSlate) { // Our assumptions are all wrong so it's clean-slate time. ClearCache(); - - // Let's try that one more time. We won't get back here because we toggled the value. Compile(em_address); } } @@ -547,8 +568,9 @@ void Jit::WriteDownCountR(ARMReg reg) { } } -void Jit::RestoreRoundingMode() { - if (g_Config.bSetRoundingMode) { +void Jit::RestoreRoundingMode(bool force) { + // If the game has never set an interesting rounding mode, we can safely skip this. + if (g_Config.bSetRoundingMode && (force || !g_Config.bForceFlushToZero || js.hasSetRounding)) { VMRS(SCRATCHREG2); // Assume we're always in round-to-nearest mode beforehand. // Also on ARM, we're always in flush-to-zero in C++, so stay that way. @@ -560,9 +582,10 @@ void Jit::RestoreRoundingMode() { } } -void Jit::ApplyRoundingMode() { +void Jit::ApplyRoundingMode(bool force) { // NOTE: Must not destory R0. - if (g_Config.bSetRoundingMode) { + // If the game has never set an interesting rounding mode, we can safely skip this. + if (g_Config.bSetRoundingMode && (force || !g_Config.bForceFlushToZero || js.hasSetRounding)) { LDR(SCRATCHREG2, CTXREG, offsetof(MIPSState, fcr31)); if (!g_Config.bForceFlushToZero) { TST(SCRATCHREG2, AssumeMakeOperand2(1 << 24)); @@ -609,6 +632,32 @@ void Jit::ApplyRoundingMode() { } } +void Jit::UpdateRoundingMode() { + // NOTE: Must not destory R0. + if (g_Config.bSetRoundingMode) { + LDR(SCRATCHREG2, CTXREG, offsetof(MIPSState, fcr31)); + if (!g_Config.bForceFlushToZero) { + TST(SCRATCHREG2, AssumeMakeOperand2(1 << 24)); + AND(SCRATCHREG2, SCRATCHREG2, Operand2(3)); + SetCC(CC_NEQ); + ADD(SCRATCHREG2, SCRATCHREG2, Operand2(4)); + SetCC(CC_AL); + // We can only skip if the rounding mode is zero and flush is set. + CMP(SCRATCHREG2, Operand2(4)); + } else { + ANDS(SCRATCHREG2, SCRATCHREG2, Operand2(3)); + } + + FixupBranch skip = B_CC(CC_EQ); + PUSH(1, SCRATCHREG1); + MOVI2R(SCRATCHREG2, 1); + MOVP2R(SCRATCHREG1, &js.hasSetRounding); + STRB(SCRATCHREG2, SCRATCHREG1, 0); + POP(1, SCRATCHREG1); + SetJumpTarget(skip); + } +} + // IDEA - could have a WriteDualExit that takes two destinations and two condition flags, // and just have conditional that set PC "twice". This only works when we fall back to dispatcher // though, as we need to have the SUBS flag set in the end. So with block linking in the mix, diff --git a/Core/MIPS/ARM/ArmJit.h b/Core/MIPS/ARM/ArmJit.h index d5cb448e35..95ba4ec867 100644 --- a/Core/MIPS/ARM/ArmJit.h +++ b/Core/MIPS/ARM/ArmJit.h @@ -192,8 +192,9 @@ private: void WriteDownCount(int offset = 0); void WriteDownCountR(ARMReg reg); - void RestoreRoundingMode(); - void ApplyRoundingMode(); + void RestoreRoundingMode(bool force = false); + void ApplyRoundingMode(bool force = false); + void UpdateRoundingMode(); void MovFromPC(ARMReg r); void MovToPC(ARMReg r); diff --git a/Core/MIPS/JitCommon/JitState.h b/Core/MIPS/JitCommon/JitState.h index ab50648775..c77481499d 100644 --- a/Core/MIPS/JitCommon/JitState.h +++ b/Core/MIPS/JitCommon/JitState.h @@ -55,7 +55,9 @@ namespace MIPSComp { }; JitState() - : startDefaultPrefix(true), + : hasSetRounding(0), + lastSetRounding(0), + startDefaultPrefix(true), prefixSFlag(PREFIX_UNKNOWN), prefixTFlag(PREFIX_UNKNOWN), prefixDFlag(PREFIX_UNKNOWN) {} @@ -72,6 +74,9 @@ namespace MIPSComp { bool compiling; // TODO: get rid of this in favor of using analysis results to determine end of block JitBlock *curBlock; + u8 hasSetRounding; + u8 lastSetRounding; + // VFPU prefix magic bool startDefaultPrefix; u32 prefixS; diff --git a/Core/MIPS/x86/Asm.cpp b/Core/MIPS/x86/Asm.cpp index dc25034e34..12fd1da530 100644 --- a/Core/MIPS/x86/Asm.cpp +++ b/Core/MIPS/x86/Asm.cpp @@ -77,9 +77,9 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit) #endif outerLoop = GetCodePtr(); - jit->RestoreRoundingMode(this); + jit->RestoreRoundingMode(true, this); ABI_CallFunction(reinterpret_cast(&CoreTiming::Advance)); - jit->ApplyRoundingMode(this); + jit->ApplyRoundingMode(true, this); FixupBranch skipToRealDispatch = J(); //skip the sync and compare first time dispatcherCheckCoreState = GetCodePtr(); @@ -134,9 +134,9 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit) SetJumpTarget(notfound); //Ok, no block, let's jit - jit->RestoreRoundingMode(this); + jit->RestoreRoundingMode(true, this); ABI_CallFunction(&Jit); - jit->ApplyRoundingMode(this); + jit->ApplyRoundingMode(true, this); JMP(dispatcherNoCheck, true); // Let's just dispatch again, we'll enter the block since we know it's there. SetJumpTarget(bail); @@ -146,12 +146,12 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit) J_CC(CC_Z, outerLoop, true); SetJumpTarget(badCoreState); - jit->RestoreRoundingMode(this); + jit->RestoreRoundingMode(true, this); ABI_PopAllCalleeSavedRegsAndAdjustStack(); RET(); breakpointBailout = GetCodePtr(); - jit->RestoreRoundingMode(this); + jit->RestoreRoundingMode(true, this); ABI_PopAllCalleeSavedRegsAndAdjustStack(); RET(); } diff --git a/Core/MIPS/x86/CompFPU.cpp b/Core/MIPS/x86/CompFPU.cpp index 01561d12af..4cb788cf31 100644 --- a/Core/MIPS/x86/CompFPU.cpp +++ b/Core/MIPS/x86/CompFPU.cpp @@ -387,6 +387,7 @@ void Jit::Comp_mxc1(MIPSOpcode op) if ((gpr.GetImm(rt) & 0x1000003) == 0) { // Default nearest / no-flush mode, just leave it cleared. } else { + UpdateRoundingMode(); ApplyRoundingMode(); } } else { @@ -399,6 +400,7 @@ void Jit::Comp_mxc1(MIPSOpcode op) MOV(32, M(&mips_->fcr31), gpr.R(rt)); AND(32, M(&mips_->fcr31), Imm32(0x0181FFFF)); gpr.UnlockAll(); + UpdateRoundingMode(); ApplyRoundingMode(); } } else { diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index 34835962a1..4b3ecf8124 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -145,22 +145,32 @@ Jit::~Jit() { void Jit::DoState(PointerWrap &p) { - auto s = p.Section("Jit", 1); + auto s = p.Section("Jit", 1, 2); if (!s) return; p.Do(js.startDefaultPrefix); + if (s >= 2) { + p.Do(js.hasSetRounding); + js.lastSetRounding = 0; + } else { + js.hasSetRounding = 1; + } } // This is here so the savestate matches between jit and non-jit. void Jit::DoDummyState(PointerWrap &p) { - auto s = p.Section("Jit", 1); + auto s = p.Section("Jit", 1, 2); if (!s) return; bool dummy = false; p.Do(dummy); + if (s >= 2) { + dummy = true; + p.Do(dummy); + } } @@ -211,9 +221,10 @@ void Jit::WriteDowncount(int offset) SUB(32, M(¤tMIPS->downcount), downcount > 127 ? Imm32(downcount) : Imm8(downcount)); } -void Jit::RestoreRoundingMode(XEmitter *emitter) +void Jit::RestoreRoundingMode(bool force, XEmitter *emitter) { - if (g_Config.bSetRoundingMode) + // If the game has never set an interesting rounding mode, we can safely skip this. + if (g_Config.bSetRoundingMode && (force || g_Config.bForceFlushToZero || js.hasSetRounding)) { if (emitter == NULL) emitter = this; @@ -224,9 +235,10 @@ void Jit::RestoreRoundingMode(XEmitter *emitter) } } -void Jit::ApplyRoundingMode(XEmitter *emitter) +void Jit::ApplyRoundingMode(bool force, XEmitter *emitter) { - if (g_Config.bSetRoundingMode) + // If the game has never set an interesting rounding mode, we can safely skip this. + if (g_Config.bSetRoundingMode && (force || g_Config.bForceFlushToZero || js.hasSetRounding)) { if (emitter == NULL) emitter = this; @@ -265,6 +277,22 @@ void Jit::ApplyRoundingMode(XEmitter *emitter) } } +void Jit::UpdateRoundingMode(XEmitter *emitter) +{ + if (g_Config.bSetRoundingMode) + { + if (emitter == NULL) + emitter = this; + + // If it's only ever 0, we don't actually bother applying or restoring it. + // This is the most common situation. + emitter->TEST(32, M(&mips_->fcr31), Imm32(0x01000003)); + FixupBranch skip = emitter->J_CC(CC_Z); + emitter->MOV(8, M(&js.hasSetRounding), Imm8(1)); + emitter->SetJumpTarget(skip); + } +} + void Jit::ClearCache() { blocks.Clear(); @@ -330,14 +358,28 @@ void Jit::Compile(u32 em_address) DoJit(em_address, b); blocks.FinalizeBlock(block_num, jo.enableBlocklink); + bool cleanSlate = false; + + if (js.hasSetRounding && !js.lastSetRounding) { + WARN_LOG(JIT, "Detected rounding mode usage, rebuilding jit with checks"); + // Won't loop, since hasSetRounding is only ever set to 1. + js.lastSetRounding = js.hasSetRounding; + cleanSlate = true; + } + // Drat. The VFPU hit an uneaten prefix at the end of a block. if (js.startDefaultPrefix && js.MayHavePrefix()) { - WARN_LOG(JIT, "Uneaten prefix at end of block: %08x", js.compilerPC - 4); - js.startDefaultPrefix = false; - // Our assumptions are all wrong so it's clean-slate time. - ClearCache(); + WARN_LOG(JIT, "An uneaten prefix at end of block: %08x", js.compilerPC - 4); + js.LogPrefix(); // Let's try that one more time. We won't get back here because we toggled the value. + js.startDefaultPrefix = false; + cleanSlate = true; + } + + if (cleanSlate) { + // Our assumptions are all wrong so it's clean-slate time. + ClearCache(); Compile(em_address); } } diff --git a/Core/MIPS/x86/Jit.h b/Core/MIPS/x86/Jit.h index 9e58ca6580..c1b088e9b8 100644 --- a/Core/MIPS/x86/Jit.h +++ b/Core/MIPS/x86/Jit.h @@ -165,8 +165,9 @@ public: void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg); void EatPrefix() { js.EatPrefix(); } - void RestoreRoundingMode(XEmitter *emitter = NULL); - void ApplyRoundingMode(XEmitter *emitter = NULL); + void RestoreRoundingMode(bool force = false, XEmitter *emitter = NULL); + void ApplyRoundingMode(bool force = false, XEmitter *emitter = NULL); + void UpdateRoundingMode(XEmitter *emitter = NULL); JitBlockCache *GetBlockCache() { return &blocks; } AsmRoutineManager &Asm() { return asm_; }