From aacb31bc180038f55ed4b4fd11e158d72a6b0ec5 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 10 Nov 2013 19:38:42 -0800 Subject: [PATCH] armjit: Copy over (disabled) immbranch optim. This does a little loop unrolling. Costs a bit more cache space, but avoids flushing regs for longer. Not enabled. --- Core/MIPS/ARM/ArmCompBranch.cpp | 60 +++++++++++++++++++++++++++++++++ Core/MIPS/ARM/ArmJit.cpp | 5 +++ Core/MIPS/ARM/ArmJit.h | 3 ++ Core/MIPS/x86/CompBranch.cpp | 20 +++++------ Core/MIPS/x86/Jit.cpp | 10 ++++++ Core/MIPS/x86/Jit.h | 10 +----- UI/DevScreens.cpp | 1 + 7 files changed, 88 insertions(+), 21 deletions(-) diff --git a/Core/MIPS/ARM/ArmCompBranch.cpp b/Core/MIPS/ARM/ArmCompBranch.cpp index 46f922f1fb..6aeef5084e 100644 --- a/Core/MIPS/ARM/ArmCompBranch.cpp +++ b/Core/MIPS/ARM/ArmCompBranch.cpp @@ -65,6 +65,34 @@ void Jit::BranchRSRTComp(MIPSOpcode op, ArmGen::CCFlags cc, bool likely) MIPSGPReg rs = _RS; u32 targetAddr = js.compilerPC + offset + 4; + if (jo.immBranches && gpr.IsImm(rs) && gpr.IsImm(rt) && js.numInstructions < jo.continueMaxInstructions) { + // The cc flags are opposites: when NOT to take the branch. + bool skipBranch; + s32 rsImm = (s32)gpr.GetImm(rs); + s32 rtImm = (s32)gpr.GetImm(rt); + + switch (cc) { + case CC_EQ: skipBranch = rsImm == rtImm; break; + case CC_NEQ: skipBranch = rsImm != rtImm; break; + default: skipBranch = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSRTComp()."); + } + + if (skipBranch) { + // Skip the delay slot if likely, otherwise it'll be the next instruction. + if (likely) + js.compilerPC += 4; + return; + } + + // Branch taken. Always compile the delay slot, and then go to dest. + CompileDelaySlot(DELAYSLOT_NICE); + // Account for the increment in the loop. + js.compilerPC = targetAddr - 4; + // In case the delay slot was a break or something. + js.compiling = true; + return; + } + MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4); bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs); CONDITIONAL_NICE_DELAYSLOT; @@ -129,6 +157,38 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, ArmGen::CCFlags cc, bool andLink, bool MIPSGPReg rs = _RS; u32 targetAddr = js.compilerPC + offset + 4; + if (jo.immBranches && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions) { + // The cc flags are opposites: when NOT to take the branch. + bool skipBranch; + s32 imm = (s32)gpr.GetImm(rs); + + switch (cc) { + case CC_GT: skipBranch = imm > 0; break; + case CC_GE: skipBranch = imm >= 0; break; + case CC_LT: skipBranch = imm < 0; break; + case CC_LE: skipBranch = imm <= 0; break; + default: skipBranch = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSZeroComp()."); + } + + if (skipBranch) { + // Skip the delay slot if likely, otherwise it'll be the next instruction. + if (likely) + js.compilerPC += 4; + return; + } + + // Branch taken. Always compile the delay slot, and then go to dest. + CompileDelaySlot(DELAYSLOT_NICE); + if (andLink) + gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8); + + // Account for the increment in the loop. + js.compilerPC = targetAddr - 4; + // In case the delay slot was a break or something. + js.compiling = true; + return; + } + MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4); bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); CONDITIONAL_NICE_DELAYSLOT; diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp index ac9b97e5eb..91a062ba5d 100644 --- a/Core/MIPS/ARM/ArmJit.cpp +++ b/Core/MIPS/ARM/ArmJit.cpp @@ -64,6 +64,11 @@ ArmJitOptions::ArmJitOptions() useBackJump = false; useForwardJump = false; cachePointers = true; + // WARNING: These options don't work properly with cache clearing or jit compare. + // Need to find a smart way to handle before enabling. + immBranches = false; + continueBranches = false; + continueMaxInstructions = 300; } Jit::Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &jo), fpr(mips), mips_(mips) diff --git a/Core/MIPS/ARM/ArmJit.h b/Core/MIPS/ARM/ArmJit.h index 189865153e..ceb2c2c4b4 100644 --- a/Core/MIPS/ARM/ArmJit.h +++ b/Core/MIPS/ARM/ArmJit.h @@ -41,6 +41,9 @@ struct ArmJitOptions bool useBackJump; bool useForwardJump; bool cachePointers; + bool immBranches; + bool continueBranches; + int continueMaxInstructions; }; class Jit : public ArmGen::ARMXCodeBlock diff --git a/Core/MIPS/x86/CompBranch.cpp b/Core/MIPS/x86/CompBranch.cpp index ae5a60506b..4b4919509f 100644 --- a/Core/MIPS/x86/CompBranch.cpp +++ b/Core/MIPS/x86/CompBranch.cpp @@ -141,10 +141,6 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely) MIPSGPReg rs = _RS; u32 targetAddr = js.compilerPC + offset + 4; - MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4); - bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs); - CONDITIONAL_NICE_DELAYSLOT; - if (jo.immBranches && gpr.IsImmediate(rs) && gpr.IsImmediate(rt) && js.numInstructions < jo.continueMaxInstructions) { // The cc flags are opposites: when NOT to take the branch. @@ -176,6 +172,9 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely) return; } + MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4); + bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs); + CONDITIONAL_NICE_DELAYSLOT; if (!likely && delaySlotIsNice) CompileDelaySlot(DELAYSLOT_NICE); @@ -240,10 +239,6 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li MIPSGPReg rs = _RS; u32 targetAddr = js.compilerPC + offset + 4; - MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4); - bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); - CONDITIONAL_NICE_DELAYSLOT; - if (jo.immBranches && gpr.IsImmediate(rs) && js.numInstructions < jo.continueMaxInstructions) { // The cc flags are opposites: when NOT to take the branch. @@ -270,10 +265,8 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li // Branch taken. Always compile the delay slot, and then go to dest. CompileDelaySlot(DELAYSLOT_NICE); if (andLink) - { - gpr.MapReg(MIPS_REG_RA, false, true); - MOV(32, gpr.R(MIPS_REG_RA), Imm32(js.compilerPC + 8)); - } + gpr.SetImmediate32(MIPS_REG_RA, js.compilerPC + 8); + // Account for the increment in the loop. js.compilerPC = targetAddr - 4; // In case the delay slot was a break or something. @@ -281,6 +274,9 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li return; } + MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4); + bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); + CONDITIONAL_NICE_DELAYSLOT; if (!likely && delaySlotIsNice) CompileDelaySlot(DELAYSLOT_NICE); diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index a6a5567079..c402364534 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -109,6 +109,16 @@ static void JitLogMiss(MIPSOpcode op) func(op); } +JitOptions::JitOptions() +{ + enableBlocklink = true; + // WARNING: These options don't work properly with cache clearing. + // Need to find a smart way to handle before enabling. + immBranches = false; + continueBranches = false; + continueMaxInstructions = 300; +} + #ifdef _MSC_VER // JitBlockCache doesn't use this, just stores it. #pragma warning(disable:4355) diff --git a/Core/MIPS/x86/Jit.h b/Core/MIPS/x86/Jit.h index 692f472755..8bdf82b3f4 100644 --- a/Core/MIPS/x86/Jit.h +++ b/Core/MIPS/x86/Jit.h @@ -39,15 +39,7 @@ u32 JitBreakpoint(); struct JitOptions { - JitOptions() - { - enableBlocklink = true; - // WARNING: These options don't work properly with cache clearing. - // Need to find a smart way to handle before enabling. - immBranches = false; - continueBranches = false; - continueMaxInstructions = 300; - } + JitOptions(); bool enableBlocklink; bool immBranches; diff --git a/UI/DevScreens.cpp b/UI/DevScreens.cpp index 5e3528c965..4879f252ea 100644 --- a/UI/DevScreens.cpp +++ b/UI/DevScreens.cpp @@ -322,6 +322,7 @@ void JitCompareScreen::UpdateDisasm() { // Alright. First generate the MIPS disassembly. + // TODO: Need a way to communicate branch continuing. for (u32 addr = block->originalAddress; addr <= block->originalAddress + block->originalSize * 4; addr += 4) { char temp[256]; MIPSDisAsm(Memory::Read_Instruction(addr), addr, temp, true);