diff --git a/Core/MIPS/ARM/ArmCompBranch.cpp b/Core/MIPS/ARM/ArmCompBranch.cpp index ba9c126deb..69e6252283 100644 --- a/Core/MIPS/ARM/ArmCompBranch.cpp +++ b/Core/MIPS/ARM/ArmCompBranch.cpp @@ -39,6 +39,8 @@ #define _FD ((op>>6 ) & 0x1F) #define _POS ((op>>6 ) & 0x1F) #define _SIZE ((op>>11 ) & 0x1F) +#define _IMM16 (signed short)(op&0xFFFF) +#define _IMM26 (op & 0x03FFFFFF) #define LOOPOPTIMIZATION 0 @@ -57,7 +59,7 @@ void Jit::BranchRSRTComp(u32 op, ArmGen::CCFlags cc, bool likely) ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); return; } - int offset = (signed short)(op&0xFFFF)<<2; + int offset = _IMM16 << 2; int rt = _RT; int rs = _RS; u32 targetAddr = js.compilerPC + offset + 4; @@ -101,11 +103,11 @@ void Jit::BranchRSRTComp(u32 op, ArmGen::CCFlags cc, bool likely) } // Take the branch - WriteExit(targetAddr, 0); + WriteExit(targetAddr, js.nextExit++); SetJumpTarget(ptr); // Not taken - WriteExit(js.compilerPC+8, 1); + WriteExit(js.compilerPC+8, js.nextExit++); js.compiling = false; } @@ -117,7 +119,7 @@ void Jit::BranchRSZeroComp(u32 op, ArmGen::CCFlags cc, bool andLink, bool likely ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); return; } - int offset = (signed short)(op&0xFFFF)<<2; + int offset = _IMM16 << 2; int rs = _RS; u32 targetAddr = js.compilerPC + offset + 4; @@ -153,11 +155,11 @@ void Jit::BranchRSZeroComp(u32 op, ArmGen::CCFlags cc, bool andLink, bool likely STR(R0, CTXREG, MIPS_REG_RA * 4); } - WriteExit(targetAddr, 0); + WriteExit(targetAddr, js.nextExit++); SetJumpTarget(ptr); // Not taken - WriteExit(js.compilerPC + 8, 1); + WriteExit(js.compilerPC + 8, js.nextExit++); js.compiling = false; } @@ -183,7 +185,6 @@ void Jit::Comp_RelBranch(u32 op) _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); break; } - js.compiling = false; } void Jit::Comp_RelBranchRI(u32 op) @@ -202,7 +203,6 @@ void Jit::Comp_RelBranchRI(u32 op) _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); break; } - js.compiling = false; } // If likely is set, discard the branch slot if NOT taken. @@ -212,7 +212,7 @@ void Jit::BranchFPFlag(u32 op, ArmGen::CCFlags cc, bool likely) ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); return; } - int offset = (signed short)(op & 0xFFFF) << 2; + int offset = _IMM16 << 2; u32 targetAddr = js.compilerPC + offset + 4; u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4); @@ -221,8 +221,6 @@ void Jit::BranchFPFlag(u32 op, ArmGen::CCFlags cc, bool likely) if (!likely && delaySlotIsNice) CompileDelaySlot(DELAYSLOT_NICE); - FlushAll(); - LDR(R0, CTXREG, offsetof(MIPSState, fpcond)); TST(R0, Operand2(1, TYPE_IMM)); @@ -231,20 +229,23 @@ void Jit::BranchFPFlag(u32 op, ArmGen::CCFlags cc, bool likely) { if (!delaySlotIsNice) CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); + else + FlushAll(); ptr = B_CC(cc); } else { + FlushAll(); ptr = B_CC(cc); CompileDelaySlot(DELAYSLOT_FLUSH); } // Take the branch - WriteExit(targetAddr, 0); + WriteExit(targetAddr, js.nextExit++); SetJumpTarget(ptr); // Not taken - WriteExit(js.compilerPC + 8, 1); + WriteExit(js.compilerPC + 8, js.nextExit++); js.compiling = false; } @@ -260,7 +261,6 @@ void Jit::Comp_FPUBranch(u32 op) _dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted"); break; } - js.compiling = false; } // If likely is set, discard the branch slot if NOT taken. @@ -270,7 +270,7 @@ void Jit::BranchVFPUFlag(u32 op, ArmGen::CCFlags cc, bool likely) ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); return; } - int offset = (signed short)(op & 0xFFFF) << 2; + int offset = _IMM16 << 2; u32 targetAddr = js.compilerPC + offset + 4; u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4); @@ -286,8 +286,6 @@ void Jit::BranchVFPUFlag(u32 op, ArmGen::CCFlags cc, bool likely) if (delaySlotIsBranch && (delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1) ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", js.compilerPC); - FlushAll(); - int imm3 = (op >> 18) & 7; MOVI2R(R0, (u32)&(mips_->vfpuCtrl[VFPU_CTRL_CC])); @@ -300,10 +298,13 @@ void Jit::BranchVFPUFlag(u32 op, ArmGen::CCFlags cc, bool likely) { if (!delaySlotIsNice && !delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); + else + FlushAll(); ptr = B_CC(cc); } else { + FlushAll(); ptr = B_CC(cc); if (!delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_FLUSH); @@ -311,12 +312,12 @@ void Jit::BranchVFPUFlag(u32 op, ArmGen::CCFlags cc, bool likely) js.inDelaySlot = false; // Take the branch - WriteExit(targetAddr, 0); + WriteExit(targetAddr, js.nextExit++); SetJumpTarget(ptr); // Not taken u32 notTakenTarget = js.compilerPC + (delaySlotIsBranch ? 4 : 8); - WriteExit(notTakenTarget, 1); + WriteExit(notTakenTarget, js.nextExit++); js.compiling = false; } @@ -329,7 +330,6 @@ void Jit::Comp_VBranch(u32 op) case 2: BranchVFPUFlag(op, CC_NEQ, true); break; // bvfl case 3: BranchVFPUFlag(op, CC_EQ, true); break; // bvtl } - js.compiling = false; } void Jit::Comp_Jump(u32 op) @@ -338,7 +338,7 @@ void Jit::Comp_Jump(u32 op) ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); return; } - u32 off = ((op & 0x03FFFFFF) << 2); + u32 off = _IMM26 << 2; u32 targetAddr = (js.compilerPC & 0xF0000000) | off; switch (op >> 26) @@ -346,7 +346,7 @@ void Jit::Comp_Jump(u32 op) case 2: //j CompileDelaySlot(DELAYSLOT_NICE); FlushAll(); - WriteExit(targetAddr, 0); + WriteExit(targetAddr, js.nextExit++); break; case 3: //jal @@ -354,7 +354,7 @@ void Jit::Comp_Jump(u32 op) MOVI2R(gpr.R(MIPS_REG_RA), js.compilerPC + 8); CompileDelaySlot(DELAYSLOT_NICE); FlushAll(); - WriteExit(targetAddr, 0); + WriteExit(targetAddr, js.nextExit++); break; default: diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp index 32dc5375d9..9ff285102a 100644 --- a/Core/MIPS/ARM/ArmJit.cpp +++ b/Core/MIPS/ARM/ArmJit.cpp @@ -193,6 +193,7 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b) { js.cancel = false; js.blockStart = js.compilerPC = mips_->pc; + js.nextExit = 0; js.downcountAmount = 0; js.curBlock = b; js.compiling = true; diff --git a/Core/MIPS/ARM/ArmJit.h b/Core/MIPS/ARM/ArmJit.h index b356015889..73bca7f25a 100644 --- a/Core/MIPS/ARM/ArmJit.h +++ b/Core/MIPS/ARM/ArmJit.h @@ -55,6 +55,7 @@ struct ArmJitState u32 compilerPC; u32 blockStart; + int nextExit; bool cancel; bool inDelaySlot; int downcountAmount; diff --git a/Core/MIPS/JitCommon/JitBlockCache.cpp b/Core/MIPS/JitCommon/JitBlockCache.cpp index cb210a1f6c..7ebdd93834 100644 --- a/Core/MIPS/JitCommon/JitBlockCache.cpp +++ b/Core/MIPS/JitCommon/JitBlockCache.cpp @@ -137,12 +137,12 @@ int JitBlockCache::AllocateBlock(u32 em_address) JitBlock &b = blocks[num_blocks]; b.invalid = false; b.originalAddress = em_address; - b.exitAddress[0] = INVALID_EXIT; - b.exitAddress[1] = INVALID_EXIT; - b.exitPtrs[0] = 0; - b.exitPtrs[1] = 0; - b.linkStatus[0] = false; - b.linkStatus[1] = false; + for (int i = 0; i < MAX_JIT_BLOCK_EXITS; ++i) + { + b.exitAddress[i] = INVALID_EXIT; + b.exitPtrs[i] = 0; + b.linkStatus[i] = false; + } b.blockNum = num_blocks; num_blocks++; //commit the current block return num_blocks - 1; @@ -163,7 +163,7 @@ void JitBlockCache::FinalizeBlock(int block_num, bool block_link) block_map[std::make_pair(pAddr + 4 * b.originalSize - 1, pAddr)] = block_num; if (block_link) { - for (int i = 0; i < 2; i++) + for (int i = 0; i < MAX_JIT_BLOCK_EXITS; i++) { if (b.exitAddress[i] != INVALID_EXIT) links_to.insert(std::pair(b.exitAddress[i], block_num)); @@ -228,7 +228,7 @@ u32 JitBlockCache::GetEmuHackOpForBlock(int blockNum) const { int JitBlockCache::GetBlockNumberFromStartAddress(u32 addr) { if (!blocks) - return -1; + return -1; u32 inst = Memory::Read_U32(addr); int bl = GetBlockNumberFromEmuHackOp(inst); if (bl < 0) @@ -262,7 +262,7 @@ void JitBlockCache::LinkBlockExits(int i) // This block is dead. Don't relink it. return; } - for (int e = 0; e < 2; e++) { + for (int e = 0; e < MAX_JIT_BLOCK_EXITS; e++) { if (b.exitAddress[e] != INVALID_EXIT && !b.linkStatus[e]) { int destinationBlock = GetBlockNumberFromStartAddress(b.exitAddress[e]); if (destinationBlock != -1) { @@ -308,7 +308,7 @@ void JitBlockCache::UnlinkBlock(int i) return; for (multimap::iterator iter = ppp.first; iter != ppp.second; ++iter) { JitBlock &sourceBlock = blocks[iter->second]; - for (int e = 0; e < 2; e++) + for (int e = 0; e < MAX_JIT_BLOCK_EXITS; e++) { if (sourceBlock.exitAddress[e] == b.originalAddress) sourceBlock.linkStatus[e] = false; diff --git a/Core/MIPS/JitCommon/JitBlockCache.h b/Core/MIPS/JitCommon/JitBlockCache.h index 2f6afd83d8..5f11fcc493 100644 --- a/Core/MIPS/JitCommon/JitBlockCache.h +++ b/Core/MIPS/JitCommon/JitBlockCache.h @@ -40,6 +40,12 @@ typedef Gen::XCodeBlock CodeBlock; #error "Unsupported arch!" #endif +#if defined(ARM) +const int MAX_JIT_BLOCK_EXITS = 2; +#else +const int MAX_JIT_BLOCK_EXITS = 8; +#endif + // Define this in order to get VTune profile support for the Jit generated code. // Add the VTune include/lib directories to the project directories to get this to build. // #define USE_VTUNE @@ -50,8 +56,8 @@ struct JitBlock { const u8 *checkedEntry; const u8 *normalEntry; - u8 *exitPtrs[2]; // to be able to rewrite the exit jump - u32 exitAddress[2]; // 0xFFFFFFFF == unknown + u8 *exitPtrs[MAX_JIT_BLOCK_EXITS]; // to be able to rewrite the exit jump + u32 exitAddress[MAX_JIT_BLOCK_EXITS]; // 0xFFFFFFFF == unknown u32 originalAddress; u32 originalFirstOpcode; //to be able to restore @@ -60,7 +66,7 @@ struct JitBlock { u16 blockNum; bool invalid; - bool linkStatus[2]; + bool linkStatus[MAX_JIT_BLOCK_EXITS]; #ifdef USE_VTUNE char blockName[32]; diff --git a/Core/MIPS/x86/CompBranch.cpp b/Core/MIPS/x86/CompBranch.cpp index 46979a790b..0669236ee6 100644 --- a/Core/MIPS/x86/CompBranch.cpp +++ b/Core/MIPS/x86/CompBranch.cpp @@ -36,8 +36,10 @@ #define _FS ((op>>11) & 0x1F) #define _FT ((op>>16) & 0x1F) #define _FD ((op>>6 ) & 0x1F) -#define _POS ((op>>6 ) & 0x1F) +#define _POS ((op>>6 ) & 0x1F) #define _SIZE ((op>>11 ) & 0x1F) +#define _IMM16 (signed short)(op&0xFFFF) +#define _IMM26 (op & 0x03FFFFFF) #define LOOPOPTIMIZATION 0 @@ -133,7 +135,7 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely) ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); return; } - int offset = (signed short)(op&0xFFFF)<<2; + int offset = _IMM16 << 2; int rt = _RT; int rs = _RS; u32 targetAddr = js.compilerPC + offset + 4; @@ -141,6 +143,36 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely) u32 delaySlotOp = Memory::Read_Instruction(js.compilerPC+4); bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs); CONDITIONAL_NICE_DELAYSLOT; + + if (jo.immBranches && gpr.IsImmediate(rs) && gpr.IsImmediate(rt)) + { + // The cc flags are opposites: when NOT to take the branch. + bool skipBranch; + s32 rsImm = (s32)gpr.GetImmediate32(rs); + s32 rtImm = (s32)gpr.GetImmediate32(rt); + + switch (cc) + { + case CC_E: skipBranch = rsImm == rtImm; break; + case CC_NE: skipBranch = rsImm != rtImm; break; + default: _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSRTComp()."); + } + + if (skipBranch) + { + // Skip the delay slot if likely, otherwise it'll be the next instruction. + if (likely) + js.compilerPC += 4; + return; + } + + // Branch taken. Always compile the delay slot, and then go to dest. + CompileDelaySlot(DELAYSLOT_NICE); + // Account for the increment in the loop. + js.compilerPC = targetAddr - 4; + return; + } + if (!likely && delaySlotIsNice) CompileDelaySlot(DELAYSLOT_NICE); @@ -156,30 +188,40 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely) } Gen::FixupBranch ptr; + RegCacheState state; if (!likely) { if (!delaySlotIsNice) - CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); + CompileDelaySlot(DELAYSLOT_SAFE_FLUSH, state); else - FlushAll(); + GetStateAndFlushAll(state); ptr = J_CC(cc, true); } else { - FlushAll(); + GetStateAndFlushAll(state); ptr = J_CC(cc, true); CompileDelaySlot(DELAYSLOT_FLUSH); } // Take the branch CONDITIONAL_LOG_EXIT(targetAddr); - WriteExit(targetAddr, 0); + WriteExit(targetAddr, js.nextExit++); SetJumpTarget(ptr); // Not taken CONDITIONAL_LOG_EXIT(js.compilerPC + 8); - WriteExit(js.compilerPC + 8, 1); - js.compiling = false; + if (CanContinueBranch()) + { + // Account for the delay slot. + js.compilerPC += 4; + RestoreState(state); + } + else + { + WriteExit(js.compilerPC + 8, js.nextExit++); + js.compiling = false; + } } void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool andLink, bool likely) @@ -189,13 +231,49 @@ void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool andLink, bool likely) ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); return; } - int offset = (signed short)(op&0xFFFF)<<2; + int offset = _IMM16 << 2; int rs = _RS; u32 targetAddr = js.compilerPC + offset + 4; u32 delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4); bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); CONDITIONAL_NICE_DELAYSLOT; + + if (jo.immBranches && gpr.IsImmediate(rs)) + { + // The cc flags are opposites: when NOT to take the branch. + bool skipBranch; + s32 imm = (s32)gpr.GetImmediate32(rs); + + switch (cc) + { + case CC_G: skipBranch = imm > 0; break; + case CC_GE: skipBranch = imm >= 0; break; + case CC_L: skipBranch = imm < 0; break; + case CC_LE: skipBranch = imm <= 0; break; + default: _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSZeroComp()."); + } + + if (skipBranch) + { + // Skip the delay slot if likely, otherwise it'll be the next instruction. + if (likely) + js.compilerPC += 4; + return; + } + + // Branch taken. Always compile the delay slot, and then go to dest. + CompileDelaySlot(DELAYSLOT_NICE); + if (andLink) + { + gpr.BindToRegister(MIPS_REG_RA, false, true); + MOV(32, gpr.R(MIPS_REG_RA), Imm32(js.compilerPC + 8)); + } + // Account for the increment in the loop. + js.compilerPC = targetAddr - 4; + return; + } + if (!likely && delaySlotIsNice) CompileDelaySlot(DELAYSLOT_NICE); @@ -203,17 +281,18 @@ void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool andLink, bool likely) CMP(32, gpr.R(rs), Imm32(0)); Gen::FixupBranch ptr; + RegCacheState state; if (!likely) { if (!delaySlotIsNice) - CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); + CompileDelaySlot(DELAYSLOT_SAFE_FLUSH, state); else - FlushAll(); + GetStateAndFlushAll(state); ptr = J_CC(cc, true); } else { - FlushAll(); + GetStateAndFlushAll(state); ptr = J_CC(cc, true); CompileDelaySlot(DELAYSLOT_FLUSH); } @@ -222,14 +301,23 @@ void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool andLink, bool likely) if (andLink) MOV(32, M(&mips_->r[MIPS_REG_RA]), Imm32(js.compilerPC + 8)); CONDITIONAL_LOG_EXIT(targetAddr); - WriteExit(targetAddr, 0); + WriteExit(targetAddr, js.nextExit++); SetJumpTarget(ptr); // Not taken CONDITIONAL_LOG_EXIT(js.compilerPC + 8); - WriteExit(js.compilerPC + 8, 1); - js.compiling = false; + if (CanContinueBranch()) + { + // Account for the delay slot. + js.compilerPC += 4; + RestoreState(state); + } + else + { + WriteExit(js.compilerPC + 8, js.nextExit++); + js.compiling = false; + } } @@ -253,7 +341,6 @@ void Jit::Comp_RelBranch(u32 op) _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); break; } - js.compiling = false; } void Jit::Comp_RelBranchRI(u32 op) @@ -272,7 +359,6 @@ void Jit::Comp_RelBranchRI(u32 op) _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); break; } - js.compiling = false; } @@ -284,7 +370,7 @@ void Jit::BranchFPFlag(u32 op, Gen::CCFlags cc, bool likely) ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); return; } - int offset = (signed short)(op & 0xFFFF) << 2; + int offset = _IMM16 << 2; u32 targetAddr = js.compilerPC + offset + 4; u32 delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4); @@ -293,32 +379,43 @@ void Jit::BranchFPFlag(u32 op, Gen::CCFlags cc, bool likely) if (!likely && delaySlotIsNice) CompileDelaySlot(DELAYSLOT_NICE); - FlushAll(); - TEST(32, M((void *)&(mips_->fpcond)), Imm32(1)); Gen::FixupBranch ptr; + RegCacheState state; if (!likely) { if (!delaySlotIsNice) - CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); + CompileDelaySlot(DELAYSLOT_SAFE_FLUSH, state); + else + GetStateAndFlushAll(state); ptr = J_CC(cc, true); } else { + GetStateAndFlushAll(state); ptr = J_CC(cc, true); CompileDelaySlot(DELAYSLOT_FLUSH); } // Take the branch CONDITIONAL_LOG_EXIT(targetAddr); - WriteExit(targetAddr, 0); + WriteExit(targetAddr, js.nextExit++); SetJumpTarget(ptr); // Not taken CONDITIONAL_LOG_EXIT(js.compilerPC + 8); - WriteExit(js.compilerPC + 8, 1); - js.compiling = false; + if (CanContinueBranch()) + { + // Account for the delay slot. + js.compilerPC += 4; + RestoreState(state); + } + else + { + WriteExit(js.compilerPC + 8, js.nextExit++); + js.compiling = false; + } } @@ -334,7 +431,6 @@ void Jit::Comp_FPUBranch(u32 op) _dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted"); break; } - js.compiling = false; } // If likely is set, discard the branch slot if NOT taken. @@ -345,7 +441,7 @@ void Jit::BranchVFPUFlag(u32 op, Gen::CCFlags cc, bool likely) ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); return; } - int offset = (signed short)(op & 0xFFFF) << 2; + int offset = _IMM16 << 2; u32 targetAddr = js.compilerPC + offset + 4; u32 delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4); @@ -361,22 +457,24 @@ void Jit::BranchVFPUFlag(u32 op, Gen::CCFlags cc, bool likely) if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1) ERROR_LOG(JIT, "VFPU branch in VFPU delay slot at %08x with different target %d / %d", js.compilerPC, (signed short)(delaySlotOp & 0xFFFF), (signed short)(op & 0xFFFF) - 1); - FlushAll(); - // THE CONDITION int imm3 = (op >> 18) & 7; //int val = (mips_->vfpuCtrl[VFPU_CTRL_CC] >> imm3) & 1; TEST(32, M((void *)&(mips_->vfpuCtrl[VFPU_CTRL_CC])), Imm32(1 << imm3)); Gen::FixupBranch ptr; + RegCacheState state; if (!likely) { if (!delaySlotIsNice && !delaySlotIsBranch) - CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); + CompileDelaySlot(DELAYSLOT_SAFE_FLUSH, state); + else + GetStateAndFlushAll(state); ptr = J_CC(cc, true); } else { + GetStateAndFlushAll(state); ptr = J_CC(cc, true); if (!delaySlotIsBranch) CompileDelaySlot(DELAYSLOT_FLUSH); @@ -384,15 +482,24 @@ void Jit::BranchVFPUFlag(u32 op, Gen::CCFlags cc, bool likely) // Take the branch CONDITIONAL_LOG_EXIT(targetAddr); - WriteExit(targetAddr, 0); + WriteExit(targetAddr, js.nextExit++); SetJumpTarget(ptr); // Not taken u32 notTakenTarget = js.compilerPC + (delaySlotIsBranch ? 4 : 8); CONDITIONAL_LOG_EXIT(notTakenTarget); - WriteExit(notTakenTarget, 1); - js.compiling = false; + if (CanContinueBranch() && !delaySlotIsBranch) + { + // Account for the delay slot. + js.compilerPC += 4; + RestoreState(state); + } + else + { + WriteExit(notTakenTarget, js.nextExit++); + js.compiling = false; + } } @@ -408,7 +515,6 @@ void Jit::Comp_VBranch(u32 op) _dbg_assert_msg_(CPU,0,"Comp_VBranch: Invalid instruction"); break; } - js.compiling = false; } void Jit::Comp_Jump(u32 op) @@ -418,7 +524,7 @@ void Jit::Comp_Jump(u32 op) ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); return; } - u32 off = ((op & 0x3FFFFFF) << 2); + u32 off = _IMM26 << 2; u32 targetAddr = (js.compilerPC & 0xF0000000) | off; switch (op >> 26) @@ -427,7 +533,7 @@ void Jit::Comp_Jump(u32 op) CompileDelaySlot(DELAYSLOT_NICE); FlushAll(); CONDITIONAL_LOG_EXIT(targetAddr); - WriteExit(targetAddr, 0); + WriteExit(targetAddr, js.nextExit++); break; case 3: //jal @@ -436,7 +542,7 @@ void Jit::Comp_Jump(u32 op) CompileDelaySlot(DELAYSLOT_NICE); FlushAll(); CONDITIONAL_LOG_EXIT(targetAddr); - WriteExit(targetAddr, 0); + WriteExit(targetAddr, js.nextExit++); break; default: diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index 94dc089670..c73e6a5691 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -134,6 +134,20 @@ void Jit::DoDummyState(PointerWrap &p) p.DoMarker("Jit"); } + +void Jit::GetStateAndFlushAll(RegCacheState &state) +{ + gpr.GetState(state.gpr); + fpr.GetState(state.fpr); + FlushAll(); +} + +void Jit::RestoreState(const RegCacheState state) +{ + gpr.RestoreState(state.gpr); + fpr.RestoreState(state.fpr); +} + void Jit::FlushAll() { gpr.Flush(); @@ -180,7 +194,7 @@ void Jit::ClearCacheAt(u32 em_address) ClearCache(); } -void Jit::CompileDelaySlot(int flags) +void Jit::CompileDelaySlot(int flags, RegCacheState *state) { const u32 addr = js.compilerPC + 4; @@ -196,7 +210,12 @@ void Jit::CompileDelaySlot(int flags) js.inDelaySlot = false; if (flags & DELAYSLOT_FLUSH) - FlushAll(); + { + if (state != NULL) + GetStateAndFlushAll(*state); + else + FlushAll(); + } if (flags & DELAYSLOT_SAFE) LOAD_FLAGS; // restore flag! } @@ -255,6 +274,7 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b) { js.cancel = false; js.blockStart = js.compilerPC = mips_->pc; + js.nextExit = 0; js.downcountAmount = 0; js.curBlock = b; js.compiling = true; @@ -352,6 +372,8 @@ void Jit::Comp_Generic(u32 op) void Jit::WriteExit(u32 destination, int exit_num) { + _dbg_assert_msg_(JIT, exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num"); + if (!Memory::IsValidAddress(destination)) { ERROR_LOG_REPORT(JIT, "Trying to write block exit to illegal destination %08x: pc = %08x", destination, currentMIPS->pc); } diff --git a/Core/MIPS/x86/Jit.h b/Core/MIPS/x86/Jit.h index dc63c1e966..64546ecc68 100644 --- a/Core/MIPS/x86/Jit.h +++ b/Core/MIPS/x86/Jit.h @@ -41,9 +41,17 @@ struct JitOptions JitOptions() { enableBlocklink = true; + // Seems to hurt performance? + immBranches = false; + // Seems to hurt performance also? + continueBranches = false; + continueMaxInstructions = 100; } bool enableBlocklink; + bool immBranches; + bool continueBranches; + int continueMaxInstructions; }; struct JitState @@ -65,6 +73,7 @@ struct JitState u32 compilerPC; u32 blockStart; + int nextExit; bool cancel; bool inDelaySlot; // See JitState::AfterOp for values. @@ -151,6 +160,12 @@ enum CompileDelaySlotFlags DELAYSLOT_SAFE_FLUSH = DELAYSLOT_FLUSH | DELAYSLOT_SAFE, }; +// TODO: Hmm, humongous. +struct RegCacheState { + GPRRegCacheState gpr; + FPURegCacheState fpr; +}; + class Jit : public Gen::XCodeBlock { public: @@ -255,12 +270,17 @@ public: void ClearCache(); void ClearCacheAt(u32 em_address); private: + void GetStateAndFlushAll(RegCacheState &state); + void RestoreState(const RegCacheState state); void FlushAll(); void FlushPrefixV(); void WriteDowncount(int offset = 0); // See CompileDelaySlotFlags for flags. - void CompileDelaySlot(int flags); + void CompileDelaySlot(int flags, RegCacheState *state = NULL); + void CompileDelaySlot(int flags, RegCacheState &state) { + CompileDelaySlot(flags, &state); + } void EatInstruction(u32 op); void WriteExit(u32 destination, int exit_num); @@ -295,6 +315,17 @@ private: void CallProtectedFunction(void *func, const u32 arg1, const u32 arg2, const u32 arg3); void CallProtectedFunction(void *func, const OpArg &arg1, const u32 arg2, const u32 arg3); + bool CanContinueBranch() { + if (!jo.continueBranches || js.numInstructions >= jo.continueMaxInstructions) { + return false; + } + // Need at least 2 exits left over. + if (js.nextExit >= MAX_JIT_BLOCK_EXITS - 1) { + return false; + } + return true; + } + JitBlockCache blocks; JitOptions jo; JitState js; diff --git a/Core/MIPS/x86/RegCache.cpp b/Core/MIPS/x86/RegCache.cpp index 14932abd98..aa96e2f002 100644 --- a/Core/MIPS/x86/RegCache.cpp +++ b/Core/MIPS/x86/RegCache.cpp @@ -282,8 +282,7 @@ void GPRRegCache::StoreFromRegister(int i) { } } -void GPRRegCache::Flush() -{ +void GPRRegCache::Flush() { for (int i = 0; i < NUM_X_REGS; i++) { if (xregs[i].allocLocked) PanicAlert("Someone forgot to unlock X64 reg %i.", i); @@ -305,4 +304,14 @@ void GPRRegCache::Flush() } } } -} \ No newline at end of file +} + +void GPRRegCache::GetState(GPRRegCacheState &state) const { + memcpy(state.regs, regs, sizeof(regs)); + memcpy(state.xregs, xregs, sizeof(xregs)); +} + +void GPRRegCache::RestoreState(const GPRRegCacheState state) { + memcpy(regs, state.regs, sizeof(regs)); + memcpy(xregs, state.xregs, sizeof(xregs)); +} diff --git a/Core/MIPS/x86/RegCache.h b/Core/MIPS/x86/RegCache.h index 0e786fffcc..5bea7d12d3 100644 --- a/Core/MIPS/x86/RegCache.h +++ b/Core/MIPS/x86/RegCache.h @@ -22,6 +22,15 @@ using namespace Gen; +#ifdef _M_X64 +#define NUM_X_REGS 16 +#elif _M_IX86 +#define NUM_X_REGS 8 +#endif + +// TODO: Add more cachable regs, like HI, LO +#define NUM_MIPS_GPRS 32 + struct MIPSCachedReg { OpArg location; bool away; // value not in source register @@ -35,14 +44,10 @@ struct X64CachedReg { bool allocLocked; }; -#ifdef _M_X64 -#define NUM_X_REGS 16 -#elif _M_IX86 -#define NUM_X_REGS 8 -#endif - -// TODO: Add more cachable regs, like HI, LO -#define NUM_MIPS_GPRS 32 +struct GPRRegCacheState { + MIPSCachedReg regs[NUM_MIPS_GPRS]; + X64CachedReg xregs[NUM_X_REGS]; +}; class GPRRegCache { @@ -91,6 +96,9 @@ public: bool IsImmediate(int preg) const; u32 GetImmediate32(int preg) const; + void GetState(GPRRegCacheState &state) const; + void RestoreState(const GPRRegCacheState state); + MIPSState *mips; private: diff --git a/Core/MIPS/x86/RegCacheFPU.cpp b/Core/MIPS/x86/RegCacheFPU.cpp index 0973863d63..e52c59f4f2 100644 --- a/Core/MIPS/x86/RegCacheFPU.cpp +++ b/Core/MIPS/x86/RegCacheFPU.cpp @@ -255,3 +255,13 @@ void FPURegCache::FlushX(X64Reg reg) { StoreFromRegister(xregs[reg].mipsReg); } } + +void FPURegCache::GetState(FPURegCacheState &state) const { + memcpy(state.regs, regs, sizeof(regs)); + memcpy(state.xregs, xregs, sizeof(xregs)); +} + +void FPURegCache::RestoreState(const FPURegCacheState state) { + memcpy(regs, state.regs, sizeof(regs)); + memcpy(xregs, state.xregs, sizeof(xregs)); +} diff --git a/Core/MIPS/x86/RegCacheFPU.h b/Core/MIPS/x86/RegCacheFPU.h index 37d7c4c2e6..cd8b669f2f 100644 --- a/Core/MIPS/x86/RegCacheFPU.h +++ b/Core/MIPS/x86/RegCacheFPU.h @@ -57,6 +57,11 @@ struct MIPSCachedFPReg { bool tempLocked; }; +struct FPURegCacheState { + MIPSCachedFPReg regs[NUM_MIPS_FPRS]; + X64CachedFPReg xregs[NUM_X_FPREGS]; +}; + enum { MAP_DIRTY = 1, MAP_NOINIT = 2, @@ -129,6 +134,9 @@ public: ReleaseSpillLock(vreg + 32); } + void GetState(FPURegCacheState &state) const; + void RestoreState(const FPURegCacheState state); + MIPSState *mips; private: