Merge pull request #3189 from unknownbrackets/jit-branch

Attempts at continuing jit blocks (disabled)
This commit is contained in:
Henrik Rydgård 2013-08-16 02:11:06 -07:00
commit e36444c1c2
12 changed files with 288 additions and 86 deletions

View file

@ -39,6 +39,8 @@
#define _FD ((op>>6 ) & 0x1F)
#define _POS ((op>>6 ) & 0x1F)
#define _SIZE ((op>>11 ) & 0x1F)
#define _IMM16 (signed short)(op&0xFFFF)
#define _IMM26 (op & 0x03FFFFFF)
#define LOOPOPTIMIZATION 0
@ -57,7 +59,7 @@ void Jit::BranchRSRTComp(u32 op, ArmGen::CCFlags cc, bool likely)
ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op&0xFFFF)<<2;
int offset = _IMM16 << 2;
int rt = _RT;
int rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
@ -101,11 +103,11 @@ void Jit::BranchRSRTComp(u32 op, ArmGen::CCFlags cc, bool likely)
}
// Take the branch
WriteExit(targetAddr, 0);
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC+8, 1);
WriteExit(js.compilerPC+8, js.nextExit++);
js.compiling = false;
}
@ -117,7 +119,7 @@ void Jit::BranchRSZeroComp(u32 op, ArmGen::CCFlags cc, bool andLink, bool likely
ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op&0xFFFF)<<2;
int offset = _IMM16 << 2;
int rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
@ -153,11 +155,11 @@ void Jit::BranchRSZeroComp(u32 op, ArmGen::CCFlags cc, bool andLink, bool likely
STR(R0, CTXREG, MIPS_REG_RA * 4);
}
WriteExit(targetAddr, 0);
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, 1);
WriteExit(js.compilerPC + 8, js.nextExit++);
js.compiling = false;
}
@ -183,7 +185,6 @@ void Jit::Comp_RelBranch(u32 op)
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
js.compiling = false;
}
void Jit::Comp_RelBranchRI(u32 op)
@ -202,7 +203,6 @@ void Jit::Comp_RelBranchRI(u32 op)
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
js.compiling = false;
}
// If likely is set, discard the branch slot if NOT taken.
@ -212,7 +212,7 @@ void Jit::BranchFPFlag(u32 op, ArmGen::CCFlags cc, bool likely)
ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op & 0xFFFF) << 2;
int offset = _IMM16 << 2;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
@ -221,8 +221,6 @@ void Jit::BranchFPFlag(u32 op, ArmGen::CCFlags cc, bool likely)
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
LDR(R0, CTXREG, offsetof(MIPSState, fpcond));
TST(R0, Operand2(1, TYPE_IMM));
@ -231,20 +229,23 @@ void Jit::BranchFPFlag(u32 op, ArmGen::CCFlags cc, bool likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B_CC(cc);
}
else
{
FlushAll();
ptr = B_CC(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
WriteExit(targetAddr, 0);
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, 1);
WriteExit(js.compilerPC + 8, js.nextExit++);
js.compiling = false;
}
@ -260,7 +261,6 @@ void Jit::Comp_FPUBranch(u32 op)
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
break;
}
js.compiling = false;
}
// If likely is set, discard the branch slot if NOT taken.
@ -270,7 +270,7 @@ void Jit::BranchVFPUFlag(u32 op, ArmGen::CCFlags cc, bool likely)
ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op & 0xFFFF) << 2;
int offset = _IMM16 << 2;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
@ -286,8 +286,6 @@ void Jit::BranchVFPUFlag(u32 op, ArmGen::CCFlags cc, bool likely)
if (delaySlotIsBranch && (delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", js.compilerPC);
FlushAll();
int imm3 = (op >> 18) & 7;
MOVI2R(R0, (u32)&(mips_->vfpuCtrl[VFPU_CTRL_CC]));
@ -300,10 +298,13 @@ void Jit::BranchVFPUFlag(u32 op, ArmGen::CCFlags cc, bool likely)
{
if (!delaySlotIsNice && !delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B_CC(cc);
}
else
{
FlushAll();
ptr = B_CC(cc);
if (!delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_FLUSH);
@ -311,12 +312,12 @@ void Jit::BranchVFPUFlag(u32 op, ArmGen::CCFlags cc, bool likely)
js.inDelaySlot = false;
// Take the branch
WriteExit(targetAddr, 0);
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
u32 notTakenTarget = js.compilerPC + (delaySlotIsBranch ? 4 : 8);
WriteExit(notTakenTarget, 1);
WriteExit(notTakenTarget, js.nextExit++);
js.compiling = false;
}
@ -329,7 +330,6 @@ void Jit::Comp_VBranch(u32 op)
case 2: BranchVFPUFlag(op, CC_NEQ, true); break; // bvfl
case 3: BranchVFPUFlag(op, CC_EQ, true); break; // bvtl
}
js.compiling = false;
}
void Jit::Comp_Jump(u32 op)
@ -338,7 +338,7 @@ void Jit::Comp_Jump(u32 op)
ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
u32 off = ((op & 0x03FFFFFF) << 2);
u32 off = _IMM26 << 2;
u32 targetAddr = (js.compilerPC & 0xF0000000) | off;
switch (op >> 26)
@ -346,7 +346,7 @@ void Jit::Comp_Jump(u32 op)
case 2: //j
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
WriteExit(targetAddr, 0);
WriteExit(targetAddr, js.nextExit++);
break;
case 3: //jal
@ -354,7 +354,7 @@ void Jit::Comp_Jump(u32 op)
MOVI2R(gpr.R(MIPS_REG_RA), js.compilerPC + 8);
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
WriteExit(targetAddr, 0);
WriteExit(targetAddr, js.nextExit++);
break;
default:

View file

@ -193,6 +193,7 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
{
js.cancel = false;
js.blockStart = js.compilerPC = mips_->pc;
js.nextExit = 0;
js.downcountAmount = 0;
js.curBlock = b;
js.compiling = true;

View file

@ -55,6 +55,7 @@ struct ArmJitState
u32 compilerPC;
u32 blockStart;
int nextExit;
bool cancel;
bool inDelaySlot;
int downcountAmount;

View file

@ -137,12 +137,12 @@ int JitBlockCache::AllocateBlock(u32 em_address)
JitBlock &b = blocks[num_blocks];
b.invalid = false;
b.originalAddress = em_address;
b.exitAddress[0] = INVALID_EXIT;
b.exitAddress[1] = INVALID_EXIT;
b.exitPtrs[0] = 0;
b.exitPtrs[1] = 0;
b.linkStatus[0] = false;
b.linkStatus[1] = false;
for (int i = 0; i < MAX_JIT_BLOCK_EXITS; ++i)
{
b.exitAddress[i] = INVALID_EXIT;
b.exitPtrs[i] = 0;
b.linkStatus[i] = false;
}
b.blockNum = num_blocks;
num_blocks++; //commit the current block
return num_blocks - 1;
@ -163,7 +163,7 @@ void JitBlockCache::FinalizeBlock(int block_num, bool block_link)
block_map[std::make_pair(pAddr + 4 * b.originalSize - 1, pAddr)] = block_num;
if (block_link)
{
for (int i = 0; i < 2; i++)
for (int i = 0; i < MAX_JIT_BLOCK_EXITS; i++)
{
if (b.exitAddress[i] != INVALID_EXIT)
links_to.insert(std::pair<u32, int>(b.exitAddress[i], block_num));
@ -228,7 +228,7 @@ u32 JitBlockCache::GetEmuHackOpForBlock(int blockNum) const {
int JitBlockCache::GetBlockNumberFromStartAddress(u32 addr)
{
if (!blocks)
return -1;
return -1;
u32 inst = Memory::Read_U32(addr);
int bl = GetBlockNumberFromEmuHackOp(inst);
if (bl < 0)
@ -262,7 +262,7 @@ void JitBlockCache::LinkBlockExits(int i)
// This block is dead. Don't relink it.
return;
}
for (int e = 0; e < 2; e++) {
for (int e = 0; e < MAX_JIT_BLOCK_EXITS; e++) {
if (b.exitAddress[e] != INVALID_EXIT && !b.linkStatus[e]) {
int destinationBlock = GetBlockNumberFromStartAddress(b.exitAddress[e]);
if (destinationBlock != -1) {
@ -308,7 +308,7 @@ void JitBlockCache::UnlinkBlock(int i)
return;
for (multimap<u32, int>::iterator iter = ppp.first; iter != ppp.second; ++iter) {
JitBlock &sourceBlock = blocks[iter->second];
for (int e = 0; e < 2; e++)
for (int e = 0; e < MAX_JIT_BLOCK_EXITS; e++)
{
if (sourceBlock.exitAddress[e] == b.originalAddress)
sourceBlock.linkStatus[e] = false;

View file

@ -40,6 +40,12 @@ typedef Gen::XCodeBlock CodeBlock;
#error "Unsupported arch!"
#endif
#if defined(ARM)
const int MAX_JIT_BLOCK_EXITS = 2;
#else
const int MAX_JIT_BLOCK_EXITS = 8;
#endif
// Define this in order to get VTune profile support for the Jit generated code.
// Add the VTune include/lib directories to the project directories to get this to build.
// #define USE_VTUNE
@ -50,8 +56,8 @@ struct JitBlock {
const u8 *checkedEntry;
const u8 *normalEntry;
u8 *exitPtrs[2]; // to be able to rewrite the exit jump
u32 exitAddress[2]; // 0xFFFFFFFF == unknown
u8 *exitPtrs[MAX_JIT_BLOCK_EXITS]; // to be able to rewrite the exit jump
u32 exitAddress[MAX_JIT_BLOCK_EXITS]; // 0xFFFFFFFF == unknown
u32 originalAddress;
u32 originalFirstOpcode; //to be able to restore
@ -60,7 +66,7 @@ struct JitBlock {
u16 blockNum;
bool invalid;
bool linkStatus[2];
bool linkStatus[MAX_JIT_BLOCK_EXITS];
#ifdef USE_VTUNE
char blockName[32];

View file

@ -36,8 +36,10 @@
#define _FS ((op>>11) & 0x1F)
#define _FT ((op>>16) & 0x1F)
#define _FD ((op>>6 ) & 0x1F)
#define _POS ((op>>6 ) & 0x1F)
#define _POS ((op>>6 ) & 0x1F)
#define _SIZE ((op>>11 ) & 0x1F)
#define _IMM16 (signed short)(op&0xFFFF)
#define _IMM26 (op & 0x03FFFFFF)
#define LOOPOPTIMIZATION 0
@ -133,7 +135,7 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely)
ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op&0xFFFF)<<2;
int offset = _IMM16 << 2;
int rt = _RT;
int rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
@ -141,6 +143,36 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely)
u32 delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (jo.immBranches && gpr.IsImmediate(rs) && gpr.IsImmediate(rt))
{
// The cc flags are opposites: when NOT to take the branch.
bool skipBranch;
s32 rsImm = (s32)gpr.GetImmediate32(rs);
s32 rtImm = (s32)gpr.GetImmediate32(rt);
switch (cc)
{
case CC_E: skipBranch = rsImm == rtImm; break;
case CC_NE: skipBranch = rsImm != rtImm; break;
default: _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSRTComp().");
}
if (skipBranch)
{
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (likely)
js.compilerPC += 4;
return;
}
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
return;
}
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
@ -156,30 +188,40 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely)
}
Gen::FixupBranch ptr;
RegCacheState state;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH, state);
else
FlushAll();
GetStateAndFlushAll(state);
ptr = J_CC(cc, true);
}
else
{
FlushAll();
GetStateAndFlushAll(state);
ptr = J_CC(cc, true);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, 0);
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
WriteExit(js.compilerPC + 8, 1);
js.compiling = false;
if (CanContinueBranch())
{
// Account for the delay slot.
js.compilerPC += 4;
RestoreState(state);
}
else
{
WriteExit(js.compilerPC + 8, js.nextExit++);
js.compiling = false;
}
}
void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool andLink, bool likely)
@ -189,13 +231,49 @@ void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool andLink, bool likely)
ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op&0xFFFF)<<2;
int offset = _IMM16 << 2;
int rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (jo.immBranches && gpr.IsImmediate(rs))
{
// The cc flags are opposites: when NOT to take the branch.
bool skipBranch;
s32 imm = (s32)gpr.GetImmediate32(rs);
switch (cc)
{
case CC_G: skipBranch = imm > 0; break;
case CC_GE: skipBranch = imm >= 0; break;
case CC_L: skipBranch = imm < 0; break;
case CC_LE: skipBranch = imm <= 0; break;
default: _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSZeroComp().");
}
if (skipBranch)
{
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (likely)
js.compilerPC += 4;
return;
}
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
if (andLink)
{
gpr.BindToRegister(MIPS_REG_RA, false, true);
MOV(32, gpr.R(MIPS_REG_RA), Imm32(js.compilerPC + 8));
}
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
return;
}
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
@ -203,17 +281,18 @@ void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool andLink, bool likely)
CMP(32, gpr.R(rs), Imm32(0));
Gen::FixupBranch ptr;
RegCacheState state;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH, state);
else
FlushAll();
GetStateAndFlushAll(state);
ptr = J_CC(cc, true);
}
else
{
FlushAll();
GetStateAndFlushAll(state);
ptr = J_CC(cc, true);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
@ -222,14 +301,23 @@ void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool andLink, bool likely)
if (andLink)
MOV(32, M(&mips_->r[MIPS_REG_RA]), Imm32(js.compilerPC + 8));
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, 0);
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
WriteExit(js.compilerPC + 8, 1);
js.compiling = false;
if (CanContinueBranch())
{
// Account for the delay slot.
js.compilerPC += 4;
RestoreState(state);
}
else
{
WriteExit(js.compilerPC + 8, js.nextExit++);
js.compiling = false;
}
}
@ -253,7 +341,6 @@ void Jit::Comp_RelBranch(u32 op)
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
js.compiling = false;
}
void Jit::Comp_RelBranchRI(u32 op)
@ -272,7 +359,6 @@ void Jit::Comp_RelBranchRI(u32 op)
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
js.compiling = false;
}
@ -284,7 +370,7 @@ void Jit::BranchFPFlag(u32 op, Gen::CCFlags cc, bool likely)
ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op & 0xFFFF) << 2;
int offset = _IMM16 << 2;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
@ -293,32 +379,43 @@ void Jit::BranchFPFlag(u32 op, Gen::CCFlags cc, bool likely)
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
TEST(32, M((void *)&(mips_->fpcond)), Imm32(1));
Gen::FixupBranch ptr;
RegCacheState state;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH, state);
else
GetStateAndFlushAll(state);
ptr = J_CC(cc, true);
}
else
{
GetStateAndFlushAll(state);
ptr = J_CC(cc, true);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, 0);
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
WriteExit(js.compilerPC + 8, 1);
js.compiling = false;
if (CanContinueBranch())
{
// Account for the delay slot.
js.compilerPC += 4;
RestoreState(state);
}
else
{
WriteExit(js.compilerPC + 8, js.nextExit++);
js.compiling = false;
}
}
@ -334,7 +431,6 @@ void Jit::Comp_FPUBranch(u32 op)
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
break;
}
js.compiling = false;
}
// If likely is set, discard the branch slot if NOT taken.
@ -345,7 +441,7 @@ void Jit::BranchVFPUFlag(u32 op, Gen::CCFlags cc, bool likely)
ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op & 0xFFFF) << 2;
int offset = _IMM16 << 2;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
@ -361,22 +457,24 @@ void Jit::BranchVFPUFlag(u32 op, Gen::CCFlags cc, bool likely)
if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
ERROR_LOG(JIT, "VFPU branch in VFPU delay slot at %08x with different target %d / %d", js.compilerPC, (signed short)(delaySlotOp & 0xFFFF), (signed short)(op & 0xFFFF) - 1);
FlushAll();
// THE CONDITION
int imm3 = (op >> 18) & 7;
//int val = (mips_->vfpuCtrl[VFPU_CTRL_CC] >> imm3) & 1;
TEST(32, M((void *)&(mips_->vfpuCtrl[VFPU_CTRL_CC])), Imm32(1 << imm3));
Gen::FixupBranch ptr;
RegCacheState state;
if (!likely)
{
if (!delaySlotIsNice && !delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH, state);
else
GetStateAndFlushAll(state);
ptr = J_CC(cc, true);
}
else
{
GetStateAndFlushAll(state);
ptr = J_CC(cc, true);
if (!delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_FLUSH);
@ -384,15 +482,24 @@ void Jit::BranchVFPUFlag(u32 op, Gen::CCFlags cc, bool likely)
// Take the branch
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, 0);
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
u32 notTakenTarget = js.compilerPC + (delaySlotIsBranch ? 4 : 8);
CONDITIONAL_LOG_EXIT(notTakenTarget);
WriteExit(notTakenTarget, 1);
js.compiling = false;
if (CanContinueBranch() && !delaySlotIsBranch)
{
// Account for the delay slot.
js.compilerPC += 4;
RestoreState(state);
}
else
{
WriteExit(notTakenTarget, js.nextExit++);
js.compiling = false;
}
}
@ -408,7 +515,6 @@ void Jit::Comp_VBranch(u32 op)
_dbg_assert_msg_(CPU,0,"Comp_VBranch: Invalid instruction");
break;
}
js.compiling = false;
}
void Jit::Comp_Jump(u32 op)
@ -418,7 +524,7 @@ void Jit::Comp_Jump(u32 op)
ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
u32 off = ((op & 0x3FFFFFF) << 2);
u32 off = _IMM26 << 2;
u32 targetAddr = (js.compilerPC & 0xF0000000) | off;
switch (op >> 26)
@ -427,7 +533,7 @@ void Jit::Comp_Jump(u32 op)
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, 0);
WriteExit(targetAddr, js.nextExit++);
break;
case 3: //jal
@ -436,7 +542,7 @@ void Jit::Comp_Jump(u32 op)
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, 0);
WriteExit(targetAddr, js.nextExit++);
break;
default:

View file

@ -134,6 +134,20 @@ void Jit::DoDummyState(PointerWrap &p)
p.DoMarker("Jit");
}
void Jit::GetStateAndFlushAll(RegCacheState &state)
{
gpr.GetState(state.gpr);
fpr.GetState(state.fpr);
FlushAll();
}
void Jit::RestoreState(const RegCacheState state)
{
gpr.RestoreState(state.gpr);
fpr.RestoreState(state.fpr);
}
void Jit::FlushAll()
{
gpr.Flush();
@ -180,7 +194,7 @@ void Jit::ClearCacheAt(u32 em_address)
ClearCache();
}
void Jit::CompileDelaySlot(int flags)
void Jit::CompileDelaySlot(int flags, RegCacheState *state)
{
const u32 addr = js.compilerPC + 4;
@ -196,7 +210,12 @@ void Jit::CompileDelaySlot(int flags)
js.inDelaySlot = false;
if (flags & DELAYSLOT_FLUSH)
FlushAll();
{
if (state != NULL)
GetStateAndFlushAll(*state);
else
FlushAll();
}
if (flags & DELAYSLOT_SAFE)
LOAD_FLAGS; // restore flag!
}
@ -255,6 +274,7 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
{
js.cancel = false;
js.blockStart = js.compilerPC = mips_->pc;
js.nextExit = 0;
js.downcountAmount = 0;
js.curBlock = b;
js.compiling = true;
@ -352,6 +372,8 @@ void Jit::Comp_Generic(u32 op)
void Jit::WriteExit(u32 destination, int exit_num)
{
_dbg_assert_msg_(JIT, exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num");
if (!Memory::IsValidAddress(destination)) {
ERROR_LOG_REPORT(JIT, "Trying to write block exit to illegal destination %08x: pc = %08x", destination, currentMIPS->pc);
}

View file

@ -41,9 +41,17 @@ struct JitOptions
JitOptions()
{
enableBlocklink = true;
// Seems to hurt performance?
immBranches = false;
// Seems to hurt performance also?
continueBranches = false;
continueMaxInstructions = 100;
}
bool enableBlocklink;
bool immBranches;
bool continueBranches;
int continueMaxInstructions;
};
struct JitState
@ -65,6 +73,7 @@ struct JitState
u32 compilerPC;
u32 blockStart;
int nextExit;
bool cancel;
bool inDelaySlot;
// See JitState::AfterOp for values.
@ -151,6 +160,12 @@ enum CompileDelaySlotFlags
DELAYSLOT_SAFE_FLUSH = DELAYSLOT_FLUSH | DELAYSLOT_SAFE,
};
// TODO: Hmm, humongous.
struct RegCacheState {
GPRRegCacheState gpr;
FPURegCacheState fpr;
};
class Jit : public Gen::XCodeBlock
{
public:
@ -255,12 +270,17 @@ public:
void ClearCache();
void ClearCacheAt(u32 em_address);
private:
void GetStateAndFlushAll(RegCacheState &state);
void RestoreState(const RegCacheState state);
void FlushAll();
void FlushPrefixV();
void WriteDowncount(int offset = 0);
// See CompileDelaySlotFlags for flags.
void CompileDelaySlot(int flags);
void CompileDelaySlot(int flags, RegCacheState *state = NULL);
void CompileDelaySlot(int flags, RegCacheState &state) {
CompileDelaySlot(flags, &state);
}
void EatInstruction(u32 op);
void WriteExit(u32 destination, int exit_num);
@ -295,6 +315,17 @@ private:
void CallProtectedFunction(void *func, const u32 arg1, const u32 arg2, const u32 arg3);
void CallProtectedFunction(void *func, const OpArg &arg1, const u32 arg2, const u32 arg3);
bool CanContinueBranch() {
if (!jo.continueBranches || js.numInstructions >= jo.continueMaxInstructions) {
return false;
}
// Need at least 2 exits left over.
if (js.nextExit >= MAX_JIT_BLOCK_EXITS - 1) {
return false;
}
return true;
}
JitBlockCache blocks;
JitOptions jo;
JitState js;

View file

@ -282,8 +282,7 @@ void GPRRegCache::StoreFromRegister(int i) {
}
}
void GPRRegCache::Flush()
{
void GPRRegCache::Flush() {
for (int i = 0; i < NUM_X_REGS; i++) {
if (xregs[i].allocLocked)
PanicAlert("Someone forgot to unlock X64 reg %i.", i);
@ -305,4 +304,14 @@ void GPRRegCache::Flush()
}
}
}
}
}
void GPRRegCache::GetState(GPRRegCacheState &state) const {
memcpy(state.regs, regs, sizeof(regs));
memcpy(state.xregs, xregs, sizeof(xregs));
}
void GPRRegCache::RestoreState(const GPRRegCacheState state) {
memcpy(regs, state.regs, sizeof(regs));
memcpy(xregs, state.xregs, sizeof(xregs));
}

View file

@ -22,6 +22,15 @@
using namespace Gen;
#ifdef _M_X64
#define NUM_X_REGS 16
#elif _M_IX86
#define NUM_X_REGS 8
#endif
// TODO: Add more cachable regs, like HI, LO
#define NUM_MIPS_GPRS 32
struct MIPSCachedReg {
OpArg location;
bool away; // value not in source register
@ -35,14 +44,10 @@ struct X64CachedReg {
bool allocLocked;
};
#ifdef _M_X64
#define NUM_X_REGS 16
#elif _M_IX86
#define NUM_X_REGS 8
#endif
// TODO: Add more cachable regs, like HI, LO
#define NUM_MIPS_GPRS 32
struct GPRRegCacheState {
MIPSCachedReg regs[NUM_MIPS_GPRS];
X64CachedReg xregs[NUM_X_REGS];
};
class GPRRegCache
{
@ -91,6 +96,9 @@ public:
bool IsImmediate(int preg) const;
u32 GetImmediate32(int preg) const;
void GetState(GPRRegCacheState &state) const;
void RestoreState(const GPRRegCacheState state);
MIPSState *mips;
private:

View file

@ -255,3 +255,13 @@ void FPURegCache::FlushX(X64Reg reg) {
StoreFromRegister(xregs[reg].mipsReg);
}
}
void FPURegCache::GetState(FPURegCacheState &state) const {
memcpy(state.regs, regs, sizeof(regs));
memcpy(state.xregs, xregs, sizeof(xregs));
}
void FPURegCache::RestoreState(const FPURegCacheState state) {
memcpy(regs, state.regs, sizeof(regs));
memcpy(xregs, state.xregs, sizeof(xregs));
}

View file

@ -57,6 +57,11 @@ struct MIPSCachedFPReg {
bool tempLocked;
};
struct FPURegCacheState {
MIPSCachedFPReg regs[NUM_MIPS_FPRS];
X64CachedFPReg xregs[NUM_X_FPREGS];
};
enum {
MAP_DIRTY = 1,
MAP_NOINIT = 2,
@ -129,6 +134,9 @@ public:
ReleaseSpillLock(vreg + 32);
}
void GetState(FPURegCacheState &state) const;
void RestoreState(const FPURegCacheState state);
MIPSState *mips;
private: