x86jit: Refactor and skip flushes in branch cont.

Still not faster, but at least the code isn't as messy.
This commit is contained in:
Unknown W. Brackets 2013-11-12 00:45:28 -08:00
parent 32504ed46e
commit a334aaf6ca
2 changed files with 105 additions and 266 deletions

View file

@ -165,6 +165,106 @@ bool Jit::PredictTakeBranch(u32 targetAddr, bool likely) {
return targetAddr > js.compilerPC;
}
void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool delaySlotIsNice, bool likely, bool andLink) {
// We may want to try to continue along this branch a little while, to reduce reg flushing.
if (CanContinueBranch())
{
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
if (predictTakeBranch)
cc = FlipCCFlag(cc);
Gen::FixupBranch ptr;
RegCacheState state;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE);
ptr = J_CC(cc, true);
GetStateAndFlushAll(state);
}
else
{
ptr = J_CC(cc, true);
if (predictTakeBranch)
GetStateAndFlushAll(state);
else
CompileDelaySlot(DELAYSLOT_FLUSH);
}
if (predictTakeBranch)
{
// We flipped the cc, the not taken case is first.
CONDITIONAL_LOG_EXIT(notTakenAddr);
WriteExit(notTakenAddr, js.nextExit++);
// Now our taken path. Bring the regs back, we didn't flush 'em after all.
SetJumpTarget(ptr);
RestoreState(state);
CONDITIONAL_LOG_EXIT(targetAddr);
if (andLink)
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
// Don't forget to run the delay slot if likely.
if (likely)
CompileDelaySlot(DELAYSLOT_NICE);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
}
else
{
// Take the branch
if (andLink)
MOV(32, M(&mips_->r[MIPS_REG_RA]), Imm32(js.compilerPC + 8));
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
// Not taken
SetJumpTarget(ptr);
RestoreState(state);
CONDITIONAL_LOG_EXIT(notTakenAddr);
// Account for the delay slot.
js.compilerPC += 4;
// In case the delay slot was a break or something.
js.compiling = true;
}
}
else
{
Gen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = J_CC(cc, true);
}
else
{
FlushAll();
ptr = J_CC(cc, true);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
if (andLink)
MOV(32, M(&mips_->r[MIPS_REG_RA]), Imm32(js.compilerPC + 8));
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
// Not taken
SetJumpTarget(ptr);
CONDITIONAL_LOG_EXIT(notTakenAddr);
WriteExit(notTakenAddr, js.nextExit++);
js.compiling = false;
}
}
void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
{
CONDITIONAL_LOG;
@ -225,71 +325,7 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
CMP(32, gpr.R(rs), gpr.R(rt));
}
// We may want to try to continue along this branch a little while, to reduce reg flushing.
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
bool continueBranch = false;
// Likely delay slots may change regs, can't take those branches inline safely.
if (CanContinueBranch() && (!likely || !predictTakeBranch))
{
continueBranch = true;
if (predictTakeBranch)
cc = FlipCCFlag(cc);
}
Gen::FixupBranch ptr;
RegCacheState state;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH, state);
else
GetStateAndFlushAll(state);
ptr = J_CC(cc, true);
}
else
{
GetStateAndFlushAll(state);
ptr = J_CC(cc, true);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
if (continueBranch && predictTakeBranch)
{
// We flipped the cc, the not taken case is first.
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
WriteExit(js.compilerPC + 8, js.nextExit++);
// Now our taken path.
SetJumpTarget(ptr);
CONDITIONAL_LOG_EXIT(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
// Take the branch
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
// Not taken
SetJumpTarget(ptr);
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
if (continueBranch && !predictTakeBranch)
{
// Account for the delay slot.
js.compilerPC += 4;
RestoreState(state);
// In case the delay slot was a break or something.
js.compiling = true;
}
else
{
WriteExit(js.compilerPC + 8, js.nextExit++);
js.compiling = false;
}
CompBranchExits(cc, targetAddr, js.compilerPC + 8, delaySlotIsNice, likely, false);
}
void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool likely)
@ -347,75 +383,7 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
gpr.MapReg(rs, true, false);
CMP(32, gpr.R(rs), Imm32(0));
// We may want to try to continue along this branch a little while, to reduce reg flushing.
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
bool continueBranch = false;
// Likely delay slots may change regs, can't take those branches inline safely.
if (CanContinueBranch() && (!likely || !predictTakeBranch))
{
continueBranch = true;
if (predictTakeBranch)
cc = FlipCCFlag(cc);
}
Gen::FixupBranch ptr;
RegCacheState state;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH, state);
else
GetStateAndFlushAll(state);
ptr = J_CC(cc, true);
}
else
{
GetStateAndFlushAll(state);
ptr = J_CC(cc, true);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
if (continueBranch && predictTakeBranch)
{
// We flipped the cc, the not taken case is first.
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
WriteExit(js.compilerPC + 8, js.nextExit++);
// Now our taken path.
SetJumpTarget(ptr);
if (andLink)
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
CONDITIONAL_LOG_EXIT(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
// Take the branch
if (andLink)
MOV(32, M(&mips_->r[MIPS_REG_RA]), Imm32(js.compilerPC + 8));
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
// Not taken
SetJumpTarget(ptr);
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
if (continueBranch && !predictTakeBranch)
{
// Account for the delay slot.
js.compilerPC += 4;
RestoreState(state);
// In case the delay slot was a break or something.
js.compiling = true;
}
else
{
WriteExit(js.compilerPC + 8, js.nextExit++);
js.compiling = false;
}
CompBranchExits(cc, targetAddr, js.compilerPC + 8, delaySlotIsNice, likely, andLink);
}
@ -477,72 +445,9 @@ void Jit::BranchFPFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely)
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
// We may want to try to continue along this branch a little while, to reduce reg flushing.
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
bool continueBranch = false;
// Likely delay slots may change regs, can't take those branches inline safely.
if (CanContinueBranch() && (!likely || !predictTakeBranch))
{
continueBranch = true;
if (predictTakeBranch)
cc = FlipCCFlag(cc);
}
TEST(32, M((void *)&(mips_->fpcond)), Imm32(1));
Gen::FixupBranch ptr;
RegCacheState state;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH, state);
else
GetStateAndFlushAll(state);
ptr = J_CC(cc, true);
}
else
{
GetStateAndFlushAll(state);
ptr = J_CC(cc, true);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
if (continueBranch && predictTakeBranch)
{
// We flipped the cc, the not taken case is first.
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
WriteExit(js.compilerPC + 8, js.nextExit++);
// Now our taken path.
SetJumpTarget(ptr);
CONDITIONAL_LOG_EXIT(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
// Take the branch
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
// Not taken
SetJumpTarget(ptr);
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
if (continueBranch && !predictTakeBranch)
{
// Account for the delay slot.
js.compilerPC += 4;
RestoreState(state);
// In case the delay slot was a break or something.
js.compiling = true;
}
else
{
WriteExit(js.compilerPC + 8, js.nextExit++);
js.compiling = false;
}
CompBranchExits(cc, targetAddr, js.compilerPC + 8, delaySlotIsNice, likely, false);
}
@ -584,81 +489,14 @@ void Jit::BranchVFPUFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely)
if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target %d / %d", js.compilerPC, (signed short)(delaySlotOp & 0xFFFF), (signed short)(op & 0xFFFF) - 1);
// We may want to try to continue along this branch a little while, to reduce reg flushing.
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
bool continueBranch = false;
// Likely delay slots may change regs, can't take those branches inline safely.
// TODO: Maybe delaySlotIsBranch could work, but let's play it safe.
if (CanContinueBranch() && (!likely || !predictTakeBranch) && !delaySlotIsBranch)
{
continueBranch = true;
if (predictTakeBranch)
cc = FlipCCFlag(cc);
}
// THE CONDITION
int imm3 = (op >> 18) & 7;
//int val = (mips_->vfpuCtrl[VFPU_CTRL_CC] >> imm3) & 1;
TEST(32, M((void *)&(mips_->vfpuCtrl[VFPU_CTRL_CC])), Imm32(1 << imm3));
Gen::FixupBranch ptr;
RegCacheState state;
if (!likely)
{
if (!delaySlotIsNice && !delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH, state);
else
GetStateAndFlushAll(state);
ptr = J_CC(cc, true);
}
else
{
GetStateAndFlushAll(state);
ptr = J_CC(cc, true);
if (!delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_FLUSH);
}
u32 notTakenTarget = js.compilerPC + (delaySlotIsBranch ? 4 : 8);
if (continueBranch && predictTakeBranch)
{
// We flipped the cc, the not taken case is first.
CONDITIONAL_LOG_EXIT(notTakenTarget);
WriteExit(notTakenTarget, js.nextExit++);
// Now our taken path.
SetJumpTarget(ptr);
CONDITIONAL_LOG_EXIT(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
// Take the branch
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
// Not taken
SetJumpTarget(ptr);
CONDITIONAL_LOG_EXIT(notTakenTarget);
if (continueBranch && !predictTakeBranch)
{
// Account for the delay slot.
if (!delaySlotIsBranch)
js.compilerPC += 4;
RestoreState(state);
// In case the delay slot was a break or something.
js.compiling = true;
}
else
{
WriteExit(notTakenTarget, js.nextExit++);
js.compiling = false;
}
CompBranchExits(cc, targetAddr, notTakenTarget, delaySlotIsNice, likely, false);
}

View file

@ -196,6 +196,7 @@ private:
void CompITypeMemWrite(MIPSOpcode op, u32 bits, void *safeFunc);
void CompITypeMemUnpairedLR(MIPSOpcode op, bool isStore);
void CompITypeMemUnpairedLRInner(MIPSOpcode op, X64Reg shiftReg);
void CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool delaySlotIsNice, bool likely, bool andLink);
void CompFPTriArith(MIPSOpcode op, void (XEmitter::*arith)(X64Reg reg, OpArg), bool orderMatters);
void CompFPComp(int lhs, int rhs, u8 compare, bool allowNaN = false);