armjit: Copy over (disabled) immbranch optim.

This does a little loop unrolling.  Costs a bit more cache space, but
avoids flushing regs for longer.

Not enabled.
This commit is contained in:
Unknown W. Brackets 2013-11-10 19:38:42 -08:00
parent 92ecff4396
commit aacb31bc18
7 changed files with 88 additions and 21 deletions

View file

@ -65,6 +65,34 @@ void Jit::BranchRSRTComp(MIPSOpcode op, ArmGen::CCFlags cc, bool likely)
MIPSGPReg rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
if (jo.immBranches && gpr.IsImm(rs) && gpr.IsImm(rt) && js.numInstructions < jo.continueMaxInstructions) {
// The cc flags are opposites: when NOT to take the branch.
bool skipBranch;
s32 rsImm = (s32)gpr.GetImm(rs);
s32 rtImm = (s32)gpr.GetImm(rt);
switch (cc) {
case CC_EQ: skipBranch = rsImm == rtImm; break;
case CC_NEQ: skipBranch = rsImm != rtImm; break;
default: skipBranch = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSRTComp().");
}
if (skipBranch) {
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (likely)
js.compilerPC += 4;
return;
}
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
@ -129,6 +157,38 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, ArmGen::CCFlags cc, bool andLink, bool
MIPSGPReg rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
if (jo.immBranches && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions) {
// The cc flags are opposites: when NOT to take the branch.
bool skipBranch;
s32 imm = (s32)gpr.GetImm(rs);
switch (cc) {
case CC_GT: skipBranch = imm > 0; break;
case CC_GE: skipBranch = imm >= 0; break;
case CC_LT: skipBranch = imm < 0; break;
case CC_LE: skipBranch = imm <= 0; break;
default: skipBranch = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSZeroComp().");
}
if (skipBranch) {
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (likely)
js.compilerPC += 4;
return;
}
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
if (andLink)
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;

View file

@ -64,6 +64,11 @@ ArmJitOptions::ArmJitOptions()
useBackJump = false;
useForwardJump = false;
cachePointers = true;
// WARNING: These options don't work properly with cache clearing or jit compare.
// Need to find a smart way to handle before enabling.
immBranches = false;
continueBranches = false;
continueMaxInstructions = 300;
}
Jit::Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &jo), fpr(mips), mips_(mips)

View file

@ -41,6 +41,9 @@ struct ArmJitOptions
bool useBackJump;
bool useForwardJump;
bool cachePointers;
bool immBranches;
bool continueBranches;
int continueMaxInstructions;
};
class Jit : public ArmGen::ARMXCodeBlock

View file

@ -141,10 +141,6 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
MIPSGPReg rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (jo.immBranches && gpr.IsImmediate(rs) && gpr.IsImmediate(rt) && js.numInstructions < jo.continueMaxInstructions)
{
// The cc flags are opposites: when NOT to take the branch.
@ -176,6 +172,9 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
return;
}
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
@ -240,10 +239,6 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
MIPSGPReg rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (jo.immBranches && gpr.IsImmediate(rs) && js.numInstructions < jo.continueMaxInstructions)
{
// The cc flags are opposites: when NOT to take the branch.
@ -270,10 +265,8 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
if (andLink)
{
gpr.MapReg(MIPS_REG_RA, false, true);
MOV(32, gpr.R(MIPS_REG_RA), Imm32(js.compilerPC + 8));
}
gpr.SetImmediate32(MIPS_REG_RA, js.compilerPC + 8);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
@ -281,6 +274,9 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
return;
}
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);

View file

@ -109,6 +109,16 @@ static void JitLogMiss(MIPSOpcode op)
func(op);
}
JitOptions::JitOptions()
{
enableBlocklink = true;
// WARNING: These options don't work properly with cache clearing.
// Need to find a smart way to handle before enabling.
immBranches = false;
continueBranches = false;
continueMaxInstructions = 300;
}
#ifdef _MSC_VER
// JitBlockCache doesn't use this, just stores it.
#pragma warning(disable:4355)

View file

@ -39,15 +39,7 @@ u32 JitBreakpoint();
struct JitOptions
{
JitOptions()
{
enableBlocklink = true;
// WARNING: These options don't work properly with cache clearing.
// Need to find a smart way to handle before enabling.
immBranches = false;
continueBranches = false;
continueMaxInstructions = 300;
}
JitOptions();
bool enableBlocklink;
bool immBranches;

View file

@ -322,6 +322,7 @@ void JitCompareScreen::UpdateDisasm() {
// Alright. First generate the MIPS disassembly.
// TODO: Need a way to communicate branch continuing.
for (u32 addr = block->originalAddress; addr <= block->originalAddress + block->originalSize * 4; addr += 4) {
char temp[256];
MIPSDisAsm(Memory::Read_Instruction(addr), addr, temp, true);