mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
armjit: Copy over (disabled) immbranch optim.
This does a little loop unrolling. Costs a bit more cache space, but avoids flushing regs for longer. Not enabled.
This commit is contained in:
parent
92ecff4396
commit
aacb31bc18
7 changed files with 88 additions and 21 deletions
|
@ -65,6 +65,34 @@ void Jit::BranchRSRTComp(MIPSOpcode op, ArmGen::CCFlags cc, bool likely)
|
||||||
MIPSGPReg rs = _RS;
|
MIPSGPReg rs = _RS;
|
||||||
u32 targetAddr = js.compilerPC + offset + 4;
|
u32 targetAddr = js.compilerPC + offset + 4;
|
||||||
|
|
||||||
|
if (jo.immBranches && gpr.IsImm(rs) && gpr.IsImm(rt) && js.numInstructions < jo.continueMaxInstructions) {
|
||||||
|
// The cc flags are opposites: when NOT to take the branch.
|
||||||
|
bool skipBranch;
|
||||||
|
s32 rsImm = (s32)gpr.GetImm(rs);
|
||||||
|
s32 rtImm = (s32)gpr.GetImm(rt);
|
||||||
|
|
||||||
|
switch (cc) {
|
||||||
|
case CC_EQ: skipBranch = rsImm == rtImm; break;
|
||||||
|
case CC_NEQ: skipBranch = rsImm != rtImm; break;
|
||||||
|
default: skipBranch = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSRTComp().");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (skipBranch) {
|
||||||
|
// Skip the delay slot if likely, otherwise it'll be the next instruction.
|
||||||
|
if (likely)
|
||||||
|
js.compilerPC += 4;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Branch taken. Always compile the delay slot, and then go to dest.
|
||||||
|
CompileDelaySlot(DELAYSLOT_NICE);
|
||||||
|
// Account for the increment in the loop.
|
||||||
|
js.compilerPC = targetAddr - 4;
|
||||||
|
// In case the delay slot was a break or something.
|
||||||
|
js.compiling = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
|
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
|
||||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
|
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
|
||||||
CONDITIONAL_NICE_DELAYSLOT;
|
CONDITIONAL_NICE_DELAYSLOT;
|
||||||
|
@ -129,6 +157,38 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, ArmGen::CCFlags cc, bool andLink, bool
|
||||||
MIPSGPReg rs = _RS;
|
MIPSGPReg rs = _RS;
|
||||||
u32 targetAddr = js.compilerPC + offset + 4;
|
u32 targetAddr = js.compilerPC + offset + 4;
|
||||||
|
|
||||||
|
if (jo.immBranches && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions) {
|
||||||
|
// The cc flags are opposites: when NOT to take the branch.
|
||||||
|
bool skipBranch;
|
||||||
|
s32 imm = (s32)gpr.GetImm(rs);
|
||||||
|
|
||||||
|
switch (cc) {
|
||||||
|
case CC_GT: skipBranch = imm > 0; break;
|
||||||
|
case CC_GE: skipBranch = imm >= 0; break;
|
||||||
|
case CC_LT: skipBranch = imm < 0; break;
|
||||||
|
case CC_LE: skipBranch = imm <= 0; break;
|
||||||
|
default: skipBranch = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSZeroComp().");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (skipBranch) {
|
||||||
|
// Skip the delay slot if likely, otherwise it'll be the next instruction.
|
||||||
|
if (likely)
|
||||||
|
js.compilerPC += 4;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Branch taken. Always compile the delay slot, and then go to dest.
|
||||||
|
CompileDelaySlot(DELAYSLOT_NICE);
|
||||||
|
if (andLink)
|
||||||
|
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
|
||||||
|
|
||||||
|
// Account for the increment in the loop.
|
||||||
|
js.compilerPC = targetAddr - 4;
|
||||||
|
// In case the delay slot was a break or something.
|
||||||
|
js.compiling = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
|
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
|
||||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
|
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
|
||||||
CONDITIONAL_NICE_DELAYSLOT;
|
CONDITIONAL_NICE_DELAYSLOT;
|
||||||
|
|
|
@ -64,6 +64,11 @@ ArmJitOptions::ArmJitOptions()
|
||||||
useBackJump = false;
|
useBackJump = false;
|
||||||
useForwardJump = false;
|
useForwardJump = false;
|
||||||
cachePointers = true;
|
cachePointers = true;
|
||||||
|
// WARNING: These options don't work properly with cache clearing or jit compare.
|
||||||
|
// Need to find a smart way to handle before enabling.
|
||||||
|
immBranches = false;
|
||||||
|
continueBranches = false;
|
||||||
|
continueMaxInstructions = 300;
|
||||||
}
|
}
|
||||||
|
|
||||||
Jit::Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &jo), fpr(mips), mips_(mips)
|
Jit::Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &jo), fpr(mips), mips_(mips)
|
||||||
|
|
|
@ -41,6 +41,9 @@ struct ArmJitOptions
|
||||||
bool useBackJump;
|
bool useBackJump;
|
||||||
bool useForwardJump;
|
bool useForwardJump;
|
||||||
bool cachePointers;
|
bool cachePointers;
|
||||||
|
bool immBranches;
|
||||||
|
bool continueBranches;
|
||||||
|
int continueMaxInstructions;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Jit : public ArmGen::ARMXCodeBlock
|
class Jit : public ArmGen::ARMXCodeBlock
|
||||||
|
|
|
@ -141,10 +141,6 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
|
||||||
MIPSGPReg rs = _RS;
|
MIPSGPReg rs = _RS;
|
||||||
u32 targetAddr = js.compilerPC + offset + 4;
|
u32 targetAddr = js.compilerPC + offset + 4;
|
||||||
|
|
||||||
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
|
|
||||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
|
|
||||||
CONDITIONAL_NICE_DELAYSLOT;
|
|
||||||
|
|
||||||
if (jo.immBranches && gpr.IsImmediate(rs) && gpr.IsImmediate(rt) && js.numInstructions < jo.continueMaxInstructions)
|
if (jo.immBranches && gpr.IsImmediate(rs) && gpr.IsImmediate(rt) && js.numInstructions < jo.continueMaxInstructions)
|
||||||
{
|
{
|
||||||
// The cc flags are opposites: when NOT to take the branch.
|
// The cc flags are opposites: when NOT to take the branch.
|
||||||
|
@ -176,6 +172,9 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
|
||||||
|
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
|
||||||
|
CONDITIONAL_NICE_DELAYSLOT;
|
||||||
if (!likely && delaySlotIsNice)
|
if (!likely && delaySlotIsNice)
|
||||||
CompileDelaySlot(DELAYSLOT_NICE);
|
CompileDelaySlot(DELAYSLOT_NICE);
|
||||||
|
|
||||||
|
@ -240,10 +239,6 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
|
||||||
MIPSGPReg rs = _RS;
|
MIPSGPReg rs = _RS;
|
||||||
u32 targetAddr = js.compilerPC + offset + 4;
|
u32 targetAddr = js.compilerPC + offset + 4;
|
||||||
|
|
||||||
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
|
|
||||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
|
|
||||||
CONDITIONAL_NICE_DELAYSLOT;
|
|
||||||
|
|
||||||
if (jo.immBranches && gpr.IsImmediate(rs) && js.numInstructions < jo.continueMaxInstructions)
|
if (jo.immBranches && gpr.IsImmediate(rs) && js.numInstructions < jo.continueMaxInstructions)
|
||||||
{
|
{
|
||||||
// The cc flags are opposites: when NOT to take the branch.
|
// The cc flags are opposites: when NOT to take the branch.
|
||||||
|
@ -270,10 +265,8 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
|
||||||
// Branch taken. Always compile the delay slot, and then go to dest.
|
// Branch taken. Always compile the delay slot, and then go to dest.
|
||||||
CompileDelaySlot(DELAYSLOT_NICE);
|
CompileDelaySlot(DELAYSLOT_NICE);
|
||||||
if (andLink)
|
if (andLink)
|
||||||
{
|
gpr.SetImmediate32(MIPS_REG_RA, js.compilerPC + 8);
|
||||||
gpr.MapReg(MIPS_REG_RA, false, true);
|
|
||||||
MOV(32, gpr.R(MIPS_REG_RA), Imm32(js.compilerPC + 8));
|
|
||||||
}
|
|
||||||
// Account for the increment in the loop.
|
// Account for the increment in the loop.
|
||||||
js.compilerPC = targetAddr - 4;
|
js.compilerPC = targetAddr - 4;
|
||||||
// In case the delay slot was a break or something.
|
// In case the delay slot was a break or something.
|
||||||
|
@ -281,6 +274,9 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
|
||||||
|
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
|
||||||
|
CONDITIONAL_NICE_DELAYSLOT;
|
||||||
if (!likely && delaySlotIsNice)
|
if (!likely && delaySlotIsNice)
|
||||||
CompileDelaySlot(DELAYSLOT_NICE);
|
CompileDelaySlot(DELAYSLOT_NICE);
|
||||||
|
|
||||||
|
|
|
@ -109,6 +109,16 @@ static void JitLogMiss(MIPSOpcode op)
|
||||||
func(op);
|
func(op);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
JitOptions::JitOptions()
|
||||||
|
{
|
||||||
|
enableBlocklink = true;
|
||||||
|
// WARNING: These options don't work properly with cache clearing.
|
||||||
|
// Need to find a smart way to handle before enabling.
|
||||||
|
immBranches = false;
|
||||||
|
continueBranches = false;
|
||||||
|
continueMaxInstructions = 300;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
// JitBlockCache doesn't use this, just stores it.
|
// JitBlockCache doesn't use this, just stores it.
|
||||||
#pragma warning(disable:4355)
|
#pragma warning(disable:4355)
|
||||||
|
|
|
@ -39,15 +39,7 @@ u32 JitBreakpoint();
|
||||||
|
|
||||||
struct JitOptions
|
struct JitOptions
|
||||||
{
|
{
|
||||||
JitOptions()
|
JitOptions();
|
||||||
{
|
|
||||||
enableBlocklink = true;
|
|
||||||
// WARNING: These options don't work properly with cache clearing.
|
|
||||||
// Need to find a smart way to handle before enabling.
|
|
||||||
immBranches = false;
|
|
||||||
continueBranches = false;
|
|
||||||
continueMaxInstructions = 300;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool enableBlocklink;
|
bool enableBlocklink;
|
||||||
bool immBranches;
|
bool immBranches;
|
||||||
|
|
|
@ -322,6 +322,7 @@ void JitCompareScreen::UpdateDisasm() {
|
||||||
|
|
||||||
// Alright. First generate the MIPS disassembly.
|
// Alright. First generate the MIPS disassembly.
|
||||||
|
|
||||||
|
// TODO: Need a way to communicate branch continuing.
|
||||||
for (u32 addr = block->originalAddress; addr <= block->originalAddress + block->originalSize * 4; addr += 4) {
|
for (u32 addr = block->originalAddress; addr <= block->originalAddress + block->originalSize * 4; addr += 4) {
|
||||||
char temp[256];
|
char temp[256];
|
||||||
MIPSDisAsm(Memory::Read_Instruction(addr), addr, temp, true);
|
MIPSDisAsm(Memory::Read_Instruction(addr), addr, temp, true);
|
||||||
|
|
Loading…
Add table
Reference in a new issue