mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Armjit: Optimise swl+swr and lwl+lwr cases that can be combined to a single sw or lw. Add shift flags to STR/LDR. Add EatInstruction to ArmJit.
This commit is contained in:
parent
d42293033f
commit
9152d2f2bb
6 changed files with 44 additions and 46 deletions
|
@ -609,9 +609,9 @@ void ARMXEmitter::STRH (ARMReg dest, ARMReg src, Operand2 op)
|
|||
Write32(condition | (0x04 << 20) | (src << 16) | (dest << 12) | ((Imm >> 4) << 8) | (0xB << 4) | (Imm & 0x0F));
|
||||
}
|
||||
void ARMXEmitter::STRB (ARMReg dest, ARMReg src, Operand2 op) { WriteStoreOp(0x44, dest, src, op);}
|
||||
void ARMXEmitter::STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add)
|
||||
void ARMXEmitter::STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add, int shift, ShiftType type)
|
||||
{
|
||||
Write32(condition | (0x60 << 20) | (Index << 24) | (Add << 23) | (dest << 16) | (base << 12) | offset);
|
||||
Write32(condition | (0x60 << 20) | (Index << 24) | (Add << 23) | (dest << 16) | (base << 12) | (shift << 7) | (type << 5) | offset);
|
||||
}
|
||||
void ARMXEmitter::STRH (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add)
|
||||
{
|
||||
|
@ -657,9 +657,9 @@ void ARMXEmitter::LDRSB(ARMReg dest, ARMReg src, Operand2 op)
|
|||
Write32(condition | (0x05 << 20) | (src << 16) | (dest << 12) | ((Imm >> 4) << 8) | (0xD << 4) | (Imm & 0x0F));
|
||||
}
|
||||
|
||||
void ARMXEmitter::LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add)
|
||||
void ARMXEmitter::LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add, int shift, ShiftType type)
|
||||
{
|
||||
Write32(condition | (0x61 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (dest << 12) | offset);
|
||||
Write32(condition | (0x61 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (dest << 12) | (shift << 7) | (type << 5) | offset);
|
||||
}
|
||||
void ARMXEmitter::LDRH (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add)
|
||||
{
|
||||
|
|
|
@ -500,7 +500,7 @@ public:
|
|||
void LDRB (ARMReg dest, ARMReg src, Operand2 op2 = 0);
|
||||
void LDRSB(ARMReg dest, ARMReg src, Operand2 op2 = 0);
|
||||
// Offset adds to the base register in LDR
|
||||
void LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add);
|
||||
void LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add, int shift = 0, ShiftType type = ST_LSL);
|
||||
void LDRH (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add);
|
||||
void LDRSH(ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add);
|
||||
void LDRB (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add);
|
||||
|
@ -511,7 +511,7 @@ public:
|
|||
void STRH (ARMReg dest, ARMReg src, Operand2 op2 = 0);
|
||||
void STRB (ARMReg dest, ARMReg src, Operand2 op2 = 0);
|
||||
// Offset adds on to the destination register in STR
|
||||
void STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add);
|
||||
void STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add, int shift = 0, ShiftType type = ST_LSL);
|
||||
void STRH (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add);
|
||||
void STRB (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add);
|
||||
|
||||
|
|
|
@ -177,7 +177,7 @@ namespace MIPSComp
|
|||
default:
|
||||
DISABLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Jit::Comp_RType3(u32 op)
|
||||
{
|
||||
|
|
|
@ -98,6 +98,32 @@ namespace MIPSComp
|
|||
// Don't load anything into $zr
|
||||
return;
|
||||
}
|
||||
|
||||
// Optimisation: Combine to single unaligned load/store
|
||||
switch(o)
|
||||
{
|
||||
case 34: //lwl
|
||||
case 38: //lwr
|
||||
load = true;
|
||||
case 42: //swl
|
||||
case 46: //swr
|
||||
{
|
||||
int left = (o == 34 || o == 42) ? 1 : -1;
|
||||
u32 nextOp = Memory::Read_Instruction(js.compilerPC + 4);
|
||||
// Find a matching shift in opposite direction with opposite offset.
|
||||
u32 desiredOp = ((op + left* (4 << 26)) & 0xFFFF0000) + (offset - left*3);
|
||||
if (!js.inDelaySlot && nextOp == desiredOp)
|
||||
{
|
||||
EatInstruction(nextOp);
|
||||
nextOp = ((load ? 35 : 43) << 26) | (nextOp & 0x3FFFFFF); //lw, sw
|
||||
Comp_ITypeMem(nextOp);
|
||||
return;
|
||||
}
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (o)
|
||||
{
|
||||
case 32: //lb
|
||||
|
@ -138,45 +164,6 @@ namespace MIPSComp
|
|||
return;
|
||||
}
|
||||
break;
|
||||
/*
|
||||
case 34: //lwl
|
||||
{
|
||||
Crash();
|
||||
//u32 shift = (addr & 3) << 3;
|
||||
//u32 mem = ReadMem32(addr & 0xfffffffc);
|
||||
//R(rt) = ( u32(R(rt)) & (0x00ffffff >> shift) ) | ( mem << (24 - shift) );
|
||||
}
|
||||
break;
|
||||
|
||||
case 38: //lwr
|
||||
{
|
||||
Crash();
|
||||
//u32 shift = (addr & 3) << 3;
|
||||
//u32 mem = ReadMem32(addr & 0xfffffffc);
|
||||
|
||||
//R(rt) = ( u32(rt) & (0xffffff00 << (24 - shift)) ) | ( mem >> shift );
|
||||
}
|
||||
break;
|
||||
|
||||
case 42: //swl
|
||||
{
|
||||
Crash();
|
||||
//u32 shift = (addr & 3) << 3;
|
||||
//u32 mem = ReadMem32(addr & 0xfffffffc);
|
||||
//WriteMem32((addr & 0xfffffffc), ( ( u32(R(rt)) >> (24 - shift) ) ) |
|
||||
// ( mem & (0xffffff00 << shift) ));
|
||||
}
|
||||
break;
|
||||
case 46: //swr
|
||||
{
|
||||
Crash();
|
||||
// u32 shift = (addr & 3) << 3;
|
||||
// u32 mem = ReadMem32(addr & 0xfffffffc);
|
||||
//
|
||||
// WriteMem32((addr & 0xfffffffc), ( ( u32(R(rt)) << shift ) |
|
||||
// (mem & (0x00ffffff >> (24 - shift)) ) ) );
|
||||
}
|
||||
break;*/
|
||||
default:
|
||||
Comp_Generic(op);
|
||||
return ;
|
||||
|
|
|
@ -124,6 +124,16 @@ void Jit::CompileAt(u32 addr)
|
|||
MIPSCompileOp(op);
|
||||
}
|
||||
|
||||
void Jit::EatInstruction(u32 op)
|
||||
{
|
||||
u32 info = MIPSGetInfo(op);
|
||||
_dbg_assert_msg_(JIT, !(info & DELAYSLOT), "Never eat a branch op.");
|
||||
_dbg_assert_msg_(JIT, !js.inDelaySlot, "Never eat an instruction inside a delayslot.");
|
||||
|
||||
js.compilerPC += 4;
|
||||
js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
|
||||
}
|
||||
|
||||
void Jit::CompileDelaySlot(int flags)
|
||||
{
|
||||
// preserve flag around the delay slot! Maybe this is not always necessary on ARM where
|
||||
|
|
|
@ -147,6 +147,7 @@ public:
|
|||
|
||||
void CompileDelaySlot(int flags);
|
||||
void CompileAt(u32 addr);
|
||||
void EatInstruction(u32 op);
|
||||
void Comp_RunBlock(u32 op);
|
||||
|
||||
// Ops
|
||||
|
|
Loading…
Add table
Reference in a new issue