Armjit: Optimise swl+swr and lwl+lwr cases that can be combined to a single sw or lw. Add shift flags to STR/LDR. Add EatInstruction to ArmJit.

This commit is contained in:
Sacha 2013-03-06 00:36:27 +10:00
parent d42293033f
commit 9152d2f2bb
6 changed files with 44 additions and 46 deletions

View file

@ -609,9 +609,9 @@ void ARMXEmitter::STRH (ARMReg dest, ARMReg src, Operand2 op)
Write32(condition | (0x04 << 20) | (src << 16) | (dest << 12) | ((Imm >> 4) << 8) | (0xB << 4) | (Imm & 0x0F));
}
void ARMXEmitter::STRB (ARMReg dest, ARMReg src, Operand2 op) { WriteStoreOp(0x44, dest, src, op);}
void ARMXEmitter::STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add)
void ARMXEmitter::STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add, int shift, ShiftType type)
{
Write32(condition | (0x60 << 20) | (Index << 24) | (Add << 23) | (dest << 16) | (base << 12) | offset);
Write32(condition | (0x60 << 20) | (Index << 24) | (Add << 23) | (dest << 16) | (base << 12) | (shift << 7) | (type << 5) | offset);
}
void ARMXEmitter::STRH (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add)
{
@ -657,9 +657,9 @@ void ARMXEmitter::LDRSB(ARMReg dest, ARMReg src, Operand2 op)
Write32(condition | (0x05 << 20) | (src << 16) | (dest << 12) | ((Imm >> 4) << 8) | (0xD << 4) | (Imm & 0x0F));
}
void ARMXEmitter::LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add)
void ARMXEmitter::LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add, int shift, ShiftType type)
{
Write32(condition | (0x61 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (dest << 12) | offset);
Write32(condition | (0x61 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (dest << 12) | (shift << 7) | (type << 5) | offset);
}
void ARMXEmitter::LDRH (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add)
{

View file

@ -500,7 +500,7 @@ public:
void LDRB (ARMReg dest, ARMReg src, Operand2 op2 = 0);
void LDRSB(ARMReg dest, ARMReg src, Operand2 op2 = 0);
// Offset adds to the base register in LDR
void LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add);
void LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add, int shift = 0, ShiftType type = ST_LSL);
void LDRH (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add);
void LDRSH(ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add);
void LDRB (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add);
@ -511,7 +511,7 @@ public:
void STRH (ARMReg dest, ARMReg src, Operand2 op2 = 0);
void STRB (ARMReg dest, ARMReg src, Operand2 op2 = 0);
// Offset adds on to the destination register in STR
void STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add);
void STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add, int shift = 0, ShiftType type = ST_LSL);
void STRH (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add);
void STRB (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add);

View file

@ -177,7 +177,7 @@ namespace MIPSComp
default:
DISABLE;
}
}
}
void Jit::Comp_RType3(u32 op)
{

View file

@ -98,6 +98,32 @@ namespace MIPSComp
// Don't load anything into $zr
return;
}
// Optimisation: Combine to single unaligned load/store
switch(o)
{
case 34: //lwl
case 38: //lwr
load = true;
case 42: //swl
case 46: //swr
{
int left = (o == 34 || o == 42) ? 1 : -1;
u32 nextOp = Memory::Read_Instruction(js.compilerPC + 4);
// Find a matching shift in opposite direction with opposite offset.
u32 desiredOp = ((op + left* (4 << 26)) & 0xFFFF0000) + (offset - left*3);
if (!js.inDelaySlot && nextOp == desiredOp)
{
EatInstruction(nextOp);
nextOp = ((load ? 35 : 43) << 26) | (nextOp & 0x3FFFFFF); //lw, sw
Comp_ITypeMem(nextOp);
return;
}
}
default:
break;
}
switch (o)
{
case 32: //lb
@ -138,45 +164,6 @@ namespace MIPSComp
return;
}
break;
/*
case 34: //lwl
{
Crash();
//u32 shift = (addr & 3) << 3;
//u32 mem = ReadMem32(addr & 0xfffffffc);
//R(rt) = ( u32(R(rt)) & (0x00ffffff >> shift) ) | ( mem << (24 - shift) );
}
break;
case 38: //lwr
{
Crash();
//u32 shift = (addr & 3) << 3;
//u32 mem = ReadMem32(addr & 0xfffffffc);
//R(rt) = ( u32(rt) & (0xffffff00 << (24 - shift)) ) | ( mem >> shift );
}
break;
case 42: //swl
{
Crash();
//u32 shift = (addr & 3) << 3;
//u32 mem = ReadMem32(addr & 0xfffffffc);
//WriteMem32((addr & 0xfffffffc), ( ( u32(R(rt)) >> (24 - shift) ) ) |
// ( mem & (0xffffff00 << shift) ));
}
break;
case 46: //swr
{
Crash();
// u32 shift = (addr & 3) << 3;
// u32 mem = ReadMem32(addr & 0xfffffffc);
//
// WriteMem32((addr & 0xfffffffc), ( ( u32(R(rt)) << shift ) |
// (mem & (0x00ffffff >> (24 - shift)) ) ) );
}
break;*/
default:
Comp_Generic(op);
return ;

View file

@ -124,6 +124,16 @@ void Jit::CompileAt(u32 addr)
MIPSCompileOp(op);
}
void Jit::EatInstruction(u32 op)
{
u32 info = MIPSGetInfo(op);
_dbg_assert_msg_(JIT, !(info & DELAYSLOT), "Never eat a branch op.");
_dbg_assert_msg_(JIT, !js.inDelaySlot, "Never eat an instruction inside a delayslot.");
js.compilerPC += 4;
js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
}
void Jit::CompileDelaySlot(int flags)
{
// preserve flag around the delay slot! Maybe this is not always necessary on ARM where

View file

@ -147,6 +147,7 @@ public:
void CompileDelaySlot(int flags);
void CompileAt(u32 addr);
void EatInstruction(u32 op);
void Comp_RunBlock(u32 op);
// Ops