diff --git a/Common/ArmEmitter.cpp b/Common/ArmEmitter.cpp index 152f84259d..70a91e2efa 100644 --- a/Common/ArmEmitter.cpp +++ b/Common/ArmEmitter.cpp @@ -609,9 +609,9 @@ void ARMXEmitter::STRH (ARMReg dest, ARMReg src, Operand2 op) Write32(condition | (0x04 << 20) | (src << 16) | (dest << 12) | ((Imm >> 4) << 8) | (0xB << 4) | (Imm & 0x0F)); } void ARMXEmitter::STRB (ARMReg dest, ARMReg src, Operand2 op) { WriteStoreOp(0x44, dest, src, op);} -void ARMXEmitter::STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add) +void ARMXEmitter::STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add, int shift, ShiftType type) { - Write32(condition | (0x60 << 20) | (Index << 24) | (Add << 23) | (dest << 16) | (base << 12) | offset); + Write32(condition | (0x60 << 20) | (Index << 24) | (Add << 23) | (dest << 16) | (base << 12) | (shift << 7) | (type << 5) | offset); } void ARMXEmitter::STRH (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add) { @@ -657,9 +657,9 @@ void ARMXEmitter::LDRSB(ARMReg dest, ARMReg src, Operand2 op) Write32(condition | (0x05 << 20) | (src << 16) | (dest << 12) | ((Imm >> 4) << 8) | (0xD << 4) | (Imm & 0x0F)); } -void ARMXEmitter::LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add) +void ARMXEmitter::LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add, int shift, ShiftType type) { - Write32(condition | (0x61 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (dest << 12) | offset); + Write32(condition | (0x61 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (dest << 12) | (shift << 7) | (type << 5) | offset); } void ARMXEmitter::LDRH (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add) { diff --git a/Common/ArmEmitter.h b/Common/ArmEmitter.h index a49a91104c..8f2b4e03bb 100644 --- a/Common/ArmEmitter.h +++ b/Common/ArmEmitter.h @@ -500,7 +500,7 @@ public: void LDRB (ARMReg dest, ARMReg src, Operand2 op2 = 0); void LDRSB(ARMReg dest, ARMReg src, Operand2 op2 = 0); // Offset adds to the base register in LDR - void LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add); + void LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add, int shift = 0, ShiftType type = ST_LSL); void LDRH (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add); void LDRSH(ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add); void LDRB (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add); @@ -511,7 +511,7 @@ public: void STRH (ARMReg dest, ARMReg src, Operand2 op2 = 0); void STRB (ARMReg dest, ARMReg src, Operand2 op2 = 0); // Offset adds on to the destination register in STR - void STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add); + void STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add, int shift = 0, ShiftType type = ST_LSL); void STRH (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add); void STRB (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add); diff --git a/Core/MIPS/ARM/ArmCompALU.cpp b/Core/MIPS/ARM/ArmCompALU.cpp index 4edfd67640..b0020a1a9a 100644 --- a/Core/MIPS/ARM/ArmCompALU.cpp +++ b/Core/MIPS/ARM/ArmCompALU.cpp @@ -177,7 +177,7 @@ namespace MIPSComp default: DISABLE; } - } + } void Jit::Comp_RType3(u32 op) { diff --git a/Core/MIPS/ARM/ArmCompLoadStore.cpp b/Core/MIPS/ARM/ArmCompLoadStore.cpp index 692a78da66..4dbb32de9b 100644 --- a/Core/MIPS/ARM/ArmCompLoadStore.cpp +++ b/Core/MIPS/ARM/ArmCompLoadStore.cpp @@ -98,6 +98,32 @@ namespace MIPSComp // Don't load anything into $zr return; } + + // Optimisation: Combine to single unaligned load/store + switch(o) + { + case 34: //lwl + case 38: //lwr + load = true; + case 42: //swl + case 46: //swr + { + int left = (o == 34 || o == 42) ? 1 : -1; + u32 nextOp = Memory::Read_Instruction(js.compilerPC + 4); + // Find a matching shift in opposite direction with opposite offset. + u32 desiredOp = ((op + left* (4 << 26)) & 0xFFFF0000) + (offset - left*3); + if (!js.inDelaySlot && nextOp == desiredOp) + { + EatInstruction(nextOp); + nextOp = ((load ? 35 : 43) << 26) | (nextOp & 0x3FFFFFF); //lw, sw + Comp_ITypeMem(nextOp); + return; + } + } + default: + break; + } + switch (o) { case 32: //lb @@ -138,45 +164,6 @@ namespace MIPSComp return; } break; - /* - case 34: //lwl - { - Crash(); - //u32 shift = (addr & 3) << 3; - //u32 mem = ReadMem32(addr & 0xfffffffc); - //R(rt) = ( u32(R(rt)) & (0x00ffffff >> shift) ) | ( mem << (24 - shift) ); - } - break; - - case 38: //lwr - { - Crash(); - //u32 shift = (addr & 3) << 3; - //u32 mem = ReadMem32(addr & 0xfffffffc); - - //R(rt) = ( u32(rt) & (0xffffff00 << (24 - shift)) ) | ( mem >> shift ); - } - break; - - case 42: //swl - { - Crash(); - //u32 shift = (addr & 3) << 3; - //u32 mem = ReadMem32(addr & 0xfffffffc); - //WriteMem32((addr & 0xfffffffc), ( ( u32(R(rt)) >> (24 - shift) ) ) | - // ( mem & (0xffffff00 << shift) )); - } - break; - case 46: //swr - { - Crash(); - // u32 shift = (addr & 3) << 3; - // u32 mem = ReadMem32(addr & 0xfffffffc); -// -// WriteMem32((addr & 0xfffffffc), ( ( u32(R(rt)) << shift ) | -// (mem & (0x00ffffff >> (24 - shift)) ) ) ); - } - break;*/ default: Comp_Generic(op); return ; diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp index 2984a5637a..ae80235f82 100644 --- a/Core/MIPS/ARM/ArmJit.cpp +++ b/Core/MIPS/ARM/ArmJit.cpp @@ -124,6 +124,16 @@ void Jit::CompileAt(u32 addr) MIPSCompileOp(op); } +void Jit::EatInstruction(u32 op) +{ + u32 info = MIPSGetInfo(op); + _dbg_assert_msg_(JIT, !(info & DELAYSLOT), "Never eat a branch op."); + _dbg_assert_msg_(JIT, !js.inDelaySlot, "Never eat an instruction inside a delayslot."); + + js.compilerPC += 4; + js.downcountAmount += MIPSGetInstructionCycleEstimate(op); +} + void Jit::CompileDelaySlot(int flags) { // preserve flag around the delay slot! Maybe this is not always necessary on ARM where diff --git a/Core/MIPS/ARM/ArmJit.h b/Core/MIPS/ARM/ArmJit.h index c9467dd46d..0d8618c507 100644 --- a/Core/MIPS/ARM/ArmJit.h +++ b/Core/MIPS/ARM/ArmJit.h @@ -147,6 +147,7 @@ public: void CompileDelaySlot(int flags); void CompileAt(u32 addr); + void EatInstruction(u32 op); void Comp_RunBlock(u32 op); // Ops