diff --git a/Core/MIPS/ARM/ArmCompFPU.cpp b/Core/MIPS/ARM/ArmCompFPU.cpp index 1d81bd1489..c163ef16b2 100644 --- a/Core/MIPS/ARM/ArmCompFPU.cpp +++ b/Core/MIPS/ARM/ArmCompFPU.cpp @@ -97,6 +97,13 @@ void Jit::Comp_FPULS(MIPSOpcode op) switch(op >> 26) { case 49: //FI(ft) = Memory::Read_U32(addr); break; //lwc1 + if (!gpr.IsImm(rs) && jo.cachePointers && g_Config.bFastMemory && (offset & 3) == 0 && offset < 0x400 && offset > -0x400) { + gpr.MapRegAsPointer(rs); + fpr.MapReg(ft, MAP_NOINIT | MAP_DIRTY); + VLDR(fpr.R(ft), gpr.RPtr(rs), offset); + break; + } + fpr.SpillLock(ft); fpr.MapReg(ft, MAP_NOINIT | MAP_DIRTY); if (gpr.IsImm(rs)) { @@ -135,6 +142,13 @@ void Jit::Comp_FPULS(MIPSOpcode op) break; case 57: //Memory::Write_U32(FI(ft), addr); break; //swc1 + if (!gpr.IsImm(rs) && jo.cachePointers && g_Config.bFastMemory && (offset & 3) == 0 && offset < 0x400 && offset > -0x400) { + gpr.MapRegAsPointer(rs); + fpr.MapReg(ft, 0); + VSTR(fpr.R(ft), gpr.RPtr(rs), offset); + break; + } + fpr.MapReg(ft); if (gpr.IsImm(rs)) { u32 addr = (offset + gpr.GetImm(rs)) & 0x3FFFFFFF; diff --git a/Core/MIPS/ARM/ArmCompLoadStore.cpp b/Core/MIPS/ARM/ArmCompLoadStore.cpp index 34b38562fd..5497af18fd 100644 --- a/Core/MIPS/ARM/ArmCompLoadStore.cpp +++ b/Core/MIPS/ARM/ArmCompLoadStore.cpp @@ -152,6 +152,33 @@ namespace MIPSComp case 40: //sb case 41: //sh case 43: //sw + // Map base register as pointer and go from there - if the displacement isn't too big. + // This is faster if there are multiple loads from the same pointer. Need to hook up the MIPS analyzer.. + if (jo.cachePointers && g_Config.bFastMemory) { + // ARM has smaller load/store immediate displacements than MIPS, 12 bits - and some memory ops only have 8 bits. + int offsetRange = 0x3ff; + if (o == 41 || o == 33 || o == 37 || o == 32) + offsetRange = 0xff; // 8 bit offset only + if (!gpr.IsImm(rs) && rs != rt && (offset <= offsetRange) && offset >= -offsetRange) { + gpr.SpillLock(rs, rt); + gpr.MapRegAsPointer(rs); + gpr.MapReg(rt, load ? (MAP_NOINIT | MAP_DIRTY) : 0); + switch (o) { + case 35: LDR (gpr.R(rt), gpr.RPtr(rs), Operand2(offset, TYPE_IMM)); break; + case 37: LDRH (gpr.R(rt), gpr.RPtr(rs), Operand2(offset, TYPE_IMM)); break; + case 33: LDRSH(gpr.R(rt), gpr.RPtr(rs), Operand2(offset, TYPE_IMM)); break; + case 36: LDRB (gpr.R(rt), gpr.RPtr(rs), Operand2(offset, TYPE_IMM)); break; + case 32: LDRSB(gpr.R(rt), gpr.RPtr(rs), Operand2(offset, TYPE_IMM)); break; + // Store + case 43: STR (gpr.R(rt), gpr.RPtr(rs), Operand2(offset, TYPE_IMM)); break; + case 41: STRH (gpr.R(rt), gpr.RPtr(rs), Operand2(offset, TYPE_IMM)); break; + case 40: STRB (gpr.R(rt), gpr.RPtr(rs), Operand2(offset, TYPE_IMM)); break; + } + gpr.ReleaseSpillLocks(); + break; + } + } + if (gpr.IsImm(rs) && Memory::IsValidAddress(iaddr)) { // TODO: Avoid mapping a register for the "zero" register, use R0 instead. @@ -171,6 +198,7 @@ namespace MIPSComp SetR0ToEffectiveAddress(rs, offset); } } + switch (o) { // Load diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp index 6f110fd5bd..7362633942 100644 --- a/Core/MIPS/ARM/ArmJit.cpp +++ b/Core/MIPS/ARM/ArmJit.cpp @@ -63,6 +63,7 @@ ArmJitOptions::ArmJitOptions() downcountInRegister = true; useBackJump = false; useForwardJump = false; + cachePointers = false; } Jit::Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &jo), fpr(mips), mips_(mips) @@ -218,7 +219,9 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b) js.inDelaySlot = false; js.PrefixStart(); - // We add a check before the block, used when entering from a linked block. + // We add a downcount flag check before the block, used when entering from a linked block. + // The last block decremented downcounter, and the flag should still be available. + // Got three variants here of where we position the code, needs detailed benchmarking. FixupBranch bail; if (jo.useBackJump) { @@ -239,7 +242,6 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b) bail = B(); SetCC(CC_AL); } else { - // Downcount flag check. The last block decremented downcounter, and the flag should still be available. b->checkedEntry = GetCodePtr(); SetCC(CC_LT); MOVI2R(R0, js.blockStart); @@ -247,7 +249,6 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b) SetCC(CC_AL); } - b->normalEntry = GetCodePtr(); // TODO: this needs work MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address); @@ -290,6 +291,7 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b) char temp[256]; if (logBlocks > 0 && dontLogBlocks == 0) { + INFO_LOG(JIT, "=============== mips ==============="); for (u32 cpc = em_address; cpc != js.compilerPC + 4; cpc += 4) { MIPSDisAsm(Memory::Read_Instruction(cpc), cpc, temp, true); INFO_LOG(JIT, "M: %08x %s", cpc, temp); diff --git a/Core/MIPS/ARM/ArmJit.h b/Core/MIPS/ARM/ArmJit.h index 3d315496b0..ddbe48169c 100644 --- a/Core/MIPS/ARM/ArmJit.h +++ b/Core/MIPS/ARM/ArmJit.h @@ -40,6 +40,7 @@ struct ArmJitOptions bool downcountInRegister; bool useBackJump; bool useForwardJump; + bool cachePointers; }; class Jit : public ArmGen::ARMXCodeBlock