diff --git a/Core/Core.vcxproj b/Core/Core.vcxproj index af4b459fb1..e902adf733 100644 --- a/Core/Core.vcxproj +++ b/Core/Core.vcxproj @@ -189,6 +189,7 @@ + @@ -518,6 +519,7 @@ + diff --git a/Core/Core.vcxproj.filters b/Core/Core.vcxproj.filters index 99af2a2696..5905d62de1 100644 --- a/Core/Core.vcxproj.filters +++ b/Core/Core.vcxproj.filters @@ -664,6 +664,9 @@ MIPS\IR + + MIPS\IR + @@ -1218,6 +1221,9 @@ MIPS\IR + + MIPS\IR + diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp index 67059e371e..69cf25de56 100644 --- a/Core/MIPS/IR/IRCompALU.cpp +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -49,15 +49,15 @@ namespace MIPSComp { void IRJit::CompImmLogic(MIPSGPReg rs, MIPSGPReg rt, u32 uimm, IROp OP) { if (gpr.IsImm(rs)) { switch (OP) { - case IROp::AddConst: gpr.SetImm(rt, rs + uimm); break; - case IROp::SubConst: gpr.SetImm(rt, rs - uimm); break; - case IROp::AndConst: gpr.SetImm(rt, rs & uimm); break; - case IROp::OrConst: gpr.SetImm(rt, rs | uimm); break; - case IROp::XorConst: gpr.SetImm(rt, rs ^ uimm); break; + case IROp::AddConst: gpr.SetImm(rt, gpr.GetImm(rs) + uimm); break; + case IROp::SubConst: gpr.SetImm(rt, gpr.GetImm(rs) - uimm); break; + case IROp::AndConst: gpr.SetImm(rt, gpr.GetImm(rs) & uimm); break; + case IROp::OrConst: gpr.SetImm(rt, gpr.GetImm(rs) | uimm); break; + case IROp::XorConst: gpr.SetImm(rt, gpr.GetImm(rs) ^ uimm); break; } } else { gpr.MapDirtyIn(rt, rs); - ir.Write(OP, rt, ir.AddConstant(uimm)); + ir.Write(OP, rt, rs, ir.AddConstant(uimm)); } } @@ -95,8 +95,7 @@ void IRJit::Comp_IType(MIPSOpcode op) { break; } gpr.MapDirtyIn(rt, rs); - // Grab the sign bit (< 0) as 1/0. Slightly faster than a shift. - ir.Write(IROp::Slt, rt, rs, ir.AddConstant(simm)); + ir.Write(IROp::SltConst, rt, rs, ir.AddConstant(simm)); break; case 11: // R(rt) = R(rs) < suimm; break; //sltiu @@ -105,7 +104,7 @@ void IRJit::Comp_IType(MIPSOpcode op) { break; } gpr.MapDirtyIn(rt, rs); - ir.Write(IROp::SltU, rt, rs, ir.AddConstant(suimm)); + ir.Write(IROp::SltUConst, rt, rs, ir.AddConstant(suimm)); break; case 15: // R(rt) = uimm << 16; //lui @@ -167,6 +166,7 @@ void IRJit::CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, IROp op, IROp co // Luckily, it was just an imm. gpr.SetImm(rhs, rhsImm); } + return; } // Can't do the RSB optimization on ARM64 - no RSB! @@ -220,10 +220,17 @@ void IRJit::Comp_RType3(MIPSOpcode op) { case 39: // R(rd) = ~(R(rs) | R(rt)); break; //nor if (gpr.IsImm(rs) && gpr.IsImm(rt)) { gpr.SetImm(rd, ~(gpr.GetImm(rs) | gpr.GetImm(rt))); - } - - ir.Write(IROp::Or, IRTEMP_0, rs, rt); - ir.Write(IROp::Not, rd, IRTEMP_0); + } else { + gpr.MapDirtyInIn(rd, rs, rt); + if (rs == 0) { + ir.Write(IROp::Not, rd, rt); + } else if (rt == 0) { + ir.Write(IROp::Not, rd, rs); + } else { + ir.Write(IROp::Or, IRTEMP_0, rs, rt); + ir.Write(IROp::Not, rd, IRTEMP_0); + } + } break; case 42: //R(rd) = (int)R(rs) < (int)R(rt); break; //slt @@ -323,9 +330,9 @@ void IRJit::Comp_ShiftType(MIPSOpcode op) { // WARNING : ROTR switch (op & 0x3f) { - case 0: CompShiftImm(op, IROp::Shl, sa); break; //sll - case 2: CompShiftImm(op, rs == 1 ? IROp::Ror : IROp::Shr, sa); break; //srl - case 3: CompShiftImm(op, IROp::Sar, sa); break; //sra + case 0: CompShiftImm(op, IROp::ShlImm, sa); break; //sll + case 2: CompShiftImm(op, (rs == 1 ? IROp::RorImm : IROp::ShrImm), sa); break; //srl + case 3: CompShiftImm(op, IROp::SarImm, sa); break; //sra case 4: CompShiftVar(op, IROp::Shl, IROp::ShlImm); break; //sllv case 6: CompShiftVar(op, (fd == 1 ? IROp::Ror : IROp::Shr), (fd == 1 ? IROp::RorImm : IROp::ShrImm)); break; //srlv case 7: CompShiftVar(op, IROp::Sar, IROp::SarImm); break; //srav diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index 16c7245b82..7d01d0b685 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -72,22 +72,28 @@ void IRJit::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) MIPSGPReg lhs = rs; MIPSGPReg rhs = rt; - if (!delaySlotIsNice) { - ir.Write(IROp::Mov, IRTEMP_0, rs); - ir.Write(IROp::Mov, IRTEMP_1, rt); - lhs = (MIPSGPReg)IRTEMP_0; - rhs = (MIPSGPReg)IRTEMP_1; + if (!delaySlotIsNice && !likely) { // if likely, we don't need this + if (rs != 0) { + ir.Write(IROp::Mov, IRTEMP_0, rs); + lhs = (MIPSGPReg)IRTEMP_0; + } + if (rt != 0) { + ir.Write(IROp::Mov, IRTEMP_1, rt); + rhs = (MIPSGPReg)IRTEMP_1; + } } if (!likely) CompileDelaySlot(); gpr.MapInIn(lhs, rhs); + FlushAll(); ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), lhs, rhs); // This makes the block "impure" :( if (likely) CompileDelaySlot(); + FlushAll(); ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); js.compiling = false; @@ -105,19 +111,25 @@ void IRJit::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, bool MIPSOpcode delaySlotOp = GetOffsetInstruction(1); bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); + ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + if (!likely && delaySlotIsNice) CompileDelaySlot(); int lhs = rs; gpr.MapIn(rs); - if (!delaySlotIsNice) { + if (!delaySlotIsNice && !likely) { // if likely, we don't need this ir.Write(IROp::Mov, IRTEMP_0, rs); lhs = IRTEMP_0; } + if (andLink) + gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8); + FlushAll(); ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), lhs); if (likely) { CompileDelaySlot(); } // Taken + FlushAll(); ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); js.compiling = false; } @@ -173,12 +185,15 @@ void IRJit::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) { if (!likely) CompileDelaySlot(); + ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + FlushAll(); // Not taken ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), IRTEMP_0, 0); // Taken if (likely) CompileDelaySlot(); + FlushAll(); ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); js.compiling = false; } @@ -208,6 +223,8 @@ void IRJit::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) { ir.Write(IROp::VfpCondToReg, IRTEMP_0); + ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + // Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle) // The behavior is undefined - the CPU may take the second branch even if the first one passes. // However, it does consistently try each branch, which these games seem to expect. @@ -223,12 +240,14 @@ void IRJit::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) { u32 notTakenTarget = GetCompilerPC() + (delaySlotIsBranch ? 4 : 8); ir.Write(IROp::AndConst, IRTEMP_0, IRTEMP_0, ir.AddConstant(imm3)); + FlushAll(); ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), IRTEMP_0, 0); if (likely) CompileDelaySlot(); // Taken + FlushAll(); ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr)); js.compiling = false; } @@ -251,6 +270,8 @@ void IRJit::Comp_Jump(MIPSOpcode op) { u32 off = _IMM26 << 2; u32 targetAddr = (GetCompilerPC() & 0xF0000000) | off; + ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + // Might be a stubbed address or something? if (!Memory::IsValidAddress(targetAddr)) { if (js.nextExit == 0) { @@ -270,8 +291,6 @@ void IRJit::Comp_Jump(MIPSOpcode op) { break; case 3: //jal - if (ReplaceJalTo(targetAddr)) - return; gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8); CompileDelaySlot(); FlushAll(); @@ -299,6 +318,8 @@ void IRJit::Comp_JumpReg(MIPSOpcode op) { if (andLink && rs == rd) delaySlotIsNice = false; + ir.Write(IROp::Downcount, 0, js.downcountAmount & 0xFF, js.downcountAmount >> 8); + int destReg; if (IsSyscall(delaySlotOp)) { gpr.MapDirty(rs); @@ -336,7 +357,7 @@ void IRJit::Comp_JumpReg(MIPSOpcode op) { break; } - ir.Write(IROp::ExitToReg, ir.AddConstant(js.downcountAmount), rs, 0); + ir.Write(IROp::ExitToReg, destReg, 0, 0); js.compiling = false; } @@ -354,8 +375,7 @@ void IRJit::Comp_Syscall(MIPSOpcode op) { js.compiling = false; } -void IRJit::Comp_Break(MIPSOpcode op) -{ +void IRJit::Comp_Break(MIPSOpcode op) { Comp_Generic(op); js.compiling = false; } diff --git a/Core/MIPS/IR/IRCompFPU.cpp b/Core/MIPS/IR/IRCompFPU.cpp index 00a8ec6399..86e8d126e7 100644 --- a/Core/MIPS/IR/IRCompFPU.cpp +++ b/Core/MIPS/IR/IRCompFPU.cpp @@ -82,7 +82,7 @@ void IRJit::Comp_FPUComp(MIPSOpcode op) { int opc = op & 0xF; if (opc >= 8) opc -= 8; // alias if (opc == 0) { // f, sf (signalling false) - gpr.SetImm(MIPS_REG_FPCOND, 0); + gpr.SetImm((MIPSGPReg)IRREG_FPCOND, 0); return; } diff --git a/Core/MIPS/IR/IRCompLoadStore.cpp b/Core/MIPS/IR/IRCompLoadStore.cpp index 53ea1f866f..fb0a143dd8 100644 --- a/Core/MIPS/IR/IRCompLoadStore.cpp +++ b/Core/MIPS/IR/IRCompLoadStore.cpp @@ -82,7 +82,8 @@ namespace MIPSComp { return; } - u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF; + gpr.MapIn(rs); + gpr.MapDirty(rt); int addrReg = IRTEMP_0; switch (o) { // Load diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index cfcbe43497..1e0cdabf0b 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -1,8 +1,13 @@ #include "Core/MIPS/IR/IRInst.h" +#include "Core/MIPS/IR/IRPassSimplify.h" +#include "Core/MIPS/MIPSDebugInterface.h" +#include "Core/MIPS/MIPSTables.h" #include "Core/MemMap.h" +#include "Core/HLE/HLE.h" IRMeta meta[] = { - { IROp::SetConst, "SetConst", "GC" }, + { IROp::SetConst, "SetConst", "GC_" }, + { IROp::Mov, "Mov", "GG" }, { IROp::Add, "Add", "GGG" }, { IROp::Sub, "Sub", "GGG" }, { IROp::Neg, "Neg", "GG" }, @@ -23,9 +28,9 @@ IRMeta meta[] = { { IROp::ShrImm, "ShrImm", "GGI" }, { IROp::SarImm, "SarImm", "GGI" }, { IROp::RorImm, "RorImm", "GGI" }, - { IROp::Slt, "Slt","GGC" }, - { IROp::SltConst, "SltConst","GGC" }, - { IROp::SltU, "SltU", "GGC" }, + { IROp::Slt, "Slt", "GGG" }, + { IROp::SltConst, "SltConst", "GGC" }, + { IROp::SltU, "SltU", "GGG" }, { IROp::SltUConst, "SltUConst", "GGC" }, { IROp::Clz, "Clz", "GG" }, { IROp::MovZ, "MovZ", "GGG" }, @@ -37,6 +42,14 @@ IRMeta meta[] = { { IROp::Mul, "Mul", "_GG" }, { IROp::Ext8to32, "Ext8to32", "GG" }, { IROp::Ext16to32, "Ext16to32", "GG" }, + { IROp::Load8, "Load8", "GGC" }, + { IROp::Load8Ext, "Load8", "GGC" }, + { IROp::Load16, "Load16", "GGC" }, + { IROp::Load16Ext, "Load16Ext", "GGC" }, + { IROp::Load32, "Load32", "GGC" }, + { IROp::Store8, "Store8", "GGC" }, + { IROp::Store16, "Store16", "GGC" }, + { IROp::Store32, "Store32", "GGC" }, { IROp::FAdd, "FAdd", "FFF" }, { IROp::FSub, "FSub", "FFF" }, { IROp::FMul, "FMul", "FFF" }, @@ -57,8 +70,16 @@ IRMeta meta[] = { { IROp::SetCtrlVFPU, "SetCtrlVFPU", "T" }, { IROp::Interpret, "Interpret", "_C" }, { IROp::Downcount, "Downcount", "_II" }, + { IROp::ExitToConst, "Exit", "C" }, + { IROp::ExitToConstIfEq, "ExitIfEq", "CGG" }, + { IROp::ExitToConstIfNeq, "ExitIfNeq", "CGG" }, + { IROp::ExitToConstIfGtZ, "ExitIfGtZ", "CG" }, + { IROp::ExitToConstIfGeZ, "ExitIfGeZ", "CG" }, + { IROp::ExitToConstIfLeZ, "ExitIfLeZ", "CG" }, + { IROp::ExitToConstIfLtZ, "ExitIfLtZ", "CG" }, + { IROp::ExitToReg, "ExitToReg", "G" }, { IROp::Syscall, "Syscall", "_C"}, - { IROp::SetPC, "SetPC", "_C"}, + { IROp::SetPC, "SetPC", "_G"}, }; const IRMeta *metaIndex[256]; @@ -82,9 +103,39 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c case IROp::Sub: mips->r[inst->dest] = mips->r[inst->src1] - mips->r[inst->src2]; break; + case IROp::And: + mips->r[inst->dest] = mips->r[inst->src1] & mips->r[inst->src2]; + break; + case IROp::Or: + mips->r[inst->dest] = mips->r[inst->src1] | mips->r[inst->src2]; + break; + case IROp::Xor: + mips->r[inst->dest] = mips->r[inst->src1] ^ mips->r[inst->src2]; + break; + case IROp::Mov: + mips->r[inst->dest] = mips->r[inst->src1]; + break; + case IROp::AddConst: + mips->r[inst->dest] = mips->r[inst->src1] + constPool[inst->src2]; + break; + case IROp::SubConst: + mips->r[inst->dest] = mips->r[inst->src1] - constPool[inst->src2]; + break; + case IROp::AndConst: + mips->r[inst->dest] = mips->r[inst->src1] & constPool[inst->src2]; + break; + case IROp::OrConst: + mips->r[inst->dest] = mips->r[inst->src1] | constPool[inst->src2]; + break; + case IROp::XorConst: + mips->r[inst->dest] = mips->r[inst->src1] ^ constPool[inst->src2]; + break; case IROp::Neg: mips->r[inst->dest] = -(s32)mips->r[inst->src1]; break; + case IROp::Not: + mips->r[inst->dest] = ~mips->r[inst->src1]; + break; case IROp::Ext8to32: mips->r[inst->dest] = (s32)(s8)mips->r[inst->src1]; break; @@ -152,6 +203,22 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c } break; + case IROp::Slt: + mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2]; + break; + + case IROp::SltU: + mips->r[inst->dest] = mips->r[inst->src1] < mips->r[inst->src2]; + break; + + case IROp::SltConst: + mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)constPool[inst->src2]; + break; + + case IROp::SltUConst: + mips->r[inst->dest] = mips->r[inst->src1] < constPool[inst->src2]; + break; + case IROp::MovZ: if (mips->r[inst->src1] == 0) mips->r[inst->dest] = mips->r[inst->src2]; @@ -208,10 +275,10 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c break; case IROp::ExitToConst: - return constPool[inst->src1]; + return constPool[inst->dest]; case IROp::ExitToReg: - return mips->r[inst->src1]; + return mips->r[inst->dest]; case IROp::ExitToConstIfEq: if (mips->r[inst->src1] == mips->r[inst->src2]) @@ -238,8 +305,28 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c return constPool[inst->dest]; break; + case IROp::Downcount: + mips->downcount -= (inst->src1) | ((inst->src2) << 8); + break; + case IROp::SetPC: - return mips->pc = mips->r[inst->src1]; + mips->pc = mips->r[inst->src1]; + break; + + case IROp::Syscall: + // SetPC was executed before. + { + MIPSOpcode op(constPool[inst->src1]); + CallSyscall(op); + return mips->pc; + } + + case IROp::Interpret: // SLOW fallback. Can be made faster. + { + MIPSOpcode op(constPool[inst->src1]); + MIPSInterpret(op); + break; + } default: Crash(); @@ -262,14 +349,13 @@ void IRWriter::Write(IROp op, u8 dst, u8 src1, u8 src2) { } void IRWriter::WriteSetConstant(u8 dst, u32 value) { - // TODO: Check for the fixed ones first. - Write(IROp::SetConstImm, AddConstant(value)); + Write(IROp::SetConst, dst, AddConstant(value)); } int IRWriter::AddConstant(u32 value) { for (size_t i = 0; i < constPool_.size(); i++) { if (constPool_[i] == value) - return i; + return (int)i; } constPool_.push_back(value); return (int)constPool_.size() - 1; @@ -281,10 +367,25 @@ int IRWriter::AddConstantFloat(float value) { return AddConstant(val); } +void IRWriter::Simplify() { + SimplifyInPlace(&insts_[0], insts_.size(), constPool_.data()); +} + +const char *GetGPRName(int r) { + if (r < 32) { + return currentDebugMIPS->GetRegName(0, r); + } + switch (r) { + case IRTEMP_0: return "irtemp0"; + case IRTEMP_1: return "irtemp1"; + default: return "(unk)"; + } +} + void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *constPool) { switch (type) { case 'G': - snprintf(buf, bufSize, "r%d", param); + snprintf(buf, bufSize, "%s", GetGPRName(param)); break; case 'F': snprintf(buf, bufSize, "r%d", param); @@ -292,6 +393,13 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co case 'C': snprintf(buf, bufSize, "%08x", constPool[param]); break; + case 'I': + snprintf(buf, bufSize, "%02x", param); + break; + case '_': + case '\0': + buf[0] = 0; + break; default: snprintf(buf, bufSize, "?"); break; @@ -300,17 +408,21 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co void DisassembleIR(char *buf, size_t bufsize, IRInst inst, const u32 *constPool) { const IRMeta *meta = metaIndex[(int)inst.op]; + if (!meta) { + snprintf(buf, bufsize, "Unknown %d", (int)inst.op); + return; + } char bufDst[16]; char bufSrc1[16]; char bufSrc2[16]; DisassembleParam(bufDst, sizeof(bufDst) - 2, inst.dest, meta->types[0], constPool); - DisassembleParam(bufSrc1, sizeof(bufSrc1) - 2, inst.dest, meta->types[1], constPool); - DisassembleParam(bufSrc2, sizeof(bufSrc2), inst.dest, meta->types[2], constPool); - if (meta->types[1]) { + DisassembleParam(bufSrc1, sizeof(bufSrc1) - 2, inst.src1, meta->types[1], constPool); + DisassembleParam(bufSrc2, sizeof(bufSrc2), inst.src2, meta->types[2], constPool); + if (meta->types[1] && meta->types[0] != '_') { strcat(bufDst, ", "); } - if (meta->types[2]) { + if (meta->types[2] && meta->types[1] != '_') { strcat(bufSrc1, ", "); } snprintf(buf, bufsize, "%s %s%s%s", meta->name, bufDst, bufSrc1, bufSrc2); -} \ No newline at end of file +} diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index a1aa75edc7..c3cb6021de 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -17,7 +17,6 @@ enum class IROp : u8 { SetConst, - SetConstImm, FSetConst, Mov, @@ -202,6 +201,8 @@ enum { // Hacky way to get to other state IRREG_LO = 226, // offset of lo in MIPSState / 4 IRREG_HI = 227, + IRREG_FCR31 = 228, + IRREG_FPCOND = 229 }; enum class IRParam { @@ -249,6 +250,8 @@ public: constPool_.clear(); } + void Simplify(); + const std::vector &GetInstructions() { return insts_; } const std::vector &GetConstants() { return constPool_; } @@ -258,3 +261,4 @@ private: }; void DisassembleIR(char *buf, size_t bufsize, IRInst inst, const u32 *constPool); +void InitIR(); diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index b16706b2d6..7fae3255c3 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -38,32 +38,18 @@ #include "Core/MIPS/IR/IRJit.h" #include "Core/MIPS/JitCommon/JitCommon.h" -void DisassembleArm64Print(const u8 *data, int size) { - std::vector lines = DisassembleArm64(data, size); - for (auto s : lines) { - ILOG("%s", s.c_str()); - } - /* - ILOG("+++"); - // A format friendly to Online Disassembler which gets endianness wrong - for (size_t i = 0; i < lines.size(); i++) { - uint32_t opcode = ((const uint32_t *)data)[i]; - ILOG("%d/%d: %08x", (int)(i+1), (int)lines.size(), swap32(opcode)); - } - ILOG("==="); - ILOG("===");*/ -} - namespace MIPSComp { IRJit::IRJit(MIPSState *mips) : gpr(), mips_(mips) { logBlocks = 0; dontLogBlocks = 0; - js.startDefaultPrefix = mips_->HasDefaultPrefix(); + js.startDefaultPrefix = true; js.currentRoundingFunc = convertS0ToSCRATCH1[0]; u32 size = 128 * 1024; blTrampolines_ = kernelMemory.Alloc(size, true, "trampoline"); + logBlocks = 100; + InitIR(); } IRJit::~IRJit() { @@ -102,7 +88,8 @@ void IRJit::DoDummyState(PointerWrap &p) { } void IRJit::FlushAll() { - FlushPrefixV(); + gpr.FlushAll(); + // FlushPrefixV(); } void IRJit::FlushPrefixV() { @@ -162,6 +149,7 @@ void IRJit::Compile(u32 em_address) { int block_num = blocks_.AllocateBlock(em_address); IRBlock *b = blocks_.GetBlock(block_num); DoJit(em_address, b); + b->Finalize(block_num); // Overwrites the first instruction bool cleanSlate = false; @@ -192,7 +180,35 @@ void IRJit::Compile(u32 em_address) { void IRJit::RunLoopUntil(u64 globalticks) { PROFILE_THIS_SCOPE("jit"); - ((void (*)())enterDispatcher)(); + + // ApplyRoundingMode(true); + // IR Dispatcher + + while (true) { + // RestoreRoundingMode(true); + CoreTiming::Advance(); + // ApplyRoundingMode(true); + if (coreState != 0) { + break; + } + while (mips_->downcount >= 0) { + u32 inst = Memory::ReadUnchecked_U32(mips_->pc); + u32 opcode = inst >> 24; + u32 data = inst & 0xFFFFFF; + if (opcode == (MIPS_EMUHACK_OPCODE >> 24)) { + IRBlock *block = blocks_.GetBlock(data); + ILOG("Run block at %08x : v1=%08x a0=%08x", mips_->pc, mips_->r[MIPS_REG_V1], mips_->r[MIPS_REG_A0]); + mips_->pc = IRInterpret(mips_, block->GetInstructions(), block->GetConstants(), block->GetNumInstructions()); + } else { + // RestoreRoundingMode(true); + ILOG("Compile block at %08x : v1=%08x a0=%08x", mips_->pc, mips_->r[MIPS_REG_V1], mips_->r[MIPS_REG_A0]); + Compile(mips_->pc); + // ApplyRoundingMode(true); + } + } + } + + // RestoreRoundingMode(true); } u32 IRJit::GetCompilerPC() { @@ -230,24 +246,28 @@ void IRJit::DoJit(u32 em_address, IRBlock *b) { js.numInstructions++; } + ir.Simplify(); + b->SetInstructions(ir.GetInstructions(), ir.GetConstants()); - char temp[256]; if (logBlocks > 0 && dontLogBlocks == 0) { + char temp2[256]; ILOG("=============== mips %d ===============", blocks_.GetNumBlocks()); for (u32 cpc = em_address; cpc != GetCompilerPC() + 4; cpc += 4) { - MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp, true); - ILOG("M: %08x %s", cpc, temp); + temp2[0] = 0; + MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp2, true); + ILOG("M: %08x %s", cpc, temp2); } } if (logBlocks > 0 && dontLogBlocks == 0) { ILOG("=============== IR (%d instructions) ===============", js.numInstructions); - for (int i = 0; i < js.numInstructions; i++) { + for (int i = 0; i < ir.GetInstructions().size(); i++) { char buf[256]; DisassembleIR(buf, sizeof(buf), ir.GetInstructions()[i], ir.GetConstants().data()); ILOG("%s", buf); } + ILOG("=============== end ================="); } if (logBlocks > 0) @@ -330,4 +350,15 @@ void IRBlockCache::InvalidateICache(u32 addess, u32 length) { // TODO } +void IRBlock::Finalize(int number) { + origFirstOpcode_= Memory::Read_Opcode_JIT(origAddr_); + MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | number); + Memory::Write_Opcode_JIT(origAddr_, opcode); +} + +MIPSOpcode IRJit::GetOriginalOp(MIPSOpcode op) { + IRBlock *b = blocks_.GetBlock(op.encoding & 0xFFFFFF); + return b->GetOriginalFirstOp(); +} + } // namespace MIPSComp \ No newline at end of file diff --git a/Core/MIPS/IR/IRJit.h b/Core/MIPS/IR/IRJit.h index 686eefe6c2..440e96d282 100644 --- a/Core/MIPS/IR/IRJit.h +++ b/Core/MIPS/IR/IRJit.h @@ -34,8 +34,18 @@ namespace MIPSComp { // TODO : Use arena allocators. For now let's just malloc. class IRBlock { public: - IRBlock() {} + IRBlock() : instr_(nullptr), const_(nullptr), numInstructions_(0), numConstants_(0), origAddr_(0) {} IRBlock(u32 emAddr) : instr_(nullptr), const_(nullptr), origAddr_(emAddr), numInstructions_(0) {} + IRBlock(IRBlock &&b) { + instr_ = b.instr_; + const_ = b.const_; + numInstructions_ = b.numInstructions_; + numConstants_ = b.numConstants_; + origAddr_ = b.origAddr_; + b.instr_ = nullptr; + b.const_ = nullptr; + } + ~IRBlock() { delete[] instr_; delete[] const_; @@ -50,12 +60,20 @@ public: memcpy(const_, constants.data(), sizeof(u32) * constants.size()); } + const IRInst *GetInstructions() const { return instr_; } + const u32 *GetConstants() const { return const_; } + int GetNumInstructions() const { return numInstructions_; } + MIPSOpcode GetOriginalFirstOp() const { return origFirstOpcode_; } + + void Finalize(int number); + private: IRInst *instr_; u32 *const_; u16 numInstructions_; u16 numConstants_; u32 origAddr_; + MIPSOpcode origFirstOpcode_; }; class IRBlockCache { @@ -170,7 +188,8 @@ public: int Replace_fabsf(); // Not using a regular block cache. - JitBlockCache *GetBlockCache() { return nullptr; } + JitBlockCache *GetBlockCache() override { return nullptr; } + MIPSOpcode GetOriginalOp(MIPSOpcode op) override; void ClearCache(); void InvalidateCache(); diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp new file mode 100644 index 0000000000..e110b73808 --- /dev/null +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -0,0 +1,14 @@ +#include "Core/MIPS/IR/IRPassSimplify.h" + +void SimplifyInPlace(IRInst *inst, int count, const u32 *constPool) { + for (int i = 0; i < count; i++) { + switch (inst[i].op) { + case IROp::AddConst: + if (constPool[inst[i].src2] == 0) + inst[i].op = IROp::Mov; + break; + default: + break; + } + } +} \ No newline at end of file diff --git a/Core/MIPS/IR/IRPassSimplify.h b/Core/MIPS/IR/IRPassSimplify.h new file mode 100644 index 0000000000..c798d89f92 --- /dev/null +++ b/Core/MIPS/IR/IRPassSimplify.h @@ -0,0 +1,5 @@ +#pragma once + +#include "Core/MIPS/IR/IRInst.h" + +void SimplifyInPlace(IRInst *inst, int count, const u32 *constPool); diff --git a/Core/MIPS/IR/IRRegCache.cpp b/Core/MIPS/IR/IRRegCache.cpp index 7a31a463e4..808370ce63 100644 --- a/Core/MIPS/IR/IRRegCache.cpp +++ b/Core/MIPS/IR/IRRegCache.cpp @@ -42,5 +42,7 @@ void IRRegCache::Start(IRWriter *ir) { } void IRRegCache::FlushAll() { - + for (int i = 0; i < TOTAL_MAPPABLE_MIPSREGS; i++) { + Dirty((MIPSGPReg)i); + } } diff --git a/Core/MIPS/JitCommon/JitCommon.h b/Core/MIPS/JitCommon/JitCommon.h index 9c440a6cdb..e27707ea35 100644 --- a/Core/MIPS/JitCommon/JitCommon.h +++ b/Core/MIPS/JitCommon/JitCommon.h @@ -57,6 +57,7 @@ namespace MIPSComp { virtual void Compile(u32 em_address) = 0; virtual void ClearCache() = 0; virtual void EatPrefix() = 0; + virtual MIPSOpcode GetOriginalOp(MIPSOpcode op) = 0; // Block linking. This may need to work differently for whole-function JITs and stuff // like that. diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index 3ada3ad123..c2c01a56f4 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -839,4 +839,14 @@ void Jit::CallProtectedFunction(const void *func, const OpArg &arg1, const u32 a void Jit::Comp_DoNothing(MIPSOpcode op) { } +MIPSOpcode Jit::GetOriginalOp(MIPSOpcode op) { + JitBlockCache *bc = GetBlockCache(); + int block_num = bc->GetBlockNumberFromEmuHackOp(op, true); + if (block_num >= 0) { + return bc->GetOriginalFirstOp(block_num); + } else { + return op; + } +} + } // namespace diff --git a/Core/MIPS/x86/Jit.h b/Core/MIPS/x86/Jit.h index 68d160a958..a6f4444331 100644 --- a/Core/MIPS/x86/Jit.h +++ b/Core/MIPS/x86/Jit.h @@ -161,6 +161,7 @@ public: void UpdateRoundingMode(); JitBlockCache *GetBlockCache() { return &blocks; } + MIPSOpcode GetOriginalOp(MIPSOpcode op) override; void ClearCache(); void InvalidateCache() override; diff --git a/Core/MemMap.cpp b/Core/MemMap.cpp index 557bc461c4..68e1f3e5a8 100644 --- a/Core/MemMap.cpp +++ b/Core/MemMap.cpp @@ -479,13 +479,7 @@ Opcode Read_Opcode_JIT(u32 address) { Opcode inst = Opcode(Read_U32(address)); if (MIPS_IS_RUNBLOCK(inst.encoding) && MIPSComp::jit) { - JitBlockCache *bc = MIPSComp::jit->GetBlockCache(); - int block_num = bc->GetBlockNumberFromEmuHackOp(inst, true); - if (block_num >= 0) { - return bc->GetOriginalFirstOp(block_num); - } else { - return inst; - } + return MIPSComp::jit->GetOriginalOp(inst); } else { return inst; }