diff --git a/Core/MIPS/IR/IRCompALU.cpp b/Core/MIPS/IR/IRCompALU.cpp index 6500d1b0df..8ffa632aff 100644 --- a/Core/MIPS/IR/IRCompALU.cpp +++ b/Core/MIPS/IR/IRCompALU.cpp @@ -19,11 +19,9 @@ #include "Core/MIPS/MIPS.h" #include "Core/MIPS/MIPSCodeUtils.h" -#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRFrontend.h" #include "Common/CPUDetect.h" -using namespace MIPSAnalyst; - #define _RS MIPS_GET_RS(op) #define _RT MIPS_GET_RT(op) #define _RD MIPS_GET_RD(op) diff --git a/Core/MIPS/IR/IRCompBranch.cpp b/Core/MIPS/IR/IRCompBranch.cpp index 3dda003b56..76833bf329 100644 --- a/Core/MIPS/IR/IRCompBranch.cpp +++ b/Core/MIPS/IR/IRCompBranch.cpp @@ -28,7 +28,7 @@ #include "Core/MIPS/MIPSAnalyst.h" #include "Core/MIPS/MIPSTables.h" -#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRFrontend.h" #include "Core/MIPS/JitCommon/JitBlockCache.h" #include "Common/Arm64Emitter.h" diff --git a/Core/MIPS/IR/IRCompFPU.cpp b/Core/MIPS/IR/IRCompFPU.cpp index 1ca4a08e96..068a58013a 100644 --- a/Core/MIPS/IR/IRCompFPU.cpp +++ b/Core/MIPS/IR/IRCompFPU.cpp @@ -21,7 +21,7 @@ #include "Core/MIPS/MIPSCodeUtils.h" #include "Core/MIPS/MIPSTables.h" -#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRFrontend.h" #include "Core/MIPS/IR/IRRegCache.h" #include "Common/CPUDetect.h" diff --git a/Core/MIPS/IR/IRCompLoadStore.cpp b/Core/MIPS/IR/IRCompLoadStore.cpp index 41c76a1a7d..b890f4ff68 100644 --- a/Core/MIPS/IR/IRCompLoadStore.cpp +++ b/Core/MIPS/IR/IRCompLoadStore.cpp @@ -42,7 +42,7 @@ #include "Core/MIPS/MIPS.h" #include "Core/MIPS/MIPSAnalyst.h" #include "Core/MIPS/MIPSCodeUtils.h" -#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRFrontend.h" #include "Core/MIPS/IR/IRRegCache.h" #define _RS MIPS_GET_RS(op) diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index a2a3295c6e..2bb96e7549 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -27,7 +27,7 @@ #include "Core/Config.h" #include "Core/Reporting.h" -#include "Core/MIPS/IR/IRJit.h" +#include "Core/MIPS/IR/IRFrontend.h" #include "Core/MIPS/IR/IRRegCache.h" // All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. @@ -50,6 +50,15 @@ #define _IMM26 (op & 0x03FFFFFF) namespace MIPSComp { + static void ApplyVoffset(u8 regs[4], int count) { + for (int i = 0; i < count; i++) { + regs[i] = voffset[regs[i]]; + } + } + + static bool IsConsecutive4(const u8 regs[4]) { + return (regs[1] == regs[0] + 1 && regs[2] == regs[1] + 1 && regs[3] == regs[2] + 1); + } void IRFrontend::Comp_VPFX(MIPSOpcode op) { CONDITIONAL_DISABLE; @@ -177,7 +186,21 @@ namespace MIPSComp { } void IRFrontend::Comp_SV(MIPSOpcode op) { - DISABLE; + s32 offset = (signed short)(op & 0xFFFC); + int vt = ((op >> 16) & 0x1f) | ((op & 3) << 5); + MIPSGPReg rs = _RS; + switch (op >> 26) { + case 50: //lv.s + ir.Write(IROp::LoadFloatV, voffset[vt], rs, ir.AddConstant(offset)); + break; + + case 58: //sv.s + ir.Write(IROp::StoreFloatV, voffset[vt], rs, ir.AddConstant(offset)); + break; + + default: + DISABLE; + } } void IRFrontend::Comp_SVQ(MIPSOpcode op) { @@ -187,27 +210,32 @@ namespace MIPSComp { u8 vregs[4]; GetVectorRegs(vregs, V_Quad, vt); + ApplyVoffset(vregs, 4); // Translate to memory order switch (op >> 26) { case 54: //lv.q - { - // TODO: Add vector load/store instruction to the IR - ir.Write(IROp::LoadFloatV, voffset[vregs[0]], rs, ir.AddConstant(imm)); - ir.Write(IROp::LoadFloatV, voffset[vregs[1]], rs, ir.AddConstant(imm + 4)); - ir.Write(IROp::LoadFloatV, voffset[vregs[2]], rs, ir.AddConstant(imm + 8)); - ir.Write(IROp::LoadFloatV, voffset[vregs[3]], rs, ir.AddConstant(imm + 12)); - } - break; + if (IsConsecutive4(vregs)) { + ir.Write(IROp::LoadVec4, vregs[0], rs, ir.AddConstant(imm)); + } else { + // Let's not even bother with "vertical" loads for now. + ir.Write(IROp::LoadFloatV, vregs[0], rs, ir.AddConstant(imm)); + ir.Write(IROp::LoadFloatV, vregs[1], rs, ir.AddConstant(imm + 4)); + ir.Write(IROp::LoadFloatV, vregs[2], rs, ir.AddConstant(imm + 8)); + ir.Write(IROp::LoadFloatV, vregs[3], rs, ir.AddConstant(imm + 12)); + } + break; case 62: //sv.q - { - // CC might be set by slow path below, so load regs first. - ir.Write(IROp::StoreFloatV, voffset[vregs[0]], rs, ir.AddConstant(imm)); - ir.Write(IROp::StoreFloatV, voffset[vregs[1]], rs, ir.AddConstant(imm + 4)); - ir.Write(IROp::StoreFloatV, voffset[vregs[2]], rs, ir.AddConstant(imm + 8)); - ir.Write(IROp::StoreFloatV, voffset[vregs[3]], rs, ir.AddConstant(imm + 12)); - } - break; + if (IsConsecutive4(vregs)) { + ir.Write(IROp::StoreVec4, vregs[0], rs, ir.AddConstant(imm)); + } else { + // Let's not even bother with "vertical" stores for now. + ir.Write(IROp::StoreFloatV, vregs[0], rs, ir.AddConstant(imm)); + ir.Write(IROp::StoreFloatV, vregs[1], rs, ir.AddConstant(imm + 4)); + ir.Write(IROp::StoreFloatV, vregs[2], rs, ir.AddConstant(imm + 8)); + ir.Write(IROp::StoreFloatV, vregs[3], rs, ir.AddConstant(imm + 12)); + } + break; default: DISABLE; diff --git a/Core/MIPS/IR/IRFrontend.cpp b/Core/MIPS/IR/IRFrontend.cpp index 95ce9d9c4b..3b13978b43 100644 --- a/Core/MIPS/IR/IRFrontend.cpp +++ b/Core/MIPS/IR/IRFrontend.cpp @@ -236,8 +236,8 @@ void IRFrontend::DoJit(u32 em_address, std::vector &instructions, std::v if (IRApplyPasses(passes, ARRAY_SIZE(passes), ir, simplified)) logBlocks = 1; code = &simplified; - if (ir.GetInstructions().size() >= 24) - logBlocks = 1; + //if (ir.GetInstructions().size() >= 24) + // logBlocks = 1; } instructions = code->GetInstructions(); diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 6b3231ce6d..d82e72ccdb 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -60,11 +60,13 @@ static const IRMeta irMeta[] = { { IROp::Load32, "Load32", "GGC" }, { IROp::LoadFloat, "LoadFloat", "FGC" }, { IROp::LoadFloatV, "LoadFloatV", "VGC" }, + { IROp::LoadVec4, "LoadVec4", "VGC" }, { IROp::Store8, "Store8", "GGC" }, { IROp::Store16, "Store16", "GGC" }, { IROp::Store32, "Store32", "GGC" }, { IROp::StoreFloat, "StoreFloat", "FGC" }, { IROp::StoreFloatV, "StoreFloatV", "VGC" }, + { IROp::StoreVec4, "StoreVec4", "VGC" }, { IROp::FAdd, "FAdd", "FFF" }, { IROp::FSub, "FSub", "FFF" }, { IROp::FMul, "FMul", "FFF" }, diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index 77d71ed915..70f0e0ff6e 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -90,12 +90,14 @@ enum class IROp : u8 { Load32, LoadFloat, LoadFloatV, + LoadVec4, Store8, Store16, Store32, StoreFloat, StoreFloatV, + StoreVec4, Ext8to32, Ext16to32, @@ -212,13 +214,16 @@ enum { IRTEMP_LHS, // Reserved for use in branches IRTEMP_RHS, // Reserved for use in branches + // 16 float temps for vector S and T prefixes and things like that. + // IRVTEMP_0 = 208 - 64, // -64 to be relative to v[0] + // Hacky way to get to other state - IRREG_VPFU_CTRL_BASE = 208, - IRREG_VPFU_CC = 211, + IRREG_VFPU_CTRL_BASE = 208, + IRREG_VFPU_CC = 211, IRREG_LO = 226, // offset of lo in MIPSState / 4 IRREG_HI = 227, IRREG_FCR31 = 228, - IRREG_FPCOND = 229 + IRREG_FPCOND = 229, }; struct IRMeta { diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 54f20edb4a..63e0bd533e 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -1,3 +1,7 @@ +#ifdef _M_SSE +#include +#endif + #include "Core/MemMap.h" #include "Core/HLE/HLE.h" #include "Core/HLE/ReplaceTables.h" @@ -107,6 +111,29 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c Memory::WriteUnchecked_Float(mips->v[inst->src3], mips->r[inst->src1] + constPool[inst->src2]); break; + case IROp::LoadVec4: + { + u32 base = mips->r[inst->src1] + constPool[inst->src2]; +#if defined(_M_SSE) + _mm_store_ps(&mips->v[inst->dest], _mm_load_ps((const float *)Memory::GetPointerUnchecked(base))); +#else + for (int i = 0; i < 4; i++) + mips->v[inst->dest + i] = Memory::ReadUnchecked_Float(base + 4 * i); +#endif + break; + } + case IROp::StoreVec4: + { + u32 base = mips->r[inst->src1] + constPool[inst->src2]; +#if defined(_M_SSE) + _mm_store_ps((float *)Memory::GetPointerUnchecked(base), _mm_load_ps(&mips->v[inst->dest])); +#else + for (int i = 0; i < 4; i++) + Memory::WriteUnchecked_Float(mips->v[inst->dest + i], base + 4 * i); +#endif + break; + } + case IROp::ShlImm: mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2; break; diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 0e5353ff57..50bfca8903 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -291,6 +291,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { break; case IROp::StoreFloat: case IROp::StoreFloatV: + case IROp::StoreVec4: if (gpr.IsImm(inst.src1)) { out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2])); } else { @@ -314,6 +315,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { break; case IROp::LoadFloat: case IROp::LoadFloatV: + case IROp::LoadVec4: if (gpr.IsImm(inst.src1)) { out.Write(inst.op, inst.dest, 0, out.AddConstant(gpr.GetImm(inst.src1) + constants[inst.src2])); } else { @@ -388,7 +390,7 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { goto doDefault; case IROp::VfpuCtrlToReg: - gpr.MapDirtyIn(inst.dest, IRREG_VPFU_CTRL_BASE + inst.src1); + gpr.MapDirtyIn(inst.dest, IRREG_VFPU_CTRL_BASE + inst.src1); goto doDefault; case IROp::Syscall: diff --git a/Core/MIPS/MIPS.h b/Core/MIPS/MIPS.h index bbc9952c4d..d3a01f1bde 100644 --- a/Core/MIPS/MIPS.h +++ b/Core/MIPS/MIPS.h @@ -86,6 +86,7 @@ enum MIPSGPReg { MIPS_REG_RA=31, // Not real regs, just for convenience/jit mapping. + // NOTE: These are not the same as the offsets the IR has to use! MIPS_REG_HI = 32, MIPS_REG_LO = 33, MIPS_REG_FPCOND = 34, @@ -155,7 +156,7 @@ public: void DoState(PointerWrap &p); - // MUST start with r and be followed by f! + // MUST start with r and be followed by f, v, and t! u32 r[32]; union { float f[32]; @@ -166,23 +167,25 @@ public: float v[128]; u32 vi[128]; }; - // Used for temporary variables by IR Interpreter. - // Can be indexed through r[] using indices 192+. - u32 t[16]; - // Temps don't get flushed so we don't reserve space for them. + // Register-allocated JIT Temps don't get flushed so we don't reserve space for them. + // However, the IR interpreter needs some temps that can stick around between ops. + // Can be indexed through r[] using indices 192+. + u32 t[16]; //192 + // float vt[16]; //208 TODO: VFPU temp + // If vfpuCtrl (prefixes) get mysterious values, check the VFPU regcache code. - u32 vfpuCtrl[16]; + u32 vfpuCtrl[16]; // 208 // ARM64 wants lo/hi to be aligned to 64 bits from the base of this struct. - u32 padLoHi; + u32 padLoHi; // 224 union { struct { - u32 pc; + u32 pc; //225 - u32 lo; // offset 192 + 16 + 16 + 1 + 1 - u32 hi; + u32 lo; //226 + u32 hi; //227 u32 fcr31; //fpu control register u32 fpcond; // cache the cond flag of fcr31 (& 1 << 23)