diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index 2bb96e7549..e67b93cddd 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -16,6 +16,7 @@ // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #include + #include "math/math_util.h" #include "Core/MemMap.h" @@ -57,7 +58,9 @@ namespace MIPSComp { } static bool IsConsecutive4(const u8 regs[4]) { - return (regs[1] == regs[0] + 1 && regs[2] == regs[1] + 1 && regs[3] == regs[2] + 1); + return regs[1] == regs[0] + 1 && + regs[2] == regs[1] + 1 && + regs[3] == regs[2] + 1; } void IRFrontend::Comp_VPFX(MIPSOpcode op) { @@ -244,15 +247,79 @@ namespace MIPSComp { } void IRFrontend::Comp_VVectorInit(MIPSOpcode op) { - DISABLE; + if (!js.HasNoPrefix()) + DISABLE; + + VectorSize sz = GetVecSize(op); + int type = (op >> 16) & 0xF; + int vd = _VD; + + if (sz == 4 && IsVectorColumn(vd)) { + u8 dregs[4]; + GetVectorRegs(dregs, sz, vd); + ir.Write(IROp::InitVec4, voffset[dregs[0]], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE)); + } else if (sz == 1) { + ir.Write(IROp::SetConstV, voffset[vd], ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f)); + } else { + DISABLE; + } } void IRFrontend::Comp_VIdt(MIPSOpcode op) { - DISABLE; + if (!js.HasNoPrefix()) + DISABLE; + + int vd = _VD; + VectorSize sz = GetVecSize(op); + if (sz != V_Quad) + DISABLE; + + if (!IsVectorColumn(vd)) + DISABLE; + + u8 dregs[4]; + GetVectorRegs(dregs, sz, vd); + int row = vd & 3; + Vec4Init init = Vec4Init((int)Vec4Init::Set_1000 + row); + ir.Write(IROp::InitVec4, voffset[dregs[0]], (int)init); } void IRFrontend::Comp_VMatrixInit(MIPSOpcode op) { - DISABLE; + MatrixSize sz = GetMtxSize(op); + if (sz != M_4x4) { + DISABLE; + } + + // Not really about trying here, it will work if enabled. + VectorSize vsz = GetVectorSize(sz); + u8 vecs[4]; + int vd = _VD; + if (IsMatrixTransposed(vd)) { + // All outputs are transpositionally symmetric, so should be fine. + vd = TransposeMatrixReg(vd); + } + GetMatrixColumns(vd, M_4x4, vecs); + for (int i = 0; i < 4; i++) { + u8 vec[4]; + GetVectorRegs(vec, vsz, vecs[i]); + // As they are columns, they will be nicely consecutive. + Vec4Init init; + switch ((op >> 16) & 0xF) { + case 3: + init = Vec4Init((int)Vec4Init::Set_1000 + i); + break; + case 6: + init = Vec4Init::AllZERO; + break; + case 7: + init = Vec4Init::AllONE; + break; + default: + return; + } + ir.Write(IROp::InitVec4, voffset[vec[0]], (int)init); + } + return; } void IRFrontend::Comp_VHdp(MIPSOpcode op) { @@ -275,7 +342,7 @@ namespace MIPSComp { void IRFrontend::Comp_VV2Op(MIPSOpcode op) { CONDITIONAL_DISABLE; - // Pre-processing: Eliminate silly no-op VMOVs, common in Wipeout Pure + // Eliminate silly no-op VMOVs, common in Wipeout Pure if (((op >> 16) & 0x1f) == 0 && _VS == _VD && js.HasNoPrefix()) { return; } @@ -379,7 +446,12 @@ namespace MIPSComp { } void IRFrontend::Comp_Viim(MIPSOpcode op) { - DISABLE; + if (!js.HasNoPrefix()) + DISABLE; + + u8 dreg = _VT; + s32 imm = (s32)(s16)(u16)(op & 0xFFFF); + ir.Write(IROp::SetConstV, voffset[dreg], ir.AddConstantFloat((float)imm)); } void IRFrontend::Comp_Vfim(MIPSOpcode op) { diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index d82e72ccdb..e9bc55ab78 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -85,9 +85,18 @@ static const IRMeta irMeta[] = { { IROp::FMovToGPR, "FMovToGPR", "GF" }, { IROp::VMovFromGPR, "VMovFromGPR", "VG" }, { IROp::VMovToGPR, "VMovToGPR", "GV" }, + { IROp::InitVec4, "InitVec4", "Vv"}, { IROp::FpCondToReg, "FpCondToReg", "G" }, { IROp::VfpuCtrlToReg, "VfpuCtrlToReg", "GI" }, { IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" }, + + { IROp::VSin, "VSin", "VV" }, + { IROp::VCos, "VCos", "VV" }, + { IROp::VSqrt, "VSqrt", "VV" }, + { IROp::VRSqrt, "VRSqrt", "VV" }, + { IROp::VRecip, "VRecip", "VV" }, + { IROp::VAsin, "VAsin", "VV" }, + { IROp::Interpret, "Interpret", "_C" }, { IROp::Downcount, "Downcount", "_II" }, { IROp::ExitToConst, "Exit", "C" }, @@ -177,6 +186,15 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co "RCX6", "RCX7", }; + static const char *initVec4Names[8] = { + "[0 0 0 0]", + "[1 1 1 1]", + "[-1 -1 -1 -1]", + "[1 0 0 0]", + "[0 1 0 0]", + "[0 0 1 0]", + "[0 0 0 1]", + }; switch (type) { case 'G': @@ -197,6 +215,9 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co case 'T': snprintf(buf, bufSize, "%s", vfpuCtrlNames[param]); break; + case 'v': + snprintf(buf, bufSize, "%s", initVec4Names[param]); + break; case '_': case '\0': buf[0] = 0; diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index 70f0e0ff6e..e2c0f6644a 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -144,6 +144,16 @@ enum class IROp : u8 { VMovFromGPR, VMovToGPR, + InitVec4, + + // Slow special functions. Used on singles. + VSin, + VCos, + VSqrt, + VRSqrt, + VRecip, + VAsin, + // Fake/System instructions Interpret, @@ -181,6 +191,17 @@ enum IRComparison { Bad, }; +// Some common vec4 constants. +enum class Vec4Init { + AllZERO, + AllONE, + AllMinusONE, + Set_1000, + Set_0100, + Set_0010, + Set_0001, +}; + // Hm, unused inline IRComparison Invert(IRComparison comp) { switch (comp) { diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 63e0bd533e..2a601bb8f8 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -6,6 +6,7 @@ #include "Core/HLE/HLE.h" #include "Core/HLE/ReplaceTables.h" #include "Core/MIPS/MIPSTables.h" +#include "Core/MIPS/MIPSVFPUUtils.h" #include "math/math_util.h" #include "Common/CommonTypes.h" @@ -14,6 +15,16 @@ #include "Core/MIPS/IR/IRInst.h" #include "Core/MIPS/IR/IRInterpreter.h" +alignas(16) float vec4InitValues[8][4] = { + { 0.0f, 0.0f, 0.0f, 0.0f }, + { 1.0f, 1.0f, 1.0f, 1.0f }, + { -1.0f, -1.0f, -1.0f, -1.0f }, + { 1.0f, 0.0f, 0.0f, 0.0f }, + { 0.0f, 1.0f, 0.0f, 0.0f }, + { 0.0f, 0.0f, 1.0f, 0.0f }, + { 0.0f, 0.0f, 0.0f, 1.0f }, +}; + u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int count) { const IRInst *end = inst + count; while (inst != end) { @@ -134,6 +145,33 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c break; } + case IROp::InitVec4: +#if defined(_M_SSE) + _mm_store_ps(&mips->v[inst->dest], _mm_load_ps(vec4InitValues[inst->src1])); +#else + memcpy(&mips->v[inst->dest + i], vec4InitValues[inst->src1], 4 * sizeof(float)); +#endif + break; + + case IROp::VSin: + mips->v[inst->dest] = vfpu_sin(mips->v[inst->src1]); + break; + case IROp::VCos: + mips->v[inst->dest] = vfpu_cos(mips->v[inst->src1]); + break; + case IROp::VSqrt: + mips->v[inst->dest] = sqrtf(mips->v[inst->src1]); + break; + case IROp::VRSqrt: + mips->v[inst->dest] = 1.0f / sqrtf(mips->v[inst->src1]); + break; + case IROp::VRecip: + mips->v[inst->dest] = 1.0f / mips->v[inst->src1]; + break; + case IROp::VAsin: + mips->v[inst->dest] = vfpu_asin(mips->v[inst->src1]); + break; + case IROp::ShlImm: mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2; break; diff --git a/Core/MIPS/IR/IRJit.cpp b/Core/MIPS/IR/IRJit.cpp index 661543a9d0..fb49026855 100644 --- a/Core/MIPS/IR/IRJit.cpp +++ b/Core/MIPS/IR/IRJit.cpp @@ -114,9 +114,9 @@ void IRJit::RunLoopUntil(u64 globalticks) { } while (mips_->downcount >= 0) { u32 inst = Memory::ReadUnchecked_U32(mips_->pc); - u32 opcode = inst >> 24; - u32 data = inst & 0xFFFFFF; - if (opcode == (MIPS_EMUHACK_OPCODE >> 24)) { + u32 opcode = inst & 0xFF000000; + if (opcode == MIPS_EMUHACK_OPCODE) { + u32 data = inst & 0xFFFFFF; IRBlock *block = blocks_.GetBlock(data); mips_->pc = IRInterpret(mips_, block->GetInstructions(), block->GetConstants(), block->GetNumInstructions()); } else { diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 50bfca8903..d7c93593f9 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -340,8 +340,13 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { // FP-only instructions don't need to flush immediates. case IROp::FAdd: case IROp::FMul: - case IROp::FDiv: + // Regularize, to help x86 backends (add.s r0, r1, r0 -> add.s r0, r0, r1) + if (inst.src2 == inst.dest && inst.src1 != inst.src2) + std::swap(inst.src1, inst.src2); + out.Write(inst); + break; case IROp::FSub: + case IROp::FDiv: case IROp::FNeg: case IROp::FAbs: case IROp::FSqrt: @@ -373,6 +378,19 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) { } break; + case IROp::InitVec4: + out.Write(inst); + break; + + case IROp::VSin: + case IROp::VCos: + case IROp::VSqrt: + case IROp::VRSqrt: + case IROp::VRecip: + case IROp::VAsin: + out.Write(inst); + break; + case IROp::ZeroFpCond: case IROp::FCmpUnordered: case IROp::FCmpEqual: diff --git a/Core/MIPS/MIPSVFPUUtils.h b/Core/MIPS/MIPSVFPUUtils.h index bb8403217f..7f6ada0fa2 100644 --- a/Core/MIPS/MIPSVFPUUtils.h +++ b/Core/MIPS/MIPSVFPUUtils.h @@ -45,6 +45,10 @@ inline float vfpu_cos(float angle) { return cosf(angle); } +inline float vfpu_asin(float angle) { + return asinf(angle) / M_PI_2; +} + inline void vfpu_sincos(float angle, float &sine, float &cosine) { angle -= floorf(angle * 0.25f) * 4.f; angle *= (float)M_PI_2; @@ -127,7 +131,15 @@ int GetNumVectorElements(VectorSize sz); int GetMatrixSide(MatrixSize sz); const char *GetVectorNotation(int reg, VectorSize size); const char *GetMatrixNotation(int reg, MatrixSize size); - +inline bool IsMatrixTransposed(int matrixReg) { + return (matrixReg >> 5) & 1; +} +inline bool IsVectorColumn(int vectorReg) { + return !((vectorReg >> 5) & 1); +} +inline int TransposeMatrixReg(int matrixReg) { + return matrixReg ^ 0x20; +} int GetVectorOverlap(int reg1, VectorSize size1, int reg2, VectorSize size2); float Float16ToFloat32(unsigned short l); diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp index fcc51c3646..749967f53a 100644 --- a/Core/MIPS/x86/CompVFPU.cpp +++ b/Core/MIPS/x86/CompVFPU.cpp @@ -101,8 +101,7 @@ void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) { for (int i = 0; i < n; i++) origV[i] = vregs[i]; - for (int i = 0; i < n; i++) - { + for (int i = 0; i < n; i++) { int regnum = (prefix >> (i*2)) & 3; int abs = (prefix >> (8+i)) & 1; int negate = (prefix >> (16+i)) & 1; @@ -2142,7 +2141,7 @@ void CosOnly(SinCosArg angle) { } void ASinScaled(SinCosArg angle) { - sincostemp[0] = asinf(angle) / M_PI_2; + sincostemp[0] = vfpu_asin(angle); } void SinCosNegSin(SinCosArg angle) {