diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index ed2ab8d89b..f4736be521 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -358,7 +358,38 @@ namespace MIPSComp { } void IRFrontend::Comp_VHdp(MIPSOpcode op) { - DISABLE; + CONDITIONAL_DISABLE; + if (js.HasUnknownPrefix()) { + DISABLE; + } + + int vd = _VD; + int vs = _VS; + int vt = _VT; + VectorSize sz = GetVecSize(op); + + // TODO: Force read one of them into regs? probably not. + u8 sregs[4], tregs[4], dregs[1]; + GetVectorRegsPrefixS(sregs, sz, vs); + GetVectorRegsPrefixT(tregs, sz, vt); + GetVectorRegsPrefixD(dregs, V_Single, vd); + + // TODO: applyprefixST here somehow (shuffle, etc...) + ir.Write(IROp::FMul, IRVTEMP_0, sregs[0], tregs[0]); + + int n = GetNumVectorElements(sz); + for (int i = 1; i < n; i++) { + // sum += s[i]*t[i]; + if (i == n - 1) { + ir.Write(IROp::FAdd, IRVTEMP_0, IRVTEMP_0, tregs[i]); + } else { + ir.Write(IROp::FMul, IRVTEMP_0 + 1, sregs[i], tregs[i]); + ir.Write(IROp::FAdd, IRVTEMP_0, IRVTEMP_0, IRVTEMP_0 + 1); + } + } + + ir.Write(IROp::FMov, dregs[0], IRVTEMP_0); + ApplyPrefixD(dregs, V_Single); } static const float MEMORY_ALIGNED16(vavg_table[4]) = { 1.0f, 1.0f / 2.0f, 1.0f / 3.0f, 1.0f / 4.0f }; @@ -840,7 +871,6 @@ namespace MIPSComp { MatrixSize sz = GetMtxSize(op); if (sz != M_4x4) { - // logBlocks = true; DISABLE; } int n = GetMatrixSide(sz); @@ -1165,16 +1195,66 @@ namespace MIPSComp { } void IRFrontend::Comp_Vcmp(MIPSOpcode op) { - // Fiendishly hard... - DISABLE; + CONDITIONAL_DISABLE; + if (js.HasUnknownPrefix()) + DISABLE; + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + VCondition cond = (VCondition)(op & 0xF); + + u8 sregs[4], tregs[4]; + GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegsPrefixT(tregs, sz, _VT); + + int mask = 0; + for (int i = 0; i < n; i++) { + ir.Write(IROp::FCmpVfpuBit, cond | (i << 4), sregs[i], tregs[i]); + mask |= (1 << i); + } + ir.Write(IROp::FCmpVfpuAggregate, mask); } void IRFrontend::Comp_Vcmov(MIPSOpcode op) { - // Fiendishly hard... - DISABLE; + CONDITIONAL_DISABLE; + if (js.HasUnknownPrefix()) { + DISABLE; + } + + logBlocks = 1; + + VectorSize sz = GetVecSize(op); + int n = GetNumVectorElements(sz); + + u8 sregs[4], dregs[4]; + GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegsPrefixD(dregs, sz, _VD); + int tf = (op >> 19) & 1; + int imm3 = (op >> 16) & 7; + + for (int i = 0; i < n; ++i) { + // Simplification: Disable if overlap unsafe + if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs)) { + DISABLE; + } + } + if (imm3 < 6) { + // Test one bit of CC. This bit decides whether none or all subregisters are copied. + for (int i = 0; i < n; i++) { + ir.Write(IROp::FCmovVfpuCC, dregs[i], sregs[i], (imm3) | ((!tf) << 7)); + } + } else { + // Look at the bottom four bits of CC to individually decide if the subregisters should be copied. + for (int i = 0; i < n; i++) { + ir.Write(IROp::FCmovVfpuCC, dregs[i], sregs[i], (i) | ((!tf) << 7)); + } + } + ApplyPrefixD(dregs, sz); } void IRFrontend::Comp_Viim(MIPSOpcode op) { + CONDITIONAL_DISABLE; if (js.HasUnknownPrefix()) DISABLE; @@ -1186,6 +1266,7 @@ namespace MIPSComp { } void IRFrontend::Comp_Vfim(MIPSOpcode op) { + CONDITIONAL_DISABLE; if (js.HasUnknownPrefix()) DISABLE; diff --git a/Core/MIPS/IR/IRInst.cpp b/Core/MIPS/IR/IRInst.cpp index 74bf24dc4a..38b8f54e2d 100644 --- a/Core/MIPS/IR/IRInst.cpp +++ b/Core/MIPS/IR/IRInst.cpp @@ -97,7 +97,9 @@ static const IRMeta irMeta[] = { { IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" }, { IROp::SetCtrlVFPUReg, "SetCtrlVFPUReg", "TC" }, { IROp::SetCtrlVFPUFReg, "SetCtrlVFPUFReg", "TF" }, - + { IROp::FCmovVfpuCC, "FCmovVfpuCC", "FFI" }, + { IROp::FCmpVfpuBit, "FCmpVfpuBit", "IFF" }, + { IROp::FCmpVfpuAggregate, "FCmpVfpuAggregate", ""}, { IROp::Vec4Init, "Vec4Init", "Fv" }, { IROp::Vec4Shuffle, "Vec4Shuffle", "FFs" }, { IROp::Vec4Mov, "Vec4Mov", "FF" }, diff --git a/Core/MIPS/IR/IRInst.h b/Core/MIPS/IR/IRInst.h index 0393eb9d22..df434eb113 100644 --- a/Core/MIPS/IR/IRInst.h +++ b/Core/MIPS/IR/IRInst.h @@ -134,6 +134,8 @@ enum class IROp : u8 { FCmp, FCmovVfpuCC, + FCmpVfpuBit, + FCmpVfpuAggregate, // Rounding Mode RestoreRoundingMode, @@ -157,6 +159,12 @@ enum class IROp : u8 { Vec4Scale, Vec4Dot, + // vx2i + Vec4ExpandU16ToU32Hi, + Vec4ExpandU8ToU32Hi, + Vec4ExpandS16ToS32Hi, + Vec4ExpandS8ToS32Hi, + // Slow special functions. Used on singles. FSin, FCos, diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 63ac1afb99..1a719046cd 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -32,6 +32,9 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c const IRInst *end = inst + count; while (inst != end) { switch (inst->op) { + case IROp::Nop: + _assert_(false); + break; case IROp::SetConst: mips->r[inst->dest] = constPool[inst->src1]; break; @@ -209,6 +212,49 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c #endif break; + case IROp::FCmpVfpuBit: + { + int op = inst->dest & 0xF; + int bit = inst->dest >> 4; + int result = 0; + switch (op) { + case VC_EQ: result = mips->f[inst->src1] == mips->f[inst->src2]; break; + case VC_NE: result = mips->f[inst->src1] != mips->f[inst->src2]; break; + case VC_LT: result = mips->f[inst->src1] < mips->f[inst->src2]; break; + case VC_LE: result = mips->f[inst->src1] <= mips->f[inst->src2]; break; + case VC_GT: result = mips->f[inst->src1] > mips->f[inst->src2]; break; + case VC_GE: result = mips->f[inst->src1] >= mips->f[inst->src2]; break; + case VC_EZ: result = mips->f[inst->src1] == 0.0f; break; + case VC_NZ: result = mips->f[inst->src1] != 0.0f; break; + case VC_TR: result = 1; break; + case VC_FL: result = 0; break; + default: + result = 0; + } + if (result != 0) { + mips->vfpuCtrl[VFPU_CTRL_CC] |= (1 << bit); + } else { + mips->vfpuCtrl[VFPU_CTRL_CC] &= ~(1 << bit); + } + } + break; + + case IROp::FCmpVfpuAggregate: + { + int mask = inst->dest; + u32 cc = mips->vfpuCtrl[VFPU_CTRL_CC]; + int a = (cc & mask) ? 0x10 : 0x00; + int b = (cc & mask) == mask ? 0x20 : 0x00; + mips->vfpuCtrl[VFPU_CTRL_CC] = (cc & ~0x30) | a | b;; + } + break; + + case IROp::FCmovVfpuCC: + if (((mips->vfpuCtrl[VFPU_CTRL_CC] >> (inst->src2 & 0x7f)) & 1) == (inst->src2 >> 7)) { + mips->f[inst->dest] = mips->f[inst->src1]; + } + break; + // Not quickly implementable on all platforms, unfortunately. case IROp::Vec4Dot: {