IR: vcmp, vcmov, vhdp

This commit is contained in:
Henrik Rydgard 2016-05-12 22:35:31 +02:00
parent 1851458628
commit 7268abec61
4 changed files with 144 additions and 7 deletions

View file

@ -358,7 +358,38 @@ namespace MIPSComp {
}
void IRFrontend::Comp_VHdp(MIPSOpcode op) {
DISABLE;
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;
}
int vd = _VD;
int vs = _VS;
int vt = _VT;
VectorSize sz = GetVecSize(op);
// TODO: Force read one of them into regs? probably not.
u8 sregs[4], tregs[4], dregs[1];
GetVectorRegsPrefixS(sregs, sz, vs);
GetVectorRegsPrefixT(tregs, sz, vt);
GetVectorRegsPrefixD(dregs, V_Single, vd);
// TODO: applyprefixST here somehow (shuffle, etc...)
ir.Write(IROp::FMul, IRVTEMP_0, sregs[0], tregs[0]);
int n = GetNumVectorElements(sz);
for (int i = 1; i < n; i++) {
// sum += s[i]*t[i];
if (i == n - 1) {
ir.Write(IROp::FAdd, IRVTEMP_0, IRVTEMP_0, tregs[i]);
} else {
ir.Write(IROp::FMul, IRVTEMP_0 + 1, sregs[i], tregs[i]);
ir.Write(IROp::FAdd, IRVTEMP_0, IRVTEMP_0, IRVTEMP_0 + 1);
}
}
ir.Write(IROp::FMov, dregs[0], IRVTEMP_0);
ApplyPrefixD(dregs, V_Single);
}
static const float MEMORY_ALIGNED16(vavg_table[4]) = { 1.0f, 1.0f / 2.0f, 1.0f / 3.0f, 1.0f / 4.0f };
@ -840,7 +871,6 @@ namespace MIPSComp {
MatrixSize sz = GetMtxSize(op);
if (sz != M_4x4) {
// logBlocks = true;
DISABLE;
}
int n = GetMatrixSide(sz);
@ -1165,16 +1195,66 @@ namespace MIPSComp {
}
void IRFrontend::Comp_Vcmp(MIPSOpcode op) {
// Fiendishly hard...
DISABLE;
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix())
DISABLE;
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
VCondition cond = (VCondition)(op & 0xF);
u8 sregs[4], tregs[4];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixT(tregs, sz, _VT);
int mask = 0;
for (int i = 0; i < n; i++) {
ir.Write(IROp::FCmpVfpuBit, cond | (i << 4), sregs[i], tregs[i]);
mask |= (1 << i);
}
ir.Write(IROp::FCmpVfpuAggregate, mask);
}
void IRFrontend::Comp_Vcmov(MIPSOpcode op) {
// Fiendishly hard...
DISABLE;
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
DISABLE;
}
logBlocks = 1;
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
u8 sregs[4], dregs[4];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD);
int tf = (op >> 19) & 1;
int imm3 = (op >> 16) & 7;
for (int i = 0; i < n; ++i) {
// Simplification: Disable if overlap unsafe
if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs)) {
DISABLE;
}
}
if (imm3 < 6) {
// Test one bit of CC. This bit decides whether none or all subregisters are copied.
for (int i = 0; i < n; i++) {
ir.Write(IROp::FCmovVfpuCC, dregs[i], sregs[i], (imm3) | ((!tf) << 7));
}
} else {
// Look at the bottom four bits of CC to individually decide if the subregisters should be copied.
for (int i = 0; i < n; i++) {
ir.Write(IROp::FCmovVfpuCC, dregs[i], sregs[i], (i) | ((!tf) << 7));
}
}
ApplyPrefixD(dregs, sz);
}
void IRFrontend::Comp_Viim(MIPSOpcode op) {
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix())
DISABLE;
@ -1186,6 +1266,7 @@ namespace MIPSComp {
}
void IRFrontend::Comp_Vfim(MIPSOpcode op) {
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix())
DISABLE;

View file

@ -97,7 +97,9 @@ static const IRMeta irMeta[] = {
{ IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" },
{ IROp::SetCtrlVFPUReg, "SetCtrlVFPUReg", "TC" },
{ IROp::SetCtrlVFPUFReg, "SetCtrlVFPUFReg", "TF" },
{ IROp::FCmovVfpuCC, "FCmovVfpuCC", "FFI" },
{ IROp::FCmpVfpuBit, "FCmpVfpuBit", "IFF" },
{ IROp::FCmpVfpuAggregate, "FCmpVfpuAggregate", ""},
{ IROp::Vec4Init, "Vec4Init", "Fv" },
{ IROp::Vec4Shuffle, "Vec4Shuffle", "FFs" },
{ IROp::Vec4Mov, "Vec4Mov", "FF" },

View file

@ -134,6 +134,8 @@ enum class IROp : u8 {
FCmp,
FCmovVfpuCC,
FCmpVfpuBit,
FCmpVfpuAggregate,
// Rounding Mode
RestoreRoundingMode,
@ -157,6 +159,12 @@ enum class IROp : u8 {
Vec4Scale,
Vec4Dot,
// vx2i
Vec4ExpandU16ToU32Hi,
Vec4ExpandU8ToU32Hi,
Vec4ExpandS16ToS32Hi,
Vec4ExpandS8ToS32Hi,
// Slow special functions. Used on singles.
FSin,
FCos,

View file

@ -32,6 +32,9 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
const IRInst *end = inst + count;
while (inst != end) {
switch (inst->op) {
case IROp::Nop:
_assert_(false);
break;
case IROp::SetConst:
mips->r[inst->dest] = constPool[inst->src1];
break;
@ -209,6 +212,49 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
#endif
break;
case IROp::FCmpVfpuBit:
{
int op = inst->dest & 0xF;
int bit = inst->dest >> 4;
int result = 0;
switch (op) {
case VC_EQ: result = mips->f[inst->src1] == mips->f[inst->src2]; break;
case VC_NE: result = mips->f[inst->src1] != mips->f[inst->src2]; break;
case VC_LT: result = mips->f[inst->src1] < mips->f[inst->src2]; break;
case VC_LE: result = mips->f[inst->src1] <= mips->f[inst->src2]; break;
case VC_GT: result = mips->f[inst->src1] > mips->f[inst->src2]; break;
case VC_GE: result = mips->f[inst->src1] >= mips->f[inst->src2]; break;
case VC_EZ: result = mips->f[inst->src1] == 0.0f; break;
case VC_NZ: result = mips->f[inst->src1] != 0.0f; break;
case VC_TR: result = 1; break;
case VC_FL: result = 0; break;
default:
result = 0;
}
if (result != 0) {
mips->vfpuCtrl[VFPU_CTRL_CC] |= (1 << bit);
} else {
mips->vfpuCtrl[VFPU_CTRL_CC] &= ~(1 << bit);
}
}
break;
case IROp::FCmpVfpuAggregate:
{
int mask = inst->dest;
u32 cc = mips->vfpuCtrl[VFPU_CTRL_CC];
int a = (cc & mask) ? 0x10 : 0x00;
int b = (cc & mask) == mask ? 0x20 : 0x00;
mips->vfpuCtrl[VFPU_CTRL_CC] = (cc & ~0x30) | a | b;;
}
break;
case IROp::FCmovVfpuCC:
if (((mips->vfpuCtrl[VFPU_CTRL_CC] >> (inst->src2 & 0x7f)) & 1) == (inst->src2 >> 7)) {
mips->f[inst->dest] = mips->f[inst->src1];
}
break;
// Not quickly implementable on all platforms, unfortunately.
case IROp::Vec4Dot:
{