Optimize some common prefixes

This commit is contained in:
Henrik Rydgard 2016-05-13 20:15:20 +02:00
parent 91a6cf5e44
commit 5b2504120d
5 changed files with 48 additions and 5 deletions

View file

@ -108,6 +108,13 @@ namespace MIPSComp {
}
}
static void InitRegs(u8 *vregs, int reg) {
vregs[0] = reg;
vregs[1] = reg + 1;
vregs[2] = reg + 2;
vregs[3] = reg + 3;
}
void IRFrontend::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz, int tempReg) {
if (prefix == 0xE4)
return;
@ -119,6 +126,27 @@ namespace MIPSComp {
for (int i = 0; i < n; i++)
origV[i] = vregs[i];
// Some common vector prefixes
if (sz == V_Quad && IsConsecutive4(vregs)) {
if (prefix == 0xF00E4 && IsConsecutive4(vregs)) {
InitRegs(vregs, tempReg);
ir.Write(IROp::Vec4Neg, vregs[0], origV[0]);
return;
}
if (prefix == 0x00FE4 && IsConsecutive4(vregs)) {
InitRegs(vregs, tempReg);
ir.Write(IROp::Vec4Abs, vregs[0], origV[0]);
return;
}
// Pure shuffle
if (prefix == (prefix & 0xFF)) {
InitRegs(vregs, tempReg);
ir.Write(IROp::Vec4Shuffle, vregs[0], origV[0], prefix);
return;
}
}
// Alright, fall back to the generic approach.
for (int i = 0; i < n; i++) {
int regnum = (prefix >> (i * 2)) & 3;
int abs = (prefix >> (8 + i)) & 1;
@ -395,7 +423,6 @@ namespace MIPSComp {
GetVectorRegsPrefixT(tregs, sz, vt);
GetVectorRegsPrefixD(dregs, V_Single, vd);
// TODO: applyprefixST here somehow (shuffle, etc...)
ir.Write(IROp::FMul, IRVTEMP_0, sregs[0], tregs[0]);
int n = GetNumVectorElements(sz);
@ -1050,7 +1077,7 @@ namespace MIPSComp {
}
} else if (sz == M_4x4) {
// Tekken 6 has a case here: MEE
logBlocks = 1;
// logBlocks = 1;
}
// Fallback. Expands a LOT
@ -1141,8 +1168,8 @@ namespace MIPSComp {
tempregs[i] = temp;
}
for (int i = 0; i < n; i++) {
u8 temp = tempregs[i];
ir.Write(IROp::FMov, dregs[i], temp);
if (tempregs[i] != dregs[i])
ir.Write(IROp::FMov, dregs[i], tempregs[i]);
}
}

View file

@ -109,6 +109,8 @@ static const IRMeta irMeta[] = {
{ IROp::Vec4Mul, "Vec4Mul", "FFF" },
{ IROp::Vec4Scale, "Vec4Scale", "FFF" },
{ IROp::Vec4Dot, "Vec4Dot", "FFF" },
{ IROp::Vec4Neg, "Vec4Neg", "FF" },
{ IROp::Vec4Abs, "Vec4Abs", "FF" },
{ IROp::Interpret, "Interpret", "_C" },
{ IROp::Downcount, "Downcount", "_II" },

View file

@ -159,6 +159,8 @@ enum class IROp : u8 {
Vec4Div,
Vec4Scale,
Vec4Dot,
Vec4Neg,
Vec4Abs,
// vx2i
Vec4ExpandU16ToU32Hi,

View file

@ -212,6 +212,16 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
#endif
break;
case IROp::Vec4Neg:
for (int i = 0; i < 4; i++)
mips->f[inst->dest + i] = -mips->f[inst->src1 + i];
break;
case IROp::Vec4Abs:
for (int i = 0; i < 4; i++)
mips->f[inst->dest + i] = fabsf(mips->f[inst->src1 + i]);
break;
case IROp::FCmpVfpuBit:
{
int op = inst->dest & 0xF;

View file

@ -114,11 +114,11 @@ bool OptimizeFPMoves(const IRWriter &in, IRWriter &out) {
inst.op = IROp::FMov;
inst.src1 = prev.src1;
out.Write(inst);
logBlocks = true;
} else {
out.Write(inst);
}
break;
default:
// Remap constants to the new reality
const IRMeta *m = GetIRMeta(inst.op);
@ -487,6 +487,8 @@ bool PropagateConstants(const IRWriter &in, IRWriter &out) {
case IROp::Vec4Dot:
case IROp::Vec4Scale:
case IROp::Vec4Shuffle:
case IROp::Vec4Neg:
case IROp::Vec4Abs:
out.Write(inst);
break;