mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
IR: vcmp, vcmov, vhdp
This commit is contained in:
parent
1851458628
commit
7268abec61
4 changed files with 144 additions and 7 deletions
|
@ -358,7 +358,38 @@ namespace MIPSComp {
|
|||
}
|
||||
|
||||
void IRFrontend::Comp_VHdp(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
CONDITIONAL_DISABLE;
|
||||
if (js.HasUnknownPrefix()) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
int vd = _VD;
|
||||
int vs = _VS;
|
||||
int vt = _VT;
|
||||
VectorSize sz = GetVecSize(op);
|
||||
|
||||
// TODO: Force read one of them into regs? probably not.
|
||||
u8 sregs[4], tregs[4], dregs[1];
|
||||
GetVectorRegsPrefixS(sregs, sz, vs);
|
||||
GetVectorRegsPrefixT(tregs, sz, vt);
|
||||
GetVectorRegsPrefixD(dregs, V_Single, vd);
|
||||
|
||||
// TODO: applyprefixST here somehow (shuffle, etc...)
|
||||
ir.Write(IROp::FMul, IRVTEMP_0, sregs[0], tregs[0]);
|
||||
|
||||
int n = GetNumVectorElements(sz);
|
||||
for (int i = 1; i < n; i++) {
|
||||
// sum += s[i]*t[i];
|
||||
if (i == n - 1) {
|
||||
ir.Write(IROp::FAdd, IRVTEMP_0, IRVTEMP_0, tregs[i]);
|
||||
} else {
|
||||
ir.Write(IROp::FMul, IRVTEMP_0 + 1, sregs[i], tregs[i]);
|
||||
ir.Write(IROp::FAdd, IRVTEMP_0, IRVTEMP_0, IRVTEMP_0 + 1);
|
||||
}
|
||||
}
|
||||
|
||||
ir.Write(IROp::FMov, dregs[0], IRVTEMP_0);
|
||||
ApplyPrefixD(dregs, V_Single);
|
||||
}
|
||||
|
||||
static const float MEMORY_ALIGNED16(vavg_table[4]) = { 1.0f, 1.0f / 2.0f, 1.0f / 3.0f, 1.0f / 4.0f };
|
||||
|
@ -840,7 +871,6 @@ namespace MIPSComp {
|
|||
|
||||
MatrixSize sz = GetMtxSize(op);
|
||||
if (sz != M_4x4) {
|
||||
// logBlocks = true;
|
||||
DISABLE;
|
||||
}
|
||||
int n = GetMatrixSide(sz);
|
||||
|
@ -1165,16 +1195,66 @@ namespace MIPSComp {
|
|||
}
|
||||
|
||||
void IRFrontend::Comp_Vcmp(MIPSOpcode op) {
|
||||
// Fiendishly hard...
|
||||
DISABLE;
|
||||
CONDITIONAL_DISABLE;
|
||||
if (js.HasUnknownPrefix())
|
||||
DISABLE;
|
||||
|
||||
VectorSize sz = GetVecSize(op);
|
||||
int n = GetNumVectorElements(sz);
|
||||
|
||||
VCondition cond = (VCondition)(op & 0xF);
|
||||
|
||||
u8 sregs[4], tregs[4];
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
GetVectorRegsPrefixT(tregs, sz, _VT);
|
||||
|
||||
int mask = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
ir.Write(IROp::FCmpVfpuBit, cond | (i << 4), sregs[i], tregs[i]);
|
||||
mask |= (1 << i);
|
||||
}
|
||||
ir.Write(IROp::FCmpVfpuAggregate, mask);
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_Vcmov(MIPSOpcode op) {
|
||||
// Fiendishly hard...
|
||||
DISABLE;
|
||||
CONDITIONAL_DISABLE;
|
||||
if (js.HasUnknownPrefix()) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
logBlocks = 1;
|
||||
|
||||
VectorSize sz = GetVecSize(op);
|
||||
int n = GetNumVectorElements(sz);
|
||||
|
||||
u8 sregs[4], dregs[4];
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
GetVectorRegsPrefixD(dregs, sz, _VD);
|
||||
int tf = (op >> 19) & 1;
|
||||
int imm3 = (op >> 16) & 7;
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
// Simplification: Disable if overlap unsafe
|
||||
if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs)) {
|
||||
DISABLE;
|
||||
}
|
||||
}
|
||||
if (imm3 < 6) {
|
||||
// Test one bit of CC. This bit decides whether none or all subregisters are copied.
|
||||
for (int i = 0; i < n; i++) {
|
||||
ir.Write(IROp::FCmovVfpuCC, dregs[i], sregs[i], (imm3) | ((!tf) << 7));
|
||||
}
|
||||
} else {
|
||||
// Look at the bottom four bits of CC to individually decide if the subregisters should be copied.
|
||||
for (int i = 0; i < n; i++) {
|
||||
ir.Write(IROp::FCmovVfpuCC, dregs[i], sregs[i], (i) | ((!tf) << 7));
|
||||
}
|
||||
}
|
||||
ApplyPrefixD(dregs, sz);
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_Viim(MIPSOpcode op) {
|
||||
CONDITIONAL_DISABLE;
|
||||
if (js.HasUnknownPrefix())
|
||||
DISABLE;
|
||||
|
||||
|
@ -1186,6 +1266,7 @@ namespace MIPSComp {
|
|||
}
|
||||
|
||||
void IRFrontend::Comp_Vfim(MIPSOpcode op) {
|
||||
CONDITIONAL_DISABLE;
|
||||
if (js.HasUnknownPrefix())
|
||||
DISABLE;
|
||||
|
||||
|
|
|
@ -97,7 +97,9 @@ static const IRMeta irMeta[] = {
|
|||
{ IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" },
|
||||
{ IROp::SetCtrlVFPUReg, "SetCtrlVFPUReg", "TC" },
|
||||
{ IROp::SetCtrlVFPUFReg, "SetCtrlVFPUFReg", "TF" },
|
||||
|
||||
{ IROp::FCmovVfpuCC, "FCmovVfpuCC", "FFI" },
|
||||
{ IROp::FCmpVfpuBit, "FCmpVfpuBit", "IFF" },
|
||||
{ IROp::FCmpVfpuAggregate, "FCmpVfpuAggregate", ""},
|
||||
{ IROp::Vec4Init, "Vec4Init", "Fv" },
|
||||
{ IROp::Vec4Shuffle, "Vec4Shuffle", "FFs" },
|
||||
{ IROp::Vec4Mov, "Vec4Mov", "FF" },
|
||||
|
|
|
@ -134,6 +134,8 @@ enum class IROp : u8 {
|
|||
FCmp,
|
||||
|
||||
FCmovVfpuCC,
|
||||
FCmpVfpuBit,
|
||||
FCmpVfpuAggregate,
|
||||
|
||||
// Rounding Mode
|
||||
RestoreRoundingMode,
|
||||
|
@ -157,6 +159,12 @@ enum class IROp : u8 {
|
|||
Vec4Scale,
|
||||
Vec4Dot,
|
||||
|
||||
// vx2i
|
||||
Vec4ExpandU16ToU32Hi,
|
||||
Vec4ExpandU8ToU32Hi,
|
||||
Vec4ExpandS16ToS32Hi,
|
||||
Vec4ExpandS8ToS32Hi,
|
||||
|
||||
// Slow special functions. Used on singles.
|
||||
FSin,
|
||||
FCos,
|
||||
|
|
|
@ -32,6 +32,9 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
|
|||
const IRInst *end = inst + count;
|
||||
while (inst != end) {
|
||||
switch (inst->op) {
|
||||
case IROp::Nop:
|
||||
_assert_(false);
|
||||
break;
|
||||
case IROp::SetConst:
|
||||
mips->r[inst->dest] = constPool[inst->src1];
|
||||
break;
|
||||
|
@ -209,6 +212,49 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
|
|||
#endif
|
||||
break;
|
||||
|
||||
case IROp::FCmpVfpuBit:
|
||||
{
|
||||
int op = inst->dest & 0xF;
|
||||
int bit = inst->dest >> 4;
|
||||
int result = 0;
|
||||
switch (op) {
|
||||
case VC_EQ: result = mips->f[inst->src1] == mips->f[inst->src2]; break;
|
||||
case VC_NE: result = mips->f[inst->src1] != mips->f[inst->src2]; break;
|
||||
case VC_LT: result = mips->f[inst->src1] < mips->f[inst->src2]; break;
|
||||
case VC_LE: result = mips->f[inst->src1] <= mips->f[inst->src2]; break;
|
||||
case VC_GT: result = mips->f[inst->src1] > mips->f[inst->src2]; break;
|
||||
case VC_GE: result = mips->f[inst->src1] >= mips->f[inst->src2]; break;
|
||||
case VC_EZ: result = mips->f[inst->src1] == 0.0f; break;
|
||||
case VC_NZ: result = mips->f[inst->src1] != 0.0f; break;
|
||||
case VC_TR: result = 1; break;
|
||||
case VC_FL: result = 0; break;
|
||||
default:
|
||||
result = 0;
|
||||
}
|
||||
if (result != 0) {
|
||||
mips->vfpuCtrl[VFPU_CTRL_CC] |= (1 << bit);
|
||||
} else {
|
||||
mips->vfpuCtrl[VFPU_CTRL_CC] &= ~(1 << bit);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::FCmpVfpuAggregate:
|
||||
{
|
||||
int mask = inst->dest;
|
||||
u32 cc = mips->vfpuCtrl[VFPU_CTRL_CC];
|
||||
int a = (cc & mask) ? 0x10 : 0x00;
|
||||
int b = (cc & mask) == mask ? 0x20 : 0x00;
|
||||
mips->vfpuCtrl[VFPU_CTRL_CC] = (cc & ~0x30) | a | b;;
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::FCmovVfpuCC:
|
||||
if (((mips->vfpuCtrl[VFPU_CTRL_CC] >> (inst->src2 & 0x7f)) & 1) == (inst->src2 >> 7)) {
|
||||
mips->f[inst->dest] = mips->f[inst->src1];
|
||||
}
|
||||
break;
|
||||
|
||||
// Not quickly implementable on all platforms, unfortunately.
|
||||
case IROp::Vec4Dot:
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue