Implement Comp_VV2Op vfpu ops in the x86 jit.

Also, some cleanup.  No need for this extra boilerplate, simplify...

This makes the Bink video issue slightly better, in jit only.
This commit is contained in:
Unknown W. Brackets 2013-02-18 20:02:58 -08:00
parent 65b2e7ca4b
commit 2dfdf3ffeb
6 changed files with 161 additions and 52 deletions

View file

@ -40,6 +40,11 @@ namespace MIPSComp
DISABLE;
}
void Jit::Comp_VV2Op(u32 op)
{
DISABLE;
}
void Jit::Comp_Mftv(u32 op)
{
DISABLE;

View file

@ -112,6 +112,7 @@ public:
void Comp_VPFX(u32 op);
void Comp_VDot(u32 op);
void Comp_VecDo3(u32 op);
void Comp_VV2Op(u32 op);
void Comp_Mftv(u32 op);
void Comp_Vmtvc(u32 op);

View file

@ -492,8 +492,8 @@ namespace MIPSInt
case 17: d[i] = 1.0f / sqrtf(s[i]); break; //vrsq
case 18: d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin
case 19: d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos
case 20: d[i] = powf(2.0f, s[i]); break;
case 21: d[i] = logf(s[i])/log(2.0f); break;
case 20: d[i] = powf(2.0f, s[i]); break; //vexp2
case 21: d[i] = logf(s[i])/log(2.0f); break; //vlog2
case 22: d[i] = sqrtf(s[i]); break; //vsqrt
case 23: d[i] = asinf(s[i] * (float)M_2_PI); break; //vasin
case 24: d[i] = -1.0f / s[i]; break; // vnrcp

View file

@ -582,31 +582,31 @@ const MIPSInstruction tableVFPU7[32] =
// 110100 00000 10111 0000000000000000
const MIPSInstruction tableVFPU4[32] = //110100 00000 xxxxx
{
INSTR("vmov", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
INSTR("vabs", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
INSTR("vneg", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
INSTR("vmov", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
INSTR("vabs", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
INSTR("vneg", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
INSTR("vidt", &Jit::Comp_Generic, Dis_VectorSet1, Int_Vidt,IS_VFPU|OUT_EAT_PREFIX),
INSTR("vsat0", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vsat1", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vsat0", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vsat1", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vzero", &Jit::Comp_Generic, Dis_VectorSet1, Int_VVectorInit, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vone", &Jit::Comp_Generic, Dis_VectorSet1, Int_VVectorInit, IS_VFPU|OUT_EAT_PREFIX),
//8
{-2},{-2},{-2},{-2},{-2},{-2},{-2},{-2},
//16
INSTR("vrcp", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vrsq", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vsin", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vcos", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vexp2", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vlog2", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vsqrt", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vasin", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vrcp", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vrsq", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vsin", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vcos", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vexp2", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vlog2", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vsqrt", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vasin", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
//24
INSTR("vnrcp", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
INSTR("vnrcp", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
{-2},
INSTR("vnsin", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
INSTR("vnsin", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
{-2},
INSTR("vrexp2",&Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
INSTR("vrexp2",&Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
{-2},{-2},{-2},
//32
};

View file

@ -28,9 +28,9 @@
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocks(); Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { Comp_Generic(op); return; }
#define DISABLE { fpr.ReleaseSpillLocks(); Comp_Generic(op); return; }
#define _RS ((op>>21) & 0x1F)
@ -167,7 +167,7 @@ void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) {
// Vector regs can overlap in all sorts of swizzled ways.
// This does allow a single overlap in sregs[i].
bool IsOverlapSafeAllowS(int dreg, int di, int sn, u8 sregs[], int tn, u8 tregs[])
bool IsOverlapSafeAllowS(int dreg, int di, int sn, u8 sregs[], int tn = 0, u8 tregs[] = NULL)
{
for (int i = 0; i < sn; ++i)
{
@ -184,7 +184,7 @@ bool IsOverlapSafeAllowS(int dreg, int di, int sn, u8 sregs[], int tn, u8 tregs[
return true;
}
bool IsOverlapSafe(int dreg, int di, int sn, u8 sregs[], int tn, u8 tregs[])
bool IsOverlapSafe(int dreg, int di, int sn, u8 sregs[], int tn = 0, u8 tregs[] = NULL)
{
return IsOverlapSafeAllowS(dreg, di, sn, sregs, tn, tregs) && sregs[di] != dreg;
}
@ -339,23 +339,18 @@ void Jit::Comp_SVQ(u32 op)
void Jit::Comp_VDot(u32 op) {
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
Comp_Generic(op);
return;
}
if (js.HasUnknownPrefix())
DISABLE;
int vd = _VD;
int vs = _VS;
int vt = _VT;
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
// TODO: Force read one of them into regs? probably not.
u8 sregs[4], tregs[4], dregs[1];
GetVectorRegsPrefixS(sregs, sz, vs);
GetVectorRegsPrefixT(tregs, sz, vt);
GetVectorRegsPrefixD(dregs, V_Single, vd);
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixT(tregs, sz, _VT);
GetVectorRegsPrefixD(dregs, V_Single, _VD);
int n = GetNumVectorElements(sz);
X64Reg tempxreg = XMM0;
if (IsOverlapSafe(dregs[0], 0, n, sregs, n, tregs))
{
@ -387,20 +382,8 @@ void Jit::Comp_VDot(u32 op) {
void Jit::Comp_VecDo3(u32 op) {
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) {
Comp_Generic(op);
return;
}
int vd = _VD;
int vs = _VS;
int vt = _VT;
VectorSize sz = GetVecSize(op);
u8 sregs[4], tregs[4], dregs[4];
GetVectorRegsPrefixS(sregs, sz, vs);
GetVectorRegsPrefixT(tregs, sz, vt);
GetVectorRegsPrefixD(dregs, sz, vd);
if (js.HasUnknownPrefix())
DISABLE;
void (XEmitter::*xmmop)(X64Reg, OpArg) = NULL;
switch (op >> 26)
@ -430,14 +413,16 @@ void Jit::Comp_VecDo3(u32 op) {
}
if (xmmop == NULL)
{
fpr.ReleaseSpillLocks();
Comp_Generic(op);
return;
}
DISABLE;
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
u8 sregs[4], tregs[4], dregs[4];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixT(tregs, sz, _VT);
GetVectorRegsPrefixD(dregs, sz, _VD);
X64Reg tempxregs[4];
for (int i = 0; i < n; ++i)
{
@ -480,6 +465,123 @@ void Jit::Comp_VecDo3(u32 op) {
fpr.ReleaseSpillLocks();
}
void Jit::Comp_VV2Op(u32 op) {
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix())
DISABLE;
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
u8 sregs[4], dregs[4];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD);
X64Reg tempxregs[4];
for (int i = 0; i < n; ++i)
{
if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs))
{
int reg = fpr.GetTempV();
fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY);
fpr.SpillLockV(reg);
tempxregs[i] = fpr.VX(reg);
}
else
{
fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY);
fpr.SpillLockV(dregs[i]);
tempxregs[i] = fpr.VX(dregs[i]);
}
}
// Warning: sregs[i] and tempxregs[i] may be the same reg.
// Helps for vmov, hurts for vrcp, etc.
for (int i = 0; i < n; ++i)
{
switch ((op >> 16) & 0x1f)
{
case 0: // d[i] = s[i]; break; //vmov
// Probably for swizzle.
if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
MOVSS(tempxregs[i], fpr.V(sregs[i]));
break;
case 1: // d[i] = fabsf(s[i]); break; //vabs
if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
MOVSS(tempxregs[i], fpr.V(sregs[i]));
ANDPS(tempxregs[i], M((void *)&noSignMask));
break;
case 2: // d[i] = -s[i]; break; //vneg
if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
MOVSS(tempxregs[i], fpr.V(sregs[i]));
XORPS(tempxregs[i], M((void *)&signBitLower));
break;
case 4: // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0
if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
MOVSS(tempxregs[i], fpr.V(sregs[i]));
MAXSS(tempxregs[i], M((void *)&zero));
MINSS(tempxregs[i], M((void *)&one));
break;
case 5: // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1
if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
MOVSS(tempxregs[i], fpr.V(sregs[i]));
MAXSS(tempxregs[i], M((void *)&minus_one));
MINSS(tempxregs[i], M((void *)&one));
break;
case 16: // d[i] = 1.0f / s[i]; break; //vrcp
MOVSS(XMM0, M((void *)&one));
DIVSS(XMM0, fpr.V(sregs[i]));
MOVSS(tempxregs[i], R(XMM0));
break;
case 17: // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq
SQRTSS(XMM0, fpr.V(sregs[i]));
MOVSS(tempxregs[i], M((void *)&one));
DIVSS(tempxregs[i], R(XMM0));
break;
case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin
DISABLE;
break;
case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos
DISABLE;
break;
case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2
DISABLE;
break;
case 21: // d[i] = logf(s[i])/log(2.0f); break; //vlog2
DISABLE;
break;
case 22: // d[i] = sqrtf(s[i]); break; //vsqrt
SQRTSS(tempxregs[i], fpr.V(sregs[i]));
break;
case 23: // d[i] = asinf(s[i] * (float)M_2_PI); break; //vasin
DISABLE;
break;
case 24: // d[i] = -1.0f / s[i]; break; // vnrcp
MOVSS(XMM0, M((void *)&one));
DIVSS(XMM0, fpr.V(sregs[i]));
XORPS(XMM0, M((void *)&signBitLower));
MOVSS(tempxregs[i], R(XMM0));
break;
case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin
DISABLE;
break;
case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2
DISABLE;
break;
}
}
for (int i = 0; i < n; ++i)
{
if (!fpr.V(dregs[i]).IsSimpleReg(tempxregs[i]))
MOVSS(fpr.V(dregs[i]), tempxregs[i]);
}
ApplyPrefixD(dregs, sz);
fpr.ReleaseSpillLocks();
}
void Jit::Comp_Mftv(u32 op) {
CONDITIONAL_DISABLE;

View file

@ -187,6 +187,7 @@ public:
void Comp_VPFX(u32 op);
void Comp_VDot(u32 op);
void Comp_VecDo3(u32 op);
void Comp_VV2Op(u32 op);
void Comp_Mftv(u32 op);
void Comp_Vmtvc(u32 op);