mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Implement Comp_VV2Op vfpu ops in the x86 jit.
Also, some cleanup. No need for this extra boilerplate, simplify... This makes the Bink video issue slightly better, in jit only.
This commit is contained in:
parent
65b2e7ca4b
commit
2dfdf3ffeb
6 changed files with 161 additions and 52 deletions
|
@ -40,6 +40,11 @@ namespace MIPSComp
|
|||
DISABLE;
|
||||
}
|
||||
|
||||
void Jit::Comp_VV2Op(u32 op)
|
||||
{
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Jit::Comp_Mftv(u32 op)
|
||||
{
|
||||
DISABLE;
|
||||
|
|
|
@ -112,6 +112,7 @@ public:
|
|||
void Comp_VPFX(u32 op);
|
||||
void Comp_VDot(u32 op);
|
||||
void Comp_VecDo3(u32 op);
|
||||
void Comp_VV2Op(u32 op);
|
||||
void Comp_Mftv(u32 op);
|
||||
void Comp_Vmtvc(u32 op);
|
||||
|
||||
|
|
|
@ -492,8 +492,8 @@ namespace MIPSInt
|
|||
case 17: d[i] = 1.0f / sqrtf(s[i]); break; //vrsq
|
||||
case 18: d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin
|
||||
case 19: d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos
|
||||
case 20: d[i] = powf(2.0f, s[i]); break;
|
||||
case 21: d[i] = logf(s[i])/log(2.0f); break;
|
||||
case 20: d[i] = powf(2.0f, s[i]); break; //vexp2
|
||||
case 21: d[i] = logf(s[i])/log(2.0f); break; //vlog2
|
||||
case 22: d[i] = sqrtf(s[i]); break; //vsqrt
|
||||
case 23: d[i] = asinf(s[i] * (float)M_2_PI); break; //vasin
|
||||
case 24: d[i] = -1.0f / s[i]; break; // vnrcp
|
||||
|
|
|
@ -582,31 +582,31 @@ const MIPSInstruction tableVFPU7[32] =
|
|||
// 110100 00000 10111 0000000000000000
|
||||
const MIPSInstruction tableVFPU4[32] = //110100 00000 xxxxx
|
||||
{
|
||||
INSTR("vmov", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vabs", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vneg", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vmov", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vabs", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vneg", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vidt", &Jit::Comp_Generic, Dis_VectorSet1, Int_Vidt,IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vsat0", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vsat1", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vsat0", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vsat1", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vzero", &Jit::Comp_Generic, Dis_VectorSet1, Int_VVectorInit, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vone", &Jit::Comp_Generic, Dis_VectorSet1, Int_VVectorInit, IS_VFPU|OUT_EAT_PREFIX),
|
||||
//8
|
||||
{-2},{-2},{-2},{-2},{-2},{-2},{-2},{-2},
|
||||
//16
|
||||
INSTR("vrcp", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vrsq", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vsin", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vcos", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vexp2", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vlog2", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vsqrt", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vasin", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vrcp", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vrsq", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vsin", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vcos", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vexp2", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vlog2", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vsqrt", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vasin", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
//24
|
||||
INSTR("vnrcp", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vnrcp", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
|
||||
{-2},
|
||||
INSTR("vnsin", &Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vnsin", &Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op,IS_VFPU|OUT_EAT_PREFIX),
|
||||
{-2},
|
||||
INSTR("vrexp2",&Jit::Comp_Generic, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
INSTR("vrexp2",&Jit::Comp_VV2Op, Dis_VectorSet2, Int_VV2Op, IS_VFPU|OUT_EAT_PREFIX),
|
||||
{-2},{-2},{-2},
|
||||
//32
|
||||
};
|
||||
|
|
|
@ -28,9 +28,9 @@
|
|||
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
|
||||
// Currently known non working ones should have DISABLE.
|
||||
|
||||
// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
|
||||
// #define CONDITIONAL_DISABLE { fpr.ReleaseSpillLocks(); Comp_Generic(op); return; }
|
||||
#define CONDITIONAL_DISABLE ;
|
||||
#define DISABLE { Comp_Generic(op); return; }
|
||||
#define DISABLE { fpr.ReleaseSpillLocks(); Comp_Generic(op); return; }
|
||||
|
||||
|
||||
#define _RS ((op>>21) & 0x1F)
|
||||
|
@ -167,7 +167,7 @@ void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) {
|
|||
|
||||
// Vector regs can overlap in all sorts of swizzled ways.
|
||||
// This does allow a single overlap in sregs[i].
|
||||
bool IsOverlapSafeAllowS(int dreg, int di, int sn, u8 sregs[], int tn, u8 tregs[])
|
||||
bool IsOverlapSafeAllowS(int dreg, int di, int sn, u8 sregs[], int tn = 0, u8 tregs[] = NULL)
|
||||
{
|
||||
for (int i = 0; i < sn; ++i)
|
||||
{
|
||||
|
@ -184,7 +184,7 @@ bool IsOverlapSafeAllowS(int dreg, int di, int sn, u8 sregs[], int tn, u8 tregs[
|
|||
return true;
|
||||
}
|
||||
|
||||
bool IsOverlapSafe(int dreg, int di, int sn, u8 sregs[], int tn, u8 tregs[])
|
||||
bool IsOverlapSafe(int dreg, int di, int sn, u8 sregs[], int tn = 0, u8 tregs[] = NULL)
|
||||
{
|
||||
return IsOverlapSafeAllowS(dreg, di, sn, sregs, tn, tregs) && sregs[di] != dreg;
|
||||
}
|
||||
|
@ -339,23 +339,18 @@ void Jit::Comp_SVQ(u32 op)
|
|||
void Jit::Comp_VDot(u32 op) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
if (js.HasUnknownPrefix()) {
|
||||
Comp_Generic(op);
|
||||
return;
|
||||
}
|
||||
if (js.HasUnknownPrefix())
|
||||
DISABLE;
|
||||
|
||||
int vd = _VD;
|
||||
int vs = _VS;
|
||||
int vt = _VT;
|
||||
VectorSize sz = GetVecSize(op);
|
||||
int n = GetNumVectorElements(sz);
|
||||
|
||||
// TODO: Force read one of them into regs? probably not.
|
||||
u8 sregs[4], tregs[4], dregs[1];
|
||||
GetVectorRegsPrefixS(sregs, sz, vs);
|
||||
GetVectorRegsPrefixT(tregs, sz, vt);
|
||||
GetVectorRegsPrefixD(dregs, V_Single, vd);
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
GetVectorRegsPrefixT(tregs, sz, _VT);
|
||||
GetVectorRegsPrefixD(dregs, V_Single, _VD);
|
||||
|
||||
int n = GetNumVectorElements(sz);
|
||||
X64Reg tempxreg = XMM0;
|
||||
if (IsOverlapSafe(dregs[0], 0, n, sregs, n, tregs))
|
||||
{
|
||||
|
@ -387,20 +382,8 @@ void Jit::Comp_VDot(u32 op) {
|
|||
void Jit::Comp_VecDo3(u32 op) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
if (js.HasUnknownPrefix()) {
|
||||
Comp_Generic(op);
|
||||
return;
|
||||
}
|
||||
|
||||
int vd = _VD;
|
||||
int vs = _VS;
|
||||
int vt = _VT;
|
||||
VectorSize sz = GetVecSize(op);
|
||||
|
||||
u8 sregs[4], tregs[4], dregs[4];
|
||||
GetVectorRegsPrefixS(sregs, sz, vs);
|
||||
GetVectorRegsPrefixT(tregs, sz, vt);
|
||||
GetVectorRegsPrefixD(dregs, sz, vd);
|
||||
if (js.HasUnknownPrefix())
|
||||
DISABLE;
|
||||
|
||||
void (XEmitter::*xmmop)(X64Reg, OpArg) = NULL;
|
||||
switch (op >> 26)
|
||||
|
@ -430,14 +413,16 @@ void Jit::Comp_VecDo3(u32 op) {
|
|||
}
|
||||
|
||||
if (xmmop == NULL)
|
||||
{
|
||||
fpr.ReleaseSpillLocks();
|
||||
Comp_Generic(op);
|
||||
return;
|
||||
}
|
||||
DISABLE;
|
||||
|
||||
VectorSize sz = GetVecSize(op);
|
||||
int n = GetNumVectorElements(sz);
|
||||
|
||||
u8 sregs[4], tregs[4], dregs[4];
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
GetVectorRegsPrefixT(tregs, sz, _VT);
|
||||
GetVectorRegsPrefixD(dregs, sz, _VD);
|
||||
|
||||
X64Reg tempxregs[4];
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
|
@ -480,6 +465,123 @@ void Jit::Comp_VecDo3(u32 op) {
|
|||
fpr.ReleaseSpillLocks();
|
||||
}
|
||||
|
||||
void Jit::Comp_VV2Op(u32 op) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
if (js.HasUnknownPrefix())
|
||||
DISABLE;
|
||||
|
||||
VectorSize sz = GetVecSize(op);
|
||||
int n = GetNumVectorElements(sz);
|
||||
|
||||
u8 sregs[4], dregs[4];
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
GetVectorRegsPrefixD(dregs, sz, _VD);
|
||||
|
||||
X64Reg tempxregs[4];
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs))
|
||||
{
|
||||
int reg = fpr.GetTempV();
|
||||
fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY);
|
||||
fpr.SpillLockV(reg);
|
||||
tempxregs[i] = fpr.VX(reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY);
|
||||
fpr.SpillLockV(dregs[i]);
|
||||
tempxregs[i] = fpr.VX(dregs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Warning: sregs[i] and tempxregs[i] may be the same reg.
|
||||
// Helps for vmov, hurts for vrcp, etc.
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
switch ((op >> 16) & 0x1f)
|
||||
{
|
||||
case 0: // d[i] = s[i]; break; //vmov
|
||||
// Probably for swizzle.
|
||||
if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
|
||||
MOVSS(tempxregs[i], fpr.V(sregs[i]));
|
||||
break;
|
||||
case 1: // d[i] = fabsf(s[i]); break; //vabs
|
||||
if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
|
||||
MOVSS(tempxregs[i], fpr.V(sregs[i]));
|
||||
ANDPS(tempxregs[i], M((void *)&noSignMask));
|
||||
break;
|
||||
case 2: // d[i] = -s[i]; break; //vneg
|
||||
if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
|
||||
MOVSS(tempxregs[i], fpr.V(sregs[i]));
|
||||
XORPS(tempxregs[i], M((void *)&signBitLower));
|
||||
break;
|
||||
case 4: // if (s[i] < 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0
|
||||
if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
|
||||
MOVSS(tempxregs[i], fpr.V(sregs[i]));
|
||||
MAXSS(tempxregs[i], M((void *)&zero));
|
||||
MINSS(tempxregs[i], M((void *)&one));
|
||||
break;
|
||||
case 5: // if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1
|
||||
if (!fpr.V(sregs[i]).IsSimpleReg(tempxregs[i]))
|
||||
MOVSS(tempxregs[i], fpr.V(sregs[i]));
|
||||
MAXSS(tempxregs[i], M((void *)&minus_one));
|
||||
MINSS(tempxregs[i], M((void *)&one));
|
||||
break;
|
||||
case 16: // d[i] = 1.0f / s[i]; break; //vrcp
|
||||
MOVSS(XMM0, M((void *)&one));
|
||||
DIVSS(XMM0, fpr.V(sregs[i]));
|
||||
MOVSS(tempxregs[i], R(XMM0));
|
||||
break;
|
||||
case 17: // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq
|
||||
SQRTSS(XMM0, fpr.V(sregs[i]));
|
||||
MOVSS(tempxregs[i], M((void *)&one));
|
||||
DIVSS(tempxregs[i], R(XMM0));
|
||||
break;
|
||||
case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin
|
||||
DISABLE;
|
||||
break;
|
||||
case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos
|
||||
DISABLE;
|
||||
break;
|
||||
case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2
|
||||
DISABLE;
|
||||
break;
|
||||
case 21: // d[i] = logf(s[i])/log(2.0f); break; //vlog2
|
||||
DISABLE;
|
||||
break;
|
||||
case 22: // d[i] = sqrtf(s[i]); break; //vsqrt
|
||||
SQRTSS(tempxregs[i], fpr.V(sregs[i]));
|
||||
break;
|
||||
case 23: // d[i] = asinf(s[i] * (float)M_2_PI); break; //vasin
|
||||
DISABLE;
|
||||
break;
|
||||
case 24: // d[i] = -1.0f / s[i]; break; // vnrcp
|
||||
MOVSS(XMM0, M((void *)&one));
|
||||
DIVSS(XMM0, fpr.V(sregs[i]));
|
||||
XORPS(XMM0, M((void *)&signBitLower));
|
||||
MOVSS(tempxregs[i], R(XMM0));
|
||||
break;
|
||||
case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin
|
||||
DISABLE;
|
||||
break;
|
||||
case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2
|
||||
DISABLE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
if (!fpr.V(dregs[i]).IsSimpleReg(tempxregs[i]))
|
||||
MOVSS(fpr.V(dregs[i]), tempxregs[i]);
|
||||
}
|
||||
|
||||
ApplyPrefixD(dregs, sz);
|
||||
|
||||
fpr.ReleaseSpillLocks();
|
||||
}
|
||||
|
||||
void Jit::Comp_Mftv(u32 op) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
|
|
|
@ -187,6 +187,7 @@ public:
|
|||
void Comp_VPFX(u32 op);
|
||||
void Comp_VDot(u32 op);
|
||||
void Comp_VecDo3(u32 op);
|
||||
void Comp_VV2Op(u32 op);
|
||||
void Comp_Mftv(u32 op);
|
||||
void Comp_Vmtvc(u32 op);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue