Implement vsgn in x86/x64 and ARM jit

This commit is contained in:
Henrik Rydgard 2013-11-07 14:34:08 +01:00
parent 32c95af820
commit 6eb7f94065
7 changed files with 134 additions and 22 deletions

View file

@ -518,7 +518,8 @@ namespace MIPSComp
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix() || disablePrefixes) {
DISABLE;
// Don't think matrix init ops care about prefixes.
// DISABLE;
}
MatrixSize sz = GetMtxSize(op);
@ -639,17 +640,6 @@ namespace MIPSComp
fpr.ReleaseSpillLocksAndDiscardTemps();
}
void Jit::Comp_Vhoriz(MIPSOpcode op) {
DISABLE;
switch ((op >> 16) & 31) {
case 6: // vfad
break;
case 7: // vavg
break;
}
}
void Jit::Comp_VecDo3(MIPSOpcode op) {
CONDITIONAL_DISABLE;
@ -1679,4 +1669,66 @@ namespace MIPSComp
}
fpr.ReleaseSpillLocksAndDiscardTemps();
}
void Jit::Comp_Vhoriz(MIPSOpcode op) {
DISABLE;
switch ((op >> 16) & 31) {
case 6: // vfad
break;
case 7: // vavg
break;
}
}
void Jit::Comp_Vsgn(MIPSOpcode op) {
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix() || disablePrefixes) {
DISABLE;
}
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
u8 sregs[4], dregs[4];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD);
MIPSReg tempregs[4];
for (int i = 0; i < n; ++i) {
if (!IsOverlapSafe(dregs[i], i, n, sregs)) {
tempregs[i] = fpr.GetTempV();
} else {
tempregs[i] = dregs[i];
}
}
for (int i = 0; i < n; ++i) {
fpr.MapDirtyInV(tempregs[i], sregs[i]);
// Let's do it integer registers for now. NEON later.
// There's gotta be a shorter way, can't find one though that takes
// care of NaNs like the interpreter (ignores them and just operates on the bits).
MOVI2F(S0, 0.0f, R0);
VCMP(fpr.V(sregs[i]), S0);
VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
VMOV(R0, fpr.V(sregs[i]));
AND(R0, R0, AssumeMakeOperand2(0x80000000));
ORR(R0, R0, AssumeMakeOperand2(0x3F800000));
SetCC(CC_EQ);
MOV(R1, AssumeMakeOperand2(0x0));
SetCC(CC_AL);
VMOV(fpr.V(tempregs[i]), R0);
}
for (int i = 0; i < n; ++i) {
if (dregs[i] != tempregs[i]) {
fpr.MapDirtyInV(dregs[i], tempregs[i]);
VMOV(fpr.V(dregs[i]), fpr.V(tempregs[i]));
}
}
ApplyPrefixD(dregs, sz);
fpr.ReleaseSpillLocksAndDiscardTemps();
}
}

View file

@ -233,6 +233,7 @@ public:
void Comp_VCrossQuat(MIPSOpcode op);
void Comp_Vsge(MIPSOpcode op);
void Comp_Vslt(MIPSOpcode op);
void Comp_Vsgn(MIPSOpcode op);
JitBlockCache *GetBlockCache() { return &blocks; }

View file

@ -747,7 +747,7 @@ const MIPSInstruction tableVFPU9[32] = // 110100 00010 xxxxx . ....... . .......
INSTR("vsrt3", &Jit::Comp_Generic, Dis_Vbfy, Int_Vsrt3, IN_OTHER|OUT_OTHER|IS_VFPU),
// TODO: Flags may not be correct (prefixes, etc.)
INSTR("vsrt4", &Jit::Comp_Generic, Dis_Vbfy, Int_Vsrt4, IN_OTHER|OUT_OTHER|IS_VFPU),
INSTR("vsgn", &Jit::Comp_Generic, Dis_Vbfy, Int_Vsgn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vsgn", &Jit::Comp_Vsgn, Dis_Vbfy, Int_Vsgn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INVALID,
//12
INVALID,

View file

@ -1120,4 +1120,7 @@ namespace MIPSComp
void Jit::Comp_Vslt(MIPSOpcode op) {
DISABLE;
}
void Jit::Comp_Vsgn(MIPSOpcode op) {
DISABLE;
}
}

View file

@ -237,6 +237,7 @@ namespace MIPSComp
void Comp_VCrossQuat(MIPSOpcode op);
void Comp_Vsge(MIPSOpcode op);
void Comp_Vslt(MIPSOpcode op);
void Comp_Vsgn(MIPSOpcode op);
// Utility compilation functions

View file

@ -534,7 +534,6 @@ void Jit::Comp_VHdp(MIPSOpcode op) {
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
// TODO: Force read one of them into regs? probably not.
u8 sregs[4], tregs[4], dregs[1];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixT(tregs, sz, _VT);
@ -572,6 +571,14 @@ void Jit::Comp_VHdp(MIPSOpcode op) {
fpr.ReleaseSpillLocks();
}
void Jit::Comp_Vsge(MIPSOpcode op) {
DISABLE;
}
void Jit::Comp_Vslt(MIPSOpcode op) {
DISABLE;
}
void Jit::Comp_VCrossQuat(MIPSOpcode op) {
CONDITIONAL_DISABLE;
@ -1015,14 +1022,6 @@ void Jit::Comp_Vcmp(MIPSOpcode op) {
gpr.UnlockAllX();
}
void Jit::Comp_Vsge(MIPSOpcode op) {
DISABLE;
}
void Jit::Comp_Vslt(MIPSOpcode op) {
DISABLE;
}
// There are no immediates for floating point, so we need to load these
// from RAM. Might as well have a table ready.
extern const float mulTableVi2f[32] = {
@ -1368,6 +1367,60 @@ void Jit::Comp_Vcst(MIPSOpcode op) {
fpr.ReleaseSpillLocks();
}
void Jit::Comp_Vsgn(MIPSOpcode op) {
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix())
DISABLE;
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
u8 sregs[4], dregs[4];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD);
X64Reg tempxregs[4];
for (int i = 0; i < n; ++i)
{
if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs))
{
int reg = fpr.GetTempV();
fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY);
fpr.SpillLockV(reg);
tempxregs[i] = fpr.VX(reg);
}
else
{
fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY);
fpr.SpillLockV(dregs[i]);
tempxregs[i] = fpr.VX(dregs[i]);
}
}
XORPS(XMM0, R(XMM0));
for (int i = 0; i < n; ++i)
{
CMPEQSS(XMM0, fpr.V(sregs[i])); // XMM0 = s[i] == 0.0f
MOVSS(XMM1, fpr.V(sregs[i]));
// Preserve sign bit, replace rest with ones
ANDPS(XMM1, M((void *)&signBitLower));
ORPS(XMM1, M((void *)&oneOneOneOne));
// If really was equal to zero, zap. Note that ANDN negates the destination.
ANDNPS(XMM0, R(XMM1));
MOVAPS(tempxregs[i], R(XMM0));
}
for (int i = 0; i < n; ++i) {
if (!fpr.V(dregs[i]).IsSimpleReg(tempxregs[i]))
MOVSS(fpr.V(dregs[i]), tempxregs[i]);
}
ApplyPrefixD(dregs, sz);
fpr.ReleaseSpillLocks();
}
void Jit::Comp_VV2Op(MIPSOpcode op) {
CONDITIONAL_DISABLE;
@ -2018,4 +2071,5 @@ void Jit::Comp_VRot(MIPSOpcode op) {
fpr.ReleaseSpillLocks();
}
}

View file

@ -253,6 +253,7 @@ public:
void Comp_VCrossQuat(MIPSOpcode op);
void Comp_Vsge(MIPSOpcode op);
void Comp_Vslt(MIPSOpcode op);
void Comp_Vsgn(MIPSOpcode op);
void Comp_DoNothing(MIPSOpcode op);