Implement vsgn in x86/x64 and ARM jit

2025-04-02 11:01:50 -04:00 · 2013-11-07 14:34:08 +01:00 · 2013-11-07 14:34:08 +01:00 · 6eb7f94065
commit 6eb7f94065
parent 32c95af820
7 changed files with 134 additions and 22 deletions
--- a/Core/MIPS/ARM/ArmCompVFPU.cpp
+++ b/Core/MIPS/ARM/ArmCompVFPU.cpp
@ -518,7 +518,8 @@ namespace MIPSComp
 		CONDITIONAL_DISABLE;

 		if (js.HasUnknownPrefix() || disablePrefixes) {
-			DISABLE;
+			// Don't think matrix init ops care about prefixes.
+			// DISABLE;
 		}

 		MatrixSize sz = GetMtxSize(op);
@ -639,17 +640,6 @@ namespace MIPSComp
 		fpr.ReleaseSpillLocksAndDiscardTemps();
 	}

-	void Jit::Comp_Vhoriz(MIPSOpcode op) {
-		DISABLE;
-
-		switch ((op >> 16) & 31) {
-		case 6:  // vfad
-			break;
-		case 7:  // vavg
-			break;
-		}
-	}
-
 	void Jit::Comp_VecDo3(MIPSOpcode op) {
 		CONDITIONAL_DISABLE;
 		
@ -1679,4 +1669,66 @@ namespace MIPSComp
 		}
 		fpr.ReleaseSpillLocksAndDiscardTemps();
 	}
+
+	void Jit::Comp_Vhoriz(MIPSOpcode op) {
+		DISABLE;
+
+		switch ((op >> 16) & 31) {
+		case 6:  // vfad
+			break;
+		case 7:  // vavg
+			break;
+		}
+	}
+
+	void Jit::Comp_Vsgn(MIPSOpcode op) {
+		CONDITIONAL_DISABLE;
+		if (js.HasUnknownPrefix() || disablePrefixes) {
+			DISABLE;
+		}
+
+		VectorSize sz = GetVecSize(op);
+		int n = GetNumVectorElements(sz);
+
+		u8 sregs[4], dregs[4];
+		GetVectorRegsPrefixS(sregs, sz, _VS);
+		GetVectorRegsPrefixD(dregs, sz, _VD);
+
+		MIPSReg tempregs[4];
+		for (int i = 0; i < n; ++i) {
+			if (!IsOverlapSafe(dregs[i], i, n, sregs)) {
+				tempregs[i] = fpr.GetTempV();
+			} else {
+				tempregs[i] = dregs[i];
+			}
+		}
+
+		for (int i = 0; i < n; ++i) {
+			fpr.MapDirtyInV(tempregs[i], sregs[i]);
+			// Let's do it integer registers for now. NEON later.
+			// There's gotta be a shorter way, can't find one though that takes
+			// care of NaNs like the interpreter (ignores them and just operates on the bits).
+			MOVI2F(S0, 0.0f, R0);
+			VCMP(fpr.V(sregs[i]), S0);
+			VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
+			VMOV(R0, fpr.V(sregs[i]));
+			AND(R0, R0, AssumeMakeOperand2(0x80000000));
+			ORR(R0, R0, AssumeMakeOperand2(0x3F800000));
+			SetCC(CC_EQ);
+			MOV(R1, AssumeMakeOperand2(0x0));
+			SetCC(CC_AL);
+			VMOV(fpr.V(tempregs[i]), R0);
+		}
+
+		for (int i = 0; i < n; ++i) {
+			if (dregs[i] != tempregs[i]) {
+				fpr.MapDirtyInV(dregs[i], tempregs[i]);
+				VMOV(fpr.V(dregs[i]), fpr.V(tempregs[i]));
+			}
+		}
+
+		ApplyPrefixD(dregs, sz);
+
+		fpr.ReleaseSpillLocksAndDiscardTemps();
+	}
 }
--- a/Core/MIPS/ARM/ArmJit.h
+++ b/Core/MIPS/ARM/ArmJit.h
@ -233,6 +233,7 @@ public:
 	void Comp_VCrossQuat(MIPSOpcode op);
 	void Comp_Vsge(MIPSOpcode op);
 	void Comp_Vslt(MIPSOpcode op);
+	void Comp_Vsgn(MIPSOpcode op);

 	JitBlockCache *GetBlockCache() { return &blocks; }

--- a/Core/MIPS/MIPSTables.cpp
+++ b/Core/MIPS/MIPSTables.cpp
@ -747,7 +747,7 @@ const MIPSInstruction tableVFPU9[32] = // 110100 00010 xxxxx . ....... . .......
 	INSTR("vsrt3", &Jit::Comp_Generic, Dis_Vbfy, Int_Vsrt3, IN_OTHER|OUT_OTHER|IS_VFPU),
 	// TODO: Flags may not be correct (prefixes, etc.)
 	INSTR("vsrt4", &Jit::Comp_Generic, Dis_Vbfy, Int_Vsrt4, IN_OTHER|OUT_OTHER|IS_VFPU),
-	INSTR("vsgn", &Jit::Comp_Generic, Dis_Vbfy, Int_Vsgn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
+	INSTR("vsgn", &Jit::Comp_Vsgn, Dis_Vbfy, Int_Vsgn, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
 	INVALID,
 	//12
 	INVALID,
--- a/Core/MIPS/PPC/PpcCompVFPU.cpp
+++ b/Core/MIPS/PPC/PpcCompVFPU.cpp
@ -1120,4 +1120,7 @@ namespace MIPSComp
 	void Jit::Comp_Vslt(MIPSOpcode op) {
 		DISABLE;
 	}
+	void Jit::Comp_Vsgn(MIPSOpcode op) {
+		DISABLE;
+	}
 }
--- a/Core/MIPS/PPC/PpcJit.h
+++ b/Core/MIPS/PPC/PpcJit.h
@ -237,6 +237,7 @@ namespace MIPSComp
 		void Comp_VCrossQuat(MIPSOpcode op);
 		void Comp_Vsge(MIPSOpcode op);
 		void Comp_Vslt(MIPSOpcode op);
+		void Comp_Vsgn(MIPSOpcode op);


 		// Utility compilation functions
--- a/Core/MIPS/x86/CompVFPU.cpp
+++ b/Core/MIPS/x86/CompVFPU.cpp
@ -534,7 +534,6 @@ void Jit::Comp_VHdp(MIPSOpcode op) {
 	VectorSize sz = GetVecSize(op);
 	int n = GetNumVectorElements(sz);

-	// TODO: Force read one of them into regs? probably not.
 	u8 sregs[4], tregs[4], dregs[1];
 	GetVectorRegsPrefixS(sregs, sz, _VS);
 	GetVectorRegsPrefixT(tregs, sz, _VT);
@ -572,6 +571,14 @@ void Jit::Comp_VHdp(MIPSOpcode op) {
 	fpr.ReleaseSpillLocks();
 }

+void Jit::Comp_Vsge(MIPSOpcode op) {
+	DISABLE;
+}
+
+void Jit::Comp_Vslt(MIPSOpcode op) {
+	DISABLE;
+}
+
 void Jit::Comp_VCrossQuat(MIPSOpcode op) {
 	CONDITIONAL_DISABLE;

@ -1015,14 +1022,6 @@ void Jit::Comp_Vcmp(MIPSOpcode op) {
 	gpr.UnlockAllX();
 }

-void Jit::Comp_Vsge(MIPSOpcode op) {
-	DISABLE;
-}
-
-void Jit::Comp_Vslt(MIPSOpcode op) {
-	DISABLE;
-}
-
 // There are no immediates for floating point, so we need to load these
 // from RAM. Might as well have a table ready.
 extern const float mulTableVi2f[32] = {
@ -1368,6 +1367,60 @@ void Jit::Comp_Vcst(MIPSOpcode op) {
 	fpr.ReleaseSpillLocks();
 }

+void Jit::Comp_Vsgn(MIPSOpcode op) {
+	CONDITIONAL_DISABLE;
+
+	if (js.HasUnknownPrefix())
+		DISABLE;
+
+	VectorSize sz = GetVecSize(op);
+	int n = GetNumVectorElements(sz);
+
+	u8 sregs[4], dregs[4];
+	GetVectorRegsPrefixS(sregs, sz, _VS);
+	GetVectorRegsPrefixD(dregs, sz, _VD);
+
+	X64Reg tempxregs[4];
+	for (int i = 0; i < n; ++i)
+	{
+		if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs))
+		{
+			int reg = fpr.GetTempV();
+			fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY);
+			fpr.SpillLockV(reg);
+			tempxregs[i] = fpr.VX(reg);
+		}
+		else
+		{
+			fpr.MapRegV(dregs[i], (dregs[i] == sregs[i] ? 0 : MAP_NOINIT) | MAP_DIRTY);
+			fpr.SpillLockV(dregs[i]);
+			tempxregs[i] = fpr.VX(dregs[i]);
+		}
+	}
+
+	XORPS(XMM0, R(XMM0));
+	for (int i = 0; i < n; ++i)
+	{
+		CMPEQSS(XMM0, fpr.V(sregs[i]));  // XMM0 = s[i] == 0.0f
+		MOVSS(XMM1, fpr.V(sregs[i]));
+		// Preserve sign bit, replace rest with ones
+		ANDPS(XMM1, M((void *)&signBitLower));
+		ORPS(XMM1, M((void *)&oneOneOneOne));
+		// If really was equal to zero, zap. Note that ANDN negates the destination.
+		ANDNPS(XMM0, R(XMM1));
+		MOVAPS(tempxregs[i], R(XMM0));
+	}
+
+	for (int i = 0; i < n; ++i) {
+		if (!fpr.V(dregs[i]).IsSimpleReg(tempxregs[i]))
+			MOVSS(fpr.V(dregs[i]), tempxregs[i]);
+	}
+
+	ApplyPrefixD(dregs, sz);
+
+	fpr.ReleaseSpillLocks();
+}
+
 void Jit::Comp_VV2Op(MIPSOpcode op) {
 	CONDITIONAL_DISABLE;

@ -2018,4 +2071,5 @@ void Jit::Comp_VRot(MIPSOpcode op) {
 	fpr.ReleaseSpillLocks();
 }

+
 }
--- a/Core/MIPS/x86/Jit.h
+++ b/Core/MIPS/x86/Jit.h
@ -253,6 +253,7 @@ public:
 	void Comp_VCrossQuat(MIPSOpcode op);
 	void Comp_Vsge(MIPSOpcode op);
 	void Comp_Vslt(MIPSOpcode op);
+	void Comp_Vsgn(MIPSOpcode op);

 	void Comp_DoNothing(MIPSOpcode op);