IR: vcmp, vcmov, vhdp

2025-04-02 11:01:50 -04:00 · 2016-05-12 22:35:31 +02:00 · 2016-05-12 22:35:31 +02:00 · 7268abec61
commit 7268abec61
parent 1851458628
4 changed files with 144 additions and 7 deletions
--- a/Core/MIPS/IR/IRCompVFPU.cpp
+++ b/Core/MIPS/IR/IRCompVFPU.cpp
@ -358,7 +358,38 @@ namespace MIPSComp {
 	}

 	void IRFrontend::Comp_VHdp(MIPSOpcode op) {
-		DISABLE;
+		CONDITIONAL_DISABLE;
+		if (js.HasUnknownPrefix()) {
+			DISABLE;
+		}
+
+		int vd = _VD;
+		int vs = _VS;
+		int vt = _VT;
+		VectorSize sz = GetVecSize(op);
+
+		// TODO: Force read one of them into regs? probably not.
+		u8 sregs[4], tregs[4], dregs[1];
+		GetVectorRegsPrefixS(sregs, sz, vs);
+		GetVectorRegsPrefixT(tregs, sz, vt);
+		GetVectorRegsPrefixD(dregs, V_Single, vd);
+
+		// TODO: applyprefixST here somehow (shuffle, etc...)
+		ir.Write(IROp::FMul, IRVTEMP_0, sregs[0], tregs[0]);
+
+		int n = GetNumVectorElements(sz);
+		for (int i = 1; i < n; i++) {
+			// sum += s[i]*t[i];
+			if (i == n - 1) {
+				ir.Write(IROp::FAdd, IRVTEMP_0, IRVTEMP_0, tregs[i]);
+			} else {
+				ir.Write(IROp::FMul, IRVTEMP_0 + 1, sregs[i], tregs[i]);
+				ir.Write(IROp::FAdd, IRVTEMP_0, IRVTEMP_0, IRVTEMP_0 + 1);
+			}
+		}
+
+		ir.Write(IROp::FMov, dregs[0], IRVTEMP_0);
+		ApplyPrefixD(dregs, V_Single);
 	}

 	static const float MEMORY_ALIGNED16(vavg_table[4]) = { 1.0f, 1.0f / 2.0f, 1.0f / 3.0f, 1.0f / 4.0f };
@ -840,7 +871,6 @@ namespace MIPSComp {

 		MatrixSize sz = GetMtxSize(op);
 		if (sz != M_4x4) {
-			// logBlocks = true;
 			DISABLE;
 		}
 		int n = GetMatrixSide(sz);
@ -1165,16 +1195,66 @@ namespace MIPSComp {
 	}

 	void IRFrontend::Comp_Vcmp(MIPSOpcode op) {
-		// Fiendishly hard...
-		DISABLE;
+		CONDITIONAL_DISABLE;
+		if (js.HasUnknownPrefix())
+			DISABLE;
+
+		VectorSize sz = GetVecSize(op);
+		int n = GetNumVectorElements(sz);
+
+		VCondition cond = (VCondition)(op & 0xF);
+
+		u8 sregs[4], tregs[4];
+		GetVectorRegsPrefixS(sregs, sz, _VS);
+		GetVectorRegsPrefixT(tregs, sz, _VT);
+
+		int mask = 0;
+		for (int i = 0; i < n; i++) {
+			ir.Write(IROp::FCmpVfpuBit, cond | (i << 4), sregs[i], tregs[i]);
+			mask |= (1 << i);
+		}
+		ir.Write(IROp::FCmpVfpuAggregate, mask);
 	}

 	void IRFrontend::Comp_Vcmov(MIPSOpcode op) {
-		// Fiendishly hard...
-		DISABLE;
+		CONDITIONAL_DISABLE;
+		if (js.HasUnknownPrefix()) {
+			DISABLE;
+		}
+
+		logBlocks = 1;
+
+		VectorSize sz = GetVecSize(op);
+		int n = GetNumVectorElements(sz);
+
+		u8 sregs[4], dregs[4];
+		GetVectorRegsPrefixS(sregs, sz, _VS);
+		GetVectorRegsPrefixD(dregs, sz, _VD);
+		int tf = (op >> 19) & 1;
+		int imm3 = (op >> 16) & 7;
+
+		for (int i = 0; i < n; ++i) {
+			// Simplification: Disable if overlap unsafe
+			if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs)) {
+				DISABLE;
+			}
+		}
+		if (imm3 < 6) {
+			// Test one bit of CC. This bit decides whether none or all subregisters are copied.
+			for (int i = 0; i < n; i++) {
+				ir.Write(IROp::FCmovVfpuCC, dregs[i], sregs[i], (imm3) | ((!tf) << 7));
+			}
+		} else {
+			// Look at the bottom four bits of CC to individually decide if the subregisters should be copied.
+			for (int i = 0; i < n; i++) {
+				ir.Write(IROp::FCmovVfpuCC, dregs[i], sregs[i], (i) | ((!tf) << 7));
+			}
+		}
+		ApplyPrefixD(dregs, sz);
 	}

 	void IRFrontend::Comp_Viim(MIPSOpcode op) {
+		CONDITIONAL_DISABLE;
 		if (js.HasUnknownPrefix())
 			DISABLE;

@ -1186,6 +1266,7 @@ namespace MIPSComp {
 	}

 	void IRFrontend::Comp_Vfim(MIPSOpcode op) {
+		CONDITIONAL_DISABLE;
 		if (js.HasUnknownPrefix())
 			DISABLE;

--- a/Core/MIPS/IR/IRInst.cpp
+++ b/Core/MIPS/IR/IRInst.cpp
@ -97,7 +97,9 @@ static const IRMeta irMeta[] = {
 	{ IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" },
 	{ IROp::SetCtrlVFPUReg, "SetCtrlVFPUReg", "TC" },
 	{ IROp::SetCtrlVFPUFReg, "SetCtrlVFPUFReg", "TF" },
-
+	{ IROp::FCmovVfpuCC, "FCmovVfpuCC", "FFI" },
+	{ IROp::FCmpVfpuBit, "FCmpVfpuBit", "IFF" },
+	{ IROp::FCmpVfpuAggregate, "FCmpVfpuAggregate", ""},
 	{ IROp::Vec4Init, "Vec4Init", "Fv" },
 	{ IROp::Vec4Shuffle, "Vec4Shuffle", "FFs" },
 	{ IROp::Vec4Mov, "Vec4Mov", "FF" },
--- a/Core/MIPS/IR/IRInst.h
+++ b/Core/MIPS/IR/IRInst.h
@ -134,6 +134,8 @@ enum class IROp : u8 {
 	FCmp,

 	FCmovVfpuCC,
+	FCmpVfpuBit,
+	FCmpVfpuAggregate,

 	// Rounding Mode
 	RestoreRoundingMode,
@ -157,6 +159,12 @@ enum class IROp : u8 {
 	Vec4Scale,
 	Vec4Dot,

+	// vx2i
+	Vec4ExpandU16ToU32Hi,
+	Vec4ExpandU8ToU32Hi,
+	Vec4ExpandS16ToS32Hi,
+	Vec4ExpandS8ToS32Hi,
+
 	// Slow special functions. Used on singles.
 	FSin,
 	FCos,
--- a/Core/MIPS/IR/IRInterpreter.cpp
+++ b/Core/MIPS/IR/IRInterpreter.cpp
@ -32,6 +32,9 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
 	const IRInst *end = inst + count;
 	while (inst != end) {
 		switch (inst->op) {
+		case IROp::Nop:
+			_assert_(false);
+			break;
 		case IROp::SetConst:
 			mips->r[inst->dest] = constPool[inst->src1];
 			break;
@ -209,6 +212,49 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
 #endif
 			break;

+		case IROp::FCmpVfpuBit:
+		{
+			int op = inst->dest & 0xF;
+			int bit = inst->dest >> 4;
+			int result = 0;
+			switch (op) {
+			case VC_EQ: result = mips->f[inst->src1] == mips->f[inst->src2]; break;
+			case VC_NE: result = mips->f[inst->src1] != mips->f[inst->src2]; break;
+			case VC_LT: result = mips->f[inst->src1] < mips->f[inst->src2]; break;
+			case VC_LE: result = mips->f[inst->src1] <= mips->f[inst->src2]; break;
+			case VC_GT: result = mips->f[inst->src1] > mips->f[inst->src2]; break;
+			case VC_GE: result = mips->f[inst->src1] >= mips->f[inst->src2]; break;
+			case VC_EZ: result = mips->f[inst->src1] == 0.0f; break;
+			case VC_NZ: result = mips->f[inst->src1] != 0.0f; break;
+			case VC_TR: result = 1; break;
+			case VC_FL: result = 0; break;
+			default:
+				result = 0;
+			}
+			if (result != 0) {
+				mips->vfpuCtrl[VFPU_CTRL_CC] |= (1 << bit);
+			} else {
+				mips->vfpuCtrl[VFPU_CTRL_CC] &= ~(1 << bit);
+			}
+		}
+			break;
+
+		case IROp::FCmpVfpuAggregate:
+		{
+			int mask = inst->dest;
+			u32 cc = mips->vfpuCtrl[VFPU_CTRL_CC];
+			int a = (cc & mask) ? 0x10 : 0x00;
+			int b = (cc & mask) == mask ? 0x20 : 0x00;
+			mips->vfpuCtrl[VFPU_CTRL_CC] = (cc & ~0x30) | a | b;;
+		}
+			break;
+
+		case IROp::FCmovVfpuCC:
+			if (((mips->vfpuCtrl[VFPU_CTRL_CC] >> (inst->src2 & 0x7f)) & 1) == (inst->src2 >> 7)) {
+				mips->f[inst->dest] = mips->f[inst->src1];
+			}
+			break;
+
 		// Not quickly implementable on all platforms, unfortunately.
 		case IROp::Vec4Dot:
 		{