arm64jit: Implement vector unpacks.

This commit is contained in:
Unknown W. Brackets 2023-09-07 19:53:07 -07:00
parent e03ae26d20
commit c523273d51
3 changed files with 60 additions and 2 deletions

View file

@ -3684,6 +3684,12 @@ void ARM64FloatEmitter::USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
{
USHLL(src_size, Rd, Rn, shift, true);
}
void ARM64FloatEmitter::SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn) {
SHLL(src_size, Rd, Rn, false);
}
void ARM64FloatEmitter::SHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn) {
SHLL(src_size, Rd, Rn, true);
}
void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
{
SXTL(src_size, Rd, Rn, false);
@ -3723,6 +3729,11 @@ void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift,
EmitShiftImm(upper, 1, imm >> 3, imm & 7, 0x14, Rd, Rn);
}
void ARM64FloatEmitter::SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper) {
_assert_msg_(src_size <= 32, "%s shift amount cannot be 64", __FUNCTION__);
Emit2RegMisc(upper, 1, src_size >> 4, 0b10011, Rd, Rn);
}
void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
{
_assert_msg_(shift > 0, "%s shift amount must be greater than zero!", __FUNCTION__);

View file

@ -976,6 +976,9 @@ public:
void SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
// Shift == src_size for these.
void SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
void SHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
@ -1034,6 +1037,7 @@ private:
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
void SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);

View file

@ -713,9 +713,52 @@ void Arm64JitBackend::CompIR_VecPack(IRInst inst) {
break;
case IROp::Vec2Unpack16To31:
case IROp::Vec4Unpack8To32:
// Viewed as 16-bit: ABxx -> 0A0B, then shift a zero into the sign place.
if (Overlap(inst.dest, 2, inst.src1, 1)) {
regs_.MapVec2(inst.dest, MIPSMap::DIRTY);
} else {
regs_.Map(inst);
}
if (inst.src1 == inst.dest + 1) {
fp_.USHLL2(16, regs_.FQ(inst.dest), regs_.FD(inst.src1), 15);
} else {
fp_.USHLL(16, regs_.FQ(inst.dest), regs_.FD(inst.src1), 15);
}
break;
case IROp::Vec2Unpack16To32:
CompIR_Generic(inst);
// Just Vec2Unpack16To31, without the shift.
if (Overlap(inst.dest, 2, inst.src1, 1)) {
regs_.MapVec2(inst.dest, MIPSMap::DIRTY);
} else {
regs_.Map(inst);
}
if (inst.src1 == inst.dest + 1) {
fp_.SHLL2(16, regs_.FQ(inst.dest), regs_.FD(inst.src1));
} else {
fp_.SHLL(16, regs_.FQ(inst.dest), regs_.FD(inst.src1));
}
break;
case IROp::Vec4Unpack8To32:
// Viewed as 8-bit: ABCD -> 000A000B000C000D.
if (Overlap(inst.dest, 4, inst.src1, 1)) {
regs_.MapVec4(inst.dest, MIPSMap::DIRTY);
if (inst.dest == inst.src1 + 2) {
fp_.SHLL2(8, regs_.FQ(inst.dest), regs_.FD(inst.src1 & ~3));
} else if (inst.dest != inst.src1) {
fp_.DUP(32, regs_.FQ(inst.dest), regs_.FQ(inst.src1), inst.src1 & 3);
fp_.SHLL(8, regs_.FQ(inst.dest), regs_.FD(inst.dest));
} else {
fp_.SHLL(8, regs_.FQ(inst.dest), regs_.FD(inst.src1));
}
fp_.SHLL(16, regs_.FQ(inst.dest), regs_.FD(inst.dest));
} else {
regs_.Map(inst);
// Two steps: ABCD -> 0A0B0C0D, then to 000A000B000C000D.
fp_.SHLL(8, regs_.FQ(inst.dest), regs_.FD(inst.src1));
fp_.SHLL(16, regs_.FQ(inst.dest), regs_.FD(inst.dest));
}
break;
default: