mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
arm64jit: Implement vector unpacks.
This commit is contained in:
parent
e03ae26d20
commit
c523273d51
3 changed files with 60 additions and 2 deletions
|
@ -3684,6 +3684,12 @@ void ARM64FloatEmitter::USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
|
|||
{
|
||||
USHLL(src_size, Rd, Rn, shift, true);
|
||||
}
|
||||
void ARM64FloatEmitter::SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn) {
|
||||
SHLL(src_size, Rd, Rn, false);
|
||||
}
|
||||
void ARM64FloatEmitter::SHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn) {
|
||||
SHLL(src_size, Rd, Rn, true);
|
||||
}
|
||||
void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
SXTL(src_size, Rd, Rn, false);
|
||||
|
@ -3723,6 +3729,11 @@ void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift,
|
|||
EmitShiftImm(upper, 1, imm >> 3, imm & 7, 0x14, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper) {
|
||||
_assert_msg_(src_size <= 32, "%s shift amount cannot be 64", __FUNCTION__);
|
||||
Emit2RegMisc(upper, 1, src_size >> 4, 0b10011, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
|
||||
{
|
||||
_assert_msg_(shift > 0, "%s shift amount must be greater than zero!", __FUNCTION__);
|
||||
|
|
|
@ -976,6 +976,9 @@ public:
|
|||
void SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
// Shift == src_size for these.
|
||||
void SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
@ -1034,6 +1037,7 @@ private:
|
|||
|
||||
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
|
||||
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
|
||||
void SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
|
||||
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
|
||||
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
|
||||
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
|
||||
|
|
|
@ -713,9 +713,52 @@ void Arm64JitBackend::CompIR_VecPack(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::Vec2Unpack16To31:
|
||||
case IROp::Vec4Unpack8To32:
|
||||
// Viewed as 16-bit: ABxx -> 0A0B, then shift a zero into the sign place.
|
||||
if (Overlap(inst.dest, 2, inst.src1, 1)) {
|
||||
regs_.MapVec2(inst.dest, MIPSMap::DIRTY);
|
||||
} else {
|
||||
regs_.Map(inst);
|
||||
}
|
||||
if (inst.src1 == inst.dest + 1) {
|
||||
fp_.USHLL2(16, regs_.FQ(inst.dest), regs_.FD(inst.src1), 15);
|
||||
} else {
|
||||
fp_.USHLL(16, regs_.FQ(inst.dest), regs_.FD(inst.src1), 15);
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::Vec2Unpack16To32:
|
||||
CompIR_Generic(inst);
|
||||
// Just Vec2Unpack16To31, without the shift.
|
||||
if (Overlap(inst.dest, 2, inst.src1, 1)) {
|
||||
regs_.MapVec2(inst.dest, MIPSMap::DIRTY);
|
||||
} else {
|
||||
regs_.Map(inst);
|
||||
}
|
||||
if (inst.src1 == inst.dest + 1) {
|
||||
fp_.SHLL2(16, regs_.FQ(inst.dest), regs_.FD(inst.src1));
|
||||
} else {
|
||||
fp_.SHLL(16, regs_.FQ(inst.dest), regs_.FD(inst.src1));
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::Vec4Unpack8To32:
|
||||
// Viewed as 8-bit: ABCD -> 000A000B000C000D.
|
||||
if (Overlap(inst.dest, 4, inst.src1, 1)) {
|
||||
regs_.MapVec4(inst.dest, MIPSMap::DIRTY);
|
||||
if (inst.dest == inst.src1 + 2) {
|
||||
fp_.SHLL2(8, regs_.FQ(inst.dest), regs_.FD(inst.src1 & ~3));
|
||||
} else if (inst.dest != inst.src1) {
|
||||
fp_.DUP(32, regs_.FQ(inst.dest), regs_.FQ(inst.src1), inst.src1 & 3);
|
||||
fp_.SHLL(8, regs_.FQ(inst.dest), regs_.FD(inst.dest));
|
||||
} else {
|
||||
fp_.SHLL(8, regs_.FQ(inst.dest), regs_.FD(inst.src1));
|
||||
}
|
||||
fp_.SHLL(16, regs_.FQ(inst.dest), regs_.FD(inst.dest));
|
||||
} else {
|
||||
regs_.Map(inst);
|
||||
// Two steps: ABCD -> 0A0B0C0D, then to 000A000B000C000D.
|
||||
fp_.SHLL(8, regs_.FQ(inst.dest), regs_.FD(inst.src1));
|
||||
fp_.SHLL(16, regs_.FQ(inst.dest), regs_.FD(inst.dest));
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
Loading…
Add table
Reference in a new issue