arm64jit: Implement convert/int conversions.

This commit is contained in:
Unknown W. Brackets 2023-09-07 21:28:22 -07:00
parent b698c673a8
commit 7a5cdafdf3
4 changed files with 140 additions and 34 deletions

View file

@ -2234,6 +2234,45 @@ void ARM64FloatEmitter::FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round) {
EmitConvertScalarToInt(Rd, Rn, round, true);
}
void ARM64FloatEmitter::FCVTZS(ARM64Reg Rd, ARM64Reg Rn, int scale) {
if (IsScalar(Rd)) {
int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;
Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn);
Write32((1 << 30) | (0 << 29) | (0x1F << 24) | (imm << 16) | (0x1F << 11) | (1 << 10) | (Rn << 5) | Rd);
} else {
bool sf = Is64Bit(Rd);
u32 type = 0;
if (IsDouble(Rd))
type = 1;
int rmode = 3;
int opcode = 0;
Write32((sf << 31) | (0 << 29) | (0x1E << 24) | (type << 22) | (rmode << 19) | (opcode << 16) | (scale << 10) | (Rn << 5) | Rd);
}
}
void ARM64FloatEmitter::FCVTZU(ARM64Reg Rd, ARM64Reg Rn, int scale) {
if (IsScalar(Rd)) {
int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;
Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn);
Write32((1 << 30) | (1 << 29) | (0x1F << 24) | (imm << 16) | (0x1F << 11) | (1 << 10) | (Rn << 5) | Rd);
} else {
bool sf = Is64Bit(Rd);
u32 type = 0;
if (IsDouble(Rd))
type = 1;
int rmode = 3;
int opcode = 1;
Write32((sf << 31) | (0 << 29) | (0x1E << 24) | (type << 22) | (rmode << 19) | (opcode << 16) | (scale << 10) | (Rn << 5) | Rd);
}
}
void ARM64FloatEmitter::EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn)
{
Rd = DecodeReg(Rd);
@ -3098,6 +3137,14 @@ void ARM64FloatEmitter::FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn)
{
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x1B, Rd, Rn);
}
void ARM64FloatEmitter::FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale) {
int imm = size * 2 - scale;
EmitShiftImm(IsQuad(Rd), false, imm >> 3, imm & 7, 0x1F, Rd, Rn);
}
void ARM64FloatEmitter::FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale) {
int imm = size * 2 - scale;
EmitShiftImm(IsQuad(Rd), true, imm >> 3, imm & 7, 0x1F, Rd, Rn);
}
void ARM64FloatEmitter::FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
EmitThreeSame(1, size >> 6, 0x1F, Rd, Rn, Rm);
@ -3539,22 +3586,38 @@ void ARM64FloatEmitter::UCVTF(ARM64Reg Rd, ARM64Reg Rn)
void ARM64FloatEmitter::SCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale)
{
bool sf = Is64Bit(Rn);
u32 type = 0;
if (IsDouble(Rd))
type = 1;
if (IsScalar(Rn)) {
int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;
Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn);
EmitConversion2(sf, 0, false, type, 0, 2, 64 - scale, Rd, Rn);
Write32((1 << 30) | (0 << 29) | (0x1F << 24) | (imm << 16) | (0x1C << 11) | (1 << 10) | (Rn << 5) | Rd);
} else {
bool sf = Is64Bit(Rn);
u32 type = 0;
if (IsDouble(Rd))
type = 1;
EmitConversion2(sf, 0, false, type, 0, 2, 64 - scale, Rd, Rn);
}
}
void ARM64FloatEmitter::UCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale)
{
bool sf = Is64Bit(Rn);
u32 type = 0;
if (IsDouble(Rd))
type = 1;
if (IsScalar(Rn)) {
int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;
Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn);
EmitConversion2(sf, 0, false, type, 0, 3, 64 - scale, Rd, Rn);
Write32((1 << 30) | (1 << 29) | (0x1F << 24) | (imm << 16) | (0x1C << 11) | (1 << 10) | (Rn << 5) | Rd);
} else {
bool sf = Is64Bit(Rn);
u32 type = 0;
if (IsDouble(Rd))
type = 1;
EmitConversion2(sf, 0, false, type, 0, 3, 64 - scale, Rd, Rn);
}
}
void ARM64FloatEmitter::FCMP(ARM64Reg Rn, ARM64Reg Rm)

View file

@ -866,6 +866,8 @@ public:
void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
void FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn);
@ -931,6 +933,8 @@ public:
// and one that outputs to a scalar fp register.
void FCVTS(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
void FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
void FCVTZS(ARM64Reg Rd, ARM64Reg Rn, int scale);
void FCVTZU(ARM64Reg Rd, ARM64Reg Rn, int scale);
// Scalar convert int to float. No rounding mode specifier necessary.
void SCVTF(ARM64Reg Rd, ARM64Reg Rn);

View file

@ -374,7 +374,15 @@ void Arm64JitBackend::CompIR_FCvt(IRInst inst) {
switch (inst.op) {
case IROp::FCvtWS:
CompIR_Generic(inst);
// TODO: Unfortunately, we don't currently have the hasSetRounding flag, could skip lookup.
regs_.Map(inst);
fp_.FMOV(S0, regs_.F(inst.src1));
MOVP2R(SCRATCH1_64, &currentRoundingFunc_);
LDR(INDEX_UNSIGNED, SCRATCH1_64, SCRATCH1_64, 0);
BLR(SCRATCH1_64);
fp_.FMOV(regs_.F(inst.dest), S0);
break;
case IROp::FCvtSW:
@ -383,8 +391,40 @@ void Arm64JitBackend::CompIR_FCvt(IRInst inst) {
break;
case IROp::FCvtScaledWS:
if (IRRoundMode(inst.src2 >> 6) == IRRoundMode::CAST_1) {
regs_.Map(inst);
// NAN would convert to zero, so detect it specifically and replace with 0x7FFFFFFF.
fp_.MVNI(32, EncodeRegToDouble(SCRATCHF2), 0x80, 24);
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src1));
fp_.FCVTZS(regs_.F(inst.dest), regs_.F(inst.src1), inst.src2 & 0x1F);
fp_.FCSEL(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF2, CC_VC);
} else {
RoundingMode rm;
switch (IRRoundMode(inst.src2 >> 6)) {
case IRRoundMode::RINT_0: rm = RoundingMode::ROUND_N; break;
case IRRoundMode::CEIL_2: rm = RoundingMode::ROUND_P; break;
case IRRoundMode::FLOOR_3: rm = RoundingMode::ROUND_M; break;
default:
_assert_msg_(false, "Invalid rounding mode for FCvtScaledWS");
}
// Unfortunately, only Z has a direct scaled instruction.
// We'll have to multiply.
regs_.Map(inst);
fp_.MOVI2F(SCRATCHF1, (float)(1UL << (inst.src2 & 0x1F)), SCRATCH1);
// This is for the NAN result.
fp_.MVNI(32, EncodeRegToDouble(SCRATCHF2), 0x80, 24);
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src1));
fp_.FMUL(regs_.F(inst.dest), regs_.F(inst.src1), SCRATCHF1);
fp_.FCVTS(regs_.F(inst.dest), regs_.F(inst.dest), rm);
fp_.FCSEL(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF2, CC_VC);
}
break;
case IROp::FCvtScaledSW:
CompIR_Generic(inst);
// TODO: This is probably proceeded by a GPR transfer, might be ideal to combine.
regs_.Map(inst);
fp_.SCVTF(regs_.F(inst.dest), regs_.F(inst.src1), inst.src2 & 0x1F);
break;
default:

View file

@ -233,30 +233,29 @@ void RiscVJitBackend::CompIR_FCvt(IRInst inst) {
break;
case IROp::FCvtScaledWS:
if (cpu_info.RiscV_D) {
Round rm = Round::NEAREST_EVEN;
switch (inst.src2 >> 6) {
case 0: rm = Round::NEAREST_EVEN; break;
case 1: rm = Round::TOZERO; break;
case 2: rm = Round::UP; break;
case 3: rm = Round::DOWN; break;
}
tempReg = regs_.MapWithFPRTemp(inst);
// Prepare the double src1 and the multiplier.
FCVT(FConv::D, FConv::S, regs_.F(inst.dest), regs_.F(inst.src1));
LI(SCRATCH1, 1UL << (inst.src2 & 0x1F));
FCVT(FConv::D, FConv::WU, tempReg, SCRATCH1, rm);
FMUL(64, regs_.F(inst.dest), regs_.F(inst.dest), tempReg, rm);
// NAN and clamping should all be correct.
FCVT(FConv::W, FConv::D, SCRATCH1, regs_.F(inst.dest), rm);
// TODO: Could combine with a transfer, often is one...
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
} else {
CompIR_Generic(inst);
{
Round rm = Round::NEAREST_EVEN;
switch (inst.src2 >> 6) {
case 0: rm = Round::NEAREST_EVEN; break;
case 1: rm = Round::TOZERO; break;
case 2: rm = Round::UP; break;
case 3: rm = Round::DOWN; break;
default:
_assert_msg_(false, "Invalid rounding mode for FCvtScaledWS");
}
tempReg = regs_.MapWithFPRTemp(inst);
// Prepare the multiplier.
LI(SCRATCH1, 1UL << (inst.src2 & 0x1F));
FCVT(FConv::S, FConv::WU, tempReg, SCRATCH1, rm);
FMUL(32, regs_.F(inst.dest), regs_.F(inst.src1), tempReg, rm);
// NAN and clamping should all be correct.
FCVT(FConv::W, FConv::S, SCRATCH1, regs_.F(inst.dest), rm);
// TODO: Could combine with a transfer, often is one...
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
break;
}
case IROp::FCvtScaledSW:
// TODO: This is probably proceeded by a GPR transfer, might be ideal to combine.