mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
arm64jit: Implement convert/int conversions.
This commit is contained in:
parent
b698c673a8
commit
7a5cdafdf3
4 changed files with 140 additions and 34 deletions
|
@ -2234,6 +2234,45 @@ void ARM64FloatEmitter::FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round) {
|
|||
EmitConvertScalarToInt(Rd, Rn, round, true);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::FCVTZS(ARM64Reg Rd, ARM64Reg Rn, int scale) {
|
||||
if (IsScalar(Rd)) {
|
||||
int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;
|
||||
Rd = DecodeReg(Rd);
|
||||
Rn = DecodeReg(Rn);
|
||||
|
||||
Write32((1 << 30) | (0 << 29) | (0x1F << 24) | (imm << 16) | (0x1F << 11) | (1 << 10) | (Rn << 5) | Rd);
|
||||
} else {
|
||||
bool sf = Is64Bit(Rd);
|
||||
u32 type = 0;
|
||||
if (IsDouble(Rd))
|
||||
type = 1;
|
||||
int rmode = 3;
|
||||
int opcode = 0;
|
||||
|
||||
Write32((sf << 31) | (0 << 29) | (0x1E << 24) | (type << 22) | (rmode << 19) | (opcode << 16) | (scale << 10) | (Rn << 5) | Rd);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::FCVTZU(ARM64Reg Rd, ARM64Reg Rn, int scale) {
|
||||
if (IsScalar(Rd)) {
|
||||
int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;
|
||||
Rd = DecodeReg(Rd);
|
||||
Rn = DecodeReg(Rn);
|
||||
|
||||
Write32((1 << 30) | (1 << 29) | (0x1F << 24) | (imm << 16) | (0x1F << 11) | (1 << 10) | (Rn << 5) | Rd);
|
||||
} else {
|
||||
bool sf = Is64Bit(Rd);
|
||||
u32 type = 0;
|
||||
if (IsDouble(Rd))
|
||||
type = 1;
|
||||
int rmode = 3;
|
||||
int opcode = 1;
|
||||
|
||||
Write32((sf << 31) | (0 << 29) | (0x1E << 24) | (type << 22) | (rmode << 19) | (opcode << 16) | (scale << 10) | (Rn << 5) | Rd);
|
||||
}
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Rd = DecodeReg(Rd);
|
||||
|
@ -3098,6 +3137,14 @@ void ARM64FloatEmitter::FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
|||
{
|
||||
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x1B, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale) {
|
||||
int imm = size * 2 - scale;
|
||||
EmitShiftImm(IsQuad(Rd), false, imm >> 3, imm & 7, 0x1F, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale) {
|
||||
int imm = size * 2 - scale;
|
||||
EmitShiftImm(IsQuad(Rd), true, imm >> 3, imm & 7, 0x1F, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
EmitThreeSame(1, size >> 6, 0x1F, Rd, Rn, Rm);
|
||||
|
@ -3539,22 +3586,38 @@ void ARM64FloatEmitter::UCVTF(ARM64Reg Rd, ARM64Reg Rn)
|
|||
|
||||
void ARM64FloatEmitter::SCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale)
|
||||
{
|
||||
bool sf = Is64Bit(Rn);
|
||||
u32 type = 0;
|
||||
if (IsDouble(Rd))
|
||||
type = 1;
|
||||
if (IsScalar(Rn)) {
|
||||
int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;
|
||||
Rd = DecodeReg(Rd);
|
||||
Rn = DecodeReg(Rn);
|
||||
|
||||
EmitConversion2(sf, 0, false, type, 0, 2, 64 - scale, Rd, Rn);
|
||||
Write32((1 << 30) | (0 << 29) | (0x1F << 24) | (imm << 16) | (0x1C << 11) | (1 << 10) | (Rn << 5) | Rd);
|
||||
} else {
|
||||
bool sf = Is64Bit(Rn);
|
||||
u32 type = 0;
|
||||
if (IsDouble(Rd))
|
||||
type = 1;
|
||||
|
||||
EmitConversion2(sf, 0, false, type, 0, 2, 64 - scale, Rd, Rn);
|
||||
}
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::UCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale)
|
||||
{
|
||||
bool sf = Is64Bit(Rn);
|
||||
u32 type = 0;
|
||||
if (IsDouble(Rd))
|
||||
type = 1;
|
||||
if (IsScalar(Rn)) {
|
||||
int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;
|
||||
Rd = DecodeReg(Rd);
|
||||
Rn = DecodeReg(Rn);
|
||||
|
||||
EmitConversion2(sf, 0, false, type, 0, 3, 64 - scale, Rd, Rn);
|
||||
Write32((1 << 30) | (1 << 29) | (0x1F << 24) | (imm << 16) | (0x1C << 11) | (1 << 10) | (Rn << 5) | Rd);
|
||||
} else {
|
||||
bool sf = Is64Bit(Rn);
|
||||
u32 type = 0;
|
||||
if (IsDouble(Rd))
|
||||
type = 1;
|
||||
|
||||
EmitConversion2(sf, 0, false, type, 0, 3, 64 - scale, Rd, Rn);
|
||||
}
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::FCMP(ARM64Reg Rn, ARM64Reg Rm)
|
||||
|
|
|
@ -866,6 +866,8 @@ public:
|
|||
void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
|
||||
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
|
||||
void FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
@ -931,6 +933,8 @@ public:
|
|||
// and one that outputs to a scalar fp register.
|
||||
void FCVTS(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
|
||||
void FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
|
||||
void FCVTZS(ARM64Reg Rd, ARM64Reg Rn, int scale);
|
||||
void FCVTZU(ARM64Reg Rd, ARM64Reg Rn, int scale);
|
||||
|
||||
// Scalar convert int to float. No rounding mode specifier necessary.
|
||||
void SCVTF(ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
|
|
@ -374,7 +374,15 @@ void Arm64JitBackend::CompIR_FCvt(IRInst inst) {
|
|||
|
||||
switch (inst.op) {
|
||||
case IROp::FCvtWS:
|
||||
CompIR_Generic(inst);
|
||||
// TODO: Unfortunately, we don't currently have the hasSetRounding flag, could skip lookup.
|
||||
regs_.Map(inst);
|
||||
fp_.FMOV(S0, regs_.F(inst.src1));
|
||||
|
||||
MOVP2R(SCRATCH1_64, ¤tRoundingFunc_);
|
||||
LDR(INDEX_UNSIGNED, SCRATCH1_64, SCRATCH1_64, 0);
|
||||
BLR(SCRATCH1_64);
|
||||
|
||||
fp_.FMOV(regs_.F(inst.dest), S0);
|
||||
break;
|
||||
|
||||
case IROp::FCvtSW:
|
||||
|
@ -383,8 +391,40 @@ void Arm64JitBackend::CompIR_FCvt(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::FCvtScaledWS:
|
||||
if (IRRoundMode(inst.src2 >> 6) == IRRoundMode::CAST_1) {
|
||||
regs_.Map(inst);
|
||||
// NAN would convert to zero, so detect it specifically and replace with 0x7FFFFFFF.
|
||||
fp_.MVNI(32, EncodeRegToDouble(SCRATCHF2), 0x80, 24);
|
||||
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src1));
|
||||
fp_.FCVTZS(regs_.F(inst.dest), regs_.F(inst.src1), inst.src2 & 0x1F);
|
||||
fp_.FCSEL(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF2, CC_VC);
|
||||
} else {
|
||||
RoundingMode rm;
|
||||
switch (IRRoundMode(inst.src2 >> 6)) {
|
||||
case IRRoundMode::RINT_0: rm = RoundingMode::ROUND_N; break;
|
||||
case IRRoundMode::CEIL_2: rm = RoundingMode::ROUND_P; break;
|
||||
case IRRoundMode::FLOOR_3: rm = RoundingMode::ROUND_M; break;
|
||||
default:
|
||||
_assert_msg_(false, "Invalid rounding mode for FCvtScaledWS");
|
||||
}
|
||||
|
||||
// Unfortunately, only Z has a direct scaled instruction.
|
||||
// We'll have to multiply.
|
||||
regs_.Map(inst);
|
||||
fp_.MOVI2F(SCRATCHF1, (float)(1UL << (inst.src2 & 0x1F)), SCRATCH1);
|
||||
// This is for the NAN result.
|
||||
fp_.MVNI(32, EncodeRegToDouble(SCRATCHF2), 0x80, 24);
|
||||
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src1));
|
||||
fp_.FMUL(regs_.F(inst.dest), regs_.F(inst.src1), SCRATCHF1);
|
||||
fp_.FCVTS(regs_.F(inst.dest), regs_.F(inst.dest), rm);
|
||||
fp_.FCSEL(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF2, CC_VC);
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::FCvtScaledSW:
|
||||
CompIR_Generic(inst);
|
||||
// TODO: This is probably proceeded by a GPR transfer, might be ideal to combine.
|
||||
regs_.Map(inst);
|
||||
fp_.SCVTF(regs_.F(inst.dest), regs_.F(inst.src1), inst.src2 & 0x1F);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -233,30 +233,29 @@ void RiscVJitBackend::CompIR_FCvt(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::FCvtScaledWS:
|
||||
if (cpu_info.RiscV_D) {
|
||||
Round rm = Round::NEAREST_EVEN;
|
||||
switch (inst.src2 >> 6) {
|
||||
case 0: rm = Round::NEAREST_EVEN; break;
|
||||
case 1: rm = Round::TOZERO; break;
|
||||
case 2: rm = Round::UP; break;
|
||||
case 3: rm = Round::DOWN; break;
|
||||
}
|
||||
|
||||
tempReg = regs_.MapWithFPRTemp(inst);
|
||||
// Prepare the double src1 and the multiplier.
|
||||
FCVT(FConv::D, FConv::S, regs_.F(inst.dest), regs_.F(inst.src1));
|
||||
LI(SCRATCH1, 1UL << (inst.src2 & 0x1F));
|
||||
FCVT(FConv::D, FConv::WU, tempReg, SCRATCH1, rm);
|
||||
|
||||
FMUL(64, regs_.F(inst.dest), regs_.F(inst.dest), tempReg, rm);
|
||||
// NAN and clamping should all be correct.
|
||||
FCVT(FConv::W, FConv::D, SCRATCH1, regs_.F(inst.dest), rm);
|
||||
// TODO: Could combine with a transfer, often is one...
|
||||
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
|
||||
} else {
|
||||
CompIR_Generic(inst);
|
||||
{
|
||||
Round rm = Round::NEAREST_EVEN;
|
||||
switch (inst.src2 >> 6) {
|
||||
case 0: rm = Round::NEAREST_EVEN; break;
|
||||
case 1: rm = Round::TOZERO; break;
|
||||
case 2: rm = Round::UP; break;
|
||||
case 3: rm = Round::DOWN; break;
|
||||
default:
|
||||
_assert_msg_(false, "Invalid rounding mode for FCvtScaledWS");
|
||||
}
|
||||
|
||||
tempReg = regs_.MapWithFPRTemp(inst);
|
||||
// Prepare the multiplier.
|
||||
LI(SCRATCH1, 1UL << (inst.src2 & 0x1F));
|
||||
FCVT(FConv::S, FConv::WU, tempReg, SCRATCH1, rm);
|
||||
|
||||
FMUL(32, regs_.F(inst.dest), regs_.F(inst.src1), tempReg, rm);
|
||||
// NAN and clamping should all be correct.
|
||||
FCVT(FConv::W, FConv::S, SCRATCH1, regs_.F(inst.dest), rm);
|
||||
// TODO: Could combine with a transfer, often is one...
|
||||
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
|
||||
break;
|
||||
}
|
||||
|
||||
case IROp::FCvtScaledSW:
|
||||
// TODO: This is probably proceeded by a GPR transfer, might be ideal to combine.
|
||||
|
|
Loading…
Add table
Reference in a new issue