mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
riscv: Implement Zfa encoding.
Not yet enabled/detected.
This commit is contained in:
parent
ecb7f93418
commit
15cb782f85
6 changed files with 248 additions and 47 deletions
|
@ -75,6 +75,11 @@ static inline bool SupportsFloatHalf(bool allowMin = false) {
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline bool SupportsFloatExtra() {
|
||||
// TODO: cpu_info.RiscV_Zfa
|
||||
return false;
|
||||
}
|
||||
|
||||
enum class Opcode32 {
|
||||
// Note: invalid, just used for FixupBranch.
|
||||
ZERO = 0b0000000,
|
||||
|
@ -156,6 +161,8 @@ enum class Funct3 {
|
|||
|
||||
FMIN = 0b000,
|
||||
FMAX = 0b001,
|
||||
FMINM = 0b010,
|
||||
FMAXM = 0b011,
|
||||
|
||||
FMV = 0b000,
|
||||
FCLASS = 0b001,
|
||||
|
@ -2216,6 +2223,162 @@ void RiscVEmitter::FCLASS(int bits, RiscVReg rd, RiscVReg rs1) {
|
|||
Write32(EncodeR(Opcode32::OP_FP, rd, Funct3::FCLASS, rs1, F0, BitsToFunct2(bits), Funct5::FMV_TOX));
|
||||
}
|
||||
|
||||
static const uint32_t FLIvalues[32] = {
|
||||
0xBF800000, // -1.0
|
||||
0x00800000, // FLT_MIN (note: a bit special)
|
||||
0x37800000, // pow(2, -16)
|
||||
0x38000000, // pow(2, -15)
|
||||
0x3B800000, // pow(2, -8)
|
||||
0x3C000000, // pow(2, -7)
|
||||
0x3D800000, // 0.0625
|
||||
0x3E000000, // 0.125
|
||||
0x3E800000, // 0.25
|
||||
0x3EA00000, // 0.3125
|
||||
0x3EC00000, // 0.375
|
||||
0x3EE00000, // 0.4375
|
||||
0x3F000000, // 0.5
|
||||
0x3F200000, // 0.625
|
||||
0x3F400000, // 0.75
|
||||
0x3F600000, // 0.875
|
||||
0x3F800000, // 1.0
|
||||
0x3FA00000, // 1.25
|
||||
0x3FC00000, // 1.5
|
||||
0x3FE00000, // 1.75
|
||||
0x40000000, // 2.0
|
||||
0x40200000, // 2.5
|
||||
0x40400000, // 3.0
|
||||
0x40800000, // 4.0
|
||||
0x41000000, // 8.0
|
||||
0x41800000, // 16.0
|
||||
0x43000000, // 128.0
|
||||
0x43800000, // 256.0
|
||||
0x47000000, // pow(2, 15)
|
||||
0x47800000, // pow(2, 16)
|
||||
0x7F800000, // INFINITY
|
||||
0x7FC00000, // NAN
|
||||
};
|
||||
|
||||
static RiscVReg EncodeFLImm(int bits, double v) {
|
||||
float f = (float)v;
|
||||
int index = -1;
|
||||
for (size_t i = 0; i < ARRAY_SIZE(FLIvalues); ++i) {
|
||||
if (memcmp(&f, &FLIvalues[i], sizeof(float)) == 0) {
|
||||
index = (int)i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// For 16-bit, 2/3 are subnormal and 29 is not possible. Just avoid for now.
|
||||
if (index != -1 && index != 1 && (bits > 16 || (index != 2 && index != 3 && index != 29)))
|
||||
return (RiscVReg)index;
|
||||
|
||||
if (bits == 64) {
|
||||
uint64_t dmin = 0x0010000000000000ULL;
|
||||
if (memcmp(&v, &dmin, 8) == 0)
|
||||
return F1;
|
||||
} else if (bits == 32 && index == 1) {
|
||||
return F1;
|
||||
} else if (bits == 16) {
|
||||
uint64_t hmin = 0x3F10000000000000ULL;
|
||||
if (memcmp(&v, &hmin, 8) == 0)
|
||||
return F1;
|
||||
}
|
||||
|
||||
return INVALID_REG;
|
||||
}
|
||||
|
||||
bool RiscVEmitter::CanFLI(int bits, double v) const {
|
||||
if (!SupportsFloatExtra())
|
||||
return false;
|
||||
if (bits == 16 && !SupportsFloatHalf())
|
||||
return false;
|
||||
if (bits > FloatBitsSupported())
|
||||
return false;
|
||||
return EncodeFLImm(bits, v) != INVALID_REG;
|
||||
}
|
||||
|
||||
bool RiscVEmitter::CanFLI(int bits, uint32_t pattern) const {
|
||||
float f;
|
||||
memcpy(&f, &pattern, sizeof(f));
|
||||
return CanFLI(bits, f);
|
||||
}
|
||||
|
||||
void RiscVEmitter::FLI(int bits, RiscVReg rd, double v) {
|
||||
_assert_msg_(SupportsFloatExtra(), "%s cannot be used without Zfa", __func__);
|
||||
_assert_msg_(bits <= FloatBitsSupported(), "FLI cannot be used for %d bits, only %d/%d supported", bits, BitsSupported(), FloatBitsSupported());
|
||||
_assert_msg_(IsFPR(rd), "%s rd of wrong type", __func__);
|
||||
|
||||
RiscVReg imm = EncodeFLImm(bits, v);
|
||||
_assert_msg_(imm != INVALID_REG, "FLI with unsupported constant %f for %d bits", v, bits);
|
||||
Write32(EncodeR(Opcode32::OP_FP, rd, Funct3::FMV, imm, F1, BitsToFunct2(bits, false), Funct5::FMV_FROMX));
|
||||
}
|
||||
|
||||
void RiscVEmitter::FLI(int bits, RiscVReg rd, uint32_t pattern) {
|
||||
float f;
|
||||
memcpy(&f, &pattern, sizeof(f));
|
||||
FLI(bits, rd, f);
|
||||
}
|
||||
|
||||
void RiscVEmitter::FMINM(int bits, RiscVReg rd, RiscVReg rs1, RiscVReg rs2) {
|
||||
_assert_msg_(SupportsFloatExtra(), "%s cannot be used without Zfa", __func__);
|
||||
Write32(EncodeFR(Opcode32::OP_FP, rd, Funct3::FMINM, rs1, rs2, BitsToFunct2(bits), Funct5::FMINMAX));
|
||||
}
|
||||
|
||||
void RiscVEmitter::FMAXM(int bits, RiscVReg rd, RiscVReg rs1, RiscVReg rs2) {
|
||||
_assert_msg_(SupportsFloatExtra(), "%s cannot be used without Zfa", __func__);
|
||||
Write32(EncodeFR(Opcode32::OP_FP, rd, Funct3::FMAXM, rs1, rs2, BitsToFunct2(bits), Funct5::FMINMAX));
|
||||
}
|
||||
|
||||
void RiscVEmitter::FROUND(int bits, RiscVReg rd, RiscVReg rs1, Round rm) {
|
||||
_assert_msg_(SupportsFloatExtra(), "%s cannot be used without Zfa", __func__);
|
||||
_assert_msg_(bits <= FloatBitsSupported(), "FROUND for %d float bits, only %d supported", bits, FloatBitsSupported());
|
||||
_assert_msg_(IsFPR(rd), "%s rd of wrong type", __func__);
|
||||
_assert_msg_(IsFPR(rs1), "%s rs1 of wrong type", __func__);
|
||||
|
||||
Funct2 toFmt = BitsToFunct2(bits, false);
|
||||
Write32(EncodeR(Opcode32::OP_FP, rd, (Funct3)rm, rs1, F4, toFmt, Funct5::FCVT_SZ));
|
||||
}
|
||||
|
||||
void RiscVEmitter::QuickFLI(int bits, RiscVReg rd, double v, RiscVReg scratchReg) {
|
||||
if (CanFLI(bits, v)) {
|
||||
FLI(bits, rd, v);
|
||||
} else if (bits == 64) {
|
||||
LI(scratchReg, v);
|
||||
FMV(FMv::D, FMv::X, rd, scratchReg);
|
||||
} else if (bits <= 32) {
|
||||
QuickFLI(32, rd, (float)v, scratchReg);
|
||||
} else {
|
||||
_assert_msg_(false, "Unsupported QuickFLI bits");
|
||||
}
|
||||
}
|
||||
|
||||
void RiscVEmitter::QuickFLI(int bits, RiscVReg rd, uint32_t pattern, RiscVReg scratchReg) {
|
||||
if (CanFLI(bits, pattern)) {
|
||||
FLI(bits, rd, pattern);
|
||||
} else if (bits == 32) {
|
||||
LI(scratchReg, (int32_t)pattern);
|
||||
FMV(FMv::W, FMv::X, rd, scratchReg);
|
||||
} else if (bits == 16) {
|
||||
LI(scratchReg, (int16_t)pattern);
|
||||
FMV(FMv::H, FMv::X, rd, scratchReg);
|
||||
} else {
|
||||
_assert_msg_(false, "Unsupported QuickFLI bits");
|
||||
}
|
||||
}
|
||||
|
||||
void RiscVEmitter::QuickFLI(int bits, RiscVReg rd, float v, RiscVReg scratchReg) {
|
||||
if (CanFLI(bits, v)) {
|
||||
FLI(bits, rd, v);
|
||||
} else if (bits == 64) {
|
||||
QuickFLI(32, rd, (double)v, scratchReg);
|
||||
} else if (bits == 32) {
|
||||
LI(scratchReg, v);
|
||||
FMV(FMv::D, FMv::X, rd, scratchReg);
|
||||
} else {
|
||||
_assert_msg_(false, "Unsupported QuickFLI bits");
|
||||
}
|
||||
}
|
||||
|
||||
void RiscVEmitter::CSRRW(RiscVReg rd, Csr csr, RiscVReg rs1) {
|
||||
_assert_msg_(SupportsZicsr(), "%s instruction not supported", __func__);
|
||||
_assert_msg_((u32)csr <= 0x00000FFF, "%s with invalid CSR number", __func__);
|
||||
|
|
|
@ -421,6 +421,26 @@ public:
|
|||
void FLE(int bits, RiscVReg rd, RiscVReg rs1, RiscVReg rs2);
|
||||
void FCLASS(int bits, RiscVReg rd, RiscVReg rs1);
|
||||
|
||||
// Additional floating point (Zfa.)
|
||||
bool CanFLI(int bits, double v) const;
|
||||
bool CanFLI(int bits, uint32_t pattern) const;
|
||||
bool CanFLI(int bits, float v) const {
|
||||
return CanFLI(bits, (double)v);
|
||||
}
|
||||
void FLI(int bits, RiscVReg rd, double v);
|
||||
void FLI(int bits, RiscVReg rd, uint32_t pattern);
|
||||
void FLI(int bits, RiscVReg rd, float v) {
|
||||
FLI(bits, rd, (double)v);
|
||||
}
|
||||
void FMINM(int bits, RiscVReg rd, RiscVReg rs1, RiscVReg rs2);
|
||||
void FMAXM(int bits, RiscVReg rd, RiscVReg rs1, RiscVReg rs2);
|
||||
void FROUND(int bits, RiscVReg rd, RiscVReg rs1, Round rm = Round::DYNAMIC);
|
||||
|
||||
// Convenience helper for FLI support.
|
||||
void QuickFLI(int bits, RiscVReg rd, double v, RiscVReg scratchReg);
|
||||
void QuickFLI(int bits, RiscVReg rd, uint32_t pattern, RiscVReg scratchReg);
|
||||
void QuickFLI(int bits, RiscVReg rd, float v, RiscVReg scratchReg);
|
||||
|
||||
// Control state register manipulation.
|
||||
void CSRRW(RiscVReg rd, Csr csr, RiscVReg rs1);
|
||||
void CSRRS(RiscVReg rd, Csr csr, RiscVReg rs1);
|
||||
|
|
|
@ -246,8 +246,7 @@ void RiscVJitBackend::CompIR_FCvt(IRInst inst) {
|
|||
|
||||
tempReg = regs_.MapWithFPRTemp(inst);
|
||||
// Prepare the multiplier.
|
||||
LI(SCRATCH1, 1UL << (inst.src2 & 0x1F));
|
||||
FCVT(FConv::S, FConv::WU, tempReg, SCRATCH1, rm);
|
||||
QuickFLI(32, tempReg, (float)(1UL << (inst.src2 & 0x1F)), SCRATCH1);
|
||||
|
||||
FMUL(32, regs_.F(inst.dest), regs_.F(inst.src1), tempReg, rm);
|
||||
// NAN and clamping should all be correct.
|
||||
|
@ -264,8 +263,7 @@ void RiscVJitBackend::CompIR_FCvt(IRInst inst) {
|
|||
FCVT(FConv::S, FConv::W, regs_.F(inst.dest), SCRATCH1);
|
||||
|
||||
// Pre-divide so we can avoid any actual divide.
|
||||
LI(SCRATCH1, 1.0f / (1UL << (inst.src2 & 0x1F)));
|
||||
FMV(FMv::W, FMv::X, tempReg, SCRATCH1);
|
||||
QuickFLI(32, tempReg, 1.0f / (1UL << (inst.src2 & 0x1F)), SCRATCH1);
|
||||
FMUL(32, regs_.F(inst.dest), regs_.F(inst.dest), tempReg);
|
||||
break;
|
||||
|
||||
|
@ -292,8 +290,7 @@ void RiscVJitBackend::CompIR_FSat(IRInst inst) {
|
|||
FCVT(FConv::S, FConv::W, tempReg, R_ZERO);
|
||||
// FLE here is intentional to convert -0.0 to +0.0.
|
||||
FLE(32, SCRATCH1, regs_.F(inst.src1), tempReg);
|
||||
LI(SCRATCH2, 1.0f);
|
||||
FMV(FMv::W, FMv::X, tempReg, SCRATCH2);
|
||||
QuickFLI(32, tempReg, 1.0f, SCRATCH2);
|
||||
FLT(32, SCRATCH2, tempReg, regs_.F(inst.src1));
|
||||
|
||||
skipLower = BEQ(SCRATCH1, R_ZERO);
|
||||
|
@ -315,8 +312,7 @@ void RiscVJitBackend::CompIR_FSat(IRInst inst) {
|
|||
FMV(32, regs_.F(inst.dest), regs_.F(inst.src1));
|
||||
|
||||
// First, set SCRATCH1 = clamp to negative, SCRATCH2 = clamp to positive.
|
||||
LI(SCRATCH2, -1.0f);
|
||||
FMV(FMv::W, FMv::X, tempReg, SCRATCH2);
|
||||
QuickFLI(32, tempReg, -1.0f, SCRATCH2);
|
||||
FLT(32, SCRATCH1, regs_.F(inst.src1), tempReg);
|
||||
FNEG(32, tempReg, tempReg);
|
||||
FLT(32, SCRATCH2, tempReg, regs_.F(inst.src1));
|
||||
|
@ -621,8 +617,7 @@ void RiscVJitBackend::CompIR_FSpecial(IRInst inst) {
|
|||
FSQRT(32, regs_.F(inst.dest), regs_.F(inst.src1));
|
||||
|
||||
// Ugh, we can't really avoid a temp here. Probably not worth a permanent one.
|
||||
LI(SCRATCH1, 1.0f);
|
||||
FMV(FMv::W, FMv::X, tempReg, SCRATCH1);
|
||||
QuickFLI(32, tempReg, 1.0f, SCRATCH1);
|
||||
FDIV(32, regs_.F(inst.dest), tempReg, regs_.F(inst.dest));
|
||||
break;
|
||||
|
||||
|
@ -635,8 +630,7 @@ void RiscVJitBackend::CompIR_FSpecial(IRInst inst) {
|
|||
FDIV(32, regs_.F(inst.dest), regs_.F(inst.dest), regs_.F(inst.src1));
|
||||
} else {
|
||||
tempReg = regs_.MapWithFPRTemp(inst);
|
||||
LI(SCRATCH1, 1.0f);
|
||||
FMV(FMv::W, FMv::X, tempReg, SCRATCH1);
|
||||
QuickFLI(32, tempReg, 1.0f, SCRATCH1);
|
||||
FDIV(32, regs_.F(inst.dest), tempReg, regs_.F(inst.src1));
|
||||
}
|
||||
break;
|
||||
|
|
|
@ -50,14 +50,10 @@ void RiscVJitBackend::CompIR_Basic(IRInst inst) {
|
|||
|
||||
case IROp::SetConstF:
|
||||
regs_.Map(inst);
|
||||
if (inst.constant == 0) {
|
||||
if (inst.constant == 0)
|
||||
FCVT(FConv::S, FConv::W, regs_.F(inst.dest), R_ZERO);
|
||||
} else {
|
||||
// TODO: In the future, could use FLI if it's approved.
|
||||
// Also, is FCVT faster?
|
||||
LI(SCRATCH1, (int32_t)inst.constant);
|
||||
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
|
||||
}
|
||||
else
|
||||
QuickFLI(32, regs_.F(inst.dest), inst.constant, SCRATCH1);
|
||||
break;
|
||||
|
||||
case IROp::Downcount:
|
||||
|
|
|
@ -54,56 +54,86 @@ void RiscVJitBackend::CompIR_VecAssign(IRInst inst) {
|
|||
break;
|
||||
|
||||
case Vec4Init::AllONE:
|
||||
LI(SCRATCH1, 1.0f);
|
||||
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
|
||||
for (int i = 1; i < 4; ++i)
|
||||
FMV(32, regs_.F(inst.dest + i), regs_.F(inst.dest));
|
||||
if (CanFLI(32, 1.0f)) {
|
||||
for (int i = 0; i < 4; ++i)
|
||||
FLI(32, regs_.F(inst.dest + i), 1.0f);
|
||||
} else {
|
||||
LI(SCRATCH1, 1.0f);
|
||||
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
|
||||
for (int i = 1; i < 4; ++i)
|
||||
FMV(32, regs_.F(inst.dest + i), regs_.F(inst.dest));
|
||||
}
|
||||
break;
|
||||
|
||||
case Vec4Init::AllMinusONE:
|
||||
LI(SCRATCH1, -1.0f);
|
||||
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
|
||||
for (int i = 1; i < 4; ++i)
|
||||
FMV(32, regs_.F(inst.dest + i), regs_.F(inst.dest));
|
||||
if (CanFLI(32, -1.0f)) {
|
||||
for (int i = 0; i < 4; ++i)
|
||||
FLI(32, regs_.F(inst.dest + i), -1.0f);
|
||||
} else {
|
||||
LI(SCRATCH1, -1.0f);
|
||||
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
|
||||
for (int i = 1; i < 4; ++i)
|
||||
FMV(32, regs_.F(inst.dest + i), regs_.F(inst.dest));
|
||||
}
|
||||
break;
|
||||
|
||||
case Vec4Init::Set_1000:
|
||||
LI(SCRATCH1, 1.0f);
|
||||
if (!CanFLI(32, 1.0f))
|
||||
LI(SCRATCH1, 1.0f);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (i == 0)
|
||||
FMV(FMv::W, FMv::X, regs_.F(inst.dest + i), SCRATCH1);
|
||||
else
|
||||
if (i == 0) {
|
||||
if (CanFLI(32, 1.0f))
|
||||
FLI(32, regs_.F(inst.dest + i), 1.0f);
|
||||
else
|
||||
FMV(FMv::W, FMv::X, regs_.F(inst.dest + i), SCRATCH1);
|
||||
} else {
|
||||
FCVT(FConv::S, FConv::W, regs_.F(inst.dest + i), R_ZERO);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case Vec4Init::Set_0100:
|
||||
LI(SCRATCH1, 1.0f);
|
||||
if (!CanFLI(32, 1.0f))
|
||||
LI(SCRATCH1, 1.0f);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (i == 1)
|
||||
FMV(FMv::W, FMv::X, regs_.F(inst.dest + i), SCRATCH1);
|
||||
else
|
||||
if (i == 1) {
|
||||
if (CanFLI(32, 1.0f))
|
||||
FLI(32, regs_.F(inst.dest + i), 1.0f);
|
||||
else
|
||||
FMV(FMv::W, FMv::X, regs_.F(inst.dest + i), SCRATCH1);
|
||||
} else {
|
||||
FCVT(FConv::S, FConv::W, regs_.F(inst.dest + i), R_ZERO);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case Vec4Init::Set_0010:
|
||||
LI(SCRATCH1, 1.0f);
|
||||
if (!CanFLI(32, 1.0f))
|
||||
LI(SCRATCH1, 1.0f);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (i == 2)
|
||||
FMV(FMv::W, FMv::X, regs_.F(inst.dest + i), SCRATCH1);
|
||||
else
|
||||
if (i == 2) {
|
||||
if (CanFLI(32, 1.0f))
|
||||
FLI(32, regs_.F(inst.dest + i), 1.0f);
|
||||
else
|
||||
FMV(FMv::W, FMv::X, regs_.F(inst.dest + i), SCRATCH1);
|
||||
} else {
|
||||
FCVT(FConv::S, FConv::W, regs_.F(inst.dest + i), R_ZERO);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case Vec4Init::Set_0001:
|
||||
LI(SCRATCH1, 1.0f);
|
||||
if (!CanFLI(32, 1.0f))
|
||||
LI(SCRATCH1, 1.0f);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (i == 3)
|
||||
FMV(FMv::W, FMv::X, regs_.F(inst.dest + i), SCRATCH1);
|
||||
else
|
||||
if (i == 3) {
|
||||
if (CanFLI(32, 1.0f))
|
||||
FLI(32, regs_.F(inst.dest + i), 1.0f);
|
||||
else
|
||||
FMV(FMv::W, FMv::X, regs_.F(inst.dest + i), SCRATCH1);
|
||||
} else {
|
||||
FCVT(FConv::S, FConv::W, regs_.F(inst.dest + i), R_ZERO);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -223,10 +223,8 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
|||
}
|
||||
|
||||
// TODO: Only load these when needed?
|
||||
LI(scratchReg, by128);
|
||||
FMV(FMv::W, FMv::X, by128Reg, scratchReg);
|
||||
LI(scratchReg, by32768);
|
||||
FMV(FMv::W, FMv::X, by32768Reg, scratchReg);
|
||||
QuickFLI(32, by128Reg, by128, scratchReg);
|
||||
QuickFLI(32, by32768Reg, by32768, scratchReg);
|
||||
if (posThroughStep) {
|
||||
LI(scratchReg, const65535);
|
||||
FMV(FMv::W, FMv::X, const65535Reg, scratchReg);
|
||||
|
|
Loading…
Add table
Reference in a new issue