mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
arm64jit: Implement FMin/FMax.
This commit is contained in:
parent
6c3547d7ae
commit
c8f888fab0
3 changed files with 113 additions and 1 deletions
|
@ -3011,6 +3011,12 @@ void ARM64FloatEmitter::BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
|||
{
|
||||
EmitThreeSame(1, 1, 3, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::BIT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
|
||||
EmitThreeSame(1, 2, 3, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::BIF(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
|
||||
EmitThreeSame(1, 3, 3, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)
|
||||
{
|
||||
u32 imm5 = 0;
|
||||
|
@ -3184,6 +3190,61 @@ void ARM64FloatEmitter::XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
|
|||
Emit2RegMisc(true, 0, dest_size >> 4, 0x12, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
|
||||
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
|
||||
EmitThreeSame(true, size >> 4, 0b10001, Rd, Rn, Rm);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
|
||||
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
|
||||
EmitThreeSame(false, size >> 4, 0b00111, Rd, Rn, Rm);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
|
||||
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
|
||||
EmitThreeSame(false, size >> 4, 0b00110, Rd, Rn, Rm);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::CMHI(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
|
||||
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
|
||||
EmitThreeSame(true, size >> 4, 0b00110, Rd, Rn, Rm);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::CMHS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
|
||||
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
|
||||
EmitThreeSame(true, size >> 4, 0b00111, Rd, Rn, Rm);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::CMTST(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
|
||||
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
|
||||
EmitThreeSame(false, size >> 4, 0b10001, Rd, Rn, Rm);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
|
||||
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
|
||||
Emit2RegMisc(IsQuad(Rd), false, size >> 4, 0b01001, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
|
||||
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
|
||||
Emit2RegMisc(IsQuad(Rd), true, size >> 4, 0b01000, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
|
||||
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
|
||||
Emit2RegMisc(IsQuad(Rd), false, size >> 4, 0b01000, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::CMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
|
||||
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
|
||||
Emit2RegMisc(IsQuad(Rd), true, size >> 4, 0b01001, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::CMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
|
||||
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
|
||||
Emit2RegMisc(IsQuad(Rd), false, size >> 4, 0b01010, Rd, Rn);
|
||||
}
|
||||
|
||||
// Move
|
||||
void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
|
|
|
@ -851,6 +851,8 @@ public:
|
|||
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void BIT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void BIF(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
|
||||
void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
|
@ -894,6 +896,18 @@ public:
|
|||
void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
||||
void CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void CMHI(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void CMHS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void CMTST(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void CMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void CMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
||||
// Move
|
||||
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void INS(u8 size, ARM64Reg Rd, u8 index, ARM64Reg Rn);
|
||||
|
|
|
@ -143,16 +143,53 @@ void Arm64JitBackend::CompIR_FCompare(IRInst inst) {
|
|||
void Arm64JitBackend::CompIR_FCondAssign(IRInst inst) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
// For Vec4, we could basically just ORR FCMPGE/FCMPLE together, but overlap is trickier.
|
||||
regs_.Map(inst);
|
||||
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
|
||||
FixupBranch unordered = B(CC_VS);
|
||||
|
||||
switch (inst.op) {
|
||||
case IROp::FMin:
|
||||
fp_.FMIN(regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
|
||||
break;
|
||||
|
||||
case IROp::FMax:
|
||||
CompIR_Generic(inst);
|
||||
fp_.FMAX(regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
|
||||
break;
|
||||
|
||||
default:
|
||||
INVALIDOP;
|
||||
break;
|
||||
}
|
||||
|
||||
FixupBranch orderedDone = B();
|
||||
|
||||
// Not sure if this path is fast, trying to optimize it to be small but correct.
|
||||
// Probably an uncommon path.
|
||||
SetJumpTarget(unordered);
|
||||
fp_.AND(EncodeRegToDouble(SCRATCHF1), regs_.FD(inst.src1), regs_.FD(inst.src2));
|
||||
// SCRATCHF1 = 0xFFFFFFFF if sign bit set on both, 0x00000000 otherwise.
|
||||
fp_.CMLT(32, EncodeRegToDouble(SCRATCHF1), EncodeRegToDouble(SCRATCHF1));
|
||||
|
||||
switch (inst.op) {
|
||||
case IROp::FMin:
|
||||
fp_.SMAX(32, EncodeRegToDouble(SCRATCHF2), regs_.FD(inst.src1), regs_.FD(inst.src2));
|
||||
fp_.SMIN(32, regs_.FD(inst.dest), regs_.FD(inst.src1), regs_.FD(inst.src2));
|
||||
break;
|
||||
|
||||
case IROp::FMax:
|
||||
fp_.SMIN(32, EncodeRegToDouble(SCRATCHF2), regs_.FD(inst.src1), regs_.FD(inst.src2));
|
||||
fp_.SMAX(32, regs_.FD(inst.dest), regs_.FD(inst.src1), regs_.FD(inst.src2));
|
||||
break;
|
||||
|
||||
default:
|
||||
INVALIDOP;
|
||||
break;
|
||||
}
|
||||
// Replace dest with SCRATCHF2 if both were less than zero.
|
||||
fp_.BIT(regs_.FD(inst.dest), EncodeRegToDouble(SCRATCHF2), EncodeRegToDouble(SCRATCHF1));
|
||||
|
||||
SetJumpTarget(orderedDone);
|
||||
}
|
||||
|
||||
void Arm64JitBackend::CompIR_FCvt(IRInst inst) {
|
||||
|
|
Loading…
Add table
Reference in a new issue