mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
648 lines
18 KiB
C++
648 lines
18 KiB
C++
// Copyright (c) 2023- PPSSPP Project.
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, version 2.0 or later versions.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
// Official git repository and contact information can be found at
|
|
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
|
|
|
#include "Core/MIPS/RiscV/RiscVJit.h"
|
|
#include "Core/MIPS/RiscV/RiscVRegCache.h"
|
|
|
|
// This file contains compilation for floating point related instructions.
|
|
//
|
|
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
|
|
// Currently known non working ones should have DISABLE. No flags because that's in IR already.
|
|
|
|
// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; }
|
|
#define CONDITIONAL_DISABLE {}
|
|
#define DISABLE { CompIR_Generic(inst); return; }
|
|
#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; }
|
|
|
|
namespace MIPSComp {
|
|
|
|
using namespace RiscVGen;
|
|
using namespace RiscVJitConstants;
|
|
|
|
void RiscVJitBackend::CompIR_FArith(IRInst inst) {
|
|
CONDITIONAL_DISABLE;
|
|
|
|
switch (inst.op) {
|
|
case IROp::FAdd:
|
|
regs_.Map(inst);
|
|
FADD(32, regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
|
|
break;
|
|
|
|
case IROp::FSub:
|
|
regs_.Map(inst);
|
|
FSUB(32, regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
|
|
break;
|
|
|
|
case IROp::FMul:
|
|
regs_.Map(inst);
|
|
// We'll assume everyone will make it such that 0 * infinity = NAN properly.
|
|
// See blame on this comment if that proves untrue.
|
|
FMUL(32, regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
|
|
break;
|
|
|
|
case IROp::FDiv:
|
|
regs_.Map(inst);
|
|
FDIV(32, regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
|
|
break;
|
|
|
|
case IROp::FSqrt:
|
|
regs_.Map(inst);
|
|
FSQRT(32, regs_.F(inst.dest), regs_.F(inst.src1));
|
|
break;
|
|
|
|
case IROp::FNeg:
|
|
regs_.Map(inst);
|
|
FNEG(32, regs_.F(inst.dest), regs_.F(inst.src1));
|
|
break;
|
|
|
|
default:
|
|
INVALIDOP;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void RiscVJitBackend::CompIR_FCondAssign(IRInst inst) {
|
|
CONDITIONAL_DISABLE;
|
|
if (inst.op != IROp::FMin && inst.op != IROp::FMax)
|
|
INVALIDOP;
|
|
bool maxCondition = inst.op == IROp::FMax;
|
|
|
|
// FMin and FMax are used by VFPU and handle NAN/INF as just a larger exponent.
|
|
regs_.Map(inst);
|
|
FCLASS(32, SCRATCH1, regs_.F(inst.src1));
|
|
FCLASS(32, SCRATCH2, regs_.F(inst.src2));
|
|
|
|
// If either side is a NAN, it needs to participate in the comparison.
|
|
OR(SCRATCH1, SCRATCH1, SCRATCH2);
|
|
// NAN is either 0x100 or 0x200.
|
|
ANDI(SCRATCH1, SCRATCH1, 0x300);
|
|
FixupBranch useNormalCond = BEQ(SCRATCH1, R_ZERO);
|
|
|
|
// Time to use bits... classify won't help because it ignores -NAN.
|
|
FMV(FMv::X, FMv::W, SCRATCH1, regs_.F(inst.src1));
|
|
FMV(FMv::X, FMv::W, SCRATCH2, regs_.F(inst.src2));
|
|
|
|
// If both are negative, we flip the comparison (not two's compliment.)
|
|
// We cheat and use RA...
|
|
AND(R_RA, SCRATCH1, SCRATCH2);
|
|
SRLIW(R_RA, R_RA, 31);
|
|
|
|
if (cpu_info.RiscV_Zbb) {
|
|
FixupBranch swapCompare = BNE(R_RA, R_ZERO);
|
|
if (maxCondition)
|
|
MAX(SCRATCH1, SCRATCH1, SCRATCH2);
|
|
else
|
|
MIN(SCRATCH1, SCRATCH1, SCRATCH2);
|
|
FixupBranch skipSwapCompare = J();
|
|
SetJumpTarget(swapCompare);
|
|
if (maxCondition)
|
|
MIN(SCRATCH1, SCRATCH1, SCRATCH2);
|
|
else
|
|
MAX(SCRATCH1, SCRATCH1, SCRATCH2);
|
|
SetJumpTarget(skipSwapCompare);
|
|
} else {
|
|
RiscVReg isSrc1LowerReg = regs_.GetAndLockTempGPR();
|
|
SLT(isSrc1LowerReg, SCRATCH1, SCRATCH2);
|
|
// Flip the flag (to reverse the min/max) based on if both were negative.
|
|
XOR(isSrc1LowerReg, isSrc1LowerReg, R_RA);
|
|
FixupBranch useSrc1;
|
|
if (maxCondition)
|
|
useSrc1 = BEQ(isSrc1LowerReg, R_ZERO);
|
|
else
|
|
useSrc1 = BNE(isSrc1LowerReg, R_ZERO);
|
|
MV(SCRATCH1, SCRATCH2);
|
|
SetJumpTarget(useSrc1);
|
|
}
|
|
|
|
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
|
|
FixupBranch finish = J();
|
|
|
|
SetJumpTarget(useNormalCond);
|
|
if (maxCondition)
|
|
FMAX(32, regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
|
|
else
|
|
FMIN(32, regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
|
|
SetJumpTarget(finish);
|
|
}
|
|
|
|
void RiscVJitBackend::CompIR_FAssign(IRInst inst) {
|
|
CONDITIONAL_DISABLE;
|
|
|
|
switch (inst.op) {
|
|
case IROp::FMov:
|
|
if (inst.dest != inst.src1) {
|
|
regs_.Map(inst);
|
|
FMV(32, regs_.F(inst.dest), regs_.F(inst.src1));
|
|
}
|
|
break;
|
|
|
|
case IROp::FAbs:
|
|
regs_.Map(inst);
|
|
FABS(32, regs_.F(inst.dest), regs_.F(inst.src1));
|
|
break;
|
|
|
|
case IROp::FSign:
|
|
{
|
|
regs_.Map(inst);
|
|
// Check if it's negative zero, either 0x10/0x08 is zero.
|
|
FCLASS(32, SCRATCH1, regs_.F(inst.src1));
|
|
ANDI(SCRATCH1, SCRATCH1, 0x18);
|
|
SEQZ(SCRATCH1, SCRATCH1);
|
|
// Okay, it's zero if zero, 1 otherwise. Convert 1 to a constant 1.0.
|
|
// Probably non-zero is the common case, so we make that the straight line.
|
|
FixupBranch skipOne = BEQ(SCRATCH1, R_ZERO);
|
|
LI(SCRATCH1, 1.0f);
|
|
|
|
// Now we just need the sign from it.
|
|
FMV(FMv::X, FMv::W, SCRATCH2, regs_.F(inst.src1));
|
|
// Use a wall to isolate the sign, and combine.
|
|
SRAIW(SCRATCH2, SCRATCH2, 31);
|
|
SLLIW(SCRATCH2, SCRATCH2, 31);
|
|
OR(SCRATCH1, SCRATCH1, SCRATCH2);
|
|
|
|
SetJumpTarget(skipOne);
|
|
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
|
|
break;
|
|
}
|
|
|
|
default:
|
|
INVALIDOP;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void RiscVJitBackend::CompIR_FRound(IRInst inst) {
|
|
CONDITIONAL_DISABLE;
|
|
|
|
// TODO: If this is followed by a GPR transfer, might want to combine.
|
|
regs_.Map(inst);
|
|
|
|
switch (inst.op) {
|
|
case IROp::FRound:
|
|
FCVT(FConv::W, FConv::S, SCRATCH1, regs_.F(inst.src1), Round::NEAREST_EVEN);
|
|
break;
|
|
|
|
case IROp::FTrunc:
|
|
FCVT(FConv::W, FConv::S, SCRATCH1, regs_.F(inst.src1), Round::TOZERO);
|
|
break;
|
|
|
|
case IROp::FCeil:
|
|
FCVT(FConv::W, FConv::S, SCRATCH1, regs_.F(inst.src1), Round::UP);
|
|
break;
|
|
|
|
case IROp::FFloor:
|
|
FCVT(FConv::W, FConv::S, SCRATCH1, regs_.F(inst.src1), Round::DOWN);
|
|
break;
|
|
|
|
default:
|
|
INVALIDOP;
|
|
break;
|
|
}
|
|
|
|
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
|
|
}
|
|
|
|
void RiscVJitBackend::CompIR_FCvt(IRInst inst) {
|
|
CONDITIONAL_DISABLE;
|
|
|
|
RiscVReg tempReg = INVALID_REG;
|
|
switch (inst.op) {
|
|
case IROp::FCvtWS:
|
|
CompIR_Generic(inst);
|
|
break;
|
|
|
|
case IROp::FCvtSW:
|
|
// TODO: This is probably proceeded by a GPR transfer, might be ideal to combine.
|
|
regs_.Map(inst);
|
|
FMV(FMv::X, FMv::W, SCRATCH1, regs_.F(inst.src1));
|
|
FCVT(FConv::S, FConv::W, regs_.F(inst.dest), SCRATCH1);
|
|
break;
|
|
|
|
case IROp::FCvtScaledWS:
|
|
{
|
|
Round rm = Round::NEAREST_EVEN;
|
|
switch (inst.src2 >> 6) {
|
|
case 0: rm = Round::NEAREST_EVEN; break;
|
|
case 1: rm = Round::TOZERO; break;
|
|
case 2: rm = Round::UP; break;
|
|
case 3: rm = Round::DOWN; break;
|
|
default:
|
|
_assert_msg_(false, "Invalid rounding mode for FCvtScaledWS");
|
|
}
|
|
|
|
tempReg = regs_.MapWithFPRTemp(inst);
|
|
// Prepare the multiplier.
|
|
QuickFLI(32, tempReg, (float)(1UL << (inst.src2 & 0x1F)), SCRATCH1);
|
|
|
|
FMUL(32, regs_.F(inst.dest), regs_.F(inst.src1), tempReg, rm);
|
|
// NAN and clamping should all be correct.
|
|
FCVT(FConv::W, FConv::S, SCRATCH1, regs_.F(inst.dest), rm);
|
|
// TODO: Could combine with a transfer, often is one...
|
|
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
|
|
break;
|
|
}
|
|
|
|
case IROp::FCvtScaledSW:
|
|
// TODO: This is probably proceeded by a GPR transfer, might be ideal to combine.
|
|
tempReg = regs_.MapWithFPRTemp(inst);
|
|
FMV(FMv::X, FMv::W, SCRATCH1, regs_.F(inst.src1));
|
|
FCVT(FConv::S, FConv::W, regs_.F(inst.dest), SCRATCH1);
|
|
|
|
// Pre-divide so we can avoid any actual divide.
|
|
QuickFLI(32, tempReg, 1.0f / (1UL << (inst.src2 & 0x1F)), SCRATCH1);
|
|
FMUL(32, regs_.F(inst.dest), regs_.F(inst.dest), tempReg);
|
|
break;
|
|
|
|
default:
|
|
INVALIDOP;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void RiscVJitBackend::CompIR_FSat(IRInst inst) {
|
|
CONDITIONAL_DISABLE;
|
|
|
|
RiscVReg tempReg = INVALID_REG;
|
|
FixupBranch skipLower;
|
|
FixupBranch finishLower;
|
|
FixupBranch skipHigher;
|
|
switch (inst.op) {
|
|
case IROp::FSat0_1:
|
|
tempReg = regs_.MapWithFPRTemp(inst);
|
|
if (inst.dest != inst.src1)
|
|
FMV(32, regs_.F(inst.dest), regs_.F(inst.src1));
|
|
|
|
// First, set SCRATCH1 = clamp to zero, SCRATCH2 = clamp to one.
|
|
FCVT(FConv::S, FConv::W, tempReg, R_ZERO);
|
|
// FLE here is intentional to convert -0.0 to +0.0.
|
|
FLE(32, SCRATCH1, regs_.F(inst.src1), tempReg);
|
|
QuickFLI(32, tempReg, 1.0f, SCRATCH2);
|
|
FLT(32, SCRATCH2, tempReg, regs_.F(inst.src1));
|
|
|
|
skipLower = BEQ(SCRATCH1, R_ZERO);
|
|
FCVT(FConv::S, FConv::W, regs_.F(inst.dest), R_ZERO);
|
|
finishLower = J();
|
|
|
|
SetJumpTarget(skipLower);
|
|
skipHigher = BEQ(SCRATCH2, R_ZERO);
|
|
// Still has 1.0 in it.
|
|
FMV(32, regs_.F(inst.dest), tempReg);
|
|
|
|
SetJumpTarget(finishLower);
|
|
SetJumpTarget(skipHigher);
|
|
break;
|
|
|
|
case IROp::FSatMinus1_1:
|
|
tempReg = regs_.MapWithFPRTemp(inst);
|
|
if (inst.dest != inst.src1)
|
|
FMV(32, regs_.F(inst.dest), regs_.F(inst.src1));
|
|
|
|
// First, set SCRATCH1 = clamp to negative, SCRATCH2 = clamp to positive.
|
|
QuickFLI(32, tempReg, -1.0f, SCRATCH2);
|
|
FLT(32, SCRATCH1, regs_.F(inst.src1), tempReg);
|
|
FNEG(32, tempReg, tempReg);
|
|
FLT(32, SCRATCH2, tempReg, regs_.F(inst.src1));
|
|
|
|
// But we can actually do one branch, using sign-injection to keep the original sign.
|
|
OR(SCRATCH1, SCRATCH1, SCRATCH2);
|
|
|
|
skipLower = BEQ(SCRATCH1, R_ZERO);
|
|
FSGNJ(32, regs_.F(inst.dest), tempReg, regs_.F(inst.dest));
|
|
SetJumpTarget(skipLower);
|
|
break;
|
|
|
|
default:
|
|
INVALIDOP;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void RiscVJitBackend::CompIR_FCompare(IRInst inst) {
|
|
CONDITIONAL_DISABLE;
|
|
|
|
constexpr IRReg IRREG_VFPU_CC = IRREG_VFPU_CTRL_BASE + VFPU_CTRL_CC;
|
|
|
|
switch (inst.op) {
|
|
case IROp::FCmp:
|
|
switch (inst.dest) {
|
|
case IRFpCompareMode::False:
|
|
regs_.SetGPRImm(IRREG_FPCOND, 0);
|
|
break;
|
|
|
|
case IRFpCompareMode::EitherUnordered:
|
|
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
|
|
FCLASS(32, SCRATCH1, regs_.F(inst.src1));
|
|
FCLASS(32, SCRATCH2, regs_.F(inst.src2));
|
|
OR(SCRATCH1, SCRATCH1, SCRATCH2);
|
|
// NAN is 0x100 or 0x200.
|
|
ANDI(SCRATCH1, SCRATCH1, 0x300);
|
|
SNEZ(regs_.R(IRREG_FPCOND), SCRATCH1);
|
|
regs_.MarkGPRDirty(IRREG_FPCOND, true);
|
|
break;
|
|
|
|
case IRFpCompareMode::EqualOrdered:
|
|
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
|
|
FEQ(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src1), regs_.F(inst.src2));
|
|
regs_.MarkGPRDirty(IRREG_FPCOND, true);
|
|
break;
|
|
|
|
case IRFpCompareMode::EqualUnordered:
|
|
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
|
|
FEQ(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src1), regs_.F(inst.src2));
|
|
|
|
// Now let's just OR in the unordered check.
|
|
FCLASS(32, SCRATCH1, regs_.F(inst.src1));
|
|
FCLASS(32, SCRATCH2, regs_.F(inst.src2));
|
|
OR(SCRATCH1, SCRATCH1, SCRATCH2);
|
|
// NAN is 0x100 or 0x200.
|
|
ANDI(SCRATCH1, SCRATCH1, 0x300);
|
|
SNEZ(SCRATCH1, SCRATCH1);
|
|
OR(regs_.R(IRREG_FPCOND), regs_.R(IRREG_FPCOND), SCRATCH1);
|
|
regs_.MarkGPRDirty(IRREG_FPCOND, true);
|
|
break;
|
|
|
|
case IRFpCompareMode::LessEqualOrdered:
|
|
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
|
|
FLE(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src1), regs_.F(inst.src2));
|
|
regs_.MarkGPRDirty(IRREG_FPCOND, true);
|
|
break;
|
|
|
|
case IRFpCompareMode::LessEqualUnordered:
|
|
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
|
|
FLT(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src2), regs_.F(inst.src1));
|
|
SEQZ(regs_.R(IRREG_FPCOND), regs_.R(IRREG_FPCOND));
|
|
regs_.MarkGPRDirty(IRREG_FPCOND, true);
|
|
break;
|
|
|
|
case IRFpCompareMode::LessOrdered:
|
|
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
|
|
FLT(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src1), regs_.F(inst.src2));
|
|
regs_.MarkGPRDirty(IRREG_FPCOND, true);
|
|
break;
|
|
|
|
case IRFpCompareMode::LessUnordered:
|
|
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
|
|
FLE(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src2), regs_.F(inst.src1));
|
|
SEQZ(regs_.R(IRREG_FPCOND), regs_.R(IRREG_FPCOND));
|
|
regs_.MarkGPRDirty(IRREG_FPCOND, true);
|
|
break;
|
|
|
|
default:
|
|
_assert_msg_(false, "Unexpected IRFpCompareMode %d", inst.dest);
|
|
}
|
|
break;
|
|
|
|
case IROp::FCmovVfpuCC:
|
|
regs_.MapWithExtra(inst, { { 'G', IRREG_VFPU_CC, 1, MIPSMap::INIT } });
|
|
if ((inst.src2 & 0xF) == 0) {
|
|
ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), 1);
|
|
} else if (cpu_info.RiscV_Zbs) {
|
|
BEXTI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.src2 & 0xF);
|
|
} else {
|
|
SRLI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.src2 & 0xF);
|
|
ANDI(SCRATCH1, SCRATCH1, 1);
|
|
}
|
|
if ((inst.src2 >> 7) & 1) {
|
|
FixupBranch skip = BEQ(SCRATCH1, R_ZERO);
|
|
FMV(32, regs_.F(inst.dest), regs_.F(inst.src1));
|
|
SetJumpTarget(skip);
|
|
} else {
|
|
FixupBranch skip = BNE(SCRATCH1, R_ZERO);
|
|
FMV(32, regs_.F(inst.dest), regs_.F(inst.src1));
|
|
SetJumpTarget(skip);
|
|
}
|
|
break;
|
|
|
|
case IROp::FCmpVfpuBit:
|
|
regs_.MapGPR(IRREG_VFPU_CC, MIPSMap::DIRTY);
|
|
|
|
switch (VCondition(inst.dest & 0xF)) {
|
|
case VC_EQ:
|
|
regs_.Map(inst);
|
|
FEQ(32, SCRATCH1, regs_.F(inst.src1), regs_.F(inst.src2));
|
|
break;
|
|
case VC_NE:
|
|
regs_.Map(inst);
|
|
FEQ(32, SCRATCH1, regs_.F(inst.src1), regs_.F(inst.src2));
|
|
SEQZ(SCRATCH1, SCRATCH1);
|
|
break;
|
|
case VC_LT:
|
|
regs_.Map(inst);
|
|
FLT(32, SCRATCH1, regs_.F(inst.src1), regs_.F(inst.src2));
|
|
break;
|
|
case VC_LE:
|
|
regs_.Map(inst);
|
|
FLE(32, SCRATCH1, regs_.F(inst.src1), regs_.F(inst.src2));
|
|
break;
|
|
case VC_GT:
|
|
regs_.Map(inst);
|
|
FLT(32, SCRATCH1, regs_.F(inst.src2), regs_.F(inst.src1));
|
|
break;
|
|
case VC_GE:
|
|
regs_.Map(inst);
|
|
FLE(32, SCRATCH1, regs_.F(inst.src2), regs_.F(inst.src1));
|
|
break;
|
|
case VC_EZ:
|
|
case VC_NZ:
|
|
regs_.MapFPR(inst.src1);
|
|
// Zero is either 0x10 or 0x08.
|
|
FCLASS(32, SCRATCH1, regs_.F(inst.src1));
|
|
ANDI(SCRATCH1, SCRATCH1, 0x18);
|
|
if ((inst.dest & 4) == 0)
|
|
SNEZ(SCRATCH1, SCRATCH1);
|
|
else
|
|
SEQZ(SCRATCH1, SCRATCH1);
|
|
break;
|
|
case VC_EN:
|
|
case VC_NN:
|
|
regs_.MapFPR(inst.src1);
|
|
// NAN is either 0x100 or 0x200.
|
|
FCLASS(32, SCRATCH1, regs_.F(inst.src1));
|
|
ANDI(SCRATCH1, SCRATCH1, 0x300);
|
|
if ((inst.dest & 4) == 0)
|
|
SNEZ(SCRATCH1, SCRATCH1);
|
|
else
|
|
SEQZ(SCRATCH1, SCRATCH1);
|
|
break;
|
|
case VC_EI:
|
|
case VC_NI:
|
|
regs_.MapFPR(inst.src1);
|
|
// Infinity is either 0x80 or 0x01.
|
|
FCLASS(32, SCRATCH1, regs_.F(inst.src1));
|
|
ANDI(SCRATCH1, SCRATCH1, 0x81);
|
|
if ((inst.dest & 4) == 0)
|
|
SNEZ(SCRATCH1, SCRATCH1);
|
|
else
|
|
SEQZ(SCRATCH1, SCRATCH1);
|
|
break;
|
|
case VC_ES:
|
|
case VC_NS:
|
|
regs_.MapFPR(inst.src1);
|
|
// Infinity is either 0x80 or 0x01, NAN is either 0x100 or 0x200.
|
|
FCLASS(32, SCRATCH1, regs_.F(inst.src1));
|
|
ANDI(SCRATCH1, SCRATCH1, 0x381);
|
|
if ((inst.dest & 4) == 0)
|
|
SNEZ(SCRATCH1, SCRATCH1);
|
|
else
|
|
SEQZ(SCRATCH1, SCRATCH1);
|
|
break;
|
|
case VC_TR:
|
|
LI(SCRATCH1, 1);
|
|
break;
|
|
case VC_FL:
|
|
LI(SCRATCH1, 0);
|
|
break;
|
|
}
|
|
|
|
ANDI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), ~(1 << (inst.dest >> 4)));
|
|
if ((inst.dest >> 4) != 0)
|
|
SLLI(SCRATCH1, SCRATCH1, inst.dest >> 4);
|
|
OR(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), SCRATCH1);
|
|
break;
|
|
|
|
case IROp::FCmpVfpuAggregate:
|
|
regs_.MapGPR(IRREG_VFPU_CC, MIPSMap::DIRTY);
|
|
if (inst.dest == 1) {
|
|
ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.dest);
|
|
// Negate so 1 becomes all bits set and zero stays zero, then mask to 0x30.
|
|
NEG(SCRATCH1, SCRATCH1);
|
|
ANDI(SCRATCH1, SCRATCH1, 0x30);
|
|
|
|
// Reject the old any/all bits and replace them with our own.
|
|
ANDI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), ~0x30);
|
|
OR(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), SCRATCH1);
|
|
} else {
|
|
ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.dest);
|
|
FixupBranch skipZero = BEQ(SCRATCH1, R_ZERO);
|
|
|
|
// To compare to inst.dest for "all", let's simply subtract it and compare to zero.
|
|
ADDI(SCRATCH1, SCRATCH1, -inst.dest);
|
|
SEQZ(SCRATCH1, SCRATCH1);
|
|
// Now we combine with the "any" bit.
|
|
SLLI(SCRATCH1, SCRATCH1, 5);
|
|
ORI(SCRATCH1, SCRATCH1, 0x10);
|
|
|
|
SetJumpTarget(skipZero);
|
|
|
|
// Reject the old any/all bits and replace them with our own.
|
|
ANDI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), ~0x30);
|
|
OR(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), SCRATCH1);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
INVALIDOP;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void RiscVJitBackend::CompIR_RoundingMode(IRInst inst) {
|
|
CONDITIONAL_DISABLE;
|
|
|
|
switch (inst.op) {
|
|
case IROp::RestoreRoundingMode:
|
|
RestoreRoundingMode();
|
|
break;
|
|
|
|
case IROp::ApplyRoundingMode:
|
|
ApplyRoundingMode();
|
|
break;
|
|
|
|
case IROp::UpdateRoundingMode:
|
|
// We don't need to do anything, instructions allow a "dynamic" rounding mode.
|
|
break;
|
|
|
|
default:
|
|
INVALIDOP;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void RiscVJitBackend::CompIR_FSpecial(IRInst inst) {
|
|
CONDITIONAL_DISABLE;
|
|
|
|
#ifdef __riscv_float_abi_soft
|
|
#error Currently hard float is required.
|
|
#endif
|
|
|
|
auto callFuncF_F = [&](float (*func)(float)) {
|
|
regs_.FlushBeforeCall();
|
|
WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);
|
|
|
|
// It might be in a non-volatile register.
|
|
// TODO: May have to handle a transfer if SIMD here.
|
|
if (regs_.IsFPRMapped(inst.src1)) {
|
|
FMV(32, F10, regs_.F(inst.src1));
|
|
} else {
|
|
int offset = offsetof(MIPSState, f) + inst.src1 * 4;
|
|
FL(32, F10, CTXREG, offset);
|
|
}
|
|
QuickCallFunction(func, SCRATCH1);
|
|
|
|
regs_.MapFPR(inst.dest, MIPSMap::NOINIT);
|
|
// If it's already F10, we're done - MapReg doesn't actually overwrite the reg in that case.
|
|
if (regs_.F(inst.dest) != F10) {
|
|
FMV(32, regs_.F(inst.dest), F10);
|
|
}
|
|
|
|
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
|
};
|
|
|
|
RiscVReg tempReg = INVALID_REG;
|
|
switch (inst.op) {
|
|
case IROp::FSin:
|
|
callFuncF_F(&vfpu_sin);
|
|
break;
|
|
|
|
case IROp::FCos:
|
|
callFuncF_F(&vfpu_cos);
|
|
break;
|
|
|
|
case IROp::FRSqrt:
|
|
tempReg = regs_.MapWithFPRTemp(inst);
|
|
FSQRT(32, regs_.F(inst.dest), regs_.F(inst.src1));
|
|
|
|
// Ugh, we can't really avoid a temp here. Probably not worth a permanent one.
|
|
QuickFLI(32, tempReg, 1.0f, SCRATCH1);
|
|
FDIV(32, regs_.F(inst.dest), tempReg, regs_.F(inst.dest));
|
|
break;
|
|
|
|
case IROp::FRecip:
|
|
if (inst.dest != inst.src1) {
|
|
// This is the easy case.
|
|
regs_.Map(inst);
|
|
LI(SCRATCH1, 1.0f);
|
|
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
|
|
FDIV(32, regs_.F(inst.dest), regs_.F(inst.dest), regs_.F(inst.src1));
|
|
} else {
|
|
tempReg = regs_.MapWithFPRTemp(inst);
|
|
QuickFLI(32, tempReg, 1.0f, SCRATCH1);
|
|
FDIV(32, regs_.F(inst.dest), tempReg, regs_.F(inst.src1));
|
|
}
|
|
break;
|
|
|
|
case IROp::FAsin:
|
|
callFuncF_F(&vfpu_asin);
|
|
break;
|
|
|
|
default:
|
|
INVALIDOP;
|
|
break;
|
|
}
|
|
}
|
|
|
|
} // namespace MIPSComp
|