mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
x86jit: Implement trig/reciprocals.
This commit is contained in:
parent
4b1c809886
commit
61a99b4bac
9 changed files with 176 additions and 5 deletions
|
@ -1689,6 +1689,9 @@ void XEmitter::CVTTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0xE6, regOp,
|
|||
|
||||
void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));}
|
||||
|
||||
void XEmitter::MOVSHDUP(X64Reg regOp, OpArg arg) { WriteSSEOp(0xF3, sseMOVHPfromRM, regOp, arg); }
|
||||
void XEmitter::MOVSLDUP(X64Reg regOp, OpArg arg) { WriteSSEOp(0xF3, sseMOVLPfromRM, regOp, arg); }
|
||||
|
||||
void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x50, dest, arg);}
|
||||
void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x50, dest, arg);}
|
||||
|
||||
|
|
|
@ -746,6 +746,10 @@ public:
|
|||
void MOVD_xmm(const OpArg &arg, X64Reg src);
|
||||
void MOVQ_xmm(OpArg arg, X64Reg src);
|
||||
|
||||
// SSE3: Some additional moves.
|
||||
void MOVSHDUP(X64Reg regOp1, OpArg arg);
|
||||
void MOVSLDUP(X64Reg regOp1, OpArg arg);
|
||||
|
||||
// SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question.
|
||||
void MOVMSKPS(X64Reg dest, OpArg arg);
|
||||
void MOVMSKPD(X64Reg dest, OpArg arg);
|
||||
|
|
|
@ -159,6 +159,15 @@ bool IRNativeRegCacheBase::IsFPRMapped(IRReg fpr) {
|
|||
return mr[fpr + 32].loc == MIPSLoc::FREG || mr[fpr + 32].loc == MIPSLoc::VREG;
|
||||
}
|
||||
|
||||
int IRNativeRegCacheBase::GetFPRLane(IRReg fpr) {
|
||||
_dbg_assert_(IsValidFPR(fpr));
|
||||
if (mr[fpr + 32].loc == MIPSLoc::FREG || mr[fpr + 32].loc == MIPSLoc::VREG) {
|
||||
int l = mr[fpr + 32].lane;
|
||||
return l == -1 ? 0 : l;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool IRNativeRegCacheBase::IsGPRMappedAsPointer(IRReg gpr) {
|
||||
_dbg_assert_(IsValidGPR(gpr));
|
||||
if (mr[gpr].loc == MIPSLoc::REG) {
|
||||
|
|
|
@ -163,6 +163,7 @@ public:
|
|||
bool IsFPRMapped(IRReg fpr);
|
||||
bool IsGPRMappedAsPointer(IRReg gpr);
|
||||
bool IsGPRMappedAsStaticPointer(IRReg gpr);
|
||||
int GetFPRLane(IRReg fpr);
|
||||
|
||||
bool IsGPRImm(IRReg gpr);
|
||||
bool IsGPR2Imm(IRReg base);
|
||||
|
|
|
@ -578,9 +578,10 @@ void RiscVJitBackend::CompIR_FSpecial(IRInst inst) {
|
|||
#error Currently hard float is required.
|
||||
#endif
|
||||
|
||||
auto callFuncF_F = [&](float (*func)(float)){
|
||||
auto callFuncF_F = [&](float (*func)(float)) {
|
||||
regs_.FlushBeforeCall();
|
||||
// It might be in a non-volatile register.
|
||||
// TODO: May have to handle a transfer if SIMD here.
|
||||
if (regs_.IsFPRMapped(inst.src1)) {
|
||||
FMV(32, F10, regs_.F(inst.src1));
|
||||
} else {
|
||||
|
|
|
@ -45,6 +45,7 @@ alignas(16) const u32 reverseQNAN[4] = { 0x803FFFFF, 0x803FFFFF, 0x803FFFFF, 0x8
|
|||
alignas(16) const u32 noSignMask[4] = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF };
|
||||
alignas(16) const u32 positiveInfinity[4] = { 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000 };
|
||||
alignas(16) const u32 signBitAll[4] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
|
||||
alignas(16) const u32 ones[4] = { 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000 };
|
||||
} simdConstants;
|
||||
|
||||
void X64JitBackend::CompIR_FArith(IRInst inst) {
|
||||
|
@ -646,16 +647,154 @@ void X64JitBackend::CompIR_FSat(IRInst inst) {
|
|||
}
|
||||
}
|
||||
|
||||
#if X64JIT_USE_XMM_CALL
|
||||
static float X64JIT_XMM_CALL x64_sin(float f) {
|
||||
return vfpu_sin(f);
|
||||
}
|
||||
|
||||
static float X64JIT_XMM_CALL x64_cos(float f) {
|
||||
return vfpu_cos(f);
|
||||
}
|
||||
|
||||
static float X64JIT_XMM_CALL x64_asin(float f) {
|
||||
return vfpu_asin(f);
|
||||
}
|
||||
#else
|
||||
static uint32_t x64_sin(uint32_t v) {
|
||||
float f;
|
||||
memcpy(&f, &v, sizeof(v));
|
||||
f = vfpu_sin(f);
|
||||
memcpy(&v, &f, sizeof(v));
|
||||
return v;
|
||||
}
|
||||
|
||||
static uint32_t x64_cos(uint32_t v) {
|
||||
float f;
|
||||
memcpy(&f, &v, sizeof(v));
|
||||
f = vfpu_cos(f);
|
||||
memcpy(&v, &f, sizeof(v));
|
||||
return v;
|
||||
}
|
||||
|
||||
static uint32_t x64_asin(uint32_t v) {
|
||||
float f;
|
||||
memcpy(&f, &v, sizeof(v));
|
||||
f = vfpu_asin(f);
|
||||
memcpy(&v, &f, sizeof(v));
|
||||
return v;
|
||||
}
|
||||
#endif
|
||||
|
||||
void X64JitBackend::CompIR_FSpecial(IRInst inst) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
// TODO: Regcache... maybe emitter helper too?
|
||||
auto laneToReg0 = [&](X64Reg dest, X64Reg src, int lane) {
|
||||
if (lane == 0) {
|
||||
if (dest != src)
|
||||
MOVAPS(dest, R(src));
|
||||
} else if (lane == 1 && cpu_info.bSSE3) {
|
||||
MOVSHDUP(dest, R(src));
|
||||
} else if (lane == 2) {
|
||||
MOVHLPS(dest, src);
|
||||
} else if (cpu_info.bAVX) {
|
||||
VPERMILPS(128, dest, R(src), VFPU_SWIZZLE(lane, lane, lane, lane));
|
||||
} else {
|
||||
if (dest != src)
|
||||
MOVAPS(dest, R(src));
|
||||
SHUFPS(dest, R(dest), VFPU_SWIZZLE(lane, lane, lane, lane));
|
||||
}
|
||||
};
|
||||
|
||||
auto callFuncF_F = [&](const void *func) {
|
||||
regs_.FlushBeforeCall();
|
||||
|
||||
#if X64JIT_USE_XMM_CALL
|
||||
if (regs_.IsFPRMapped(inst.src1)) {
|
||||
int lane = regs_.GetFPRLane(inst.src1);
|
||||
laneToReg0(XMM0, regs_.FX(inst.src1), lane);
|
||||
} else {
|
||||
// Account for CTXREG being increased by 128 to reduce imm sizes.
|
||||
int offset = offsetof(MIPSState, f) + inst.src1 * 4 - 128;
|
||||
MOVSS(XMM0, MDisp(CTXREG, offset));
|
||||
}
|
||||
ABI_CallFunction((const void *)func);
|
||||
|
||||
// It's already in place, NOINIT won't modify.
|
||||
regs_.MapFPR(inst.dest, MIPSMap::NOINIT | X64Map::XMM0);
|
||||
#else
|
||||
if (regs_.IsFPRMapped(inst.src1)) {
|
||||
int lane = regs_.GetFPRLane(inst.src1);
|
||||
if (lane == 0) {
|
||||
MOVD_xmm(R(SCRATCH1), regs_.FX(inst.src1));
|
||||
} else {
|
||||
laneToReg0(XMM0, regs_.FX(inst.src1), lane);
|
||||
MOVD_xmm(R(SCRATCH1), XMM0);
|
||||
}
|
||||
} else {
|
||||
int offset = offsetof(MIPSState, f) + inst.src1 * 4;
|
||||
MOV(32, R(SCRATCH1), MDisp(CTXREG, offset));
|
||||
}
|
||||
ABI_CallFunctionR((const void *)func, SCRATCH1);
|
||||
|
||||
regs_.MapFPR(inst.dest, MIPSMap::NOINIT);
|
||||
MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1));
|
||||
#endif
|
||||
};
|
||||
|
||||
switch (inst.op) {
|
||||
case IROp::FSin:
|
||||
callFuncF_F((const void *)&x64_sin);
|
||||
break;
|
||||
|
||||
case IROp::FCos:
|
||||
callFuncF_F((const void *)&x64_cos);
|
||||
break;
|
||||
|
||||
case IROp::FRSqrt:
|
||||
{
|
||||
X64Reg tempReg = regs_.MapWithFPRTemp(inst);
|
||||
SQRTSS(tempReg, regs_.F(inst.src1));
|
||||
|
||||
if (RipAccessible(&simdConstants.ones)) {
|
||||
MOVSS(regs_.FX(inst.dest), M(&simdConstants.ones)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.ones));
|
||||
MOVSS(regs_.FX(inst.dest), MatR(SCRATCH1));
|
||||
}
|
||||
DIVSS(regs_.FX(inst.dest), R(tempReg));
|
||||
break;
|
||||
}
|
||||
|
||||
case IROp::FRecip:
|
||||
if (inst.dest != inst.src1) {
|
||||
regs_.Map(inst);
|
||||
if (RipAccessible(&simdConstants.ones)) {
|
||||
MOVSS(regs_.FX(inst.dest), M(&simdConstants.ones)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.ones));
|
||||
MOVSS(regs_.FX(inst.dest), MatR(SCRATCH1));
|
||||
}
|
||||
DIVSS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
} else {
|
||||
X64Reg tempReg = regs_.MapWithFPRTemp(inst);
|
||||
if (RipAccessible(&simdConstants.ones)) {
|
||||
MOVSS(tempReg, M(&simdConstants.ones)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(SCRATCH1), ImmPtr(&simdConstants.ones));
|
||||
MOVSS(tempReg, MatR(SCRATCH1));
|
||||
}
|
||||
if (cpu_info.bAVX) {
|
||||
VDIVSS(regs_.FX(inst.dest), tempReg, regs_.F(inst.src1));
|
||||
} else {
|
||||
DIVSS(tempReg, regs_.F(inst.src1));
|
||||
MOVSS(regs_.FX(inst.dest), R(tempReg));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::FAsin:
|
||||
CompIR_Generic(inst);
|
||||
callFuncF_F((const void *)&x64_asin);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -32,7 +32,7 @@
|
|||
#if PPSSPP_PLATFORM(WINDOWS) && (defined(_MSC_VER) || defined(__clang__) || defined(__INTEL_COMPILER))
|
||||
#define X64JIT_XMM_CALL __vectorcall
|
||||
#define X64JIT_USE_XMM_CALL 1
|
||||
#elif PPSSPP_ARCH(AMD64) && !PPSSPP_PLATFORM(WINDOWS)
|
||||
#elif PPSSPP_ARCH(AMD64)
|
||||
// SystemV ABI supports XMM registers.
|
||||
#define X64JIT_XMM_CALL
|
||||
#define X64JIT_USE_XMM_CALL 1
|
||||
|
|
|
@ -103,6 +103,13 @@ const int *X64IRRegCache::GetAllocationOrder(MIPSLoc type, MIPSMap flags, int &c
|
|||
#endif
|
||||
};
|
||||
|
||||
if ((flags & X64Map::MASK) == X64Map::XMM0) {
|
||||
// Certain cases require this reg.
|
||||
static const int blendReg[] = { XMM0 };
|
||||
count = 1;
|
||||
return blendReg;
|
||||
}
|
||||
|
||||
count = ARRAY_SIZE(allocationOrder);
|
||||
return allocationOrder;
|
||||
} else {
|
||||
|
@ -238,6 +245,11 @@ void X64IRRegCache::MapWithFlags(IRInst inst, X64Map destFlags, X64Map src1Flags
|
|||
flushReg(RDX);
|
||||
break;
|
||||
|
||||
case X64Map::XMM0:
|
||||
if (nr[XMMToNativeReg(XMM0)].mipsReg != mapping[i].reg)
|
||||
flushReg(XMMToNativeReg(XMM0));
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -45,10 +45,12 @@ enum class X64Map : uint8_t {
|
|||
NONE = 0,
|
||||
// On 32-bit: EAX, EBX, ECX, EDX
|
||||
LOW_SUBREG = 0x10,
|
||||
// EDX/RDX
|
||||
// EDX/RDX for DIV/MUL/similar.
|
||||
HIGH_DATA = 0x20,
|
||||
// ECX/RCX
|
||||
// ECX/RCX only, for shifts.
|
||||
SHIFT = 0x30,
|
||||
// XMM0 for BLENDVPS, funcs.
|
||||
XMM0 = 0x40,
|
||||
MASK = 0xF0,
|
||||
};
|
||||
static inline MIPSMap operator |(const MIPSMap &lhs, const X64Map &rhs) {
|
||||
|
|
Loading…
Add table
Reference in a new issue