mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
VFPU JIT: start setting up infrastructure. very incomplete. vdot works if undisabled, but isn't complete.
This commit is contained in:
parent
68991511ee
commit
2738417040
8 changed files with 215 additions and 23 deletions
|
@ -462,7 +462,9 @@ public:
|
|||
// SSE/SSE2: Floating point bitwise (yes)
|
||||
void CMPSS(X64Reg regOp, OpArg arg, u8 compare);
|
||||
void CMPSD(X64Reg regOp, OpArg arg, u8 compare);
|
||||
void ANDSS(X64Reg regOp, OpArg arg);
|
||||
|
||||
// I don't think these exist
|
||||
/*
|
||||
void ANDSD(X64Reg regOp, OpArg arg);
|
||||
void ANDNSS(X64Reg regOp, OpArg arg);
|
||||
void ANDNSD(X64Reg regOp, OpArg arg);
|
||||
|
@ -470,6 +472,7 @@ public:
|
|||
void ORSD(X64Reg regOp, OpArg arg);
|
||||
void XORSS(X64Reg regOp, OpArg arg);
|
||||
void XORSD(X64Reg regOp, OpArg arg);
|
||||
*/
|
||||
|
||||
// SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double)
|
||||
void ADDPS(X64Reg regOp, OpArg arg);
|
||||
|
|
|
@ -490,7 +490,7 @@ const MIPSInstruction tableVFPU0[8] =
|
|||
const MIPSInstruction tableVFPU1[8] =
|
||||
{
|
||||
INSTR("vmul",&Jit::Comp_Generic, Dis_VectorSet3, Int_VecDo3, IS_VFPU),
|
||||
INSTR("vdot",&Jit::Comp_Generic, Dis_VectorDot, Int_VDot, IS_VFPU),
|
||||
INSTR("vdot",&Jit::Comp_VDot, Dis_VectorDot, Int_VDot, IS_VFPU),
|
||||
INSTR("vscl",&Jit::Comp_Generic, Dis_VScl, Int_VScl, IS_VFPU),
|
||||
{-2},
|
||||
INSTR("vhdp",&Jit::Comp_Generic, Dis_Generic, Int_VHdp, IS_VFPU),
|
||||
|
|
|
@ -185,7 +185,9 @@ namespace MIPSComp
|
|||
if ((doImm == &RType3_ImmAdd || doImm == &RType3_ImmOr) && (rs == 0 || rt == 0))
|
||||
{
|
||||
gpr.BindToRegister(rd, rd == rs || rd == rt, true);
|
||||
MOV(32, gpr.R(rd), gpr.R(rt == 0 ? rs : rt));
|
||||
int rsource = rt == 0 ? rs : rt;
|
||||
if (rsource != rd)
|
||||
MOV(32, gpr.R(rd), gpr.R(rsource));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -23,6 +23,8 @@
|
|||
#include "../MIPSVFPUUtils.h"
|
||||
#include "RegCache.h"
|
||||
|
||||
// VERY UNFINISHED
|
||||
|
||||
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
|
||||
// Currently known non working ones should have DISABLE.
|
||||
|
||||
|
@ -45,6 +47,99 @@ using namespace Gen;
|
|||
namespace MIPSComp
|
||||
{
|
||||
|
||||
static const float one = 1.0f;
|
||||
static const float minus_one = -1.0f;
|
||||
static const float zero = -1.0f;
|
||||
|
||||
const u32 GC_ALIGNED16( noSignMask[4] ) = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
|
||||
const u32 GC_ALIGNED16( signBitLower[4] ) = {0x80000000, 0, 0, 0};
|
||||
|
||||
void Jit::Comp_VPFX(u32 op)
|
||||
{
|
||||
int data = op & 0xFFFFF;
|
||||
int regnum = (op >> 24) & 3;
|
||||
switch (regnum) {
|
||||
case 0: // S
|
||||
js.prefixS = data;
|
||||
js.prefixSKnown = true;
|
||||
break;
|
||||
case 1: // T
|
||||
js.prefixT = data;
|
||||
js.prefixTKnown = true;
|
||||
break;
|
||||
case 2: // D
|
||||
js.prefixD = data;
|
||||
js.prefixDKnown = true;
|
||||
break;
|
||||
}
|
||||
// TODO: Defer this to end of block
|
||||
MOV(32, M((void *)&mips_->vfpuCtrl[VFPU_CTRL_SPREFIX + regnum]), Imm32(data));
|
||||
}
|
||||
|
||||
|
||||
// TODO: Got register value ownership issues. We need to be sure that if we modify input
|
||||
// like this, it does NOT get written back!
|
||||
void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) {
|
||||
if (prefix == 0xE4) return;
|
||||
|
||||
int n = GetNumVectorElements(sz);
|
||||
u8 origV[4];
|
||||
static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f};
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
origV[i] = vregs[i];
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
int regnum = (prefix >> (i*2)) & 3;
|
||||
int abs = (prefix >> (8+i)) & 1;
|
||||
int negate = (prefix >> (16+i)) & 1;
|
||||
int constants = (prefix >> (12+i)) & 1;
|
||||
|
||||
if (!constants) {
|
||||
vregs[i] = origV[regnum];
|
||||
if (abs) {
|
||||
ANDPS(fpr.VX(vregs[i]), M((void *)&noSignMask));
|
||||
}
|
||||
} else {
|
||||
MOVSS(fpr.VX(vregs[i]), M((void *)&constantArray[regnum + (abs<<2)]));
|
||||
}
|
||||
|
||||
if (negate)
|
||||
XORPS(fpr.VX(vregs[i]), M((void *)&signBitLower));
|
||||
}
|
||||
}
|
||||
|
||||
void Jit::ApplyPrefixD(const u8 *vregs, u32 prefix, VectorSize sz, bool onlyWriteMask) {
|
||||
_assert_(js.prefixDKnown);
|
||||
if (!prefix) return;
|
||||
|
||||
int n = GetNumVectorElements(sz);
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
int mask = (prefix >> (8 + i)) & 1;
|
||||
js.writeMask[i] = mask ? true : false;
|
||||
if (onlyWriteMask)
|
||||
continue;
|
||||
if (!mask) {
|
||||
int sat = (prefix >> (i * 2)) & 3;
|
||||
if (sat == 1)
|
||||
{
|
||||
MAXSS(fpr.VX(vregs[i]), M((void *)&zero));
|
||||
MINSS(fpr.VX(vregs[i]), M((void *)&one));
|
||||
}
|
||||
else if (sat == 3)
|
||||
{
|
||||
MAXSS(fpr.VX(vregs[i]), M((void *)&minus_one));
|
||||
MINSS(fpr.VX(vregs[i]), M((void *)&one));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Jit::Comp_SVQ(u32 op)
|
||||
{
|
||||
int imm = (signed short)(op&0xFFFC);
|
||||
|
@ -58,32 +153,28 @@ void Jit::Comp_SVQ(u32 op)
|
|||
if (!g_Config.bFastMemory) {
|
||||
DISABLE;
|
||||
}
|
||||
fpr.Flush();
|
||||
gpr.BindToRegister(rs, true, true);
|
||||
|
||||
u8 vregs[4];
|
||||
GetVectorRegs(vregs, V_Quad, vt);
|
||||
|
||||
MOV(32, R(EAX), gpr.R(rs));
|
||||
// Just copy 4 words the easiest way while not wasting registers.
|
||||
#ifndef _M_X64
|
||||
AND(32, R(EAX), Imm32(0x3FFFFFFF));
|
||||
#endif
|
||||
fpr.MapRegsV(vregs, V_Quad, MAP_DIRTY | MAP_NOINIT);
|
||||
|
||||
// MOVSS to prime any crazy cache mechanism that might assume that there's a float somewhere...
|
||||
for (int i = 0; i < 4; i++) {
|
||||
#ifdef _M_X64
|
||||
MOVSS((X64Reg)(XMM0 + i), MComplex(RBX, EAX, 1, i * 4 + imm));
|
||||
MOVSS(fpr.VX(vregs[i]), MComplex(RBX, EAX, 1, i * 4 + imm));
|
||||
#else
|
||||
MOVSS((X64Reg)(XMM0 + i), MDisp(EAX, (u32)(Memory::base + i * 4 + imm)));
|
||||
MOVSS(fpr.VX(vregs[i]), MDisp(EAX, (u32)(Memory::base + i * 4 + imm)));
|
||||
#endif
|
||||
}
|
||||
|
||||
// It would be pretty nice to have these in registers for the next instruction...
|
||||
// Even if we don't use real SIMD there's still 8 or 16 scalar float registers.
|
||||
for (int i = 0; i < 4; i++) {
|
||||
MOVSS(M((void *)&mips_->v[vregs[i]]), (X64Reg)(XMM0 + i));
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
fpr.ReleaseSpillLocks();
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -107,18 +198,18 @@ void Jit::Comp_SVQ(u32 op)
|
|||
|
||||
// It would be pretty nice to have these in registers for the next instruction...
|
||||
// Even if we don't use real SIMD there's still 8 or 16 scalar float registers.
|
||||
for (int i = 0; i < 4; i++) {
|
||||
MOVSS((X64Reg)(XMM0 + i), M((void *)&mips_->v[vregs[i]]));
|
||||
}
|
||||
|
||||
fpr.MapRegsV(vregs, V_Quad, 0);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
#ifdef _M_X64
|
||||
MOVSS(MComplex(RBX, EAX, 1, i * 4 + imm), (X64Reg)(XMM0 + i));
|
||||
MOVSS(MComplex(RBX, EAX, 1, i * 4 + imm), fpr.VX(vregs[i]));
|
||||
#else
|
||||
MOVSS(MDisp(EAX, (u32)(Memory::base + i * 4 + imm)), (X64Reg)(XMM0 + i));
|
||||
MOVSS(MDisp(EAX, (u32)(Memory::base + i * 4 + imm)), fpr.VX(vregs[i]));
|
||||
#endif
|
||||
}
|
||||
|
||||
fpr.ReleaseSpillLocks();
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
break;
|
||||
|
@ -129,4 +220,48 @@ void Jit::Comp_SVQ(u32 op)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
void Jit::Comp_VDot(u32 op) {
|
||||
DISABLE;
|
||||
// WARNING: No prefix support!
|
||||
|
||||
int vd = _VD;
|
||||
int vs = _VS;
|
||||
int vt = _VT;
|
||||
VectorSize sz = GetVecSize(op);
|
||||
|
||||
// TODO: Force read one of them into regs? probably not.
|
||||
u8 sregs[4], tregs[4], dregs[4];
|
||||
GetVectorRegs(sregs, sz, vs);
|
||||
GetVectorRegs(tregs, sz, vt);
|
||||
GetVectorRegs(dregs, sz, vd);
|
||||
|
||||
// TODO: applyprefixST here somehow (shuffle, etc...)
|
||||
|
||||
MOVSS(XMM0, fpr.V(sregs[0]));
|
||||
MULSS(XMM0, fpr.V(tregs[0]));
|
||||
|
||||
float sum = 0.0f;
|
||||
int n = GetNumVectorElements(sz);
|
||||
for (int i = 1; i < n; i++)
|
||||
{
|
||||
// sum += s[i]*t[i];
|
||||
MOVSS(XMM1, fpr.V(sregs[i]));
|
||||
MULSS(XMM1, fpr.V(tregs[i]));
|
||||
ADDSS(XMM0, R(XMM1));
|
||||
}
|
||||
fpr.ReleaseSpillLocks();
|
||||
|
||||
fpr.MapRegsV(dregs, V_Single, MAP_NOINIT);
|
||||
|
||||
// TODO: applyprefixD here somehow (write mask etc..)
|
||||
|
||||
MOVSS(fpr.V(vd), XMM0);
|
||||
|
||||
fpr.ReleaseSpillLocks();
|
||||
|
||||
js.EatPrefix();
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -184,6 +184,7 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
|
|||
js.curBlock = b;
|
||||
js.compiling = true;
|
||||
js.inDelaySlot = false;
|
||||
js.PrefixStart();
|
||||
|
||||
// We add a check before the block, used when entering from a linked block.
|
||||
b->checkedEntry = GetCodePtr();
|
||||
|
|
|
@ -55,6 +55,29 @@ struct JitState
|
|||
int downcountAmount;
|
||||
bool compiling; // TODO: get rid of this in favor of using analysis results to determine end of block
|
||||
JitBlock *curBlock;
|
||||
|
||||
// VFPU prefix magic
|
||||
u32 prefixS;
|
||||
u32 prefixT;
|
||||
u32 prefixD;
|
||||
bool writeMask[4];
|
||||
bool prefixSKnown;
|
||||
bool prefixTKnown;
|
||||
bool prefixDKnown;
|
||||
void PrefixStart() {
|
||||
prefixSKnown = false;
|
||||
prefixTKnown = false;
|
||||
prefixDKnown = false;
|
||||
}
|
||||
void EatPrefix() {
|
||||
prefixSKnown = true;
|
||||
prefixTKnown = true;
|
||||
prefixDKnown = true;
|
||||
prefixS = 0xE4;
|
||||
prefixT = 0xE4;
|
||||
prefixD = 0x0;
|
||||
writeMask[0] = writeMask[1] = writeMask[2] = writeMask[3] = false;
|
||||
}
|
||||
};
|
||||
|
||||
enum CompileDelaySlotFlags
|
||||
|
@ -111,6 +134,10 @@ public:
|
|||
void Comp_mxc1(u32 op);
|
||||
|
||||
void Comp_SVQ(u32 op);
|
||||
void Comp_VPFX(u32 op);
|
||||
void Comp_VDot(u32 op);
|
||||
void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz);
|
||||
void ApplyPrefixD(const u8 *vregs, u32 prefix, VectorSize sz, bool onlyWriteMask = false);
|
||||
|
||||
JitBlockCache *GetBlockCache() { return &blocks; }
|
||||
AsmRoutineManager &Asm() { return asm_; }
|
||||
|
|
|
@ -48,16 +48,32 @@ void FPURegCache::SpillLock(int p1, int p2, int p3, int p4) {
|
|||
if (p4 != 0xFF) regs[p4].locked = true;
|
||||
}
|
||||
|
||||
void FPURegCache::SpillLockV(const u8 *v, VectorSize vsz) {
|
||||
for (int i = 0; i < GetNumVectorElements(vsz); i++) {
|
||||
vregs[i].locked = true;
|
||||
void FPURegCache::SpillLockV(const u8 *v, VectorSize sz) {
|
||||
for (int i = 0; i < GetNumVectorElements(sz); i++) {
|
||||
vregs[v[i]].locked = true;
|
||||
}
|
||||
}
|
||||
|
||||
void FPURegCache::SpillLockV(int vec, VectorSize vsz) {
|
||||
void FPURegCache::SpillLockV(int vec, VectorSize sz) {
|
||||
u8 v[4];
|
||||
GetVectorRegs(v, vsz, vec);
|
||||
SpillLockV(v, vsz);
|
||||
GetVectorRegs(v, sz, vec);
|
||||
SpillLockV(v, sz);
|
||||
}
|
||||
|
||||
void FPURegCache::MapRegsV(int vec, VectorSize sz, int flags) {
|
||||
u8 v[4];
|
||||
GetVectorRegs(v, sz, vec);
|
||||
SpillLockV(v, sz);
|
||||
for (int i = 0; i < GetNumVectorElements(sz); i++) {
|
||||
BindToRegister(v[i] + 32, (flags & MAP_NOINIT) == 0, (flags & MAP_DIRTY) != 0);
|
||||
}
|
||||
}
|
||||
|
||||
void FPURegCache::MapRegsV(const u8 *v, VectorSize sz, int flags) {
|
||||
SpillLockV(v, sz);
|
||||
for (int i = 0; i < GetNumVectorElements(sz); i++) {
|
||||
BindToRegister(v[i] + 32, (flags & MAP_NOINIT) == 0, (flags & MAP_DIRTY) != 0);
|
||||
}
|
||||
}
|
||||
|
||||
void FPURegCache::ReleaseSpillLocks() {
|
||||
|
@ -85,6 +101,7 @@ void FPURegCache::BindToRegister(int i, bool doLoad, bool makeDirty) {
|
|||
} else {
|
||||
// There are no immediates in the FPR reg file, so we already had this in a register. Make dirty as necessary.
|
||||
xregs[RX(i)].dirty |= makeDirty;
|
||||
_assert_msg_(DYNA_REC, regs[i].location.IsSimpleReg(), "not loaded and not simple.");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -47,6 +47,11 @@ struct MIPSCachedFPReg {
|
|||
bool locked;
|
||||
};
|
||||
|
||||
enum {
|
||||
MAP_DIRTY = 1,
|
||||
MAP_NOINIT = 2,
|
||||
};
|
||||
|
||||
// The PSP has 160 FP registers: 32 FPRs + 128 VFPU registers.
|
||||
// Soon we will support them all.
|
||||
|
||||
|
@ -89,6 +94,8 @@ public:
|
|||
void SpillLock(int p1, int p2=0xff, int p3=0xff, int p4=0xff);
|
||||
void ReleaseSpillLocks();
|
||||
|
||||
void MapRegsV(int vec, VectorSize vsz, int flags);
|
||||
void MapRegsV(const u8 *v, VectorSize vsz, int flags);
|
||||
void SpillLockV(const u8 *v, VectorSize vsz);
|
||||
void SpillLockV(int vec, VectorSize vsz);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue