mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #700 from unknownbrackets/jit-vfpu
Flush vfpu prefixes, initial implementation of VecDo3
This commit is contained in:
commit
c45d4b0357
9 changed files with 183 additions and 45 deletions
|
@ -35,6 +35,11 @@ namespace MIPSComp
|
|||
DISABLE;
|
||||
}
|
||||
|
||||
void Jit::Comp_VecDo3(u32 op)
|
||||
{
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Jit::Comp_Mftv(u32 op)
|
||||
{
|
||||
DISABLE;
|
||||
|
|
|
@ -110,6 +110,7 @@ public:
|
|||
void Comp_SVQ(u32 op);
|
||||
void Comp_VPFX(u32 op);
|
||||
void Comp_VDot(u32 op);
|
||||
void Comp_VecDo3(u32 op);
|
||||
void Comp_Mftv(u32 op);
|
||||
void Comp_Vmtvc(u32 op);
|
||||
|
||||
|
|
|
@ -363,11 +363,11 @@ const MIPSInstruction tableCop2[32] =
|
|||
INSTR("mfc2", &Jit::Comp_Generic, Dis_Generic, 0, OUT_RT),
|
||||
{-2},
|
||||
INSTR("cfc2", &Jit::Comp_Generic, Dis_Generic, 0, 0),
|
||||
INSTR("mfv", &Jit::Comp_Mftv, Dis_Mftv, Int_Mftv, 0),
|
||||
INSTR("mfv", &Jit::Comp_Mftv, Dis_Mftv, Int_Mftv, IS_VFPU),
|
||||
INSTR("mtc2", &Jit::Comp_Generic, Dis_Generic, 0, IN_RT),
|
||||
{-2},
|
||||
INSTR("ctc2", &Jit::Comp_Generic, Dis_Generic, 0, 0),
|
||||
INSTR("mtv", &Jit::Comp_Mftv, Dis_Mftv, Int_Mftv, 0),
|
||||
INSTR("mtv", &Jit::Comp_Mftv, Dis_Mftv, Int_Mftv, IS_VFPU),
|
||||
|
||||
{Cop2BC2},
|
||||
INSTR("??", &Jit::Comp_Generic, Dis_Generic, 0, 0),
|
||||
|
@ -478,17 +478,17 @@ const MIPSInstruction tableCop1BC[32] =
|
|||
|
||||
const MIPSInstruction tableVFPU0[8] =
|
||||
{
|
||||
INSTR("vadd",&Jit::Comp_Generic, Dis_VectorSet3, Int_VecDo3, IS_VFPU),
|
||||
INSTR("vsub",&Jit::Comp_Generic, Dis_VectorSet3, Int_VecDo3, IS_VFPU),
|
||||
INSTR("vadd",&Jit::Comp_VecDo3, Dis_VectorSet3, Int_VecDo3, IS_VFPU),
|
||||
INSTR("vsub",&Jit::Comp_VecDo3, Dis_VectorSet3, Int_VecDo3, IS_VFPU),
|
||||
INSTR("vsbn",&Jit::Comp_Generic, Dis_VectorSet3, Int_Vsbn, IS_VFPU),
|
||||
{-2}, {-2}, {-2}, {-2},
|
||||
|
||||
INSTR("vdiv",&Jit::Comp_Generic, Dis_VectorSet3, Int_VecDo3, IS_VFPU),
|
||||
INSTR("vdiv",&Jit::Comp_VecDo3, Dis_VectorSet3, Int_VecDo3, IS_VFPU),
|
||||
};
|
||||
|
||||
const MIPSInstruction tableVFPU1[8] =
|
||||
{
|
||||
INSTR("vmul",&Jit::Comp_Generic, Dis_VectorSet3, Int_VecDo3, IS_VFPU),
|
||||
INSTR("vmul",&Jit::Comp_VecDo3, Dis_VectorSet3, Int_VecDo3, IS_VFPU),
|
||||
INSTR("vdot",&Jit::Comp_VDot, Dis_VectorDot, Int_VDot, IS_VFPU),
|
||||
INSTR("vscl",&Jit::Comp_Generic, Dis_VScl, Int_VScl, IS_VFPU),
|
||||
{-2},
|
||||
|
|
|
@ -32,9 +32,9 @@ using namespace MIPSAnalyst;
|
|||
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
|
||||
// Currently known non working ones should have DISABLE.
|
||||
|
||||
//#define CONDITIONAL_DISABLE Comp_Generic(op); return;
|
||||
//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
|
||||
#define CONDITIONAL_DISABLE ;
|
||||
#define DISABLE Comp_Generic(op); return;
|
||||
#define DISABLE { Comp_Generic(op); return; }
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
|
|
|
@ -34,9 +34,9 @@
|
|||
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
|
||||
// Currently known non working ones should have DISABLE.
|
||||
|
||||
// #define CONDITIONAL_DISABLE Comp_Generic(op); return;
|
||||
// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
|
||||
#define CONDITIONAL_DISABLE ;
|
||||
#define DISABLE Comp_Generic(op); return;
|
||||
#define DISABLE { Comp_Generic(op); return; }
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
|
|
|
@ -35,9 +35,9 @@
|
|||
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
|
||||
// Currently known non working ones should have DISABLE.
|
||||
|
||||
// #define CONDITIONAL_DISABLE Comp_Generic(op); return;
|
||||
// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
|
||||
#define CONDITIONAL_DISABLE ;
|
||||
#define DISABLE Comp_Generic(op); return;
|
||||
#define DISABLE { Comp_Generic(op); return; }
|
||||
|
||||
namespace MIPSComp
|
||||
{
|
||||
|
|
|
@ -28,9 +28,9 @@
|
|||
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
|
||||
// Currently known non working ones should have DISABLE.
|
||||
|
||||
// #define CONDITIONAL_DISABLE Comp_Generic(op); return;
|
||||
// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
|
||||
#define CONDITIONAL_DISABLE ;
|
||||
#define DISABLE Comp_Generic(op); return;
|
||||
#define DISABLE { Comp_Generic(op); return; }
|
||||
|
||||
|
||||
#define _RS ((op>>21) & 0x1F)
|
||||
|
@ -62,19 +62,17 @@ void Jit::Comp_VPFX(u32 op)
|
|||
switch (regnum) {
|
||||
case 0: // S
|
||||
js.prefixS = data;
|
||||
js.prefixSKnown = true;
|
||||
js.prefixSFlag = JitState::PREFIX_KNOWN_DIRTY;
|
||||
break;
|
||||
case 1: // T
|
||||
js.prefixT = data;
|
||||
js.prefixTKnown = true;
|
||||
js.prefixTFlag = JitState::PREFIX_KNOWN_DIRTY;
|
||||
break;
|
||||
case 2: // D
|
||||
js.prefixD = data;
|
||||
js.prefixDKnown = true;
|
||||
js.prefixDFlag = JitState::PREFIX_KNOWN_DIRTY;
|
||||
break;
|
||||
}
|
||||
// TODO: Defer this to end of block
|
||||
MOV(32, M((void *)&mips_->vfpuCtrl[VFPU_CTRL_SPREFIX + regnum]), Imm32(data));
|
||||
}
|
||||
|
||||
|
||||
|
@ -114,7 +112,7 @@ void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) {
|
|||
}
|
||||
|
||||
void Jit::ApplyPrefixD(const u8 *vregs, u32 prefix, VectorSize sz, bool onlyWriteMask) {
|
||||
_assert_(js.prefixDKnown);
|
||||
_assert_(js.prefixDFlag & JitState::PREFIX_KNOWN);
|
||||
if (!prefix) return;
|
||||
|
||||
int n = GetNumVectorElements(sz);
|
||||
|
@ -202,8 +200,7 @@ void Jit::Comp_SV(u32 op) {
|
|||
break;
|
||||
|
||||
default:
|
||||
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
|
||||
break;
|
||||
DISABLE;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -288,10 +285,15 @@ void Jit::Comp_SVQ(u32 op)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
void Jit::Comp_VDot(u32 op) {
|
||||
DISABLE;
|
||||
|
||||
// WARNING: No prefix support!
|
||||
if (js.MayHavePrefix()) {
|
||||
Comp_Generic(op);
|
||||
js.EatPrefix();
|
||||
return;
|
||||
}
|
||||
|
||||
int vd = _VD;
|
||||
int vs = _VS;
|
||||
|
@ -309,7 +311,6 @@ void Jit::Comp_VDot(u32 op) {
|
|||
MOVSS(XMM0, fpr.V(sregs[0]));
|
||||
MULSS(XMM0, fpr.V(tregs[0]));
|
||||
|
||||
float sum = 0.0f;
|
||||
int n = GetNumVectorElements(sz);
|
||||
for (int i = 1; i < n; i++)
|
||||
{
|
||||
|
@ -331,6 +332,78 @@ void Jit::Comp_VDot(u32 op) {
|
|||
js.EatPrefix();
|
||||
}
|
||||
|
||||
void Jit::Comp_VecDo3(u32 op) {
|
||||
DISABLE;
|
||||
|
||||
// WARNING: No prefix support!
|
||||
if (js.MayHavePrefix())
|
||||
{
|
||||
Comp_Generic(op);
|
||||
js.EatPrefix();
|
||||
return;
|
||||
}
|
||||
|
||||
int vd = _VD;
|
||||
int vs = _VS;
|
||||
int vt = _VT;
|
||||
VectorSize sz = GetVecSize(op);
|
||||
|
||||
u8 sregs[4], tregs[4], dregs[4];
|
||||
GetVectorRegs(sregs, sz, vs);
|
||||
GetVectorRegs(tregs, sz, vt);
|
||||
GetVectorRegs(dregs, sz, vd);
|
||||
|
||||
void (XEmitter::*xmmop)(X64Reg, OpArg) = NULL;
|
||||
switch (op >> 26)
|
||||
{
|
||||
case 24: //VFPU0
|
||||
switch ((op >> 23)&7)
|
||||
{
|
||||
case 0: // d[i] = s[i] + t[i]; break; //vadd
|
||||
xmmop = &XEmitter::ADDSS;
|
||||
break;
|
||||
case 1: // d[i] = s[i] - t[i]; break; //vsub
|
||||
xmmop = &XEmitter::SUBSS;
|
||||
break;
|
||||
case 7: // d[i] = s[i] / t[i]; break; //vdiv
|
||||
xmmop = &XEmitter::DIVSS;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 25: //VFPU1
|
||||
switch ((op >> 23)&7)
|
||||
{
|
||||
case 0: // d[i] = s[i] * t[i]; break; //vmul
|
||||
xmmop = &XEmitter::MULSS;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (xmmop == NULL)
|
||||
{
|
||||
Comp_Generic(op);
|
||||
js.EatPrefix();
|
||||
return;
|
||||
}
|
||||
|
||||
int n = GetNumVectorElements(sz);
|
||||
// We need at least n temporaries...
|
||||
if (n > 2)
|
||||
fpr.Flush();
|
||||
|
||||
for (int i = 0; i < n; ++i)
|
||||
MOVSS((X64Reg) (XMM0 + i), fpr.V(sregs[i]));
|
||||
for (int i = 0; i < n; ++i)
|
||||
(this->*xmmop)((X64Reg) (XMM0 + i), fpr.V(tregs[i]));
|
||||
for (int i = 0; i < n; ++i)
|
||||
MOVSS(fpr.V(dregs[i]), (X64Reg) (XMM0 + i));
|
||||
|
||||
fpr.ReleaseSpillLocks();
|
||||
|
||||
js.EatPrefix();
|
||||
}
|
||||
|
||||
void Jit::Comp_Mftv(u32 op) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
|
@ -346,6 +419,8 @@ void Jit::Comp_Mftv(u32 op) {
|
|||
gpr.BindToRegister(rt, false, true);
|
||||
MOV(32, gpr.R(rt), fpr.V(imm));
|
||||
} else if (imm < 128 + VFPU_CTRL_MAX) { //mtvc
|
||||
// In case we have a saved prefix.
|
||||
FlushPrefixV();
|
||||
gpr.BindToRegister(rt, false, true);
|
||||
MOV(32, gpr.R(rt), M(¤tMIPS->vfpuCtrl[imm - 128]));
|
||||
} else {
|
||||
|
@ -367,11 +442,11 @@ void Jit::Comp_Mftv(u32 op) {
|
|||
|
||||
// TODO: Optimization if rt is Imm?
|
||||
if (imm - 128 == VFPU_CTRL_SPREFIX) {
|
||||
js.prefixSKnown = false;
|
||||
js.prefixSFlag = JitState::PREFIX_UNKNOWN;
|
||||
} else if (imm - 128 == VFPU_CTRL_TPREFIX) {
|
||||
js.prefixTKnown = false;
|
||||
js.prefixTFlag = JitState::PREFIX_UNKNOWN;
|
||||
} else if (imm - 128 == VFPU_CTRL_DPREFIX) {
|
||||
js.prefixDKnown = false;
|
||||
js.prefixDFlag = JitState::PREFIX_UNKNOWN;
|
||||
}
|
||||
} else {
|
||||
//ERROR
|
||||
|
@ -381,8 +456,6 @@ void Jit::Comp_Mftv(u32 op) {
|
|||
|
||||
default:
|
||||
DISABLE;
|
||||
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -396,11 +469,11 @@ void Jit::Comp_Vmtvc(u32 op) {
|
|||
fpr.ReleaseSpillLocks();
|
||||
|
||||
if (imm - 128 == VFPU_CTRL_SPREFIX) {
|
||||
js.prefixSKnown = false;
|
||||
js.prefixSFlag = JitState::PREFIX_UNKNOWN;
|
||||
} else if (imm - 128 == VFPU_CTRL_TPREFIX) {
|
||||
js.prefixTKnown = false;
|
||||
js.prefixTFlag = JitState::PREFIX_UNKNOWN;
|
||||
} else if (imm - 128 == VFPU_CTRL_DPREFIX) {
|
||||
js.prefixDKnown = false;
|
||||
js.prefixDFlag = JitState::PREFIX_UNKNOWN;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -109,6 +109,34 @@ void Jit::FlushAll()
|
|||
{
|
||||
gpr.Flush();
|
||||
fpr.Flush();
|
||||
FlushPrefixV();
|
||||
}
|
||||
|
||||
void Jit::FlushPrefixV()
|
||||
{
|
||||
if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0)
|
||||
{
|
||||
MOV(32, M((void *)&mips_->vfpuCtrl[VFPU_CTRL_SPREFIX]), Imm32(js.prefixS));
|
||||
js.prefixSFlag = (JitState::PrefixState) (js.prefixSFlag & ~JitState::PREFIX_DIRTY);
|
||||
}
|
||||
|
||||
if ((js.prefixTFlag & JitState::PREFIX_DIRTY) != 0)
|
||||
{
|
||||
MOV(32, M((void *)&mips_->vfpuCtrl[VFPU_CTRL_TPREFIX]), Imm32(js.prefixT));
|
||||
js.prefixTFlag = (JitState::PrefixState) (js.prefixTFlag & ~JitState::PREFIX_DIRTY);
|
||||
}
|
||||
|
||||
if ((js.prefixDFlag & JitState::PREFIX_DIRTY) != 0)
|
||||
{
|
||||
MOV(32, M((void *)&mips_->vfpuCtrl[VFPU_CTRL_DPREFIX]), Imm32(js.prefixD));
|
||||
|
||||
_dbg_assert_msg_(JIT, sizeof(bool) <= 4, "Bools shouldn't be that big?");
|
||||
const size_t bool_stride = 4 / sizeof(bool);
|
||||
for (size_t i = 0; i < ARRAY_SIZE(mips_->vfpuWriteMask); i += bool_stride)
|
||||
MOV(32, M((void *)&mips_->vfpuWriteMask[i]), Imm32(*(u32 *)&js.writeMask[i]));
|
||||
|
||||
js.prefixDFlag = (JitState::PrefixState) (js.prefixDFlag & ~JitState::PREFIX_DIRTY);
|
||||
}
|
||||
}
|
||||
|
||||
void Jit::WriteDowncount(int offset)
|
||||
|
@ -259,6 +287,10 @@ void Jit::Comp_Generic(u32 op)
|
|||
}
|
||||
else
|
||||
_dbg_assert_msg_(JIT, 0, "Trying to compile instruction that can't be interpreted");
|
||||
|
||||
// Might have eaten prefixes, hard to tell...
|
||||
if ((MIPSGetInfo(op) & IS_VFPU) != 0)
|
||||
js.PrefixStart();
|
||||
}
|
||||
|
||||
void Jit::WriteExit(u32 destination, int exit_num)
|
||||
|
|
|
@ -48,6 +48,14 @@ struct JitOptions
|
|||
|
||||
struct JitState
|
||||
{
|
||||
enum PrefixState
|
||||
{
|
||||
PREFIX_UNKNOWN = 0x00,
|
||||
PREFIX_KNOWN = 0x01,
|
||||
PREFIX_DIRTY = 0x10,
|
||||
PREFIX_KNOWN_DIRTY = 0x11,
|
||||
};
|
||||
|
||||
u32 compilerPC;
|
||||
u32 blockStart;
|
||||
bool cancel;
|
||||
|
@ -62,22 +70,39 @@ struct JitState
|
|||
u32 prefixT;
|
||||
u32 prefixD;
|
||||
bool writeMask[4];
|
||||
bool prefixSKnown;
|
||||
bool prefixTKnown;
|
||||
bool prefixDKnown;
|
||||
PrefixState prefixSFlag;
|
||||
PrefixState prefixTFlag;
|
||||
PrefixState prefixDFlag;
|
||||
void PrefixStart() {
|
||||
prefixSKnown = false;
|
||||
prefixTKnown = false;
|
||||
prefixDKnown = false;
|
||||
prefixSFlag = PREFIX_UNKNOWN;
|
||||
prefixTFlag = PREFIX_UNKNOWN;
|
||||
prefixDFlag = PREFIX_UNKNOWN;
|
||||
}
|
||||
bool MayHavePrefix() const {
|
||||
if (!(prefixSFlag & PREFIX_KNOWN) || !(prefixTFlag & PREFIX_KNOWN) || !(prefixDFlag & PREFIX_KNOWN)) {
|
||||
return true;
|
||||
} else if (prefixS != 0xE4 || prefixT != 0xE4 || prefixD != 0) {
|
||||
return true;
|
||||
} else if (writeMask[0] || writeMask[1] || writeMask[2] || writeMask[3]) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
void EatPrefix() {
|
||||
prefixSKnown = true;
|
||||
prefixTKnown = true;
|
||||
prefixDKnown = true;
|
||||
prefixS = 0xE4;
|
||||
prefixT = 0xE4;
|
||||
prefixD = 0x0;
|
||||
writeMask[0] = writeMask[1] = writeMask[2] = writeMask[3] = false;
|
||||
if ((prefixSFlag & PREFIX_KNOWN) == 0 || prefixS != 0xE4) {
|
||||
prefixSFlag = PREFIX_KNOWN_DIRTY;
|
||||
prefixS = 0xE4;
|
||||
}
|
||||
if ((prefixTFlag & PREFIX_KNOWN) == 0 || prefixT != 0xE4) {
|
||||
prefixTFlag = PREFIX_KNOWN_DIRTY;
|
||||
prefixT = 0xE4;
|
||||
}
|
||||
if ((prefixDFlag & PREFIX_KNOWN) == 0 || prefixD != 0x0 || writeMask[0] || writeMask[1] || writeMask[2] || writeMask[3]) {
|
||||
prefixDFlag = PREFIX_KNOWN_DIRTY;
|
||||
prefixD = 0x0;
|
||||
writeMask[0] = writeMask[1] = writeMask[2] = writeMask[3] = false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -140,6 +165,7 @@ public:
|
|||
void Comp_SVQ(u32 op);
|
||||
void Comp_VPFX(u32 op);
|
||||
void Comp_VDot(u32 op);
|
||||
void Comp_VecDo3(u32 op);
|
||||
void Comp_Mftv(u32 op);
|
||||
void Comp_Vmtvc(u32 op);
|
||||
|
||||
|
@ -155,6 +181,7 @@ public:
|
|||
void ClearCacheAt(u32 em_address);
|
||||
private:
|
||||
void FlushAll();
|
||||
void FlushPrefixV();
|
||||
void WriteDowncount(int offset = 0);
|
||||
|
||||
// See CompileDelaySlotFlags for flags.
|
||||
|
|
Loading…
Add table
Reference in a new issue