Merge pull request #700 from unknownbrackets/jit-vfpu

Flush vfpu prefixes, initial implementation of VecDo3
This commit is contained in:
Henrik Rydgård 2013-02-15 09:53:43 -08:00
commit c45d4b0357
9 changed files with 183 additions and 45 deletions

View file

@ -35,6 +35,11 @@ namespace MIPSComp
DISABLE;
}
void Jit::Comp_VecDo3(u32 op)
{
DISABLE;
}
void Jit::Comp_Mftv(u32 op)
{
DISABLE;

View file

@ -110,6 +110,7 @@ public:
void Comp_SVQ(u32 op);
void Comp_VPFX(u32 op);
void Comp_VDot(u32 op);
void Comp_VecDo3(u32 op);
void Comp_Mftv(u32 op);
void Comp_Vmtvc(u32 op);

View file

@ -363,11 +363,11 @@ const MIPSInstruction tableCop2[32] =
INSTR("mfc2", &Jit::Comp_Generic, Dis_Generic, 0, OUT_RT),
{-2},
INSTR("cfc2", &Jit::Comp_Generic, Dis_Generic, 0, 0),
INSTR("mfv", &Jit::Comp_Mftv, Dis_Mftv, Int_Mftv, 0),
INSTR("mfv", &Jit::Comp_Mftv, Dis_Mftv, Int_Mftv, IS_VFPU),
INSTR("mtc2", &Jit::Comp_Generic, Dis_Generic, 0, IN_RT),
{-2},
INSTR("ctc2", &Jit::Comp_Generic, Dis_Generic, 0, 0),
INSTR("mtv", &Jit::Comp_Mftv, Dis_Mftv, Int_Mftv, 0),
INSTR("mtv", &Jit::Comp_Mftv, Dis_Mftv, Int_Mftv, IS_VFPU),
{Cop2BC2},
INSTR("??", &Jit::Comp_Generic, Dis_Generic, 0, 0),
@ -478,17 +478,17 @@ const MIPSInstruction tableCop1BC[32] =
const MIPSInstruction tableVFPU0[8] =
{
INSTR("vadd",&Jit::Comp_Generic, Dis_VectorSet3, Int_VecDo3, IS_VFPU),
INSTR("vsub",&Jit::Comp_Generic, Dis_VectorSet3, Int_VecDo3, IS_VFPU),
INSTR("vadd",&Jit::Comp_VecDo3, Dis_VectorSet3, Int_VecDo3, IS_VFPU),
INSTR("vsub",&Jit::Comp_VecDo3, Dis_VectorSet3, Int_VecDo3, IS_VFPU),
INSTR("vsbn",&Jit::Comp_Generic, Dis_VectorSet3, Int_Vsbn, IS_VFPU),
{-2}, {-2}, {-2}, {-2},
INSTR("vdiv",&Jit::Comp_Generic, Dis_VectorSet3, Int_VecDo3, IS_VFPU),
INSTR("vdiv",&Jit::Comp_VecDo3, Dis_VectorSet3, Int_VecDo3, IS_VFPU),
};
const MIPSInstruction tableVFPU1[8] =
{
INSTR("vmul",&Jit::Comp_Generic, Dis_VectorSet3, Int_VecDo3, IS_VFPU),
INSTR("vmul",&Jit::Comp_VecDo3, Dis_VectorSet3, Int_VecDo3, IS_VFPU),
INSTR("vdot",&Jit::Comp_VDot, Dis_VectorDot, Int_VDot, IS_VFPU),
INSTR("vscl",&Jit::Comp_Generic, Dis_VScl, Int_VScl, IS_VFPU),
{-2},

View file

@ -32,9 +32,9 @@ using namespace MIPSAnalyst;
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
//#define CONDITIONAL_DISABLE Comp_Generic(op); return;
//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE Comp_Generic(op); return;
#define DISABLE { Comp_Generic(op); return; }
namespace MIPSComp
{

View file

@ -34,9 +34,9 @@
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
// #define CONDITIONAL_DISABLE Comp_Generic(op); return;
// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE Comp_Generic(op); return;
#define DISABLE { Comp_Generic(op); return; }
namespace MIPSComp
{

View file

@ -35,9 +35,9 @@
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
// #define CONDITIONAL_DISABLE Comp_Generic(op); return;
// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE Comp_Generic(op); return;
#define DISABLE { Comp_Generic(op); return; }
namespace MIPSComp
{

View file

@ -28,9 +28,9 @@
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
// #define CONDITIONAL_DISABLE Comp_Generic(op); return;
// #define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE Comp_Generic(op); return;
#define DISABLE { Comp_Generic(op); return; }
#define _RS ((op>>21) & 0x1F)
@ -62,19 +62,17 @@ void Jit::Comp_VPFX(u32 op)
switch (regnum) {
case 0: // S
js.prefixS = data;
js.prefixSKnown = true;
js.prefixSFlag = JitState::PREFIX_KNOWN_DIRTY;
break;
case 1: // T
js.prefixT = data;
js.prefixTKnown = true;
js.prefixTFlag = JitState::PREFIX_KNOWN_DIRTY;
break;
case 2: // D
js.prefixD = data;
js.prefixDKnown = true;
js.prefixDFlag = JitState::PREFIX_KNOWN_DIRTY;
break;
}
// TODO: Defer this to end of block
MOV(32, M((void *)&mips_->vfpuCtrl[VFPU_CTRL_SPREFIX + regnum]), Imm32(data));
}
@ -114,7 +112,7 @@ void Jit::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) {
}
void Jit::ApplyPrefixD(const u8 *vregs, u32 prefix, VectorSize sz, bool onlyWriteMask) {
_assert_(js.prefixDKnown);
_assert_(js.prefixDFlag & JitState::PREFIX_KNOWN);
if (!prefix) return;
int n = GetNumVectorElements(sz);
@ -202,8 +200,7 @@ void Jit::Comp_SV(u32 op) {
break;
default:
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
break;
DISABLE;
}
}
@ -288,10 +285,15 @@ void Jit::Comp_SVQ(u32 op)
}
}
void Jit::Comp_VDot(u32 op) {
DISABLE;
// WARNING: No prefix support!
if (js.MayHavePrefix()) {
Comp_Generic(op);
js.EatPrefix();
return;
}
int vd = _VD;
int vs = _VS;
@ -309,7 +311,6 @@ void Jit::Comp_VDot(u32 op) {
MOVSS(XMM0, fpr.V(sregs[0]));
MULSS(XMM0, fpr.V(tregs[0]));
float sum = 0.0f;
int n = GetNumVectorElements(sz);
for (int i = 1; i < n; i++)
{
@ -331,6 +332,78 @@ void Jit::Comp_VDot(u32 op) {
js.EatPrefix();
}
void Jit::Comp_VecDo3(u32 op) {
DISABLE;
// WARNING: No prefix support!
if (js.MayHavePrefix())
{
Comp_Generic(op);
js.EatPrefix();
return;
}
int vd = _VD;
int vs = _VS;
int vt = _VT;
VectorSize sz = GetVecSize(op);
u8 sregs[4], tregs[4], dregs[4];
GetVectorRegs(sregs, sz, vs);
GetVectorRegs(tregs, sz, vt);
GetVectorRegs(dregs, sz, vd);
void (XEmitter::*xmmop)(X64Reg, OpArg) = NULL;
switch (op >> 26)
{
case 24: //VFPU0
switch ((op >> 23)&7)
{
case 0: // d[i] = s[i] + t[i]; break; //vadd
xmmop = &XEmitter::ADDSS;
break;
case 1: // d[i] = s[i] - t[i]; break; //vsub
xmmop = &XEmitter::SUBSS;
break;
case 7: // d[i] = s[i] / t[i]; break; //vdiv
xmmop = &XEmitter::DIVSS;
break;
}
break;
case 25: //VFPU1
switch ((op >> 23)&7)
{
case 0: // d[i] = s[i] * t[i]; break; //vmul
xmmop = &XEmitter::MULSS;
break;
}
break;
}
if (xmmop == NULL)
{
Comp_Generic(op);
js.EatPrefix();
return;
}
int n = GetNumVectorElements(sz);
// We need at least n temporaries...
if (n > 2)
fpr.Flush();
for (int i = 0; i < n; ++i)
MOVSS((X64Reg) (XMM0 + i), fpr.V(sregs[i]));
for (int i = 0; i < n; ++i)
(this->*xmmop)((X64Reg) (XMM0 + i), fpr.V(tregs[i]));
for (int i = 0; i < n; ++i)
MOVSS(fpr.V(dregs[i]), (X64Reg) (XMM0 + i));
fpr.ReleaseSpillLocks();
js.EatPrefix();
}
void Jit::Comp_Mftv(u32 op) {
CONDITIONAL_DISABLE;
@ -346,6 +419,8 @@ void Jit::Comp_Mftv(u32 op) {
gpr.BindToRegister(rt, false, true);
MOV(32, gpr.R(rt), fpr.V(imm));
} else if (imm < 128 + VFPU_CTRL_MAX) { //mtvc
// In case we have a saved prefix.
FlushPrefixV();
gpr.BindToRegister(rt, false, true);
MOV(32, gpr.R(rt), M(&currentMIPS->vfpuCtrl[imm - 128]));
} else {
@ -367,11 +442,11 @@ void Jit::Comp_Mftv(u32 op) {
// TODO: Optimization if rt is Imm?
if (imm - 128 == VFPU_CTRL_SPREFIX) {
js.prefixSKnown = false;
js.prefixSFlag = JitState::PREFIX_UNKNOWN;
} else if (imm - 128 == VFPU_CTRL_TPREFIX) {
js.prefixTKnown = false;
js.prefixTFlag = JitState::PREFIX_UNKNOWN;
} else if (imm - 128 == VFPU_CTRL_DPREFIX) {
js.prefixDKnown = false;
js.prefixDFlag = JitState::PREFIX_UNKNOWN;
}
} else {
//ERROR
@ -381,8 +456,6 @@ void Jit::Comp_Mftv(u32 op) {
default:
DISABLE;
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
break;
}
}
@ -396,11 +469,11 @@ void Jit::Comp_Vmtvc(u32 op) {
fpr.ReleaseSpillLocks();
if (imm - 128 == VFPU_CTRL_SPREFIX) {
js.prefixSKnown = false;
js.prefixSFlag = JitState::PREFIX_UNKNOWN;
} else if (imm - 128 == VFPU_CTRL_TPREFIX) {
js.prefixTKnown = false;
js.prefixTFlag = JitState::PREFIX_UNKNOWN;
} else if (imm - 128 == VFPU_CTRL_DPREFIX) {
js.prefixDKnown = false;
js.prefixDFlag = JitState::PREFIX_UNKNOWN;
}
}
}

View file

@ -109,6 +109,34 @@ void Jit::FlushAll()
{
gpr.Flush();
fpr.Flush();
FlushPrefixV();
}
void Jit::FlushPrefixV()
{
if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0)
{
MOV(32, M((void *)&mips_->vfpuCtrl[VFPU_CTRL_SPREFIX]), Imm32(js.prefixS));
js.prefixSFlag = (JitState::PrefixState) (js.prefixSFlag & ~JitState::PREFIX_DIRTY);
}
if ((js.prefixTFlag & JitState::PREFIX_DIRTY) != 0)
{
MOV(32, M((void *)&mips_->vfpuCtrl[VFPU_CTRL_TPREFIX]), Imm32(js.prefixT));
js.prefixTFlag = (JitState::PrefixState) (js.prefixTFlag & ~JitState::PREFIX_DIRTY);
}
if ((js.prefixDFlag & JitState::PREFIX_DIRTY) != 0)
{
MOV(32, M((void *)&mips_->vfpuCtrl[VFPU_CTRL_DPREFIX]), Imm32(js.prefixD));
_dbg_assert_msg_(JIT, sizeof(bool) <= 4, "Bools shouldn't be that big?");
const size_t bool_stride = 4 / sizeof(bool);
for (size_t i = 0; i < ARRAY_SIZE(mips_->vfpuWriteMask); i += bool_stride)
MOV(32, M((void *)&mips_->vfpuWriteMask[i]), Imm32(*(u32 *)&js.writeMask[i]));
js.prefixDFlag = (JitState::PrefixState) (js.prefixDFlag & ~JitState::PREFIX_DIRTY);
}
}
void Jit::WriteDowncount(int offset)
@ -259,6 +287,10 @@ void Jit::Comp_Generic(u32 op)
}
else
_dbg_assert_msg_(JIT, 0, "Trying to compile instruction that can't be interpreted");
// Might have eaten prefixes, hard to tell...
if ((MIPSGetInfo(op) & IS_VFPU) != 0)
js.PrefixStart();
}
void Jit::WriteExit(u32 destination, int exit_num)

View file

@ -48,6 +48,14 @@ struct JitOptions
struct JitState
{
enum PrefixState
{
PREFIX_UNKNOWN = 0x00,
PREFIX_KNOWN = 0x01,
PREFIX_DIRTY = 0x10,
PREFIX_KNOWN_DIRTY = 0x11,
};
u32 compilerPC;
u32 blockStart;
bool cancel;
@ -62,22 +70,39 @@ struct JitState
u32 prefixT;
u32 prefixD;
bool writeMask[4];
bool prefixSKnown;
bool prefixTKnown;
bool prefixDKnown;
PrefixState prefixSFlag;
PrefixState prefixTFlag;
PrefixState prefixDFlag;
void PrefixStart() {
prefixSKnown = false;
prefixTKnown = false;
prefixDKnown = false;
prefixSFlag = PREFIX_UNKNOWN;
prefixTFlag = PREFIX_UNKNOWN;
prefixDFlag = PREFIX_UNKNOWN;
}
bool MayHavePrefix() const {
if (!(prefixSFlag & PREFIX_KNOWN) || !(prefixTFlag & PREFIX_KNOWN) || !(prefixDFlag & PREFIX_KNOWN)) {
return true;
} else if (prefixS != 0xE4 || prefixT != 0xE4 || prefixD != 0) {
return true;
} else if (writeMask[0] || writeMask[1] || writeMask[2] || writeMask[3]) {
return true;
}
return false;
}
void EatPrefix() {
prefixSKnown = true;
prefixTKnown = true;
prefixDKnown = true;
prefixS = 0xE4;
prefixT = 0xE4;
prefixD = 0x0;
writeMask[0] = writeMask[1] = writeMask[2] = writeMask[3] = false;
if ((prefixSFlag & PREFIX_KNOWN) == 0 || prefixS != 0xE4) {
prefixSFlag = PREFIX_KNOWN_DIRTY;
prefixS = 0xE4;
}
if ((prefixTFlag & PREFIX_KNOWN) == 0 || prefixT != 0xE4) {
prefixTFlag = PREFIX_KNOWN_DIRTY;
prefixT = 0xE4;
}
if ((prefixDFlag & PREFIX_KNOWN) == 0 || prefixD != 0x0 || writeMask[0] || writeMask[1] || writeMask[2] || writeMask[3]) {
prefixDFlag = PREFIX_KNOWN_DIRTY;
prefixD = 0x0;
writeMask[0] = writeMask[1] = writeMask[2] = writeMask[3] = false;
}
}
};
@ -140,6 +165,7 @@ public:
void Comp_SVQ(u32 op);
void Comp_VPFX(u32 op);
void Comp_VDot(u32 op);
void Comp_VecDo3(u32 op);
void Comp_Mftv(u32 op);
void Comp_Vmtvc(u32 op);
@ -155,6 +181,7 @@ public:
void ClearCacheAt(u32 em_address);
private:
void FlushAll();
void FlushPrefixV();
void WriteDowncount(int offset = 0);
// See CompileDelaySlotFlags for flags.