More armjit-fpu work - dot product working for example. Add some non working DISABLEd stuff too.

This commit is contained in:
Henrik Rydgard 2013-02-16 02:06:02 +01:00
parent 81589b67e5
commit b8abb77eee
11 changed files with 307 additions and 36 deletions

View file

@ -2802,7 +2802,7 @@ std::vector<DebugThreadInfo> GetThreadsInfo()
DebugThreadInfo info;
info.id = *iter;
strncpy(info.name,t->GetName(),KERNELOBJECT_MAX_NAME_LENGTH);
info.name[KERNELOBJECT_MAX_NAME_LENGTH+1] = 0;
info.name[KERNELOBJECT_MAX_NAME_LENGTH] = 0;
info.status = t->nt.status;
info.entrypoint = t->nt.entrypoint;
info.curPC = t->context.pc;

View file

@ -159,7 +159,7 @@ void Jit::GenerateFixedCode()
// MOV(R0, R13);
// QuickCallFunction(R1, (void *)&ShowPC);
LDR(R0, R10, offsetof(MIPSState, pc));
LDR(R0, CTXREG, offsetof(MIPSState, pc));
BIC(R0, R0, Operand2(0xC0, 4)); // &= 0x3FFFFFFF
LDR(R0, R11, R0, true, true);
AND(R1, R0, Operand2(0xFC, 4)); // rotation is to the right, in 2-bit increments.

View file

@ -222,7 +222,7 @@ void Jit::Comp_mxc1(u32 op)
{
case 0: // R(rt) = FI(fs); break; //mfc1
// Let's just go through RAM for now.
fpr.FlushMipsReg(fs);
fpr.FlushR(fs);
gpr.MapReg(rt, MAP_DIRTY | MAP_NOINIT);
LDR(gpr.R(rt), CTXREG, fpr.GetMipsRegOffset(fs));
return;
@ -233,7 +233,7 @@ void Jit::Comp_mxc1(u32 op)
case 4: //FI(fs) = R(rt); break; //mtc1
// Let's just go through RAM for now.
gpr.FlushMipsReg(rt);
gpr.FlushR(rt);
fpr.MapReg(fs, MAP_DIRTY | MAP_NOINIT);
VLDR(fpr.R(fs), CTXREG, gpr.GetMipsRegOffset(rt));
return;

View file

@ -42,9 +42,46 @@ namespace MIPSComp
}
}
void Jit::Comp_SV(u32 op) {
CONDITIONAL_DISABLE;
s32 imm = (signed short)(op&0xFFFC);
int vt = ((op >> 16) & 0x1f) | ((op & 3) << 5);
int rs = _RS;
switch (op >> 26)
{
case 50: //lv.s // VI(vt) = Memory::Read_U32(addr);
{
gpr.MapReg(rs);
SetR0ToEffectiveAddress(rs, imm);
ADD(R0, R0, R11);
fpr.MapRegV(vt, MAP_DIRTY | MAP_NOINIT);
fpr.ReleaseSpillLocks();
VLDR(fpr.V(vt), R0, 0);
}
break;
case 58: //sv.s // Memory::Write_U32(VI(vt), addr);
{
gpr.MapReg(rs);
SetR0ToEffectiveAddress(rs, imm);
ADD(R0, R0, R11);
fpr.MapRegV(vt);
fpr.ReleaseSpillLocks();
VSTR(fpr.V(vt), R0, 0);
}
break;
default:
DISABLE;
}
}
void Jit::Comp_SVQ(u32 op)
{
DISABLE;
CONDITIONAL_DISABLE;
int imm = (signed short)(op&0xFFFC);
int vt = (((op >> 16) & 0x1f)) | ((op&1) << 5);
@ -56,11 +93,12 @@ namespace MIPSComp
{
gpr.MapReg(rs);
SetR0ToEffectiveAddress(rs, imm);
ADD(R0, R0, R11);
u8 vregs[4];
GetVectorRegs(vregs, V_Quad, vt);
fpr.MapRegsV(vregs, V_Quad, MAP_DIRTY | MAP_NOINIT);
fpr.ReleaseSpillLocks();
// Just copy 4 words the easiest way while not wasting registers.
for (int i = 0; i < 4; i++)
VLDR(fpr.V(vregs[i]), R0, i * 4);
}
@ -68,17 +106,14 @@ namespace MIPSComp
case 62: //sv.q
{
DISABLE;
gpr.MapReg(rs);
SetR0ToEffectiveAddress(rs, imm);
ADD(R0, R0, R11);
u8 vregs[4];
GetVectorRegs(vregs, V_Quad, vt);
// Even if we don't use real SIMD there's still 8 or 16 scalar float registers.
fpr.MapRegsV(vregs, V_Quad, 0);
fpr.ReleaseSpillLocks();
// Just copy 4 words the easiest way while not wasting registers.
for (int i = 0; i < 4; i++)
VSTR(fpr.V(vregs[i]), R0, i * 4);
}
@ -92,25 +127,205 @@ namespace MIPSComp
void Jit::Comp_VDot(u32 op)
{
DISABLE;
// DISABLE;
CONDITIONAL_DISABLE;
// WARNING: No prefix support!
if (js.MayHavePrefix()) {
Comp_Generic(op);
js.EatPrefix();
return;
}
int vd = _VD;
int vs = _VS;
int vt = _VT;
VectorSize sz = GetVecSize(op);
// TODO: Force read one of them into regs? probably not.
u8 sregs[4], tregs[4];
GetVectorRegs(sregs, sz, vs);
GetVectorRegs(tregs, sz, vt);
// TODO: applyprefixST here somehow (shuffle, etc...)
fpr.MapRegsV(sregs, sz, 0);
fpr.MapRegsV(tregs, sz, 0);
VMUL(S0, fpr.V(sregs[0]), fpr.V(tregs[0]));
int n = GetNumVectorElements(sz);
for (int i = 1; i < n; i++)
{
// sum += s[i]*t[i];
VMUL(S1, fpr.V(sregs[i]), fpr.V(tregs[i]));
VADD(S0, S0, S1);
}
fpr.ReleaseSpillLocks();
fpr.MapRegV(vd, MAP_NOINIT | MAP_DIRTY);
// TODO: applyprefixD here somehow (write mask etc..)
VMOV(fpr.V(vd), S0);
fpr.ReleaseSpillLocks();
js.EatPrefix();
}
void Jit::Comp_VecDo3(u32 op)
{
DISABLE;
DISABLE; // Still buggy
// WARNING: No prefix support!
if (js.MayHavePrefix())
{
Comp_Generic(op);
js.EatPrefix();
return;
}
int vd = _VD;
int vs = _VS;
int vt = _VT;
VectorSize sz = GetVecSize(op);
u8 sregs[4], tregs[4], dregs[4];
GetVectorRegs(sregs, sz, vs);
GetVectorRegs(tregs, sz, vt);
GetVectorRegs(dregs, sz, vd);
void (ARMXEmitter::*triop)(ARMReg, ARMReg, ARMReg) = NULL;
switch (op >> 26)
{
case 24: //VFPU0
switch ((op >> 23)&7)
{
case 0: // d[i] = s[i] + t[i]; break; //vadd
triop = &ARMXEmitter::VADD;
break;
case 1: // d[i] = s[i] - t[i]; break; //vsub
triop = &ARMXEmitter::VSUB;
break;
case 7: // d[i] = s[i] / t[i]; break; //vdiv
triop = &ARMXEmitter::VDIV;
break;
}
break;
case 25: //VFPU1
switch ((op >> 23)&7)
{
case 0: // d[i] = s[i] * t[i]; break; //vmul
triop = &ARMXEmitter::VMUL;
break;
}
break;
}
if (triop == NULL)
{
Comp_Generic(op);
js.EatPrefix();
return;
}
int n = GetNumVectorElements(sz);
fpr.MapRegsV(sregs, sz, 0);
fpr.MapRegsV(tregs, sz, 0);
fpr.MapReg(TEMP1);
fpr.MapReg(TEMP2);
fpr.MapReg(TEMP3);
for (int i = 0; i < n; ++i) {
fpr.MapReg(TEMP0 + i);
(this->*triop)(fpr.R(TEMP0 + i), fpr.V(sregs[i]), fpr.V(tregs[i]));
fpr.ReleaseSpillLock(sregs[i]);
fpr.ReleaseSpillLock(tregs[i]);
}
fpr.MapRegsV(dregs, sz, MAP_DIRTY | MAP_NOINIT);
// TODO: Can avoid this when no overlap
for (int i = 0; i < n; i++) {
VMOV(fpr.V(dregs[i]), fpr.R(TEMP0 + i));
}
fpr.ReleaseSpillLocks();
js.EatPrefix();
}
void Jit::Comp_Mftv(u32 op)
{
DISABLE;
}
// DISABLE;
CONDITIONAL_DISABLE;
void Jit::Comp_SV(u32 op) {
DISABLE;
int imm = op & 0xFF;
int rt = _RT;
switch ((op >> 21) & 0x1f)
{
case 3: //mfv / mfvc
// rt = 0, imm = 255 appears to be used as a CPU interlock by some games.
if (rt != 0) {
if (imm < 128) { //R(rt) = VI(imm);
fpr.FlushV(imm);
gpr.MapReg(rt, MAP_NOINIT | MAP_DIRTY);
LDR(gpr.R(rt), CTXREG, fpr.GetMipsRegOffsetV(imm));
} else if (imm < 128 + VFPU_CTRL_MAX) { //mtvc
DISABLE;
// In case we have a saved prefix.
//FlushPrefixV();
//gpr.BindToRegister(rt, false, true);
//MOV(32, gpr.R(rt), M(&currentMIPS->vfpuCtrl[imm - 128]));
} else {
//ERROR - maybe need to make this value too an "interlock" value?
_dbg_assert_msg_(CPU,0,"mfv - invalid register");
}
}
break;
case 7: //mtv
if (imm < 128) {
gpr.FlushR(rt);
fpr.MapRegV(imm, MAP_DIRTY | MAP_NOINIT);
VLDR(fpr.V(imm), CTXREG, gpr.GetMipsRegOffset(rt));
} else if (imm < 128 + VFPU_CTRL_MAX) { //mtvc //currentMIPS->vfpuCtrl[imm - 128] = R(rt);
DISABLE;
//gpr.BindToRegister(rt, true, false);
//MOV(32, M(&currentMIPS->vfpuCtrl[imm - 128]), gpr.R(rt));
// TODO: Optimization if rt is Imm?
//if (imm - 128 == VFPU_CTRL_SPREFIX) {
//js.prefixSFlag = JitState::PREFIX_UNKNOWN;
//} else if (imm - 128 == VFPU_CTRL_TPREFIX) {
// js.prefixTFlag = JitState::PREFIX_UNKNOWN;
//} else if (imm - 128 == VFPU_CTRL_DPREFIX) {
// js.prefixDFlag = JitState::PREFIX_UNKNOWN;
//}
} else {
//ERROR
_dbg_assert_msg_(CPU,0,"mtv - invalid register");
}
break;
default:
DISABLE;
}
}
void Jit::Comp_Vmtvc(u32 op) {
DISABLE;
int vs = _VS;
int imm = op & 0xFF;
if (imm >= 128 && imm < 128 + VFPU_CTRL_MAX) {
fpr.MapRegV(vs, 0);
ADD(R0, CTXREG, offsetof(MIPSState, vfpuCtrl[0]) + (imm - 128) * 4);
VSTR(fpr.V(vs), R0, 0);
fpr.ReleaseSpillLocks();
if (imm - 128 == VFPU_CTRL_SPREFIX) {
js.prefixSFlag = ArmJitState::PREFIX_UNKNOWN;
} else if (imm - 128 == VFPU_CTRL_TPREFIX) {
js.prefixTFlag = ArmJitState::PREFIX_UNKNOWN;
} else if (imm - 128 == VFPU_CTRL_DPREFIX) {
js.prefixDFlag = ArmJitState::PREFIX_UNKNOWN;
}
}
}
}

View file

@ -180,12 +180,7 @@ void ArmRegCache::FlushArmReg(ARMReg r) {
ar[r].mipsReg = -1;
}
void ArmRegCache::FlushMipsReg(MIPSReg r) {
/*
if (r == 0) {
ERROR_LOG(JIT, "Flushing r0");
return;
}*/
void ArmRegCache::FlushR(MIPSReg r) {
switch (mr[r].loc) {
case ML_IMM:
// IMM is always "dirty".
@ -219,7 +214,7 @@ void ArmRegCache::FlushMipsReg(MIPSReg r) {
void ArmRegCache::FlushAll() {
for (int i = 0; i < NUM_MIPSREG; i++) {
FlushMipsReg(i);
FlushR(i);
}
// Sanity check
for (int i = 0; i < NUM_ARMREG; i++) {

View file

@ -93,7 +93,7 @@ public:
void MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad = true);
void MapDirtyDirtyInIn(MIPSReg rd1, MIPSReg rd2, MIPSReg rs, MIPSReg rt, bool avoidLoad = true);
void FlushArmReg(ARMReg r);
void FlushMipsReg(MIPSReg r);
void FlushR(MIPSReg r);
void FlushAll();

View file

@ -47,8 +47,19 @@ static const ARMReg *GetMIPSAllocationOrder(int &count) {
static const ARMReg allocationOrder[] = {
S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15
};
count = sizeof(allocationOrder) / sizeof(const int);
return allocationOrder;
// With NEON, we'll have many more.
static const ARMReg allocationOrderNEON[] = {
S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15,
S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31
};
bool useNEON = false; // TODO: Use cpu detect
if (useNEON) {
count = sizeof(allocationOrderNEON) / sizeof(const int);
return allocationOrderNEON;
} else {
count = sizeof(allocationOrder) / sizeof(const int);
return allocationOrder;
}
}
ARMReg ArmRegCacheFPU::MapReg(MIPSReg mipsReg, int mapFlags) {
@ -79,7 +90,7 @@ allocate:
// That means it's free. Grab it, and load the value into it (if requested).
ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false;
if (!(mapFlags & MAP_NOINIT)) {
if (mr[mipsReg].loc == ML_MEM) {
if (mr[mipsReg].loc == ML_MEM && mipsReg < TEMP0) {
emit->VLDR((ARMReg)(reg + S0), CTXREG, GetMipsRegOffset(mipsReg));
}
}
@ -180,7 +191,6 @@ void ArmRegCacheFPU::FlushArmReg(ARMReg r) {
if (ar[reg].isDirty && mr[ar[reg].mipsReg].loc == ML_ARMREG)
{
//INFO_LOG(HLE, "Flushing ARM reg %i", reg);
emit->VSTR(r, CTXREG, GetMipsRegOffset(ar[reg].mipsReg));
}
// IMMs won't be in an ARM reg.
@ -193,7 +203,7 @@ void ArmRegCacheFPU::FlushArmReg(ARMReg r) {
ar[reg].mipsReg = -1;
}
void ArmRegCacheFPU::FlushMipsReg(MIPSReg r) {
void ArmRegCacheFPU::FlushR(MIPSReg r) {
switch (mr[r].loc) {
case ML_IMM:
// IMM is always "dirty".
@ -203,7 +213,7 @@ void ArmRegCacheFPU::FlushMipsReg(MIPSReg r) {
case ML_ARMREG:
if (mr[r].reg == (int)INVALID_REG) {
ERROR_LOG(HLE, "FlushMipsReg: MipsReg had bad ArmReg");
ERROR_LOG(HLE, "FlushR: MipsReg had bad ArmReg");
}
if (ar[mr[r].reg].isDirty) {
//INFO_LOG(HLE, "Flushing dirty reg %i", mr[r].reg);
@ -225,9 +235,42 @@ void ArmRegCacheFPU::FlushMipsReg(MIPSReg r) {
mr[r].reg = (int)INVALID_REG;
}
void ArmRegCacheFPU::DiscardR(MIPSReg r) {
switch (mr[r].loc) {
case ML_IMM:
// IMM is always "dirty".
// IMM is not allowed for FP (yet).
ERROR_LOG(HLE, "Imm in FP register?");
break;
case ML_ARMREG:
if (mr[r].reg == (int)INVALID_REG) {
ERROR_LOG(HLE, "DiscardR: MipsReg had bad ArmReg");
}
// Note that we DO NOT write it back here. That's the whole point of Discard.
ar[mr[r].reg].isDirty = false;
ar[mr[r].reg].mipsReg = -1;
break;
case ML_MEM:
// Already there, nothing to do.
break;
default:
//BAD
break;
}
mr[r].loc = ML_MEM;
mr[r].reg = (int)INVALID_REG;
}
void ArmRegCacheFPU::FlushAll() {
// Discard temps!
for (int i = TEMP0; i < TEMP0 + NUM_TEMPS; i++) {
DiscardR(i);
}
for (int i = 0; i < NUM_MIPSFPUREG; i++) {
FlushMipsReg(i);
FlushR(i);
}
// Sanity check
for (int i = 0; i < NUM_ARMFPUREG; i++) {
@ -239,7 +282,7 @@ void ArmRegCacheFPU::FlushAll() {
int ArmRegCacheFPU::GetMipsRegOffset(MIPSReg r) {
// These are offsets within the MIPSState structure. First there are the GPRS, then FPRS, then the "VFPURs".
if (r < 32 + 128)
if (r < 32 + 128 + NUM_TEMPS)
return (r + 32) << 2;
ERROR_LOG(JIT, "bad mips register %i", r);
return 0; // or what?

View file

@ -28,7 +28,12 @@
using namespace ArmGen;
enum {
TOTAL_MAPPABLE_MIPSFPUREGS = 32 + 128,
NUM_TEMPS = 4,
TEMP0 = 32 + 128,
TEMP1 = TEMP0 + 1,
TEMP2 = TEMP0 + 2,
TEMP3 = TEMP0 + 3,
TOTAL_MAPPABLE_MIPSFPUREGS = 32 + 128 + NUM_TEMPS,
};
struct FPURegARM {
@ -59,6 +64,10 @@ public:
// it's being kept allocated.
void SpillLock(MIPSReg reg, MIPSReg reg2 = -1, MIPSReg reg3 = -1, MIPSReg reg4 = -1);
void ReleaseSpillLocks();
void ReleaseSpillLock(int mipsreg)
{
mr[mipsreg].spillLock = false;
}
void SetImm(MIPSReg reg, u32 immVal);
bool IsImm(MIPSReg reg) const;
@ -71,7 +80,10 @@ public:
void MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad = true);
void MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad = true);
void FlushArmReg(ARMReg r);
void FlushMipsReg(MIPSReg r);
void FlushR(MIPSReg r);
void FlushV(MIPSReg r) { FlushR(r + 32); }
void DiscardR(MIPSReg r);
void DiscardV(MIPSReg r) { DiscardR(r + 32);}
void FlushAll();
@ -81,7 +93,7 @@ public:
ARMReg V(int vreg) { return R(vreg + 32); }
void MapRegV(int vreg, int flags);
void MapRegV(int vreg, int flags = 0);
// NOTE: These require you to release spill locks manually!
void MapRegsV(int vec, VectorSize vsz, int flags);

View file

@ -304,7 +304,7 @@ void Jit::Comp_VDot(u32 op) {
u8 sregs[4], tregs[4], dregs[4];
GetVectorRegs(sregs, sz, vs);
GetVectorRegs(tregs, sz, vt);
GetVectorRegs(dregs, sz, vd);
GetVectorRegs(dregs, V_Single, vd);
// TODO: applyprefixST here somehow (shuffle, etc...)

View file

@ -79,6 +79,11 @@ void FPURegCache::MapRegsV(const u8 *v, VectorSize sz, int flags) {
}
}
void FPURegCache::ReleaseSpillLock(int mipsreg)
{
regs[mipsreg].locked = false;
}
void FPURegCache::ReleaseSpillLocks() {
for (int i = 0; i < NUM_MIPS_FPRS; i++)
regs[i].locked = false;

View file

@ -95,6 +95,7 @@ public:
// Register locking. Prevents them from being spilled.
void SpillLock(int p1, int p2=0xff, int p3=0xff, int p4=0xff);
void ReleaseSpillLock(int mipsrega);
void ReleaseSpillLocks();
void MapRegV(int vreg, int flags);