Add lv.q/sv.q support to the x86 jit.

This commit is contained in:
Unknown W. Brackets 2013-01-26 10:07:05 -08:00
parent b77ce99d01
commit 0e8e9697c5
3 changed files with 87 additions and 43 deletions

View file

@ -139,6 +139,7 @@ void Jit::ApplyPrefixD(const u8 *vregs, u32 prefix, VectorSize sz, bool onlyWrit
}
}
static u32 GC_ALIGNED16(ssLoadStoreTemp[1]);
void Jit::Comp_SVQ(u32 op)
{
@ -150,28 +151,30 @@ void Jit::Comp_SVQ(u32 op)
{
case 54: //lv.q
{
if (!g_Config.bFastMemory) {
DISABLE;
}
gpr.BindToRegister(rs, true, true);
u8 vregs[4];
GetVectorRegs(vregs, V_Quad, vt);
MOV(32, R(EAX), gpr.R(rs));
// Just copy 4 words the easiest way while not wasting registers.
#ifndef _M_X64
AND(32, R(EAX), Imm32(0x3FFFFFFF));
#endif
fpr.MapRegsV(vregs, V_Quad, MAP_DIRTY | MAP_NOINIT);
// MOVSS to prime any crazy cache mechanism that might assume that there's a float somewhere...
for (int i = 0; i < 4; i++) {
#ifdef _M_X64
MOVSS(fpr.VX(vregs[i]), MComplex(RBX, EAX, 1, i * 4 + imm));
#else
MOVSS(fpr.VX(vregs[i]), MDisp(EAX, (u32)(Memory::base + i * 4 + imm)));
#endif
JitSafeMem safe(this, rs, imm);
OpArg src;
if (safe.PrepareRead(src))
{
// Just copy 4 words the easiest way while not wasting registers.
for (int i = 0; i < 4; i++)
MOVSS(fpr.VX(vregs[i]), safe.NextFastAddress(i * 4));
}
if (safe.PrepareSlowRead((void *) &Memory::Read_U32))
{
for (int i = 0; i < 4; i++)
{
safe.NextSlowRead((void *) &Memory::Read_U32, i * 4);
MOV(32, M((void *)&ssLoadStoreTemp), R(EAX));
MOVSS(fpr.VX(vregs[i]), M((void *)&ssLoadStoreTemp));
}
}
safe.Finish();
gpr.UnlockAll();
fpr.ReleaseSpillLocks();
@ -180,34 +183,29 @@ void Jit::Comp_SVQ(u32 op)
case 62: //sv.q
{
if (!g_Config.bFastMemory) {
DISABLE;
}
fpr.Flush();
gpr.BindToRegister(rs, true, true);
u8 vregs[4];
GetVectorRegs(vregs, V_Quad, vt);
MOV(32, R(EAX), gpr.R(rs));
// Just copy 4 words the easiest way while not wasting registers.
#ifndef _M_X64
AND(32, R(EAX), Imm32(0x3FFFFFFF));
#endif
// MOVSS to prime any crazy cache mechanism that might assume that there's a float somewhere...
// It would be pretty nice to have these in registers for the next instruction...
// Even if we don't use real SIMD there's still 8 or 16 scalar float registers.
fpr.MapRegsV(vregs, V_Quad, 0);
for (int i = 0; i < 4; i++) {
#ifdef _M_X64
MOVSS(MComplex(RBX, EAX, 1, i * 4 + imm), fpr.VX(vregs[i]));
#else
MOVSS(MDisp(EAX, (u32)(Memory::base + i * 4 + imm)), fpr.VX(vregs[i]));
#endif
JitSafeMem safe(this, rs, imm);
OpArg dest;
if (safe.PrepareWrite(dest))
{
for (int i = 0; i < 4; i++)
MOVSS(safe.NextFastAddress(i * 4), fpr.VX(vregs[i]));
}
if (safe.PrepareSlowWrite())
{
for (int i = 0; i < 4; i++)
{
MOVSS(M((void *)&ssLoadStoreTemp), fpr.VX(vregs[i]));
safe.DoSlowWrite((void *) &Memory::Write_U32, M((void *)&ssLoadStoreTemp), i * 4);
}
}
safe.Finish();
fpr.ReleaseSpillLocks();
gpr.UnlockAll();

View file

@ -353,6 +353,26 @@ bool Jit::JitSafeMem::PrepareRead(OpArg &src)
return true;
}
OpArg Jit::JitSafeMem::NextFastAddress(int suboffset)
{
if (jit_->gpr.IsImmediate(raddr_))
{
u32 addr = jit_->gpr.GetImmediate32(raddr_) + offset_ + suboffset;
#ifdef _M_IX86
return M(Memory::base + addr);
#else
return MDisp(RBX, addr);
#endif
}
#ifdef _M_IX86
return MDisp(xaddr_, (u32) Memory::base + offset_ + suboffset);
#else
return MComplex(RBX, xaddr_, SCALE_1, offset_ + suboffset);
#endif
}
OpArg Jit::JitSafeMem::PrepareMemoryOpArg()
{
// We may not even need to move into EAX as a temporary.
@ -368,7 +388,6 @@ OpArg Jit::JitSafeMem::PrepareMemoryOpArg()
xaddr_ = EAX;
}
X64Reg xaddrResult = xaddr_;
if (!g_Config.bFastMemory)
{
// Is it in physical ram?
@ -387,14 +406,14 @@ OpArg Jit::JitSafeMem::PrepareMemoryOpArg()
if (xaddr_ != EAX)
jit_->MOV(32, R(EAX), R(xaddr_));
jit_->AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
xaddrResult = EAX;
xaddr_ = EAX;
#endif
}
#ifdef _M_IX86
return MDisp(xaddrResult, (u32) Memory::base + offset_);
return MDisp(xaddr_, (u32) Memory::base + offset_);
#else
return MComplex(RBX, xaddrResult, SCALE_1, offset_);
return MComplex(RBX, xaddr_, SCALE_1, offset_);
#endif
}
@ -432,15 +451,15 @@ bool Jit::JitSafeMem::PrepareSlowWrite()
return false;
}
void Jit::JitSafeMem::DoSlowWrite(void *safeFunc, const OpArg src)
void Jit::JitSafeMem::DoSlowWrite(void *safeFunc, const OpArg src, int suboffset)
{
if (jit_->gpr.IsImmediate(raddr_))
{
u32 addr = jit_->gpr.GetImmediate32(raddr_) + offset_;
jit_->MOV(32, R(EAX), Imm32(addr));
jit_->MOV(32, R(EAX), Imm32(addr + suboffset));
}
else
jit_->LEA(32, EAX, MDisp(xaddr_, offset_));
jit_->LEA(32, EAX, MDisp(xaddr_, offset_ + suboffset));
jit_->ABI_CallFunctionAA(jit_->thunks.ProtectFunction(safeFunc, 2), src, R(EAX));
needsCheck_ = true;
@ -473,6 +492,28 @@ bool Jit::JitSafeMem::PrepareSlowRead(void *safeFunc)
return false;
}
void Jit::JitSafeMem::NextSlowRead(void *safeFunc, int suboffset)
{
_dbg_assert_msg_(JIT, !g_Config.bFastMemory, "NextSlowRead() called in fast memory mode?");
// For simplicity, do nothing for 0. We already read in PrepareSlowRead().
if (suboffset == 0)
return;
if (jit_->gpr.IsImmediate(raddr_))
{
u32 addr = jit_->gpr.GetImmediate32(raddr_) + offset_;
_dbg_assert_msg_(JIT, !Memory::IsValidAddress(addr), "NextSlowRead() for a valid immediate address?");
jit_->MOV(32, R(EAX), Imm32(addr + suboffset));
}
// For GPR, if xaddr_ was the dest register, this will be wrong. Don't use in GPR.
else
jit_->LEA(32, EAX, MDisp(xaddr_, offset_ + suboffset));
jit_->ABI_CallFunctionA(jit_->thunks.ProtectFunction(safeFunc, 1), R(EAX));
}
void Jit::JitSafeMem::Finish()
{
if (needsCheck_)

View file

@ -194,12 +194,17 @@ private:
// Emit code proceeding a slow write call, returns true if slow write is needed.
bool PrepareSlowWrite();
// Emit a slow write from src.
void DoSlowWrite(void *safeFunc, const OpArg src);
void DoSlowWrite(void *safeFunc, const OpArg src, int suboffset = 0);
// Emit code necessary for a memory read, returns true if MOV from src is needed.
bool PrepareRead(OpArg &src);
// Emit code for a slow read call, and returns true if result is in EAX.
bool PrepareSlowRead(void *safeFunc);
// WARNING: Only works for non-GPR. Do not use for reads into GPR.
OpArg NextFastAddress(int suboffset);
// WARNING: Only works for non-GPR. Do not use for reads into GPR.
void NextSlowRead(void *safeFunc, int suboffset);
// Cleans up final code for the memory access.
void Finish();