mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Add lv.q/sv.q support to the x86 jit.
This commit is contained in:
parent
b77ce99d01
commit
0e8e9697c5
3 changed files with 87 additions and 43 deletions
|
@ -139,6 +139,7 @@ void Jit::ApplyPrefixD(const u8 *vregs, u32 prefix, VectorSize sz, bool onlyWrit
|
|||
}
|
||||
}
|
||||
|
||||
static u32 GC_ALIGNED16(ssLoadStoreTemp[1]);
|
||||
|
||||
void Jit::Comp_SVQ(u32 op)
|
||||
{
|
||||
|
@ -150,28 +151,30 @@ void Jit::Comp_SVQ(u32 op)
|
|||
{
|
||||
case 54: //lv.q
|
||||
{
|
||||
if (!g_Config.bFastMemory) {
|
||||
DISABLE;
|
||||
}
|
||||
gpr.BindToRegister(rs, true, true);
|
||||
|
||||
u8 vregs[4];
|
||||
GetVectorRegs(vregs, V_Quad, vt);
|
||||
MOV(32, R(EAX), gpr.R(rs));
|
||||
// Just copy 4 words the easiest way while not wasting registers.
|
||||
#ifndef _M_X64
|
||||
AND(32, R(EAX), Imm32(0x3FFFFFFF));
|
||||
#endif
|
||||
fpr.MapRegsV(vregs, V_Quad, MAP_DIRTY | MAP_NOINIT);
|
||||
|
||||
// MOVSS to prime any crazy cache mechanism that might assume that there's a float somewhere...
|
||||
for (int i = 0; i < 4; i++) {
|
||||
#ifdef _M_X64
|
||||
MOVSS(fpr.VX(vregs[i]), MComplex(RBX, EAX, 1, i * 4 + imm));
|
||||
#else
|
||||
MOVSS(fpr.VX(vregs[i]), MDisp(EAX, (u32)(Memory::base + i * 4 + imm)));
|
||||
#endif
|
||||
JitSafeMem safe(this, rs, imm);
|
||||
OpArg src;
|
||||
if (safe.PrepareRead(src))
|
||||
{
|
||||
// Just copy 4 words the easiest way while not wasting registers.
|
||||
for (int i = 0; i < 4; i++)
|
||||
MOVSS(fpr.VX(vregs[i]), safe.NextFastAddress(i * 4));
|
||||
}
|
||||
if (safe.PrepareSlowRead((void *) &Memory::Read_U32))
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
safe.NextSlowRead((void *) &Memory::Read_U32, i * 4);
|
||||
MOV(32, M((void *)&ssLoadStoreTemp), R(EAX));
|
||||
MOVSS(fpr.VX(vregs[i]), M((void *)&ssLoadStoreTemp));
|
||||
}
|
||||
}
|
||||
safe.Finish();
|
||||
|
||||
gpr.UnlockAll();
|
||||
fpr.ReleaseSpillLocks();
|
||||
|
@ -180,34 +183,29 @@ void Jit::Comp_SVQ(u32 op)
|
|||
|
||||
case 62: //sv.q
|
||||
{
|
||||
if (!g_Config.bFastMemory) {
|
||||
DISABLE;
|
||||
}
|
||||
fpr.Flush();
|
||||
gpr.BindToRegister(rs, true, true);
|
||||
|
||||
u8 vregs[4];
|
||||
GetVectorRegs(vregs, V_Quad, vt);
|
||||
|
||||
MOV(32, R(EAX), gpr.R(rs));
|
||||
// Just copy 4 words the easiest way while not wasting registers.
|
||||
#ifndef _M_X64
|
||||
AND(32, R(EAX), Imm32(0x3FFFFFFF));
|
||||
#endif
|
||||
// MOVSS to prime any crazy cache mechanism that might assume that there's a float somewhere...
|
||||
|
||||
// It would be pretty nice to have these in registers for the next instruction...
|
||||
// Even if we don't use real SIMD there's still 8 or 16 scalar float registers.
|
||||
|
||||
fpr.MapRegsV(vregs, V_Quad, 0);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
#ifdef _M_X64
|
||||
MOVSS(MComplex(RBX, EAX, 1, i * 4 + imm), fpr.VX(vregs[i]));
|
||||
#else
|
||||
MOVSS(MDisp(EAX, (u32)(Memory::base + i * 4 + imm)), fpr.VX(vregs[i]));
|
||||
#endif
|
||||
JitSafeMem safe(this, rs, imm);
|
||||
OpArg dest;
|
||||
if (safe.PrepareWrite(dest))
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
MOVSS(safe.NextFastAddress(i * 4), fpr.VX(vregs[i]));
|
||||
}
|
||||
if (safe.PrepareSlowWrite())
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
MOVSS(M((void *)&ssLoadStoreTemp), fpr.VX(vregs[i]));
|
||||
safe.DoSlowWrite((void *) &Memory::Write_U32, M((void *)&ssLoadStoreTemp), i * 4);
|
||||
}
|
||||
}
|
||||
safe.Finish();
|
||||
|
||||
fpr.ReleaseSpillLocks();
|
||||
gpr.UnlockAll();
|
||||
|
|
|
@ -353,6 +353,26 @@ bool Jit::JitSafeMem::PrepareRead(OpArg &src)
|
|||
return true;
|
||||
}
|
||||
|
||||
OpArg Jit::JitSafeMem::NextFastAddress(int suboffset)
|
||||
{
|
||||
if (jit_->gpr.IsImmediate(raddr_))
|
||||
{
|
||||
u32 addr = jit_->gpr.GetImmediate32(raddr_) + offset_ + suboffset;
|
||||
|
||||
#ifdef _M_IX86
|
||||
return M(Memory::base + addr);
|
||||
#else
|
||||
return MDisp(RBX, addr);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _M_IX86
|
||||
return MDisp(xaddr_, (u32) Memory::base + offset_ + suboffset);
|
||||
#else
|
||||
return MComplex(RBX, xaddr_, SCALE_1, offset_ + suboffset);
|
||||
#endif
|
||||
}
|
||||
|
||||
OpArg Jit::JitSafeMem::PrepareMemoryOpArg()
|
||||
{
|
||||
// We may not even need to move into EAX as a temporary.
|
||||
|
@ -368,7 +388,6 @@ OpArg Jit::JitSafeMem::PrepareMemoryOpArg()
|
|||
xaddr_ = EAX;
|
||||
}
|
||||
|
||||
X64Reg xaddrResult = xaddr_;
|
||||
if (!g_Config.bFastMemory)
|
||||
{
|
||||
// Is it in physical ram?
|
||||
|
@ -387,14 +406,14 @@ OpArg Jit::JitSafeMem::PrepareMemoryOpArg()
|
|||
if (xaddr_ != EAX)
|
||||
jit_->MOV(32, R(EAX), R(xaddr_));
|
||||
jit_->AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
xaddrResult = EAX;
|
||||
xaddr_ = EAX;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _M_IX86
|
||||
return MDisp(xaddrResult, (u32) Memory::base + offset_);
|
||||
return MDisp(xaddr_, (u32) Memory::base + offset_);
|
||||
#else
|
||||
return MComplex(RBX, xaddrResult, SCALE_1, offset_);
|
||||
return MComplex(RBX, xaddr_, SCALE_1, offset_);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -432,15 +451,15 @@ bool Jit::JitSafeMem::PrepareSlowWrite()
|
|||
return false;
|
||||
}
|
||||
|
||||
void Jit::JitSafeMem::DoSlowWrite(void *safeFunc, const OpArg src)
|
||||
void Jit::JitSafeMem::DoSlowWrite(void *safeFunc, const OpArg src, int suboffset)
|
||||
{
|
||||
if (jit_->gpr.IsImmediate(raddr_))
|
||||
{
|
||||
u32 addr = jit_->gpr.GetImmediate32(raddr_) + offset_;
|
||||
jit_->MOV(32, R(EAX), Imm32(addr));
|
||||
jit_->MOV(32, R(EAX), Imm32(addr + suboffset));
|
||||
}
|
||||
else
|
||||
jit_->LEA(32, EAX, MDisp(xaddr_, offset_));
|
||||
jit_->LEA(32, EAX, MDisp(xaddr_, offset_ + suboffset));
|
||||
|
||||
jit_->ABI_CallFunctionAA(jit_->thunks.ProtectFunction(safeFunc, 2), src, R(EAX));
|
||||
needsCheck_ = true;
|
||||
|
@ -473,6 +492,28 @@ bool Jit::JitSafeMem::PrepareSlowRead(void *safeFunc)
|
|||
return false;
|
||||
}
|
||||
|
||||
void Jit::JitSafeMem::NextSlowRead(void *safeFunc, int suboffset)
|
||||
{
|
||||
_dbg_assert_msg_(JIT, !g_Config.bFastMemory, "NextSlowRead() called in fast memory mode?");
|
||||
|
||||
// For simplicity, do nothing for 0. We already read in PrepareSlowRead().
|
||||
if (suboffset == 0)
|
||||
return;
|
||||
|
||||
if (jit_->gpr.IsImmediate(raddr_))
|
||||
{
|
||||
u32 addr = jit_->gpr.GetImmediate32(raddr_) + offset_;
|
||||
_dbg_assert_msg_(JIT, !Memory::IsValidAddress(addr), "NextSlowRead() for a valid immediate address?");
|
||||
|
||||
jit_->MOV(32, R(EAX), Imm32(addr + suboffset));
|
||||
}
|
||||
// For GPR, if xaddr_ was the dest register, this will be wrong. Don't use in GPR.
|
||||
else
|
||||
jit_->LEA(32, EAX, MDisp(xaddr_, offset_ + suboffset));
|
||||
|
||||
jit_->ABI_CallFunctionA(jit_->thunks.ProtectFunction(safeFunc, 1), R(EAX));
|
||||
}
|
||||
|
||||
void Jit::JitSafeMem::Finish()
|
||||
{
|
||||
if (needsCheck_)
|
||||
|
|
|
@ -194,12 +194,17 @@ private:
|
|||
// Emit code proceeding a slow write call, returns true if slow write is needed.
|
||||
bool PrepareSlowWrite();
|
||||
// Emit a slow write from src.
|
||||
void DoSlowWrite(void *safeFunc, const OpArg src);
|
||||
void DoSlowWrite(void *safeFunc, const OpArg src, int suboffset = 0);
|
||||
|
||||
// Emit code necessary for a memory read, returns true if MOV from src is needed.
|
||||
bool PrepareRead(OpArg &src);
|
||||
// Emit code for a slow read call, and returns true if result is in EAX.
|
||||
bool PrepareSlowRead(void *safeFunc);
|
||||
|
||||
// WARNING: Only works for non-GPR. Do not use for reads into GPR.
|
||||
OpArg NextFastAddress(int suboffset);
|
||||
// WARNING: Only works for non-GPR. Do not use for reads into GPR.
|
||||
void NextSlowRead(void *safeFunc, int suboffset);
|
||||
|
||||
// Cleans up final code for the memory access.
|
||||
void Finish();
|
||||
|
|
Loading…
Add table
Reference in a new issue