mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #2647 from unknownbrackets/jit-minor
Fix x64 jit bugs (memchecks and lwl/lwr/swl/swr)
This commit is contained in:
commit
8988d697e3
6 changed files with 133 additions and 61 deletions
|
@ -559,6 +559,7 @@ void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
|
|||
PUSH(R14);
|
||||
PUSH(R15);
|
||||
PUSH(R15); //just to align stack. duped push/pop doesn't hurt.
|
||||
// TODO: XMM?
|
||||
}
|
||||
|
||||
void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() {
|
||||
|
|
|
@ -57,6 +57,14 @@ public:
|
|||
Shutdown();
|
||||
}
|
||||
void *ProtectFunction(void *function, int num_params);
|
||||
|
||||
const u8 *GetSaveRegsFunction() const {
|
||||
return save_regs;
|
||||
}
|
||||
const u8 *GetLoadRegsFunction() const {
|
||||
return load_regs;
|
||||
}
|
||||
|
||||
private:
|
||||
void Init();
|
||||
void Shutdown();
|
||||
|
|
|
@ -104,25 +104,28 @@ namespace MIPSComp
|
|||
|
||||
void Jit::CompITypeMemUnpairedLR(u32 op, bool isStore)
|
||||
{
|
||||
// TODO: ECX getting overwritten? Why?
|
||||
DISABLE;
|
||||
|
||||
CONDITIONAL_DISABLE;
|
||||
int o = op>>26;
|
||||
int offset = (signed short)(op&0xFFFF);
|
||||
int rt = _RT;
|
||||
int rs = _RS;
|
||||
|
||||
X64Reg shiftReg = ECX;
|
||||
gpr.FlushLockX(ECX, EDX);
|
||||
#ifdef _M_X64
|
||||
// On x64, we need ECX for CL, but it's also the first arg and gets lost. Annoying.
|
||||
gpr.FlushLockX(R9);
|
||||
shiftReg = R9;
|
||||
#endif
|
||||
|
||||
gpr.Lock(rt);
|
||||
gpr.BindToRegister(rt, true, !isStore);
|
||||
|
||||
// Grab the offset from alignment for shifting (<< 3 for bytes -> bits.)
|
||||
MOV(32, R(ECX), gpr.R(rs));
|
||||
ADD(32, R(ECX), Imm32(offset));
|
||||
AND(32, R(ECX), Imm32(3));
|
||||
SHL(32, R(ECX), Imm8(3));
|
||||
MOV(32, R(shiftReg), gpr.R(rs));
|
||||
ADD(32, R(shiftReg), Imm32(offset));
|
||||
AND(32, R(shiftReg), Imm32(3));
|
||||
SHL(32, R(shiftReg), Imm8(3));
|
||||
|
||||
{
|
||||
JitSafeMem safe(this, rs, offset, ~3);
|
||||
|
@ -133,10 +136,10 @@ namespace MIPSComp
|
|||
if (!src.IsSimpleReg(EAX))
|
||||
MOV(32, R(EAX), src);
|
||||
|
||||
CompITypeMemUnpairedLRInner(op);
|
||||
CompITypeMemUnpairedLRInner(op, shiftReg);
|
||||
}
|
||||
if (safe.PrepareSlowRead((void *) &Memory::Read_U32))
|
||||
CompITypeMemUnpairedLRInner(op);
|
||||
CompITypeMemUnpairedLRInner(op, shiftReg);
|
||||
safe.Finish();
|
||||
}
|
||||
|
||||
|
@ -156,37 +159,69 @@ namespace MIPSComp
|
|||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit::CompITypeMemUnpairedLRInner(u32 op)
|
||||
void Jit::CompITypeMemUnpairedLRInner(u32 op, X64Reg shiftReg)
|
||||
{
|
||||
CONDITIONAL_DISABLE;
|
||||
int o = op>>26;
|
||||
int rt = _RT;
|
||||
|
||||
// Make sure we have the shift for the target in ECX.
|
||||
if (shiftReg != ECX)
|
||||
MOV(32, R(ECX), R(shiftReg));
|
||||
|
||||
// Now use that shift (left on target, right on source.)
|
||||
switch (o)
|
||||
{
|
||||
case 34: //lwl
|
||||
// First clear the target bits.
|
||||
MOV(32, R(EDX), Imm32(0x00ffffff));
|
||||
SHR(32, R(EDX), R(CL));
|
||||
AND(32, gpr.R(rt), R(EDX));
|
||||
break;
|
||||
|
||||
// Adjust the shift to the bits we want.
|
||||
case 38: //lwr
|
||||
SHR(32, R(EAX), R(CL));
|
||||
break;
|
||||
|
||||
case 42: //swl
|
||||
MOV(32, R(EDX), Imm32(0xffffff00));
|
||||
SHL(32, R(EDX), R(CL));
|
||||
AND(32, R(EAX), R(EDX));
|
||||
break;
|
||||
|
||||
case 46: //swr
|
||||
MOV(32, R(EDX), gpr.R(rt));
|
||||
SHL(32, R(EDX), R(CL));
|
||||
// EDX is already the target value to write, but may be overwritten below. Save it.
|
||||
PUSH(EDX);
|
||||
break;
|
||||
|
||||
default:
|
||||
_dbg_assert_msg_(JIT, 0, "Unsupported left/right load/store instruction.");
|
||||
}
|
||||
|
||||
// Flip ECX around from 3 bytes / 24 bits.
|
||||
if (shiftReg == ECX)
|
||||
{
|
||||
MOV(32, R(EDX), Imm32(24));
|
||||
SUB(32, R(EDX), R(ECX));
|
||||
MOV(32, R(ECX), R(EDX));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, R(ECX), Imm32(24));
|
||||
SUB(32, R(ECX), R(shiftReg));
|
||||
}
|
||||
|
||||
// Use the flipped shift (left on source, right on target) and write target.
|
||||
switch (o)
|
||||
{
|
||||
case 34: //lwl
|
||||
SHL(32, R(EAX), R(CL));
|
||||
|
||||
OR(32, gpr.R(rt), R(EAX));
|
||||
break;
|
||||
|
||||
case 38: //lwr
|
||||
// Adjust the shift to the bits we want.
|
||||
SHR(32, R(EAX), R(CL));
|
||||
|
||||
// Clear the target bits we're replacing.
|
||||
MOV(32, R(EDX), Imm32(24));
|
||||
SUB(32, R(EDX), R(ECX));
|
||||
MOV(32, R(ECX), R(EDX));
|
||||
MOV(32, R(EDX), Imm32(0xffffff00));
|
||||
SHL(32, R(EDX), R(CL));
|
||||
AND(32, gpr.R(rt), R(EDX));
|
||||
|
@ -195,15 +230,6 @@ namespace MIPSComp
|
|||
break;
|
||||
|
||||
case 42: //swl
|
||||
// First clear the target memory bits.
|
||||
MOV(32, R(EDX), Imm32(0xffffff00));
|
||||
SHL(32, R(EDX), R(CL));
|
||||
AND(32, R(EAX), R(EDX));
|
||||
|
||||
// Flip the shift, and adjust the shift in a temporary.
|
||||
MOV(32, R(EDX), Imm32(24));
|
||||
SUB(32, R(EDX), R(ECX));
|
||||
MOV(32, R(ECX), R(EDX));
|
||||
MOV(32, R(EDX), gpr.R(rt));
|
||||
SHR(32, R(EDX), R(CL));
|
||||
|
||||
|
@ -211,19 +237,11 @@ namespace MIPSComp
|
|||
break;
|
||||
|
||||
case 46: //swr
|
||||
// Adjust the shift to the bits we want.
|
||||
MOV(32, R(EDX), gpr.R(rt));
|
||||
SHL(32, R(EDX), R(CL));
|
||||
PUSH(EDX);
|
||||
|
||||
// Clear the target bits we're replacing.
|
||||
MOV(32, R(EDX), Imm32(24));
|
||||
SUB(32, R(EDX), R(ECX));
|
||||
MOV(32, R(ECX), R(EDX));
|
||||
MOV(32, R(EDX), Imm32(0x00ffffff));
|
||||
SHR(32, R(EDX), R(CL));
|
||||
AND(32, R(EAX), R(EDX));
|
||||
|
||||
// This is the target value we saved earlier.
|
||||
POP(EDX);
|
||||
OR(32, R(EDX), R(EAX));
|
||||
break;
|
||||
|
|
|
@ -292,14 +292,17 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
|
|||
{
|
||||
// TODO: Save/restore?
|
||||
FlushAll();
|
||||
CMP(32, M((void*)&coreState), Imm32(0));
|
||||
FixupBranch skipCheck = J_CC(CC_E);
|
||||
CMP(32, M((void*)&coreState), Imm32(CORE_RUNNING));
|
||||
FixupBranch skipCheck1 = J_CC(CC_E);
|
||||
CMP(32, M((void*)&coreState), Imm32(CORE_NEXTFRAME));
|
||||
FixupBranch skipCheck2 = J_CC(CC_E);
|
||||
if (js.afterOp & JitState::AFTER_REWIND_PC_BAD_STATE)
|
||||
MOV(32, M(&mips_->pc), Imm32(js.compilerPC));
|
||||
else
|
||||
MOV(32, M(&mips_->pc), Imm32(js.compilerPC + 4));
|
||||
WriteSyscallExit();
|
||||
SetJumpTarget(skipCheck);
|
||||
SetJumpTarget(skipCheck1);
|
||||
SetJumpTarget(skipCheck2);
|
||||
|
||||
js.afterOp = JitState::AFTER_NONE;
|
||||
}
|
||||
|
@ -355,11 +358,14 @@ void Jit::WriteExit(u32 destination, int exit_num)
|
|||
// If we need to verify coreState and rewind, we may not jump yet.
|
||||
if (js.afterOp & (JitState::AFTER_CORE_STATE | JitState::AFTER_REWIND_PC_BAD_STATE))
|
||||
{
|
||||
CMP(32, M((void*)&coreState), Imm32(0));
|
||||
FixupBranch skipCheck = J_CC(CC_E);
|
||||
CMP(32, M((void*)&coreState), Imm32(CORE_RUNNING));
|
||||
FixupBranch skipCheck1 = J_CC(CC_E);
|
||||
CMP(32, M((void*)&coreState), Imm32(CORE_NEXTFRAME));
|
||||
FixupBranch skipCheck2 = J_CC(CC_E);
|
||||
MOV(32, M(&mips_->pc), Imm32(js.compilerPC));
|
||||
WriteSyscallExit();
|
||||
SetJumpTarget(skipCheck);
|
||||
SetJumpTarget(skipCheck1);
|
||||
SetJumpTarget(skipCheck2);
|
||||
|
||||
js.afterOp = JitState::AFTER_NONE;
|
||||
}
|
||||
|
@ -392,11 +398,14 @@ void Jit::WriteExitDestInEAX()
|
|||
// If we need to verify coreState and rewind, we may not jump yet.
|
||||
if (js.afterOp & (JitState::AFTER_CORE_STATE | JitState::AFTER_REWIND_PC_BAD_STATE))
|
||||
{
|
||||
CMP(32, M((void*)&coreState), Imm32(0));
|
||||
FixupBranch skipCheck = J_CC(CC_E);
|
||||
CMP(32, M((void*)&coreState), Imm32(CORE_RUNNING));
|
||||
FixupBranch skipCheck1 = J_CC(CC_E);
|
||||
CMP(32, M((void*)&coreState), Imm32(CORE_NEXTFRAME));
|
||||
FixupBranch skipCheck2 = J_CC(CC_E);
|
||||
MOV(32, M(&mips_->pc), Imm32(js.compilerPC));
|
||||
WriteSyscallExit();
|
||||
SetJumpTarget(skipCheck);
|
||||
SetJumpTarget(skipCheck1);
|
||||
SetJumpTarget(skipCheck2);
|
||||
|
||||
js.afterOp = JitState::AFTER_NONE;
|
||||
}
|
||||
|
@ -418,13 +427,13 @@ void Jit::WriteExitDestInEAX()
|
|||
SetJumpTarget(tooLow);
|
||||
SetJumpTarget(tooHigh);
|
||||
|
||||
ABI_CallFunctionA(thunks.ProtectFunction((void *) Memory::GetPointer, 1), R(EAX));
|
||||
CallProtectedFunction((void *) Memory::GetPointer, R(EAX));
|
||||
CMP(32, R(EAX), Imm32(0));
|
||||
FixupBranch skip = J_CC(CC_NE);
|
||||
|
||||
// TODO: "Ignore" this so other threads can continue?
|
||||
if (g_Config.bIgnoreBadMemAccess)
|
||||
ABI_CallFunctionA(thunks.ProtectFunction((void *) Core_UpdateState, 1), Imm32(CORE_ERROR));
|
||||
CallProtectedFunction((void *) Core_UpdateState, Imm32(CORE_ERROR));
|
||||
|
||||
SUB(32, M(¤tMIPS->downcount), Imm32(0));
|
||||
JMP(asm_.dispatcherCheckCoreState, true);
|
||||
|
@ -607,7 +616,6 @@ OpArg Jit::JitSafeMem::PrepareMemoryOpArg(ReadType type)
|
|||
jit_->SUB(32, R(xaddr_), Imm32(offset_));
|
||||
}
|
||||
|
||||
|
||||
#ifdef _M_IX86
|
||||
return MDisp(xaddr_, (u32) Memory::base + offset_);
|
||||
#else
|
||||
|
@ -657,7 +665,7 @@ void Jit::JitSafeMem::DoSlowWrite(void *safeFunc, const OpArg src, int suboffset
|
|||
jit_->AND(32, R(EAX), Imm32(alignMask_));
|
||||
}
|
||||
|
||||
jit_->ABI_CallFunctionAA(jit_->thunks.ProtectFunction(safeFunc, 2), src, R(EAX));
|
||||
jit_->CallProtectedFunction(safeFunc, src, R(EAX));
|
||||
needsCheck_ = true;
|
||||
}
|
||||
|
||||
|
@ -680,7 +688,7 @@ bool Jit::JitSafeMem::PrepareSlowRead(void *safeFunc)
|
|||
jit_->AND(32, R(EAX), Imm32(alignMask_));
|
||||
}
|
||||
|
||||
jit_->ABI_CallFunctionA(jit_->thunks.ProtectFunction(safeFunc, 1), R(EAX));
|
||||
jit_->CallProtectedFunction(safeFunc, R(EAX));
|
||||
needsCheck_ = true;
|
||||
return true;
|
||||
}
|
||||
|
@ -710,7 +718,7 @@ void Jit::JitSafeMem::NextSlowRead(void *safeFunc, int suboffset)
|
|||
jit_->AND(32, R(EAX), Imm32(alignMask_));
|
||||
}
|
||||
|
||||
jit_->ABI_CallFunctionA(jit_->thunks.ProtectFunction(safeFunc, 1), R(EAX));
|
||||
jit_->CallProtectedFunction(safeFunc, R(EAX));
|
||||
}
|
||||
|
||||
bool Jit::JitSafeMem::ImmValid()
|
||||
|
@ -755,7 +763,7 @@ void Jit::JitSafeMem::MemCheckImm(ReadType type)
|
|||
return;
|
||||
|
||||
jit_->MOV(32, M(&jit_->mips_->pc), Imm32(jit_->js.compilerPC));
|
||||
jit_->ABI_CallFunctionCCC(jit_->thunks.ProtectFunction((void *)&JitMemCheck, 3), iaddr_, size_, type == MEM_WRITE ? 1 : 0);
|
||||
jit_->CallProtectedFunction((void *)&JitMemCheck, iaddr_, size_, type == MEM_WRITE ? 1 : 0);
|
||||
|
||||
jit_->CMP(32, M((void*)&coreState), Imm32(0));
|
||||
skipChecks_.push_back(jit_->J_CC(CC_NE, true));
|
||||
|
@ -790,11 +798,14 @@ void Jit::JitSafeMem::MemCheckAsm(ReadType type)
|
|||
skipNext = jit_->J_CC(CC_NE);
|
||||
}
|
||||
|
||||
jit_->PUSH(xaddr_);
|
||||
// Keep the stack 16-byte aligned, just PUSH/POP 4 times.
|
||||
for (int i = 0; i < 4; ++i)
|
||||
jit_->PUSH(xaddr_);
|
||||
jit_->MOV(32, M(&jit_->mips_->pc), Imm32(jit_->js.compilerPC));
|
||||
jit_->ADD(32, R(xaddr_), Imm32(offset_));
|
||||
jit_->ABI_CallFunctionACC(jit_->thunks.ProtectFunction((void *)&JitMemCheck, 3), R(xaddr_), size_, type == MEM_WRITE ? 1 : 0);
|
||||
jit_->POP(xaddr_);
|
||||
jit_->CallProtectedFunction((void *)&JitMemCheck, R(xaddr_), size_, type == MEM_WRITE ? 1 : 0);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
jit_->POP(xaddr_);
|
||||
|
||||
jit_->SetJumpTarget(skipNext);
|
||||
if (it->end != 0)
|
||||
|
@ -809,6 +820,34 @@ void Jit::JitSafeMem::MemCheckAsm(ReadType type)
|
|||
}
|
||||
}
|
||||
|
||||
void Jit::CallProtectedFunction(void *func, const OpArg &arg1)
|
||||
{
|
||||
// We don't regcache RCX, so the below is safe (and also faster, maybe branch prediction?)
|
||||
ABI_CallFunctionA(thunks.ProtectFunction(func, 1), arg1);
|
||||
}
|
||||
|
||||
void Jit::CallProtectedFunction(void *func, const OpArg &arg1, const OpArg &arg2)
|
||||
{
|
||||
// We don't regcache RCX/RDX, so the below is safe (and also faster, maybe branch prediction?)
|
||||
ABI_CallFunctionAA(thunks.ProtectFunction(func, 2), arg1, arg2);
|
||||
}
|
||||
|
||||
void Jit::CallProtectedFunction(void *func, const u32 arg1, const u32 arg2, const u32 arg3)
|
||||
{
|
||||
// On x64, we need to save R8, which is caller saved.
|
||||
ABI_CallFunction((void *)thunks.GetSaveRegsFunction());
|
||||
ABI_CallFunctionCCC(func, arg1, arg2, arg3);
|
||||
ABI_CallFunction((void *)thunks.GetLoadRegsFunction());
|
||||
}
|
||||
|
||||
void Jit::CallProtectedFunction(void *func, const OpArg &arg1, const u32 arg2, const u32 arg3)
|
||||
{
|
||||
// On x64, we need to save R8, which is caller saved.
|
||||
ABI_CallFunction((void *)thunks.GetSaveRegsFunction());
|
||||
ABI_CallFunctionACC(func, arg1, arg2, arg3);
|
||||
ABI_CallFunction((void *)thunks.GetLoadRegsFunction());
|
||||
}
|
||||
|
||||
void Jit::Comp_DoNothing(u32 op) { }
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -275,11 +275,16 @@ private:
|
|||
void CompITypeMemRead(u32 op, u32 bits, void (XEmitter::*mov)(int, int, X64Reg, OpArg), void *safeFunc);
|
||||
void CompITypeMemWrite(u32 op, u32 bits, void *safeFunc);
|
||||
void CompITypeMemUnpairedLR(u32 op, bool isStore);
|
||||
void CompITypeMemUnpairedLRInner(u32 op);
|
||||
void CompITypeMemUnpairedLRInner(u32 op, X64Reg shiftReg);
|
||||
|
||||
void CompFPTriArith(u32 op, void (XEmitter::*arith)(X64Reg reg, OpArg), bool orderMatters);
|
||||
void CompFPComp(int lhs, int rhs, u8 compare, bool allowNaN = false);
|
||||
|
||||
void CallProtectedFunction(void *func, const OpArg &arg1);
|
||||
void CallProtectedFunction(void *func, const OpArg &arg1, const OpArg &arg2);
|
||||
void CallProtectedFunction(void *func, const u32 arg1, const u32 arg2, const u32 arg3);
|
||||
void CallProtectedFunction(void *func, const OpArg &arg1, const u32 arg2, const u32 arg3);
|
||||
|
||||
JitBlockCache blocks;
|
||||
JitOptions jo;
|
||||
JitState js;
|
||||
|
|
|
@ -26,15 +26,16 @@ using namespace Gen;
|
|||
|
||||
static const int allocationOrder[] =
|
||||
{
|
||||
// R12, when used as base register, for example in a LEA, can generate bad code! Need to look into this.
|
||||
// R12, when used as base register, for example in a LEA, can generate bad code! Need to look into this.
|
||||
// On x64, RCX and RDX are the first args. CallProtectedFunction() assumes they're not regcached.
|
||||
#ifdef _M_X64
|
||||
#ifdef _WIN32
|
||||
RSI, RDI, R13, R14, R8, R9, R10, R11, R12, //, RCX
|
||||
RSI, RDI, R13, R14, R8, R9, R10, R11, R12,
|
||||
#else
|
||||
RBP, R13, R14, R8, R9, R10, R11, R12, //, RCX
|
||||
RBP, R13, R14, R8, R9, R10, R11, R12,
|
||||
#endif
|
||||
#elif _M_IX86
|
||||
ESI, EDI, EBP, EDX, ECX, // Let's try to free up EBX as well.
|
||||
ESI, EDI, EBP, EDX, ECX, // Let's try to free up EBX as well.
|
||||
#endif
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue