Merge pull request #2647 from unknownbrackets/jit-minor

Fix x64 jit bugs (memchecks and lwl/lwr/swl/swr)
This commit is contained in:
Henrik Rydgård 2013-07-06 09:35:12 -07:00
commit 8988d697e3
6 changed files with 133 additions and 61 deletions

View file

@ -559,6 +559,7 @@ void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
PUSH(R14);
PUSH(R15);
PUSH(R15); //just to align stack. duped push/pop doesn't hurt.
// TODO: XMM?
}
void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() {

View file

@ -57,6 +57,14 @@ public:
Shutdown();
}
void *ProtectFunction(void *function, int num_params);
const u8 *GetSaveRegsFunction() const {
return save_regs;
}
const u8 *GetLoadRegsFunction() const {
return load_regs;
}
private:
void Init();
void Shutdown();

View file

@ -104,25 +104,28 @@ namespace MIPSComp
void Jit::CompITypeMemUnpairedLR(u32 op, bool isStore)
{
// TODO: ECX getting overwritten? Why?
DISABLE;
CONDITIONAL_DISABLE;
int o = op>>26;
int offset = (signed short)(op&0xFFFF);
int rt = _RT;
int rs = _RS;
X64Reg shiftReg = ECX;
gpr.FlushLockX(ECX, EDX);
#ifdef _M_X64
// On x64, we need ECX for CL, but it's also the first arg and gets lost. Annoying.
gpr.FlushLockX(R9);
shiftReg = R9;
#endif
gpr.Lock(rt);
gpr.BindToRegister(rt, true, !isStore);
// Grab the offset from alignment for shifting (<< 3 for bytes -> bits.)
MOV(32, R(ECX), gpr.R(rs));
ADD(32, R(ECX), Imm32(offset));
AND(32, R(ECX), Imm32(3));
SHL(32, R(ECX), Imm8(3));
MOV(32, R(shiftReg), gpr.R(rs));
ADD(32, R(shiftReg), Imm32(offset));
AND(32, R(shiftReg), Imm32(3));
SHL(32, R(shiftReg), Imm8(3));
{
JitSafeMem safe(this, rs, offset, ~3);
@ -133,10 +136,10 @@ namespace MIPSComp
if (!src.IsSimpleReg(EAX))
MOV(32, R(EAX), src);
CompITypeMemUnpairedLRInner(op);
CompITypeMemUnpairedLRInner(op, shiftReg);
}
if (safe.PrepareSlowRead((void *) &Memory::Read_U32))
CompITypeMemUnpairedLRInner(op);
CompITypeMemUnpairedLRInner(op, shiftReg);
safe.Finish();
}
@ -156,37 +159,69 @@ namespace MIPSComp
gpr.UnlockAllX();
}
void Jit::CompITypeMemUnpairedLRInner(u32 op)
void Jit::CompITypeMemUnpairedLRInner(u32 op, X64Reg shiftReg)
{
CONDITIONAL_DISABLE;
int o = op>>26;
int rt = _RT;
// Make sure we have the shift for the target in ECX.
if (shiftReg != ECX)
MOV(32, R(ECX), R(shiftReg));
// Now use that shift (left on target, right on source.)
switch (o)
{
case 34: //lwl
// First clear the target bits.
MOV(32, R(EDX), Imm32(0x00ffffff));
SHR(32, R(EDX), R(CL));
AND(32, gpr.R(rt), R(EDX));
break;
// Adjust the shift to the bits we want.
case 38: //lwr
SHR(32, R(EAX), R(CL));
break;
case 42: //swl
MOV(32, R(EDX), Imm32(0xffffff00));
SHL(32, R(EDX), R(CL));
AND(32, R(EAX), R(EDX));
break;
case 46: //swr
MOV(32, R(EDX), gpr.R(rt));
SHL(32, R(EDX), R(CL));
// EDX is already the target value to write, but may be overwritten below. Save it.
PUSH(EDX);
break;
default:
_dbg_assert_msg_(JIT, 0, "Unsupported left/right load/store instruction.");
}
// Flip ECX around from 3 bytes / 24 bits.
if (shiftReg == ECX)
{
MOV(32, R(EDX), Imm32(24));
SUB(32, R(EDX), R(ECX));
MOV(32, R(ECX), R(EDX));
}
else
{
MOV(32, R(ECX), Imm32(24));
SUB(32, R(ECX), R(shiftReg));
}
// Use the flipped shift (left on source, right on target) and write target.
switch (o)
{
case 34: //lwl
SHL(32, R(EAX), R(CL));
OR(32, gpr.R(rt), R(EAX));
break;
case 38: //lwr
// Adjust the shift to the bits we want.
SHR(32, R(EAX), R(CL));
// Clear the target bits we're replacing.
MOV(32, R(EDX), Imm32(24));
SUB(32, R(EDX), R(ECX));
MOV(32, R(ECX), R(EDX));
MOV(32, R(EDX), Imm32(0xffffff00));
SHL(32, R(EDX), R(CL));
AND(32, gpr.R(rt), R(EDX));
@ -195,15 +230,6 @@ namespace MIPSComp
break;
case 42: //swl
// First clear the target memory bits.
MOV(32, R(EDX), Imm32(0xffffff00));
SHL(32, R(EDX), R(CL));
AND(32, R(EAX), R(EDX));
// Flip the shift, and adjust the shift in a temporary.
MOV(32, R(EDX), Imm32(24));
SUB(32, R(EDX), R(ECX));
MOV(32, R(ECX), R(EDX));
MOV(32, R(EDX), gpr.R(rt));
SHR(32, R(EDX), R(CL));
@ -211,19 +237,11 @@ namespace MIPSComp
break;
case 46: //swr
// Adjust the shift to the bits we want.
MOV(32, R(EDX), gpr.R(rt));
SHL(32, R(EDX), R(CL));
PUSH(EDX);
// Clear the target bits we're replacing.
MOV(32, R(EDX), Imm32(24));
SUB(32, R(EDX), R(ECX));
MOV(32, R(ECX), R(EDX));
MOV(32, R(EDX), Imm32(0x00ffffff));
SHR(32, R(EDX), R(CL));
AND(32, R(EAX), R(EDX));
// This is the target value we saved earlier.
POP(EDX);
OR(32, R(EDX), R(EAX));
break;

View file

@ -292,14 +292,17 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
{
// TODO: Save/restore?
FlushAll();
CMP(32, M((void*)&coreState), Imm32(0));
FixupBranch skipCheck = J_CC(CC_E);
CMP(32, M((void*)&coreState), Imm32(CORE_RUNNING));
FixupBranch skipCheck1 = J_CC(CC_E);
CMP(32, M((void*)&coreState), Imm32(CORE_NEXTFRAME));
FixupBranch skipCheck2 = J_CC(CC_E);
if (js.afterOp & JitState::AFTER_REWIND_PC_BAD_STATE)
MOV(32, M(&mips_->pc), Imm32(js.compilerPC));
else
MOV(32, M(&mips_->pc), Imm32(js.compilerPC + 4));
WriteSyscallExit();
SetJumpTarget(skipCheck);
SetJumpTarget(skipCheck1);
SetJumpTarget(skipCheck2);
js.afterOp = JitState::AFTER_NONE;
}
@ -355,11 +358,14 @@ void Jit::WriteExit(u32 destination, int exit_num)
// If we need to verify coreState and rewind, we may not jump yet.
if (js.afterOp & (JitState::AFTER_CORE_STATE | JitState::AFTER_REWIND_PC_BAD_STATE))
{
CMP(32, M((void*)&coreState), Imm32(0));
FixupBranch skipCheck = J_CC(CC_E);
CMP(32, M((void*)&coreState), Imm32(CORE_RUNNING));
FixupBranch skipCheck1 = J_CC(CC_E);
CMP(32, M((void*)&coreState), Imm32(CORE_NEXTFRAME));
FixupBranch skipCheck2 = J_CC(CC_E);
MOV(32, M(&mips_->pc), Imm32(js.compilerPC));
WriteSyscallExit();
SetJumpTarget(skipCheck);
SetJumpTarget(skipCheck1);
SetJumpTarget(skipCheck2);
js.afterOp = JitState::AFTER_NONE;
}
@ -392,11 +398,14 @@ void Jit::WriteExitDestInEAX()
// If we need to verify coreState and rewind, we may not jump yet.
if (js.afterOp & (JitState::AFTER_CORE_STATE | JitState::AFTER_REWIND_PC_BAD_STATE))
{
CMP(32, M((void*)&coreState), Imm32(0));
FixupBranch skipCheck = J_CC(CC_E);
CMP(32, M((void*)&coreState), Imm32(CORE_RUNNING));
FixupBranch skipCheck1 = J_CC(CC_E);
CMP(32, M((void*)&coreState), Imm32(CORE_NEXTFRAME));
FixupBranch skipCheck2 = J_CC(CC_E);
MOV(32, M(&mips_->pc), Imm32(js.compilerPC));
WriteSyscallExit();
SetJumpTarget(skipCheck);
SetJumpTarget(skipCheck1);
SetJumpTarget(skipCheck2);
js.afterOp = JitState::AFTER_NONE;
}
@ -418,13 +427,13 @@ void Jit::WriteExitDestInEAX()
SetJumpTarget(tooLow);
SetJumpTarget(tooHigh);
ABI_CallFunctionA(thunks.ProtectFunction((void *) Memory::GetPointer, 1), R(EAX));
CallProtectedFunction((void *) Memory::GetPointer, R(EAX));
CMP(32, R(EAX), Imm32(0));
FixupBranch skip = J_CC(CC_NE);
// TODO: "Ignore" this so other threads can continue?
if (g_Config.bIgnoreBadMemAccess)
ABI_CallFunctionA(thunks.ProtectFunction((void *) Core_UpdateState, 1), Imm32(CORE_ERROR));
CallProtectedFunction((void *) Core_UpdateState, Imm32(CORE_ERROR));
SUB(32, M(&currentMIPS->downcount), Imm32(0));
JMP(asm_.dispatcherCheckCoreState, true);
@ -607,7 +616,6 @@ OpArg Jit::JitSafeMem::PrepareMemoryOpArg(ReadType type)
jit_->SUB(32, R(xaddr_), Imm32(offset_));
}
#ifdef _M_IX86
return MDisp(xaddr_, (u32) Memory::base + offset_);
#else
@ -657,7 +665,7 @@ void Jit::JitSafeMem::DoSlowWrite(void *safeFunc, const OpArg src, int suboffset
jit_->AND(32, R(EAX), Imm32(alignMask_));
}
jit_->ABI_CallFunctionAA(jit_->thunks.ProtectFunction(safeFunc, 2), src, R(EAX));
jit_->CallProtectedFunction(safeFunc, src, R(EAX));
needsCheck_ = true;
}
@ -680,7 +688,7 @@ bool Jit::JitSafeMem::PrepareSlowRead(void *safeFunc)
jit_->AND(32, R(EAX), Imm32(alignMask_));
}
jit_->ABI_CallFunctionA(jit_->thunks.ProtectFunction(safeFunc, 1), R(EAX));
jit_->CallProtectedFunction(safeFunc, R(EAX));
needsCheck_ = true;
return true;
}
@ -710,7 +718,7 @@ void Jit::JitSafeMem::NextSlowRead(void *safeFunc, int suboffset)
jit_->AND(32, R(EAX), Imm32(alignMask_));
}
jit_->ABI_CallFunctionA(jit_->thunks.ProtectFunction(safeFunc, 1), R(EAX));
jit_->CallProtectedFunction(safeFunc, R(EAX));
}
bool Jit::JitSafeMem::ImmValid()
@ -755,7 +763,7 @@ void Jit::JitSafeMem::MemCheckImm(ReadType type)
return;
jit_->MOV(32, M(&jit_->mips_->pc), Imm32(jit_->js.compilerPC));
jit_->ABI_CallFunctionCCC(jit_->thunks.ProtectFunction((void *)&JitMemCheck, 3), iaddr_, size_, type == MEM_WRITE ? 1 : 0);
jit_->CallProtectedFunction((void *)&JitMemCheck, iaddr_, size_, type == MEM_WRITE ? 1 : 0);
jit_->CMP(32, M((void*)&coreState), Imm32(0));
skipChecks_.push_back(jit_->J_CC(CC_NE, true));
@ -790,11 +798,14 @@ void Jit::JitSafeMem::MemCheckAsm(ReadType type)
skipNext = jit_->J_CC(CC_NE);
}
jit_->PUSH(xaddr_);
// Keep the stack 16-byte aligned, just PUSH/POP 4 times.
for (int i = 0; i < 4; ++i)
jit_->PUSH(xaddr_);
jit_->MOV(32, M(&jit_->mips_->pc), Imm32(jit_->js.compilerPC));
jit_->ADD(32, R(xaddr_), Imm32(offset_));
jit_->ABI_CallFunctionACC(jit_->thunks.ProtectFunction((void *)&JitMemCheck, 3), R(xaddr_), size_, type == MEM_WRITE ? 1 : 0);
jit_->POP(xaddr_);
jit_->CallProtectedFunction((void *)&JitMemCheck, R(xaddr_), size_, type == MEM_WRITE ? 1 : 0);
for (int i = 0; i < 4; ++i)
jit_->POP(xaddr_);
jit_->SetJumpTarget(skipNext);
if (it->end != 0)
@ -809,6 +820,34 @@ void Jit::JitSafeMem::MemCheckAsm(ReadType type)
}
}
void Jit::CallProtectedFunction(void *func, const OpArg &arg1)
{
// We don't regcache RCX, so the below is safe (and also faster, maybe branch prediction?)
ABI_CallFunctionA(thunks.ProtectFunction(func, 1), arg1);
}
void Jit::CallProtectedFunction(void *func, const OpArg &arg1, const OpArg &arg2)
{
// We don't regcache RCX/RDX, so the below is safe (and also faster, maybe branch prediction?)
ABI_CallFunctionAA(thunks.ProtectFunction(func, 2), arg1, arg2);
}
void Jit::CallProtectedFunction(void *func, const u32 arg1, const u32 arg2, const u32 arg3)
{
// On x64, we need to save R8, which is caller saved.
ABI_CallFunction((void *)thunks.GetSaveRegsFunction());
ABI_CallFunctionCCC(func, arg1, arg2, arg3);
ABI_CallFunction((void *)thunks.GetLoadRegsFunction());
}
void Jit::CallProtectedFunction(void *func, const OpArg &arg1, const u32 arg2, const u32 arg3)
{
// On x64, we need to save R8, which is caller saved.
ABI_CallFunction((void *)thunks.GetSaveRegsFunction());
ABI_CallFunctionACC(func, arg1, arg2, arg3);
ABI_CallFunction((void *)thunks.GetLoadRegsFunction());
}
void Jit::Comp_DoNothing(u32 op) { }
} // namespace

View file

@ -275,11 +275,16 @@ private:
void CompITypeMemRead(u32 op, u32 bits, void (XEmitter::*mov)(int, int, X64Reg, OpArg), void *safeFunc);
void CompITypeMemWrite(u32 op, u32 bits, void *safeFunc);
void CompITypeMemUnpairedLR(u32 op, bool isStore);
void CompITypeMemUnpairedLRInner(u32 op);
void CompITypeMemUnpairedLRInner(u32 op, X64Reg shiftReg);
void CompFPTriArith(u32 op, void (XEmitter::*arith)(X64Reg reg, OpArg), bool orderMatters);
void CompFPComp(int lhs, int rhs, u8 compare, bool allowNaN = false);
void CallProtectedFunction(void *func, const OpArg &arg1);
void CallProtectedFunction(void *func, const OpArg &arg1, const OpArg &arg2);
void CallProtectedFunction(void *func, const u32 arg1, const u32 arg2, const u32 arg3);
void CallProtectedFunction(void *func, const OpArg &arg1, const u32 arg2, const u32 arg3);
JitBlockCache blocks;
JitOptions jo;
JitState js;

View file

@ -26,15 +26,16 @@ using namespace Gen;
static const int allocationOrder[] =
{
// R12, when used as base register, for example in a LEA, can generate bad code! Need to look into this.
// R12, when used as base register, for example in a LEA, can generate bad code! Need to look into this.
// On x64, RCX and RDX are the first args. CallProtectedFunction() assumes they're not regcached.
#ifdef _M_X64
#ifdef _WIN32
RSI, RDI, R13, R14, R8, R9, R10, R11, R12, //, RCX
RSI, RDI, R13, R14, R8, R9, R10, R11, R12,
#else
RBP, R13, R14, R8, R9, R10, R11, R12, //, RCX
RBP, R13, R14, R8, R9, R10, R11, R12,
#endif
#elif _M_IX86
ESI, EDI, EBP, EDX, ECX, // Let's try to free up EBX as well.
ESI, EDI, EBP, EDX, ECX, // Let's try to free up EBX as well.
#endif
};