From 19f2b35679a32ff958cf9b85ec8731ace37ea8dd Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 6 Jul 2013 00:22:09 -0700 Subject: [PATCH 1/4] Keep the stack aligned when tripping memchecks. --- Core/MIPS/x86/Jit.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index d712bdb778..11080096ae 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -790,11 +790,14 @@ void Jit::JitSafeMem::MemCheckAsm(ReadType type) skipNext = jit_->J_CC(CC_NE); } - jit_->PUSH(xaddr_); + // Keep the stack 16-byte aligned, just PUSH/POP 4 times. + for (int i = 0; i < 4; ++i) + jit_->PUSH(xaddr_); jit_->MOV(32, M(&jit_->mips_->pc), Imm32(jit_->js.compilerPC)); jit_->ADD(32, R(xaddr_), Imm32(offset_)); jit_->ABI_CallFunctionACC(jit_->thunks.ProtectFunction((void *)&JitMemCheck, 3), R(xaddr_), size_, type == MEM_WRITE ? 1 : 0); - jit_->POP(xaddr_); + for (int i = 0; i < 4; ++i) + jit_->POP(xaddr_); jit_->SetJumpTarget(skipNext); if (it->end != 0) From 662ae772141237ce99b65e88a8b7de4090820f09 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 6 Jul 2013 00:54:53 -0700 Subject: [PATCH 2/4] Save regs before/after 3-arg func calls on x86. This fixes bugs only on x64 when ABI_CallFunctionACC and etc. were used. This was breaking things since R8 was not being saved (arg 3.) --- Common/ABI.cpp | 1 + Common/Thunk.h | 8 ++++++++ Core/MIPS/x86/Jit.cpp | 43 +++++++++++++++++++++++++++++++++++-------- Core/MIPS/x86/Jit.h | 5 +++++ 4 files changed, 49 insertions(+), 8 deletions(-) diff --git a/Common/ABI.cpp b/Common/ABI.cpp index bf683b6fe4..75185cc7ab 100644 --- a/Common/ABI.cpp +++ b/Common/ABI.cpp @@ -559,6 +559,7 @@ void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { PUSH(R14); PUSH(R15); PUSH(R15); //just to align stack. duped push/pop doesn't hurt. + // TODO: XMM? } void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { diff --git a/Common/Thunk.h b/Common/Thunk.h index f841897bd6..7ee758b5fc 100644 --- a/Common/Thunk.h +++ b/Common/Thunk.h @@ -57,6 +57,14 @@ public: Shutdown(); } void *ProtectFunction(void *function, int num_params); + + const u8 *GetSaveRegsFunction() const { + return save_regs; + } + const u8 *GetLoadRegsFunction() const { + return load_regs; + } + private: void Init(); void Shutdown(); diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index 11080096ae..7f05b3d504 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -418,13 +418,13 @@ void Jit::WriteExitDestInEAX() SetJumpTarget(tooLow); SetJumpTarget(tooHigh); - ABI_CallFunctionA(thunks.ProtectFunction((void *) Memory::GetPointer, 1), R(EAX)); + CallProtectedFunction((void *) Memory::GetPointer, R(EAX)); CMP(32, R(EAX), Imm32(0)); FixupBranch skip = J_CC(CC_NE); // TODO: "Ignore" this so other threads can continue? if (g_Config.bIgnoreBadMemAccess) - ABI_CallFunctionA(thunks.ProtectFunction((void *) Core_UpdateState, 1), Imm32(CORE_ERROR)); + CallProtectedFunction((void *) Core_UpdateState, Imm32(CORE_ERROR)); SUB(32, M(¤tMIPS->downcount), Imm32(0)); JMP(asm_.dispatcherCheckCoreState, true); @@ -607,7 +607,6 @@ OpArg Jit::JitSafeMem::PrepareMemoryOpArg(ReadType type) jit_->SUB(32, R(xaddr_), Imm32(offset_)); } - #ifdef _M_IX86 return MDisp(xaddr_, (u32) Memory::base + offset_); #else @@ -657,7 +656,7 @@ void Jit::JitSafeMem::DoSlowWrite(void *safeFunc, const OpArg src, int suboffset jit_->AND(32, R(EAX), Imm32(alignMask_)); } - jit_->ABI_CallFunctionAA(jit_->thunks.ProtectFunction(safeFunc, 2), src, R(EAX)); + jit_->CallProtectedFunction(safeFunc, src, R(EAX)); needsCheck_ = true; } @@ -680,7 +679,7 @@ bool Jit::JitSafeMem::PrepareSlowRead(void *safeFunc) jit_->AND(32, R(EAX), Imm32(alignMask_)); } - jit_->ABI_CallFunctionA(jit_->thunks.ProtectFunction(safeFunc, 1), R(EAX)); + jit_->CallProtectedFunction(safeFunc, R(EAX)); needsCheck_ = true; return true; } @@ -710,7 +709,7 @@ void Jit::JitSafeMem::NextSlowRead(void *safeFunc, int suboffset) jit_->AND(32, R(EAX), Imm32(alignMask_)); } - jit_->ABI_CallFunctionA(jit_->thunks.ProtectFunction(safeFunc, 1), R(EAX)); + jit_->CallProtectedFunction(safeFunc, R(EAX)); } bool Jit::JitSafeMem::ImmValid() @@ -755,7 +754,7 @@ void Jit::JitSafeMem::MemCheckImm(ReadType type) return; jit_->MOV(32, M(&jit_->mips_->pc), Imm32(jit_->js.compilerPC)); - jit_->ABI_CallFunctionCCC(jit_->thunks.ProtectFunction((void *)&JitMemCheck, 3), iaddr_, size_, type == MEM_WRITE ? 1 : 0); + jit_->CallProtectedFunction((void *)&JitMemCheck, iaddr_, size_, type == MEM_WRITE ? 1 : 0); jit_->CMP(32, M((void*)&coreState), Imm32(0)); skipChecks_.push_back(jit_->J_CC(CC_NE, true)); @@ -795,7 +794,7 @@ void Jit::JitSafeMem::MemCheckAsm(ReadType type) jit_->PUSH(xaddr_); jit_->MOV(32, M(&jit_->mips_->pc), Imm32(jit_->js.compilerPC)); jit_->ADD(32, R(xaddr_), Imm32(offset_)); - jit_->ABI_CallFunctionACC(jit_->thunks.ProtectFunction((void *)&JitMemCheck, 3), R(xaddr_), size_, type == MEM_WRITE ? 1 : 0); + jit_->CallProtectedFunction((void *)&JitMemCheck, R(xaddr_), size_, type == MEM_WRITE ? 1 : 0); for (int i = 0; i < 4; ++i) jit_->POP(xaddr_); @@ -812,6 +811,34 @@ void Jit::JitSafeMem::MemCheckAsm(ReadType type) } } +void Jit::CallProtectedFunction(void *func, const OpArg &arg1) +{ + // We don't regcache RCX, so the below is safe (and also faster, maybe branch prediction?) + ABI_CallFunctionA(thunks.ProtectFunction(func, 1), arg1); +} + +void Jit::CallProtectedFunction(void *func, const OpArg &arg1, const OpArg &arg2) +{ + // We don't regcache RCX/RDX, so the below is safe (and also faster, maybe branch prediction?) + ABI_CallFunctionAA(thunks.ProtectFunction(func, 2), arg1, arg2); +} + +void Jit::CallProtectedFunction(void *func, const u32 arg1, const u32 arg2, const u32 arg3) +{ + // On x64, we need to save R8, which is caller saved. + ABI_CallFunction((void *)thunks.GetSaveRegsFunction()); + ABI_CallFunctionCCC(func, arg1, arg2, arg3); + ABI_CallFunction((void *)thunks.GetLoadRegsFunction()); +} + +void Jit::CallProtectedFunction(void *func, const OpArg &arg1, const u32 arg2, const u32 arg3) +{ + // On x64, we need to save R8, which is caller saved. + ABI_CallFunction((void *)thunks.GetSaveRegsFunction()); + ABI_CallFunctionACC(func, arg1, arg2, arg3); + ABI_CallFunction((void *)thunks.GetLoadRegsFunction()); +} + void Jit::Comp_DoNothing(u32 op) { } } // namespace diff --git a/Core/MIPS/x86/Jit.h b/Core/MIPS/x86/Jit.h index b3f2054217..3434dee7a2 100644 --- a/Core/MIPS/x86/Jit.h +++ b/Core/MIPS/x86/Jit.h @@ -280,6 +280,11 @@ private: void CompFPTriArith(u32 op, void (XEmitter::*arith)(X64Reg reg, OpArg), bool orderMatters); void CompFPComp(int lhs, int rhs, u8 compare, bool allowNaN = false); + void CallProtectedFunction(void *func, const OpArg &arg1); + void CallProtectedFunction(void *func, const OpArg &arg1, const OpArg &arg2); + void CallProtectedFunction(void *func, const u32 arg1, const u32 arg2, const u32 arg3); + void CallProtectedFunction(void *func, const OpArg &arg1, const u32 arg2, const u32 arg3); + JitBlockCache blocks; JitOptions jo; JitState js; From 2d15eb2acd09980a8e9ae689e72cdd2d5c2a9872 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 6 Jul 2013 01:21:52 -0700 Subject: [PATCH 3/4] Re-enable lwl/lwr/swl/swr on the x86 jit. Now correctly handling ECX on x64. --- Core/MIPS/x86/CompLoadStore.cpp | 92 ++++++++++++++++++++------------- Core/MIPS/x86/Jit.h | 2 +- Core/MIPS/x86/RegCache.cpp | 9 ++-- 3 files changed, 61 insertions(+), 42 deletions(-) diff --git a/Core/MIPS/x86/CompLoadStore.cpp b/Core/MIPS/x86/CompLoadStore.cpp index 413d50407c..2ff24b8279 100644 --- a/Core/MIPS/x86/CompLoadStore.cpp +++ b/Core/MIPS/x86/CompLoadStore.cpp @@ -104,25 +104,28 @@ namespace MIPSComp void Jit::CompITypeMemUnpairedLR(u32 op, bool isStore) { - // TODO: ECX getting overwritten? Why? - DISABLE; - CONDITIONAL_DISABLE; int o = op>>26; int offset = (signed short)(op&0xFFFF); int rt = _RT; int rs = _RS; + X64Reg shiftReg = ECX; gpr.FlushLockX(ECX, EDX); +#ifdef _M_X64 + // On x64, we need ECX for CL, but it's also the first arg and gets lost. Annoying. + gpr.FlushLockX(R9); + shiftReg = R9; +#endif gpr.Lock(rt); gpr.BindToRegister(rt, true, !isStore); // Grab the offset from alignment for shifting (<< 3 for bytes -> bits.) - MOV(32, R(ECX), gpr.R(rs)); - ADD(32, R(ECX), Imm32(offset)); - AND(32, R(ECX), Imm32(3)); - SHL(32, R(ECX), Imm8(3)); + MOV(32, R(shiftReg), gpr.R(rs)); + ADD(32, R(shiftReg), Imm32(offset)); + AND(32, R(shiftReg), Imm32(3)); + SHL(32, R(shiftReg), Imm8(3)); { JitSafeMem safe(this, rs, offset, ~3); @@ -133,10 +136,10 @@ namespace MIPSComp if (!src.IsSimpleReg(EAX)) MOV(32, R(EAX), src); - CompITypeMemUnpairedLRInner(op); + CompITypeMemUnpairedLRInner(op, shiftReg); } if (safe.PrepareSlowRead((void *) &Memory::Read_U32)) - CompITypeMemUnpairedLRInner(op); + CompITypeMemUnpairedLRInner(op, shiftReg); safe.Finish(); } @@ -156,37 +159,69 @@ namespace MIPSComp gpr.UnlockAllX(); } - void Jit::CompITypeMemUnpairedLRInner(u32 op) + void Jit::CompITypeMemUnpairedLRInner(u32 op, X64Reg shiftReg) { CONDITIONAL_DISABLE; int o = op>>26; int rt = _RT; + // Make sure we have the shift for the target in ECX. + if (shiftReg != ECX) + MOV(32, R(ECX), R(shiftReg)); + + // Now use that shift (left on target, right on source.) switch (o) { case 34: //lwl - // First clear the target bits. MOV(32, R(EDX), Imm32(0x00ffffff)); SHR(32, R(EDX), R(CL)); AND(32, gpr.R(rt), R(EDX)); + break; - // Adjust the shift to the bits we want. + case 38: //lwr + SHR(32, R(EAX), R(CL)); + break; + + case 42: //swl + MOV(32, R(EDX), Imm32(0xffffff00)); + SHL(32, R(EDX), R(CL)); + AND(32, R(EAX), R(EDX)); + break; + + case 46: //swr + MOV(32, R(EDX), gpr.R(rt)); + SHL(32, R(EDX), R(CL)); + // EDX is already the target value to write, but may be overwritten below. Save it. + PUSH(EDX); + break; + + default: + _dbg_assert_msg_(JIT, 0, "Unsupported left/right load/store instruction."); + } + + // Flip ECX around from 3 bytes / 24 bits. + if (shiftReg == ECX) + { MOV(32, R(EDX), Imm32(24)); SUB(32, R(EDX), R(ECX)); MOV(32, R(ECX), R(EDX)); + } + else + { + MOV(32, R(ECX), Imm32(24)); + SUB(32, R(ECX), R(shiftReg)); + } + + // Use the flipped shift (left on source, right on target) and write target. + switch (o) + { + case 34: //lwl SHL(32, R(EAX), R(CL)); OR(32, gpr.R(rt), R(EAX)); break; case 38: //lwr - // Adjust the shift to the bits we want. - SHR(32, R(EAX), R(CL)); - - // Clear the target bits we're replacing. - MOV(32, R(EDX), Imm32(24)); - SUB(32, R(EDX), R(ECX)); - MOV(32, R(ECX), R(EDX)); MOV(32, R(EDX), Imm32(0xffffff00)); SHL(32, R(EDX), R(CL)); AND(32, gpr.R(rt), R(EDX)); @@ -195,15 +230,6 @@ namespace MIPSComp break; case 42: //swl - // First clear the target memory bits. - MOV(32, R(EDX), Imm32(0xffffff00)); - SHL(32, R(EDX), R(CL)); - AND(32, R(EAX), R(EDX)); - - // Flip the shift, and adjust the shift in a temporary. - MOV(32, R(EDX), Imm32(24)); - SUB(32, R(EDX), R(ECX)); - MOV(32, R(ECX), R(EDX)); MOV(32, R(EDX), gpr.R(rt)); SHR(32, R(EDX), R(CL)); @@ -211,19 +237,11 @@ namespace MIPSComp break; case 46: //swr - // Adjust the shift to the bits we want. - MOV(32, R(EDX), gpr.R(rt)); - SHL(32, R(EDX), R(CL)); - PUSH(EDX); - - // Clear the target bits we're replacing. - MOV(32, R(EDX), Imm32(24)); - SUB(32, R(EDX), R(ECX)); - MOV(32, R(ECX), R(EDX)); MOV(32, R(EDX), Imm32(0x00ffffff)); SHR(32, R(EDX), R(CL)); AND(32, R(EAX), R(EDX)); + // This is the target value we saved earlier. POP(EDX); OR(32, R(EDX), R(EAX)); break; diff --git a/Core/MIPS/x86/Jit.h b/Core/MIPS/x86/Jit.h index 3434dee7a2..c999165a02 100644 --- a/Core/MIPS/x86/Jit.h +++ b/Core/MIPS/x86/Jit.h @@ -275,7 +275,7 @@ private: void CompITypeMemRead(u32 op, u32 bits, void (XEmitter::*mov)(int, int, X64Reg, OpArg), void *safeFunc); void CompITypeMemWrite(u32 op, u32 bits, void *safeFunc); void CompITypeMemUnpairedLR(u32 op, bool isStore); - void CompITypeMemUnpairedLRInner(u32 op); + void CompITypeMemUnpairedLRInner(u32 op, X64Reg shiftReg); void CompFPTriArith(u32 op, void (XEmitter::*arith)(X64Reg reg, OpArg), bool orderMatters); void CompFPComp(int lhs, int rhs, u8 compare, bool allowNaN = false); diff --git a/Core/MIPS/x86/RegCache.cpp b/Core/MIPS/x86/RegCache.cpp index 7c36b0f37e..d1547c77b8 100644 --- a/Core/MIPS/x86/RegCache.cpp +++ b/Core/MIPS/x86/RegCache.cpp @@ -26,15 +26,16 @@ using namespace Gen; static const int allocationOrder[] = { - // R12, when used as base register, for example in a LEA, can generate bad code! Need to look into this. + // R12, when used as base register, for example in a LEA, can generate bad code! Need to look into this. + // On x64, RCX and RDX are the first args. CallProtectedFunction() assumes they're not regcached. #ifdef _M_X64 #ifdef _WIN32 - RSI, RDI, R13, R14, R8, R9, R10, R11, R12, //, RCX + RSI, RDI, R13, R14, R8, R9, R10, R11, R12, #else - RBP, R13, R14, R8, R9, R10, R11, R12, //, RCX + RBP, R13, R14, R8, R9, R10, R11, R12, #endif #elif _M_IX86 - ESI, EDI, EBP, EDX, ECX, // Let's try to free up EBX as well. + ESI, EDI, EBP, EDX, ECX, // Let's try to free up EBX as well. #endif }; From 2b4344f61d856a181647dca40b521b850c23ccb7 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 6 Jul 2013 03:30:21 -0700 Subject: [PATCH 4/4] Don't rewind the PC on memcheck w/ CORE_NEXTFRAME. If the memcheck doesn't hit, we'll still rewind the PC, causing weirdness. This is likely if you try to memcheck an address hit first thing in a vblank interrupt handler or something. --- Core/MIPS/x86/Jit.cpp | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index 7f05b3d504..78b34986bf 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -292,14 +292,17 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b) { // TODO: Save/restore? FlushAll(); - CMP(32, M((void*)&coreState), Imm32(0)); - FixupBranch skipCheck = J_CC(CC_E); + CMP(32, M((void*)&coreState), Imm32(CORE_RUNNING)); + FixupBranch skipCheck1 = J_CC(CC_E); + CMP(32, M((void*)&coreState), Imm32(CORE_NEXTFRAME)); + FixupBranch skipCheck2 = J_CC(CC_E); if (js.afterOp & JitState::AFTER_REWIND_PC_BAD_STATE) MOV(32, M(&mips_->pc), Imm32(js.compilerPC)); else MOV(32, M(&mips_->pc), Imm32(js.compilerPC + 4)); WriteSyscallExit(); - SetJumpTarget(skipCheck); + SetJumpTarget(skipCheck1); + SetJumpTarget(skipCheck2); js.afterOp = JitState::AFTER_NONE; } @@ -355,11 +358,14 @@ void Jit::WriteExit(u32 destination, int exit_num) // If we need to verify coreState and rewind, we may not jump yet. if (js.afterOp & (JitState::AFTER_CORE_STATE | JitState::AFTER_REWIND_PC_BAD_STATE)) { - CMP(32, M((void*)&coreState), Imm32(0)); - FixupBranch skipCheck = J_CC(CC_E); + CMP(32, M((void*)&coreState), Imm32(CORE_RUNNING)); + FixupBranch skipCheck1 = J_CC(CC_E); + CMP(32, M((void*)&coreState), Imm32(CORE_NEXTFRAME)); + FixupBranch skipCheck2 = J_CC(CC_E); MOV(32, M(&mips_->pc), Imm32(js.compilerPC)); WriteSyscallExit(); - SetJumpTarget(skipCheck); + SetJumpTarget(skipCheck1); + SetJumpTarget(skipCheck2); js.afterOp = JitState::AFTER_NONE; } @@ -392,11 +398,14 @@ void Jit::WriteExitDestInEAX() // If we need to verify coreState and rewind, we may not jump yet. if (js.afterOp & (JitState::AFTER_CORE_STATE | JitState::AFTER_REWIND_PC_BAD_STATE)) { - CMP(32, M((void*)&coreState), Imm32(0)); - FixupBranch skipCheck = J_CC(CC_E); + CMP(32, M((void*)&coreState), Imm32(CORE_RUNNING)); + FixupBranch skipCheck1 = J_CC(CC_E); + CMP(32, M((void*)&coreState), Imm32(CORE_NEXTFRAME)); + FixupBranch skipCheck2 = J_CC(CC_E); MOV(32, M(&mips_->pc), Imm32(js.compilerPC)); WriteSyscallExit(); - SetJumpTarget(skipCheck); + SetJumpTarget(skipCheck1); + SetJumpTarget(skipCheck2); js.afterOp = JitState::AFTER_NONE; }