diff --git a/Common/x64Emitter.h b/Common/x64Emitter.h index ccd7531a43..e568547c97 100644 --- a/Common/x64Emitter.h +++ b/Common/x64Emitter.h @@ -173,7 +173,6 @@ struct OpArg void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const; void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W = 0) const; void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=INVALID_REG, bool warn_64bit_offset = true) const; - void WriteFloatModRM(XEmitter *emit, FloatOp op); void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits); // This one is public - must be written to u64 offset; // use RIP-relative as much as possible - 64-bit immediates are not available. diff --git a/Core/MIPS/ARM/ArmCompVFPUNEON.cpp b/Core/MIPS/ARM/ArmCompVFPUNEON.cpp index 9db9a2e6ed..fe49e2e509 100644 --- a/Core/MIPS/ARM/ArmCompVFPUNEON.cpp +++ b/Core/MIPS/ARM/ArmCompVFPUNEON.cpp @@ -716,8 +716,6 @@ void ArmJit::CompNEON_Mftv(MIPSOpcode op) { gpr.MapReg(rt); STR(gpr.R(rt), CTXREG, offsetof(MIPSState, vfpuCtrl) + 4 * (imm - 128)); } - //gpr.BindToRegister(rt, true, false); - //MOV(32, M(¤tMIPS->vfpuCtrl[imm - 128]), gpr.R(rt)); // TODO: Optimization if rt is Imm? // Set these BEFORE disable! diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp index 87e02f10de..176cfd9a3f 100644 --- a/Core/MIPS/x86/CompVFPU.cpp +++ b/Core/MIPS/x86/CompVFPU.cpp @@ -1548,7 +1548,7 @@ void Jit::Comp_Vi2f(MIPSOpcode op) { if (*mult != 1.0f) { if (RipAccessible(mult)) { - MOVSS(XMM1, M(mult)); + MOVSS(XMM1, M(mult)); // rip accessible } else { MOV(PTRBITS, R(TEMPREG), ImmPtr(mult)); MOVSS(XMM1, MatR(TEMPREG)); @@ -1656,14 +1656,14 @@ void Jit::Comp_Vh2f(MIPSOpcode op) { // OK, 16 bits in each word. // Let's go. Deep magic here. MOVAPS(XMM1, R(XMM0)); - ANDPS(XMM0, M(&mask_nosign[0])); // xmm0 = expmant + ANDPS(XMM0, M(&mask_nosign[0])); // xmm0 = expmant. not rip accessible but bailing above XORPS(XMM1, R(XMM0)); // xmm1 = justsign = expmant ^ xmm0 MOVAPS(tempR, R(XMM0)); - PCMPGTD(tempR, M(&was_infnan[0])); // xmm2 = b_wasinfnan + PCMPGTD(tempR, M(&was_infnan[0])); // xmm2 = b_wasinfnan. not rip accessible but bailing above PSLLD(XMM0, 13); MULPS(XMM0, M(magic)); /// xmm0 = scaled PSLLD(XMM1, 16); // xmm1 = sign - ANDPS(tempR, M(&exp_infnan[0])); + ANDPS(tempR, M(&exp_infnan[0])); // not rip accessible but bailing above ORPS(XMM1, R(tempR)); ORPS(XMM0, R(XMM1)); @@ -1747,9 +1747,9 @@ void Jit::Comp_Vx2i(MIPSOpcode op) { // vuc2i is a bit special. It spreads out the bits like this: // s[0] = 0xDDCCBBAA -> d[0] = (0xAAAAAAAA >> 1), d[1] = (0xBBBBBBBB >> 1), etc. MOVSS(XMM0, fpr.V(sregs[0])); - if (cpu_info.bSSSE3) { + if (cpu_info.bSSSE3 && RipAccessible(vuc2i_shuffle)) { // Not really different speed. Generates a bit less code. - PSHUFB(XMM0, M(&vuc2i_shuffle[0])); + PSHUFB(XMM0, M(&vuc2i_shuffle[0])); // rip accessible } else { // First, we change 0xDDCCBBAA to 0xDDDDCCCCBBBBAAAA. PUNPCKLBW(XMM0, R(XMM0)); @@ -1757,7 +1757,7 @@ void Jit::Comp_Vx2i(MIPSOpcode op) { PUNPCKLWD(XMM0, R(XMM0)); } } else { - if (cpu_info.bSSSE3) { + if (cpu_info.bSSSE3 && RipAccessible(vc2i_shuffle)) { MOVSS(XMM0, fpr.V(sregs[0])); PSHUFB(XMM0, M(&vc2i_shuffle[0])); } else { @@ -3269,7 +3269,7 @@ void Jit::Comp_Vi2x(MIPSOpcode op) { // At this point, everything is aligned in the high bits of our lanes. if (cpu_info.bSSSE3) { if (RipAccessible(vi2xc_shuffle)) { - PSHUFB(dst0, bits == 8 ? M(vi2xc_shuffle) : M(vi2xs_shuffle)); + PSHUFB(dst0, bits == 8 ? M(vi2xc_shuffle) : M(vi2xs_shuffle)); // rip accessible } else { MOV(PTRBITS, R(TEMPREG), bits == 8 ? ImmPtr(vi2xc_shuffle) : ImmPtr(vi2xs_shuffle)); PSHUFB(dst0, MatR(TEMPREG)); diff --git a/Core/MIPS/x86/JitSafeMem.cpp b/Core/MIPS/x86/JitSafeMem.cpp index 1716c5b819..e22dd61da4 100644 --- a/Core/MIPS/x86/JitSafeMem.cpp +++ b/Core/MIPS/x86/JitSafeMem.cpp @@ -91,7 +91,7 @@ bool JitSafeMem::PrepareWrite(OpArg &dest, int size) #endif #if PPSSPP_ARCH(32BIT) - dest = M(Memory::base + addr); + dest = M(Memory::base + addr); // 32-bit only #else dest = MDisp(MEMBASEREG, addr); #endif @@ -120,7 +120,7 @@ bool JitSafeMem::PrepareRead(OpArg &src, int size) #endif #if PPSSPP_ARCH(32BIT) - src = M(Memory::base + addr); + src = M(Memory::base + addr); // 32-bit only #else src = MDisp(MEMBASEREG, addr); #endif @@ -144,7 +144,7 @@ OpArg JitSafeMem::NextFastAddress(int suboffset) #endif #if PPSSPP_ARCH(32BIT) - return M(Memory::base + addr); + return M(Memory::base + addr); // 32-bit only #else return MDisp(MEMBASEREG, addr); #endif diff --git a/GPU/Common/VertexDecoderX86.cpp b/GPU/Common/VertexDecoderX86.cpp index 82b4c7e002..d0ad572b7b 100644 --- a/GPU/Common/VertexDecoderX86.cpp +++ b/GPU/Common/VertexDecoderX86.cpp @@ -932,7 +932,7 @@ void VertexDecoderJitCache::Jit_Color4444() { // Spread to RGBA -> R00GB00A. PUNPCKLBW(fpScratchReg, R(fpScratchReg)); if (RipAccessible(&color4444mask[0])) { - PAND(fpScratchReg, M(&color4444mask[0])); + PAND(fpScratchReg, M(&color4444mask[0])); // rip accessible } else { MOV(PTRBITS, R(tempReg1), ImmPtr(&color4444mask)); PAND(fpScratchReg, MatR(tempReg1)); diff --git a/GPU/Software/SamplerX86.cpp b/GPU/Software/SamplerX86.cpp index fb84580082..df8e89c8b3 100644 --- a/GPU/Software/SamplerX86.cpp +++ b/GPU/Software/SamplerX86.cpp @@ -210,7 +210,11 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) { MOVD_xmm(fpScratchReg5, MDisp(RSP, 24)); CVTDQ2PS(fpScratchReg5, R(fpScratchReg5)); SHUFPS(fpScratchReg5, R(fpScratchReg5), _MM_SHUFFLE(0, 0, 0, 0)); - MULPS(fpScratchReg5, M(by256)); + if (RipAccessible(by256)) { + MULPS(fpScratchReg5, M(by256)); // rip accessible + } else { + Crash(); // TODO + } MOVAPS(XMM0, M(ones)); SUBPS(XMM0, R(fpScratchReg5)); @@ -551,7 +555,11 @@ static const u32 MEMORY_ALIGNED16(color4444mask[4]) = { 0xf00ff00f, 0xf00ff00f, bool SamplerJitCache::Jit_Decode4444() { MOVD_xmm(fpScratchReg1, R(resultReg)); PUNPCKLBW(fpScratchReg1, R(fpScratchReg1)); - PAND(fpScratchReg1, M(color4444mask)); + if (RipAccessible(color4444mask)) { + PAND(fpScratchReg1, M(color4444mask)); // rip accessible + } else { + Crash(); + } MOVSS(fpScratchReg2, R(fpScratchReg1)); MOVSS(fpScratchReg3, R(fpScratchReg1)); PSRLW(fpScratchReg2, 4);