mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
More RIP elimination
This commit is contained in:
parent
7c1ae5b3e6
commit
7c3b37c561
8 changed files with 97 additions and 65 deletions
|
@ -1073,7 +1073,7 @@ public:
|
||||||
class XCodeBlock : public CodeBlock<XEmitter> {
|
class XCodeBlock : public CodeBlock<XEmitter> {
|
||||||
public:
|
public:
|
||||||
void PoisonMemory(int offset) override;
|
void PoisonMemory(int offset) override;
|
||||||
bool RipAccessible(void *ptr) const {
|
bool RipAccessible(const void *ptr) const {
|
||||||
#ifdef _M_IX86
|
#ifdef _M_IX86
|
||||||
return true;
|
return true;
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -232,6 +232,8 @@ public:
|
||||||
u32 intBranchExit;
|
u32 intBranchExit;
|
||||||
u32 jitBranchExit;
|
u32 jitBranchExit;
|
||||||
|
|
||||||
|
u32 savedPC;
|
||||||
|
|
||||||
static const u32 FCR0_VALUE = 0x00003351;
|
static const u32 FCR0_VALUE = 0x00003351;
|
||||||
|
|
||||||
#if defined(PPSSPP_ARCH_X86) || defined(PPSSPP_ARCH_AMD64)
|
#if defined(PPSSPP_ARCH_X86) || defined(PPSSPP_ARCH_AMD64)
|
||||||
|
|
|
@ -114,15 +114,10 @@ void Jit::GenerateFixedCode(JitOptions &jo) {
|
||||||
// This is the most common situation.
|
// This is the most common situation.
|
||||||
TEST(32, MIPSSTATE_VAR(fcr31), Imm32(0x01000003));
|
TEST(32, MIPSSTATE_VAR(fcr31), Imm32(0x01000003));
|
||||||
FixupBranch skip = J_CC(CC_Z);
|
FixupBranch skip = J_CC(CC_Z);
|
||||||
#ifdef _M_X64
|
|
||||||
// TODO: Move the hasSetRounding flag somewhere we can reach it through the context pointer, or something.
|
// TODO: Move the hasSetRounding flag somewhere we can reach it through the context pointer, or something.
|
||||||
MOV(64, R(RAX), Imm64((uintptr_t)&js.hasSetRounding));
|
MOV(PTRBITS, R(RAX), ImmPtr(&js.hasSetRounding));
|
||||||
MOV(8, MatR(RAX), Imm8(1));
|
MOV(8, MatR(RAX), Imm8(1));
|
||||||
#else
|
|
||||||
MOV(8, M(&js.hasSetRounding), Imm8(1));
|
|
||||||
#endif
|
|
||||||
SetJumpTarget(skip);
|
SetJumpTarget(skip);
|
||||||
|
|
||||||
RET();
|
RET();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -153,7 +148,12 @@ void Jit::GenerateFixedCode(JitOptions &jo) {
|
||||||
FixupBranch bailCoreState = J_CC(CC_S, true);
|
FixupBranch bailCoreState = J_CC(CC_S, true);
|
||||||
|
|
||||||
SetJumpTarget(skipToCoreStateCheck);
|
SetJumpTarget(skipToCoreStateCheck);
|
||||||
CMP(32, M(&coreState), Imm32(0));
|
if (RipAccessible((const void *)&coreState)) {
|
||||||
|
CMP(32, M(&coreState), Imm32(0));
|
||||||
|
} else {
|
||||||
|
MOV(PTRBITS, R(RAX), ImmPtr((const void *)&coreState));
|
||||||
|
CMP(32, MatR(RAX), Imm32(0));
|
||||||
|
}
|
||||||
FixupBranch badCoreState = J_CC(CC_NZ, true);
|
FixupBranch badCoreState = J_CC(CC_NZ, true);
|
||||||
FixupBranch skipToRealDispatch2 = J(); //skip the sync and compare first time
|
FixupBranch skipToRealDispatch2 = J(); //skip the sync and compare first time
|
||||||
|
|
||||||
|
@ -210,7 +210,12 @@ void Jit::GenerateFixedCode(JitOptions &jo) {
|
||||||
SetJumpTarget(bail);
|
SetJumpTarget(bail);
|
||||||
SetJumpTarget(bailCoreState);
|
SetJumpTarget(bailCoreState);
|
||||||
|
|
||||||
CMP(32, M(&coreState), Imm32(0));
|
if (RipAccessible((const void *)&coreState)) {
|
||||||
|
CMP(32, M(&coreState), Imm32(0));
|
||||||
|
} else {
|
||||||
|
MOV(PTRBITS, R(RAX), ImmPtr((const void *)&coreState));
|
||||||
|
CMP(32, MatR(RAX), Imm32(0));
|
||||||
|
}
|
||||||
J_CC(CC_Z, outerLoop, true);
|
J_CC(CC_Z, outerLoop, true);
|
||||||
|
|
||||||
SetJumpTarget(badCoreState);
|
SetJumpTarget(badCoreState);
|
||||||
|
|
|
@ -658,8 +658,6 @@ void Jit::Comp_Jump(MIPSOpcode op) {
|
||||||
js.compiling = false;
|
js.compiling = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static u32 savedPC;
|
|
||||||
|
|
||||||
void Jit::Comp_JumpReg(MIPSOpcode op)
|
void Jit::Comp_JumpReg(MIPSOpcode op)
|
||||||
{
|
{
|
||||||
CONDITIONAL_LOG;
|
CONDITIONAL_LOG;
|
||||||
|
@ -725,21 +723,18 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
|
||||||
MOV(32, R(EAX), gpr.R(rs));
|
MOV(32, R(EAX), gpr.R(rs));
|
||||||
}
|
}
|
||||||
FlushAll();
|
FlushAll();
|
||||||
}
|
} else {
|
||||||
else
|
|
||||||
{
|
|
||||||
// Latch destination now - save it in memory.
|
// Latch destination now - save it in memory.
|
||||||
gpr.MapReg(rs, true, false);
|
gpr.MapReg(rs, true, false);
|
||||||
MOV(32, M(&savedPC), gpr.R(rs));
|
MOV(32, MIPSSTATE_VAR(savedPC), gpr.R(rs));
|
||||||
if (andLink)
|
if (andLink)
|
||||||
gpr.SetImm(rd, GetCompilerPC() + 8);
|
gpr.SetImm(rd, GetCompilerPC() + 8);
|
||||||
CompileDelaySlot(DELAYSLOT_NICE);
|
CompileDelaySlot(DELAYSLOT_NICE);
|
||||||
MOV(32, R(EAX), M(&savedPC));
|
MOV(32, R(EAX), MIPSSTATE_VAR(savedPC));
|
||||||
FlushAll();
|
FlushAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (op & 0x3f)
|
switch (op & 0x3f) {
|
||||||
{
|
|
||||||
case 8: //jr
|
case 8: //jr
|
||||||
break;
|
break;
|
||||||
case 9: //jalr
|
case 9: //jalr
|
||||||
|
|
|
@ -98,8 +98,6 @@ void Jit::Comp_FPU3op(MIPSOpcode op) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static u32 MEMORY_ALIGNED16(ssLoadStoreTemp);
|
|
||||||
|
|
||||||
void Jit::Comp_FPULS(MIPSOpcode op) {
|
void Jit::Comp_FPULS(MIPSOpcode op) {
|
||||||
CONDITIONAL_DISABLE;
|
CONDITIONAL_DISABLE;
|
||||||
s32 offset = _IMM16;
|
s32 offset = _IMM16;
|
||||||
|
@ -137,8 +135,8 @@ void Jit::Comp_FPULS(MIPSOpcode op) {
|
||||||
MOVSS(dest, fpr.RX(ft));
|
MOVSS(dest, fpr.RX(ft));
|
||||||
if (safe.PrepareSlowWrite())
|
if (safe.PrepareSlowWrite())
|
||||||
{
|
{
|
||||||
MOVSS(M(&ssLoadStoreTemp), fpr.RX(ft));
|
MOVSS(MIPSSTATE_VAR(temp), fpr.RX(ft));
|
||||||
safe.DoSlowWrite(safeMemFuncs.writeU32, M(&ssLoadStoreTemp));
|
safe.DoSlowWrite(safeMemFuncs.writeU32, MIPSSTATE_VAR(temp));
|
||||||
}
|
}
|
||||||
safe.Finish();
|
safe.Finish();
|
||||||
|
|
||||||
|
|
|
@ -32,7 +32,8 @@ int Jit::Replace_fabsf() {
|
||||||
fpr.SpillLock(0, 12);
|
fpr.SpillLock(0, 12);
|
||||||
fpr.MapReg(0, false, true);
|
fpr.MapReg(0, false, true);
|
||||||
MOVSS(fpr.RX(0), fpr.R(12));
|
MOVSS(fpr.RX(0), fpr.R(12));
|
||||||
ANDPS(fpr.RX(0), M(&ssNoSignMask));
|
MOV(PTRBITS, R(RAX), ImmPtr(&ssNoSignMask));
|
||||||
|
ANDPS(fpr.RX(0), MatR(RAX));
|
||||||
fpr.ReleaseSpillLocks();
|
fpr.ReleaseSpillLocks();
|
||||||
return 4; // Number of instructions in the MIPS function
|
return 4; // Number of instructions in the MIPS function
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,6 +15,9 @@
|
||||||
// Official git repository and contact information can be found at
|
// Official git repository and contact information can be found at
|
||||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||||
|
|
||||||
|
// Table 13.10 in http://agner.org/optimize/optimizing_assembly.pdf is cool - generate constants with
|
||||||
|
// short instruction sequences. Surprisingly many are possible.
|
||||||
|
|
||||||
#include "ppsspp_config.h"
|
#include "ppsspp_config.h"
|
||||||
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
|
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
|
||||||
|
|
||||||
|
@ -66,9 +69,6 @@ const u32 MEMORY_ALIGNED16( noSignMask[4] ) = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFF
|
||||||
const u32 MEMORY_ALIGNED16( signBitAll[4] ) = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
|
const u32 MEMORY_ALIGNED16( signBitAll[4] ) = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
|
||||||
const u32 MEMORY_ALIGNED16( signBitLower[4] ) = {0x80000000, 0, 0, 0};
|
const u32 MEMORY_ALIGNED16( signBitLower[4] ) = {0x80000000, 0, 0, 0};
|
||||||
const float MEMORY_ALIGNED16( oneOneOneOne[4] ) = {1.0f, 1.0f, 1.0f, 1.0f};
|
const float MEMORY_ALIGNED16( oneOneOneOne[4] ) = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||||
const u32 MEMORY_ALIGNED16( solidOnes[4] ) = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF};
|
|
||||||
const u32 MEMORY_ALIGNED16( lowOnes[4] ) = {0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000};
|
|
||||||
const u32 MEMORY_ALIGNED16( lowZeroes[4] ) = {0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF};
|
|
||||||
const u32 MEMORY_ALIGNED16( fourinfnan[4] ) = {0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000};
|
const u32 MEMORY_ALIGNED16( fourinfnan[4] ) = {0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000};
|
||||||
const float MEMORY_ALIGNED16( identityMatrix[4][4]) = { { 1.0f, 0, 0, 0 }, { 0, 1.0f, 0, 0 }, { 0, 0, 1.0f, 0 }, { 0, 0, 0, 1.0f} };
|
const float MEMORY_ALIGNED16( identityMatrix[4][4]) = { { 1.0f, 0, 0, 0 }, { 0, 1.0f, 0, 0 }, { 0, 0, 1.0f, 0 }, { 0, 0, 0, 1.0f} };
|
||||||
|
|
||||||
|
@ -176,13 +176,15 @@ void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) {
|
||||||
ANDNPS(XMM0, fpr.V(vregs[i]));
|
ANDNPS(XMM0, fpr.V(vregs[i]));
|
||||||
|
|
||||||
// Retain a NAN in XMM0 (must be second operand.)
|
// Retain a NAN in XMM0 (must be second operand.)
|
||||||
MOVSS(fpr.VX(vregs[i]), M(&one));
|
MOV(PTRBITS, R(TEMPREG), ImmPtr(&one));
|
||||||
|
MOVSS(fpr.VX(vregs[i]), MatR(TEMPREG));
|
||||||
MINSS(fpr.VX(vregs[i]), R(XMM0));
|
MINSS(fpr.VX(vregs[i]), R(XMM0));
|
||||||
} else if (sat == 3) {
|
} else if (sat == 3) {
|
||||||
fpr.MapRegV(vregs[i], MAP_DIRTY);
|
fpr.MapRegV(vregs[i], MAP_DIRTY);
|
||||||
|
|
||||||
// Check for < -1.0f, but careful of NANs.
|
// Check for < -1.0f, but careful of NANs.
|
||||||
MOVSS(XMM1, M(&minus_one));
|
MOV(PTRBITS, R(TEMPREG), ImmPtr(&minus_one));
|
||||||
|
MOVSS(XMM1, MatR(TEMPREG));
|
||||||
MOVSS(R(XMM0), fpr.VX(vregs[i]));
|
MOVSS(R(XMM0), fpr.VX(vregs[i]));
|
||||||
CMPLESS(XMM0, R(XMM1));
|
CMPLESS(XMM0, R(XMM1));
|
||||||
// If it was NOT less, the three ops below do nothing.
|
// If it was NOT less, the three ops below do nothing.
|
||||||
|
@ -192,7 +194,8 @@ void Jit::ApplyPrefixD(const u8 *vregs, VectorSize sz) {
|
||||||
ORPS(XMM0, R(XMM1));
|
ORPS(XMM0, R(XMM1));
|
||||||
|
|
||||||
// Retain a NAN in XMM0 (must be second operand.)
|
// Retain a NAN in XMM0 (must be second operand.)
|
||||||
MOVSS(fpr.VX(vregs[i]), M(&one));
|
MOV(PTRBITS, R(TEMPREG), ImmPtr(&one));
|
||||||
|
MOVSS(fpr.VX(vregs[i]), MatR(TEMPREG));
|
||||||
MINSS(fpr.VX(vregs[i]), R(XMM0));
|
MINSS(fpr.VX(vregs[i]), R(XMM0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -236,12 +239,10 @@ void Jit::Comp_SV(MIPSOpcode op) {
|
||||||
JitSafeMem safe(this, rs, imm);
|
JitSafeMem safe(this, rs, imm);
|
||||||
safe.SetFar();
|
safe.SetFar();
|
||||||
OpArg src;
|
OpArg src;
|
||||||
if (safe.PrepareRead(src, 4))
|
if (safe.PrepareRead(src, 4)) {
|
||||||
{
|
|
||||||
MOVSS(fpr.VX(vt), safe.NextFastAddress(0));
|
MOVSS(fpr.VX(vt), safe.NextFastAddress(0));
|
||||||
}
|
}
|
||||||
if (safe.PrepareSlowRead(safeMemFuncs.readU32))
|
if (safe.PrepareSlowRead(safeMemFuncs.readU32)) {
|
||||||
{
|
|
||||||
MOVD_xmm(fpr.VX(vt), R(EAX));
|
MOVD_xmm(fpr.VX(vt), R(EAX));
|
||||||
}
|
}
|
||||||
safe.Finish();
|
safe.Finish();
|
||||||
|
@ -260,14 +261,12 @@ void Jit::Comp_SV(MIPSOpcode op) {
|
||||||
JitSafeMem safe(this, rs, imm);
|
JitSafeMem safe(this, rs, imm);
|
||||||
safe.SetFar();
|
safe.SetFar();
|
||||||
OpArg dest;
|
OpArg dest;
|
||||||
if (safe.PrepareWrite(dest, 4))
|
if (safe.PrepareWrite(dest, 4)) {
|
||||||
{
|
|
||||||
MOVSS(safe.NextFastAddress(0), fpr.VX(vt));
|
MOVSS(safe.NextFastAddress(0), fpr.VX(vt));
|
||||||
}
|
}
|
||||||
if (safe.PrepareSlowWrite())
|
if (safe.PrepareSlowWrite()) {
|
||||||
{
|
MOVSS(MIPSSTATE_VAR(temp), fpr.VX(vt));
|
||||||
MOVSS(M(&ssLoadStoreTemp), fpr.VX(vt));
|
safe.DoSlowWrite(safeMemFuncs.writeU32, MIPSSTATE_VAR(temp), 0);
|
||||||
safe.DoSlowWrite(safeMemFuncs.writeU32, M(&ssLoadStoreTemp), 0);
|
|
||||||
}
|
}
|
||||||
safe.Finish();
|
safe.Finish();
|
||||||
|
|
||||||
|
@ -453,9 +452,9 @@ void Jit::Comp_SVQ(MIPSOpcode op) {
|
||||||
if (safe.PrepareSlowWrite()) {
|
if (safe.PrepareSlowWrite()) {
|
||||||
MOVAPS(XMM0, fpr.VS(vregs));
|
MOVAPS(XMM0, fpr.VS(vregs));
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
MOVSS(M(&ssLoadStoreTemp), XMM0);
|
MOVSS(MIPSSTATE_VAR(temp), XMM0);
|
||||||
SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(3, 3, 2, 1));
|
SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(3, 3, 2, 1));
|
||||||
safe.DoSlowWrite(safeMemFuncs.writeU32, M(&ssLoadStoreTemp), i * 4);
|
safe.DoSlowWrite(safeMemFuncs.writeU32, MIPSSTATE_VAR(temp), i * 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
safe.Finish();
|
safe.Finish();
|
||||||
|
@ -476,8 +475,8 @@ void Jit::Comp_SVQ(MIPSOpcode op) {
|
||||||
}
|
}
|
||||||
if (safe.PrepareSlowWrite()) {
|
if (safe.PrepareSlowWrite()) {
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
MOVSS(M(&ssLoadStoreTemp), fpr.VX(vregs[i]));
|
MOVSS(MIPSSTATE_VAR(temp), fpr.VX(vregs[i]));
|
||||||
safe.DoSlowWrite(safeMemFuncs.writeU32, M(&ssLoadStoreTemp), i * 4);
|
safe.DoSlowWrite(safeMemFuncs.writeU32, MIPSSTATE_VAR(temp), i * 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
safe.Finish();
|
safe.Finish();
|
||||||
|
@ -508,7 +507,12 @@ void Jit::Comp_VVectorInit(MIPSOpcode op) {
|
||||||
if (type == 6) {
|
if (type == 6) {
|
||||||
XORPS(fpr.VSX(dregs), fpr.VS(dregs));
|
XORPS(fpr.VSX(dregs), fpr.VS(dregs));
|
||||||
} else if (type == 7) {
|
} else if (type == 7) {
|
||||||
MOVAPS(fpr.VSX(dregs), M(&oneOneOneOne));
|
if (RipAccessible(&oneOneOneOne)) {
|
||||||
|
MOVAPS(fpr.VSX(dregs), M(&oneOneOneOne));
|
||||||
|
} else {
|
||||||
|
MOV(PTRBITS, R(TEMPREG), ImmPtr(&oneOneOneOne));
|
||||||
|
MOVAPS(fpr.VSX(dregs), MatR(TEMPREG));
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
DISABLE;
|
DISABLE;
|
||||||
}
|
}
|
||||||
|
@ -522,7 +526,12 @@ void Jit::Comp_VVectorInit(MIPSOpcode op) {
|
||||||
XORPS(XMM0, R(XMM0));
|
XORPS(XMM0, R(XMM0));
|
||||||
break;
|
break;
|
||||||
case 7: // v=ones; break; //vone
|
case 7: // v=ones; break; //vone
|
||||||
MOVSS(XMM0, M(&one));
|
if (RipAccessible(&one)) {
|
||||||
|
MOVSS(XMM0, M(&one));
|
||||||
|
} else {
|
||||||
|
MOV(PTRBITS, R(TEMPREG), ImmPtr(&one));
|
||||||
|
MOVSS(XMM0, MatR(TEMPREG));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
DISABLE;
|
DISABLE;
|
||||||
|
@ -558,7 +567,12 @@ void Jit::Comp_VIdt(MIPSOpcode op) {
|
||||||
}
|
}
|
||||||
|
|
||||||
XORPS(XMM0, R(XMM0));
|
XORPS(XMM0, R(XMM0));
|
||||||
MOVSS(XMM1, M(&one));
|
if (RipAccessible(&one)) {
|
||||||
|
MOVSS(XMM1, M(&one));
|
||||||
|
} else {
|
||||||
|
MOV(PTRBITS, R(TEMPREG), ImmPtr(&one));
|
||||||
|
MOVSS(XMM1, MatR(TEMPREG));
|
||||||
|
}
|
||||||
fpr.MapRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);
|
fpr.MapRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);
|
||||||
switch (sz) {
|
switch (sz) {
|
||||||
case V_Pair:
|
case V_Pair:
|
||||||
|
@ -1426,13 +1440,16 @@ void Jit::Comp_Vcmp(MIPSOpcode op) {
|
||||||
|
|
||||||
// Finalize the comparison for ES/NS.
|
// Finalize the comparison for ES/NS.
|
||||||
if (cond == VC_ES || cond == VC_NS) {
|
if (cond == VC_ES || cond == VC_NS) {
|
||||||
ANDPS(XMM0, M(&fourinfnan));
|
MOV(PTRBITS, R(TEMPREG), ImmPtr(&fourinfnan));
|
||||||
PCMPEQD(XMM0, M(&fourinfnan)); // Integer comparison
|
ANDPS(XMM0, MatR(TEMPREG));
|
||||||
|
PCMPEQD(XMM0, MatR(TEMPREG)); // Integer comparison
|
||||||
// It's inversed below for NS.
|
// It's inversed below for NS.
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inverse) {
|
if (inverse) {
|
||||||
XORPS(XMM0, M(&solidOnes));
|
// The canonical way to generate a bunch of ones, see https://stackoverflow.com/questions/35085059/what-are-the-best-instruction-sequences-to-generate-vector-constants-on-the-fly
|
||||||
|
PCMPEQW(XMM1, R(XMM1));
|
||||||
|
XORPS(XMM0, R(XMM1));
|
||||||
}
|
}
|
||||||
ANDPS(XMM0, M(vcmpMask[n - 1]));
|
ANDPS(XMM0, M(vcmpMask[n - 1]));
|
||||||
MOVAPS(M(vcmpResult), XMM0);
|
MOVAPS(M(vcmpResult), XMM0);
|
||||||
|
@ -1451,8 +1468,9 @@ void Jit::Comp_Vcmp(MIPSOpcode op) {
|
||||||
} else {
|
} else {
|
||||||
// Finalize the comparison for ES/NS.
|
// Finalize the comparison for ES/NS.
|
||||||
if (cond == VC_ES || cond == VC_NS) {
|
if (cond == VC_ES || cond == VC_NS) {
|
||||||
ANDPS(XMM0, M(&fourinfnan));
|
MOV(PTRBITS, R(TEMPREG), ImmPtr(&fourinfnan));
|
||||||
PCMPEQD(XMM0, M(&fourinfnan)); // Integer comparison
|
ANDPS(XMM0, MatR(TEMPREG));
|
||||||
|
PCMPEQD(XMM0, MatR(TEMPREG)); // Integer comparison
|
||||||
// It's inversed below for NS.
|
// It's inversed below for NS.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1983,26 +2001,22 @@ void Jit::Comp_Vocp(MIPSOpcode op) {
|
||||||
fpr.SimpleRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);
|
fpr.SimpleRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);
|
||||||
|
|
||||||
X64Reg tempxregs[4];
|
X64Reg tempxregs[4];
|
||||||
for (int i = 0; i < n; ++i)
|
for (int i = 0; i < n; ++i) {
|
||||||
{
|
if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs)) {
|
||||||
if (!IsOverlapSafeAllowS(dregs[i], i, n, sregs))
|
|
||||||
{
|
|
||||||
int reg = fpr.GetTempV();
|
int reg = fpr.GetTempV();
|
||||||
fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY);
|
fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY);
|
||||||
fpr.SpillLockV(reg);
|
fpr.SpillLockV(reg);
|
||||||
tempxregs[i] = fpr.VX(reg);
|
tempxregs[i] = fpr.VX(reg);
|
||||||
}
|
} else {
|
||||||
else
|
|
||||||
{
|
|
||||||
fpr.MapRegV(dregs[i], dregs[i] == sregs[i] ? MAP_DIRTY : MAP_NOINIT);
|
fpr.MapRegV(dregs[i], dregs[i] == sregs[i] ? MAP_DIRTY : MAP_NOINIT);
|
||||||
fpr.SpillLockV(dregs[i]);
|
fpr.SpillLockV(dregs[i]);
|
||||||
tempxregs[i] = fpr.VX(dregs[i]);
|
tempxregs[i] = fpr.VX(dregs[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MOVSS(XMM1, M(&one));
|
MOV(PTRBITS, R(TEMPREG), ImmPtr(&one));
|
||||||
for (int i = 0; i < n; ++i)
|
MOVSS(XMM1, MatR(TEMPREG));
|
||||||
{
|
for (int i = 0; i < n; ++i) {
|
||||||
MOVSS(XMM0, R(XMM1));
|
MOVSS(XMM0, R(XMM1));
|
||||||
SUBSS(XMM0, fpr.V(sregs[i]));
|
SUBSS(XMM0, fpr.V(sregs[i]));
|
||||||
MOVSS(tempxregs[i], R(XMM0));
|
MOVSS(tempxregs[i], R(XMM0));
|
||||||
|
@ -2274,13 +2288,23 @@ void Jit::Comp_VV2Op(MIPSOpcode op) {
|
||||||
MINSS(tempxregs[i], R(XMM0));
|
MINSS(tempxregs[i], R(XMM0));
|
||||||
break;
|
break;
|
||||||
case 16: // d[i] = 1.0f / s[i]; break; //vrcp
|
case 16: // d[i] = 1.0f / s[i]; break; //vrcp
|
||||||
MOVSS(XMM0, M(&one));
|
if (RipAccessible(&one)) {
|
||||||
|
MOVSS(XMM0, M(&one));
|
||||||
|
} else {
|
||||||
|
MOV(PTRBITS, R(TEMPREG), ImmPtr(&one));
|
||||||
|
MOVSS(XMM0, MatR(TEMPREG));
|
||||||
|
}
|
||||||
DIVSS(XMM0, fpr.V(sregs[i]));
|
DIVSS(XMM0, fpr.V(sregs[i]));
|
||||||
MOVSS(tempxregs[i], R(XMM0));
|
MOVSS(tempxregs[i], R(XMM0));
|
||||||
break;
|
break;
|
||||||
case 17: // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq
|
case 17: // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq
|
||||||
SQRTSS(XMM0, fpr.V(sregs[i]));
|
SQRTSS(XMM0, fpr.V(sregs[i]));
|
||||||
MOVSS(tempxregs[i], M(&one));
|
if (RipAccessible(&one)) {
|
||||||
|
MOVSS(tempxregs[i], M(&one));
|
||||||
|
} else {
|
||||||
|
MOV(PTRBITS, R(TEMPREG), ImmPtr(&one));
|
||||||
|
MOVSS(tempxregs[i], MatR(TEMPREG));
|
||||||
|
}
|
||||||
DIVSS(tempxregs[i], R(XMM0));
|
DIVSS(tempxregs[i], R(XMM0));
|
||||||
break;
|
break;
|
||||||
case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin
|
case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin
|
||||||
|
@ -2306,7 +2330,9 @@ void Jit::Comp_VV2Op(MIPSOpcode op) {
|
||||||
MOVSS(tempxregs[i], M(&sincostemp[0]));
|
MOVSS(tempxregs[i], M(&sincostemp[0]));
|
||||||
break;
|
break;
|
||||||
case 24: // d[i] = -1.0f / s[i]; break; // vnrcp
|
case 24: // d[i] = -1.0f / s[i]; break; // vnrcp
|
||||||
MOVSS(XMM0, M(&minus_one));
|
// Rare so let's not bother checking for RipAccessible.
|
||||||
|
MOV(PTRBITS, R(TEMPREG), ImmPtr(&minus_one));
|
||||||
|
MOVSS(XMM0, MatR(TEMPREG));
|
||||||
DIVSS(XMM0, fpr.V(sregs[i]));
|
DIVSS(XMM0, fpr.V(sregs[i]));
|
||||||
MOVSS(tempxregs[i], R(XMM0));
|
MOVSS(tempxregs[i], R(XMM0));
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -374,7 +374,12 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b) {
|
||||||
// If we're rewinding, CORE_NEXTFRAME should not cause a rewind.
|
// If we're rewinding, CORE_NEXTFRAME should not cause a rewind.
|
||||||
// It doesn't really matter either way if we're not rewinding.
|
// It doesn't really matter either way if we're not rewinding.
|
||||||
// CORE_RUNNING is <= CORE_NEXTFRAME.
|
// CORE_RUNNING is <= CORE_NEXTFRAME.
|
||||||
CMP(32, M(&coreState), Imm32(CORE_NEXTFRAME));
|
if (RipAccessible((const void *)coreState)) {
|
||||||
|
CMP(32, M(&coreState), Imm32(CORE_NEXTFRAME));
|
||||||
|
} else {
|
||||||
|
MOV(PTRBITS, R(RAX), ImmPtr((const void *)&coreState));
|
||||||
|
CMP(32, MatR(RAX), Imm32(CORE_NEXTFRAME));
|
||||||
|
}
|
||||||
FixupBranch skipCheck = J_CC(CC_LE);
|
FixupBranch skipCheck = J_CC(CC_LE);
|
||||||
if (js.afterOp & JitState::AFTER_REWIND_PC_BAD_STATE)
|
if (js.afterOp & JitState::AFTER_REWIND_PC_BAD_STATE)
|
||||||
MOV(32, MIPSSTATE_VAR(pc), Imm32(GetCompilerPC()));
|
MOV(32, MIPSSTATE_VAR(pc), Imm32(GetCompilerPC()));
|
||||||
|
|
Loading…
Add table
Reference in a new issue