From 281ab5f9cb9553375d5a020b13d48e3dcf38aa32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Thu, 9 Oct 2014 20:01:47 +0200 Subject: [PATCH 1/3] Sync x64 emitter to Dolphin's. --- Common/x64Emitter.cpp | 296 +++++++++++++++++++++++++----------------- Common/x64Emitter.h | 237 +++++++++++++++++---------------- 2 files changed, 301 insertions(+), 232 deletions(-) diff --git a/Common/x64Emitter.cpp b/Common/x64Emitter.cpp index 4151ef1518..f454296470 100644 --- a/Common/x64Emitter.cpp +++ b/Common/x64Emitter.cpp @@ -23,6 +23,11 @@ #include "MemoryUtil.h" #include "MsgHandler.h" +#define PRIx64 "llx" + +// Minimize the diff against Dolphin +#define DYNA_REC JIT + namespace Gen { @@ -32,7 +37,7 @@ struct NormalOpDef u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, ext; }; -static const NormalOpDef nops[11] = +static const NormalOpDef nops[11] = { {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0}, //ADD {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 2}, //ADC @@ -54,30 +59,30 @@ static const NormalOpDef nops[11] = enum NormalSSEOps { - sseCMP = 0xC2, - sseADD = 0x58, //ADD - sseSUB = 0x5C, //SUB - sseAND = 0x54, //AND - sseANDN = 0x55, //ANDN - sseOR = 0x56, - sseXOR = 0x57, - sseMUL = 0x59, //MUL, - sseDIV = 0x5E, //DIV - sseMIN = 0x5D, //MIN - sseMAX = 0x5F, //MAX - sseCOMIS = 0x2F, //COMIS - sseUCOMIS = 0x2E, //UCOMIS - sseSQRT = 0x51, //SQRT - sseRSQRT = 0x52, //RSQRT (NO DOUBLE PRECISION!!!) + sseCMP = 0xC2, + sseADD = 0x58, //ADD + sseSUB = 0x5C, //SUB + sseAND = 0x54, //AND + sseANDN = 0x55, //ANDN + sseOR = 0x56, + sseXOR = 0x57, + sseMUL = 0x59, //MUL + sseDIV = 0x5E, //DIV + sseMIN = 0x5D, //MIN + sseMAX = 0x5F, //MAX + sseCOMIS = 0x2F, //COMIS + sseUCOMIS = 0x2E, //UCOMIS + sseSQRT = 0x51, //SQRT + sseRSQRT = 0x52, //RSQRT (NO DOUBLE PRECISION!!!) sseMOVAPfromRM = 0x28, //MOVAP from RM - sseMOVAPtoRM = 0x29, //MOVAP to RM - sseMOVUPfromRM = 0x10, //MOVUP from RM - sseMOVUPtoRM = 0x11, //MOVUP to RM + sseMOVAPtoRM = 0x29, //MOVAP to RM + sseMOVUPfromRM = 0x10, //MOVUP from RM sseMOVDQfromRM = 0x6F, sseMOVDQtoRM = 0x7F, - sseMASKMOVDQU = 0xF7, - sseLDDQU = 0xF0, - sseSHUF = 0xC6, + sseMOVUPtoRM = 0x11, //MOVUP to RM + sseMASKMOVDQU = 0xF7, + sseLDDQU = 0xF0, + sseSHUF = 0xC6, sseMOVNTDQ = 0xE7, sseMOVNTP = 0x2B, }; @@ -128,9 +133,9 @@ const u8 *XEmitter::AlignCodePage() return code; } -void XEmitter::WriteModRM(int mod, int rm, int reg) +void XEmitter::WriteModRM(int mod, int reg, int rm) { - Write8((u8)((mod << 6) | ((rm & 7) << 3) | (reg & 7))); + Write8((u8)((mod << 6) | ((reg & 7) << 3) | (rm & 7))); } void XEmitter::WriteSIB(int scale, int index, int base) @@ -148,32 +153,66 @@ void OpArg::WriteRex(XEmitter *emit, int opBits, int bits, int customOp) const if (indexReg & 8) op |= 2; if (offsetOrBaseReg & 8) op |= 1; //TODO investigate if this is dangerous if (op != 0x40 || - (scale == SCALE_NONE && bits == 8 && (offsetOrBaseReg & 0x10c) == 4) || - (opBits == 8 && (customOp & 0x10c) == 4)) { + (bits == 8 && (offsetOrBaseReg & 0x10c) == 4) || + (opBits == 8 && (customOp & 0x10c) == 4)) { emit->Write8(op); - _dbg_assert_(JIT, (offsetOrBaseReg & 0x100) == 0 || bits != 8); - _dbg_assert_(JIT, (customOp & 0x100) == 0 || opBits != 8); + _dbg_assert_(DYNA_REC, (offsetOrBaseReg & 0x100) == 0 || bits != 8); + _dbg_assert_(DYNA_REC, (customOp & 0x100) == 0 || opBits != 8); } else { - _dbg_assert_(JIT, (offsetOrBaseReg & 0x10c) == 0 || - (offsetOrBaseReg & 0x10c) == 0x104 || - bits != 8); - _dbg_assert_(JIT, (customOp & 0x10c) == 0 || - (customOp & 0x10c) == 0x104 || - opBits != 8); + _dbg_assert_(DYNA_REC, (offsetOrBaseReg & 0x10c) == 0 || + (offsetOrBaseReg & 0x10c) == 0x104 || + bits != 8); + _dbg_assert_(DYNA_REC, (customOp & 0x10c) == 0 || + (customOp & 0x10c) == 0x104 || + opBits != 8); } #else - _dbg_assert_(JIT, opBits != 64); - _dbg_assert_(JIT, (customOp & 8) == 0 || customOp == -1); - _dbg_assert_(JIT, (indexReg & 8) == 0); - _dbg_assert_(JIT, (offsetOrBaseReg & 8) == 0); - _dbg_assert_(JIT, opBits != 8 || (customOp & 0x10c) != 4 || customOp == -1); - _dbg_assert_(JIT, scale == SCALE_ATREG || bits != 8 || (offsetOrBaseReg & 0x10c) != 4); + _dbg_assert_(DYNA_REC, opBits != 64); + _dbg_assert_(DYNA_REC, (customOp & 8) == 0 || customOp == -1); + _dbg_assert_(DYNA_REC, (indexReg & 8) == 0); + _dbg_assert_(DYNA_REC, (offsetOrBaseReg & 8) == 0); + _dbg_assert_(DYNA_REC, opBits != 8 || (customOp & 0x10c) != 4 || customOp == -1); + _dbg_assert_(DYNA_REC, bits != 8 || (offsetOrBaseReg & 0x10c) != 4); #endif } +void OpArg::WriteVex(XEmitter* emit, int size, int packed, Gen::X64Reg regOp1, Gen::X64Reg regOp2) const +{ + int R = !(regOp1 & 8); + int X = !(indexReg & 8); + int B = !(offsetOrBaseReg & 8); + + // not so sure about this one... + int W = 0; + + // aka map_select in AMD manuals + // only support VEX opcode map 1 for now (analog to secondary opcode map) + int mmmmm = 1; + + int vvvv = (regOp2 == X64Reg::INVALID_REG) ? 0xf : (regOp2 ^ 0xf); + int L = size == 256; + int pp = (packed << 1) | (size == 64); + + // do we need any VEX fields that only appear in the three-byte form? + if (X == 1 && B == 1 && W == 0 && mmmmm == 1) + { + u8 RvvvvLpp = (R << 7) | (vvvv << 3) | (L << 1) | pp; + emit->Write8(0xC5); + emit->Write8(RvvvvLpp); + } + else + { + u8 RXBmmmmm = (R << 7) | (X << 6) | (B << 5) | mmmmm; + u8 WvvvvLpp = (W << 7) | (vvvv << 3) | (L << 1) | pp; + emit->Write8(0xC4); + emit->Write8(RXBmmmmm); + emit->Write8(WvvvvLpp); + } +} + void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, - bool warn_64bit_offset) const + bool warn_64bit_offset) const { if (_operandReg == 0xff) _operandReg = (X64Reg)this->operandReg; @@ -191,10 +230,10 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, #ifdef _M_X64 u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes; s64 distance = (s64)offset - (s64)ripAddr; - _assert_msg_(JIT, (distance < 0x80000000LL + _assert_msg_(DYNA_REC, (distance < 0x80000000LL && distance >= -0x80000000LL) || !warn_64bit_offset, - "WriteRest: op out of range (0x%llx uses 0x%llx)", + "WriteRest: op out of range (0x%" PRIx64 " uses 0x%" PRIx64 ")", ripAddr, offset); s32 offs = (s32)distance; emit->Write32((u32)offs); @@ -248,7 +287,7 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, SIB = true; } - if (scale == SCALE_ATREG && ((_offsetOrBaseReg & 7) == 4)) + if (scale == SCALE_ATREG && ((_offsetOrBaseReg & 7) == 4)) { SIB = true; ireg = _offsetOrBaseReg; @@ -273,7 +312,7 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, int oreg = _offsetOrBaseReg; if (SIB) oreg = 4; - + // TODO(ector): WTF is this if about? I don't remember writing it :-) //if (RIP) // oreg = 5; @@ -286,7 +325,7 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, int ss; switch (scale) { - case SCALE_NONE: _offsetOrBaseReg = 4; ss = 0; break; //RSP + case SCALE_NONE: _offsetOrBaseReg = 4; ss = 0; break; //RSP case SCALE_1: ss = 0; break; case SCALE_2: ss = 1; break; case SCALE_4: ss = 2; break; @@ -295,7 +334,7 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, case SCALE_NOBASE_4: ss = 2; break; case SCALE_NOBASE_8: ss = 3; break; case SCALE_ATREG: ss = 0; break; - default: _assert_msg_(JIT, 0, "Invalid scale for SIB byte"); ss = 0; break; + default: _assert_msg_(DYNA_REC, 0, "Invalid scale for SIB byte"); ss = 0; break; } emit->Write8((u8)((ss << 6) | ((ireg&7)<<3) | (_offsetOrBaseReg&7))); } @@ -317,7 +356,7 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, // B = base register# upper bit void XEmitter::Rex(int w, int r, int x, int b) { - w = w ? 1 : 0; + w = w ? 1 : 0; r = r ? 1 : 0; x = x ? 1 : 0; b = b ? 1 : 0; @@ -332,7 +371,7 @@ void XEmitter::JMP(const u8 *addr, bool force5Bytes) if (!force5Bytes) { s64 distance = (s64)(fn - ((u64)code + 2)); - _assert_msg_(JIT, distance >= -0x80 && distance < 0x80, + _assert_msg_(DYNA_REC, distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true"); //8 bits will do Write8(0xEB); @@ -342,7 +381,7 @@ void XEmitter::JMP(const u8 *addr, bool force5Bytes) { s64 distance = (s64)(fn - ((u64)code + 5)); - _assert_msg_(JIT, distance >= -0x80000000LL + _assert_msg_(DYNA_REC, distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register"); Write8(0xE9); @@ -353,7 +392,7 @@ void XEmitter::JMP(const u8 *addr, bool force5Bytes) void XEmitter::JMPptr(const OpArg &arg2) { OpArg arg = arg2; - if (arg.IsImm()) _assert_msg_(JIT, 0, "JMPptr - Imm argument"); + if (arg.IsImm()) _assert_msg_(DYNA_REC, 0, "JMPptr - Imm argument"); arg.operandReg = 4; arg.WriteRex(this, 0, 0); Write8(0xFF); @@ -370,7 +409,7 @@ void XEmitter::JMPself() void XEmitter::CALLptr(OpArg arg) { - if (arg.IsImm()) _assert_msg_(JIT, 0, "CALLptr - Imm argument"); + if (arg.IsImm()) _assert_msg_(DYNA_REC, 0, "CALLptr - Imm argument"); arg.operandReg = 2; arg.WriteRex(this, 0, 0); Write8(0xFF); @@ -380,7 +419,7 @@ void XEmitter::CALLptr(OpArg arg) void XEmitter::CALL(const void *fnptr) { u64 distance = u64(fnptr) - (u64(code) + 5); - _assert_msg_(JIT, distance < 0x0000000080000000ULL + _assert_msg_(DYNA_REC, distance < 0x0000000080000000ULL || distance >= 0xFFFFFFFF80000000ULL, "CALL out of range (%p calls %p)", code, fnptr); Write8(0xE8); @@ -432,7 +471,7 @@ void XEmitter::J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes) if (!force5Bytes) { s64 distance = (s64)(fn - ((u64)code + 2)); - _assert_msg_(JIT, distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true"); + _assert_msg_(DYNA_REC, distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true"); //8 bits will do Write8(0x70 + conditionCode); Write8((u8)(s8)distance); @@ -440,7 +479,7 @@ void XEmitter::J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes) else { s64 distance = (s64)(fn - ((u64)code + 6)); - _assert_msg_(JIT, distance >= -0x80000000LL + _assert_msg_(DYNA_REC, distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register"); Write8(0x0F); @@ -454,13 +493,13 @@ void XEmitter::SetJumpTarget(const FixupBranch &branch) if (branch.type == 0) { s64 distance = (s64)(code - branch.ptr); - _assert_msg_(JIT, distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true"); + _assert_msg_(DYNA_REC, distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true"); branch.ptr[-1] = (u8)(s8)distance; } else if (branch.type == 1) { s64 distance = (s64)(code - branch.ptr); - _assert_msg_(JIT, distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register"); + _assert_msg_(DYNA_REC, distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register"); ((s32*)branch.ptr)[-1] = (s32)distance; } } @@ -491,9 +530,7 @@ void XEmitter::DEC(int bits, OpArg arg) //Single byte opcodes //There is no PUSHAD/POPAD in 64-bit mode. -void XEmitter::INT3() { - Write8(0xCC); -} +void XEmitter::INT3() {Write8(0xCC);} void XEmitter::RET() {Write8(0xC3);} void XEmitter::RET_FAST() {Write8(0xF3); Write8(0xC3);} //two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a ret @@ -515,7 +552,7 @@ void XEmitter::NOP(int count) } break; } -} +} void XEmitter::PAUSE() {Write8(0xF3); NOP();} //use in tight spinloops for energy saving on some cpu void XEmitter::CLC() {Write8(0xF8);} //clear carry @@ -577,8 +614,8 @@ void XEmitter::CBW(int bits) void XEmitter::PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);} void XEmitter::POP(X64Reg reg) {WriteSimple1Byte(32, 0x58, reg);} -void XEmitter::PUSH(int bits, const OpArg ®) -{ +void XEmitter::PUSH(int bits, const OpArg ®) +{ if (reg.IsSimpleReg()) PUSH(reg.GetSimpleReg()); else if (reg.IsImm()) @@ -599,7 +636,7 @@ void XEmitter::PUSH(int bits, const OpArg ®) Write32((u32)reg.offset); break; default: - _assert_msg_(JIT, 0, "PUSH - Bad imm bits"); + _assert_msg_(DYNA_REC, 0, "PUSH - Bad imm bits"); break; } } @@ -614,7 +651,7 @@ void XEmitter::PUSH(int bits, const OpArg ®) } void XEmitter::POP(int /*bits*/, const OpArg ®) -{ +{ if (reg.IsSimpleReg()) POP(reg.GetSimpleReg()); else @@ -637,7 +674,7 @@ void XEmitter::BSWAP(int bits, X64Reg reg) } else { - _assert_msg_(JIT, 0, "BSWAP - Wrong number of bits"); + _assert_msg_(DYNA_REC, 0, "BSWAP - Wrong number of bits"); } } @@ -651,7 +688,7 @@ void XEmitter::UD2() void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg) { - if (arg.IsImm()) _assert_msg_(JIT, 0, "PREFETCH - Imm argument"); + if (arg.IsImm()) _assert_msg_(DYNA_REC, 0, "PREFETCH - Imm argument");; arg.operandReg = (u8)level; arg.WriteRex(this, 0, 0); Write8(0x0F); @@ -661,7 +698,7 @@ void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg) void XEmitter::SETcc(CCFlags flag, OpArg dest) { - if (dest.IsImm()) _assert_msg_(JIT, 0, "SETcc - Imm argument"); + if (dest.IsImm()) _assert_msg_(DYNA_REC, 0, "SETcc - Imm argument"); dest.operandReg = 0; dest.WriteRex(this, 0, 0); Write8(0x0F); @@ -671,7 +708,7 @@ void XEmitter::SETcc(CCFlags flag, OpArg dest) void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) { - if (src.IsImm()) _assert_msg_(JIT, 0, "CMOVcc - Imm argument"); + if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "CMOVcc - Imm argument"); src.operandReg = dest; src.WriteRex(this, bits, bits); Write8(0x0F); @@ -681,7 +718,7 @@ void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) void XEmitter::WriteMulDivType(int bits, OpArg src, int ext) { - if (src.IsImm()) _assert_msg_(JIT, 0, "WriteMulDivType - Imm argument"); + if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "WriteMulDivType - Imm argument"); src.operandReg = ext; if (bits == 16) Write8(0x66); src.WriteRex(this, bits, bits); @@ -705,7 +742,7 @@ void XEmitter::NOT(int bits, OpArg src) {WriteMulDivType(bits, src, 2);} void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2) { - if (src.IsImm()) _assert_msg_(JIT, 0, "WriteBitSearchType - Imm argument"); + if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "WriteBitSearchType - Imm argument"); src.operandReg = (u8)dest; if (bits == 16) Write8(0x66); src.WriteRex(this, bits, bits); @@ -716,7 +753,7 @@ void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2) void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src) { - if (bits <= 16) _assert_msg_(JIT, 0, "MOVNTI - bits<=16"); + if (bits <= 16) _assert_msg_(DYNA_REC, 0, "MOVNTI - bits<=16"); WriteBitSearchType(bits, src, dest, 0xC3); } @@ -725,7 +762,7 @@ void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,de void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) { - if (src.IsImm()) _assert_msg_(JIT, 0, "MOVSX - Imm argument"); + if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "MOVSX - Imm argument"); if (dbits == sbits) { MOV(dbits, R(dest), src); return; @@ -756,7 +793,7 @@ void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) { - if (src.IsImm()) _assert_msg_(JIT, 0, "MOVZX - Imm argument"); + if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "MOVZX - Imm argument"); if (dbits == sbits) { MOV(dbits, R(dest), src); return; @@ -775,6 +812,10 @@ void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) Write8(0x0F); Write8(0xB7); } + else if (sbits == 32 && dbits == 64) + { + Write8(0x8B); + } else { Crash(); @@ -785,7 +826,7 @@ void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) void XEmitter::LEA(int bits, X64Reg dest, OpArg src) { - if (src.IsImm()) _assert_msg_(JIT, 0, "LEA - Imm argument"); + if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "LEA - Imm argument"); src.operandReg = (u8)dest; if (bits == 16) Write8(0x66); //TODO: performance warning src.WriteRex(this, bits, bits); @@ -799,11 +840,11 @@ void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext) bool writeImm = false; if (dest.IsImm()) { - _assert_msg_(JIT, 0, "WriteShift - can't shift imms"); + _assert_msg_(DYNA_REC, 0, "WriteShift - can't shift imms"); } if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8)) { - _assert_msg_(JIT, 0, "WriteShift - illegal argument"); + _assert_msg_(DYNA_REC, 0, "WriteShift - illegal argument"); } dest.operandReg = ext; if (bits == 16) Write8(0x66); @@ -846,11 +887,11 @@ void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext) { if (dest.IsImm()) { - _assert_msg_(JIT, 0, "WriteBitTest - can't test imms"); + _assert_msg_(DYNA_REC, 0, "WriteBitTest - can't test imms"); } if ((index.IsImm() && index.GetImmBits() != 8)) { - _assert_msg_(JIT, 0, "WriteBitTest - illegal argument"); + _assert_msg_(DYNA_REC, 0, "WriteBitTest - illegal argument"); } if (bits == 16) Write8(0x66); if (index.IsImm()) @@ -879,15 +920,15 @@ void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift) { if (dest.IsImm()) { - _assert_msg_(JIT, 0, "SHRD - can't use imms as destination"); + _assert_msg_(DYNA_REC, 0, "SHRD - can't use imms as destination"); } if (!src.IsSimpleReg()) { - _assert_msg_(JIT, 0, "SHRD - must use simple register as source"); + _assert_msg_(DYNA_REC, 0, "SHRD - must use simple register as source"); } if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8)) { - _assert_msg_(JIT, 0, "SHRD - illegal shift"); + _assert_msg_(DYNA_REC, 0, "SHRD - illegal shift"); } if (bits == 16) Write8(0x66); X64Reg operand = src.GetSimpleReg(); @@ -909,15 +950,15 @@ void XEmitter::SHLD(int bits, OpArg dest, OpArg src, OpArg shift) { if (dest.IsImm()) { - _assert_msg_(JIT, 0, "SHLD - can't use imms as destination"); + _assert_msg_(DYNA_REC, 0, "SHLD - can't use imms as destination"); } if (!src.IsSimpleReg()) { - _assert_msg_(JIT, 0, "SHLD - must use simple register as source"); + _assert_msg_(DYNA_REC, 0, "SHLD - must use simple register as source"); } if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8)) { - _assert_msg_(JIT, 0, "SHLD - illegal shift"); + _assert_msg_(DYNA_REC, 0, "SHLD - illegal shift"); } if (bits == 16) Write8(0x66); X64Reg operand = src.GetSimpleReg(); @@ -952,7 +993,7 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &o X64Reg _operandReg = (X64Reg)this->operandReg; if (IsImm()) { - _assert_msg_(JIT, 0, "WriteNormalOp - Imm argument, wrong order"); + _assert_msg_(DYNA_REC, 0, "WriteNormalOp - Imm argument, wrong order"); } if (bits == 16) @@ -967,24 +1008,24 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &o if (!toRM) { - _assert_msg_(JIT, 0, "WriteNormalOp - Writing to Imm (!toRM)"); + _assert_msg_(DYNA_REC, 0, "WriteNormalOp - Writing to Imm (!toRM)"); } - if (operand.scale == SCALE_IMM8 && bits == 8) + if (operand.scale == SCALE_IMM8 && bits == 8) { emit->Write8(nops[op].imm8); immToWrite = 8; } else if ((operand.scale == SCALE_IMM16 && bits == 16) || - (operand.scale == SCALE_IMM32 && bits == 32) || - (operand.scale == SCALE_IMM32 && bits == 64)) + (operand.scale == SCALE_IMM32 && bits == 32) || + (operand.scale == SCALE_IMM32 && bits == 64)) { emit->Write8(nops[op].imm32); immToWrite = bits == 16 ? 16 : 32; } else if ((operand.scale == SCALE_IMM8 && bits == 16) || - (operand.scale == SCALE_IMM8 && bits == 32) || - (operand.scale == SCALE_IMM8 && bits == 64)) + (operand.scale == SCALE_IMM8 && bits == 32) || + (operand.scale == SCALE_IMM8 && bits == 64)) { emit->Write8(nops[op].simm8); immToWrite = 8; @@ -997,11 +1038,11 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &o emit->Write64((u64)operand.offset); return; } - _assert_msg_(JIT, 0, "WriteNormalOp - Only MOV can take 64-bit imm"); + _assert_msg_(DYNA_REC, 0, "WriteNormalOp - Only MOV can take 64-bit imm"); } else { - _assert_msg_(JIT, 0, "WriteNormalOp - Unhandled case"); + _assert_msg_(DYNA_REC, 0, "WriteNormalOp - Unhandled case"); } _operandReg = (X64Reg)nops[op].ext; //pass extension in REG of ModRM } @@ -1036,7 +1077,7 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &o emit->Write32((u32)operand.offset); break; default: - _assert_msg_(JIT, 0, "WriteNormalOp - Unhandled case"); + _assert_msg_(DYNA_REC, 0, "WriteNormalOp - Unhandled case"); } } @@ -1045,7 +1086,7 @@ void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg if (a1.IsImm()) { //Booh! Can't write to an imm - _assert_msg_(JIT, 0, "WriteNormalOp - a1 cannot be imm"); + _assert_msg_(DYNA_REC, 0, "WriteNormalOp - a1 cannot be imm"); return; } if (a2.IsImm()) @@ -1072,11 +1113,11 @@ void XEmitter::SBB (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(t void XEmitter::AND (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmAND, a1, a2);} void XEmitter::OR (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmOR , a1, a2);} void XEmitter::XOR (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmXOR, a1, a2);} -void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2) +void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2) { #ifdef _DEBUG - _assert_msg_(JIT, !a1.IsSimpleReg() || !a2.IsSimpleReg() || a1.GetSimpleReg() != a2.GetSimpleReg(), "Redundant MOV @ %p - bug in JIT?", - code); + _assert_msg_(DYNA_REC, !a1.IsSimpleReg() || !a2.IsSimpleReg() || a1.GetSimpleReg() != a2.GetSimpleReg(), "Redundant MOV @ %p - bug in DYNA_REC?", + code); #endif WriteNormalOp(this, bits, nrmMOV, a1, a2); } @@ -1087,16 +1128,16 @@ void XEmitter::XCHG(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(t void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2) { if (bits == 8) { - _assert_msg_(JIT, 0, "IMUL - illegal bit size!"); + _assert_msg_(DYNA_REC, 0, "IMUL - illegal bit size!"); return; } if (a1.IsImm()) { - _assert_msg_(JIT, 0, "IMUL - second arg cannot be imm!"); + _assert_msg_(DYNA_REC, 0, "IMUL - second arg cannot be imm!"); return; } if (!a2.IsImm()) { - _assert_msg_(JIT, 0, "IMUL - third arg must be imm!"); + _assert_msg_(DYNA_REC, 0, "IMUL - third arg must be imm!"); return; } @@ -1118,7 +1159,7 @@ void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2) a1.WriteRest(this, 4, regOp); Write32((u32)a2.offset); } else { - _assert_msg_(JIT, 0, "IMUL - unhandled case!"); + _assert_msg_(DYNA_REC, 0, "IMUL - unhandled case!"); } } } @@ -1126,7 +1167,7 @@ void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2) void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a) { if (bits == 8) { - _assert_msg_(JIT, 0, "IMUL - illegal bit size!"); + _assert_msg_(DYNA_REC, 0, "IMUL - illegal bit size!"); return; } if (a.IsImm()) @@ -1160,7 +1201,7 @@ void XEmitter::WriteSSEOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg a void XEmitter::WriteSSEOp2(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes) { if (size == 64 && packed) - Write8(0x66); //this time, override goes upwards + Write8(0x66); //this time, override goes upwards if (!packed) Write8(size == 64 ? 0xF2 : 0xF3); arg.operandReg = regOp; @@ -1171,6 +1212,18 @@ void XEmitter::WriteSSEOp2(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg.WriteRest(this, extrabytes); } +void XEmitter::WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes) +{ + WriteAVXOp(size, sseOp, packed, regOp, X64Reg::INVALID_REG, arg, extrabytes); +} + +void XEmitter::WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +{ + arg.WriteVex(this, size, packed, regOp1, regOp2); + Write8(sseOp); + arg.WriteRest(this, extrabytes, regOp1); +} + void XEmitter::MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x6E, true, dest, arg, 0);} void XEmitter::MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(64, 0x7E, true, src, arg, 0);} @@ -1218,8 +1271,8 @@ void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) { void XEmitter::WriteMXCSR(OpArg arg, int ext) { - if (arg.IsImm() || arg.IsSimpleReg()) - _assert_msg_(JIT, 0, "MXCSR - invalid operand"); + if (arg.IsImm() || arg.IsSimpleReg()) + _assert_msg_(DYNA_REC, 0, "MXCSR - invalid operand"); arg.operandReg = ext; arg.WriteRex(this, 0, 0); @@ -1278,8 +1331,8 @@ void XEmitter::MAXPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMAX, true, re void XEmitter::SQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseSQRT, true, regOp, arg);} void XEmitter::SQRTPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseSQRT, true, regOp, arg);} void XEmitter::RSQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseRSQRT, true, regOp, arg);} -void XEmitter::SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(32, sseSHUF, true, regOp, arg,1); Write8(shuffle);} -void XEmitter::SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(64, sseSHUF, true, regOp, arg,1); Write8(shuffle);} +void XEmitter::SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(32, sseSHUF, true, regOp, arg,1); Write8(shuffle);} +void XEmitter::SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(64, sseSHUF, true, regOp, arg,1); Write8(shuffle);} void XEmitter::COMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseCOMIS, true, regOp, arg);} //weird that these should be packed void XEmitter::COMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseCOMIS, true, regOp, arg);} //ordered @@ -1287,13 +1340,13 @@ void XEmitter::UCOMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseUCOMIS, true, void XEmitter::UCOMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseUCOMIS, true, regOp, arg);} void XEmitter::MOVAPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMOVAPfromRM, true, regOp, arg);} -void XEmitter::MOVAPS(OpArg arg, X64Reg regOp) {WriteSSEOp(32, sseMOVAPtoRM, true, regOp, arg);} -void XEmitter::MOVUPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMOVUPfromRM, true, regOp, arg);} -void XEmitter::MOVUPS(OpArg arg, X64Reg regOp) {WriteSSEOp(32, sseMOVUPtoRM, true, regOp, arg);} - void XEmitter::MOVAPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMOVAPfromRM, true, regOp, arg);} +void XEmitter::MOVAPS(OpArg arg, X64Reg regOp) {WriteSSEOp(32, sseMOVAPtoRM, true, regOp, arg);} void XEmitter::MOVAPD(OpArg arg, X64Reg regOp) {WriteSSEOp(64, sseMOVAPtoRM, true, regOp, arg);} + +void XEmitter::MOVUPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMOVUPfromRM, true, regOp, arg);} void XEmitter::MOVUPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMOVUPfromRM, true, regOp, arg);} +void XEmitter::MOVUPS(OpArg arg, X64Reg regOp) {WriteSSEOp(32, sseMOVUPtoRM, true, regOp, arg);} void XEmitter::MOVUPD(OpArg arg, X64Reg regOp) {WriteSSEOp(64, sseMOVUPtoRM, true, regOp, arg);} void XEmitter::MOVDQA(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMOVDQfromRM, true, regOp, arg);} @@ -1311,7 +1364,7 @@ void XEmitter::CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x5A, true, reg void XEmitter::CVTSD2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x5A, false, regOp, arg);} void XEmitter::CVTSS2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0x5A, false, regOp, arg);} -void XEmitter::CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0xF2, false, regOp, arg);} +void XEmitter::CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x2D, false, regOp, arg);} void XEmitter::CVTDQ2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0xE6, false, regOp, arg);} void XEmitter::CVTDQ2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0x5B, true, regOp, arg);} @@ -1339,7 +1392,7 @@ void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(32, 0x15, true, dest void XEmitter::UNPCKLPD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x14, true, dest, arg);} void XEmitter::UNPCKHPD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x15, true, dest, arg);} -void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg) +void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg) { if (cpu_info.bSSE3) { @@ -1356,7 +1409,7 @@ void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg) //There are a few more left -// Also some integer instrucitons are missing +// Also some integer instructions are missing void XEmitter::PACKSSDW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x6B, true, dest, arg);} void XEmitter::PACKSSWB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x63, true, dest, arg);} //void PACKUSDW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x66, true, dest, arg);} // WRONG @@ -1515,8 +1568,8 @@ void XEmitter::PCMPGTB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x64, true, dest void XEmitter::PCMPGTW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x65, true, dest, arg);} void XEmitter::PCMPGTD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x66, true, dest, arg);} -void XEmitter::PEXTRW(X64Reg dest, OpArg arg, u8 subreg) { WriteSSEOp(64, 0xC5, true, dest, arg); Write8(subreg); } -void XEmitter::PINSRW(X64Reg dest, OpArg arg, u8 subreg) { WriteSSEOp(64, 0xC4, true, dest, arg); Write8(subreg); } +void XEmitter::PEXTRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(64, 0xC5, true, dest, arg); Write8(subreg);} +void XEmitter::PINSRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(64, 0xC4, true, dest, arg); Write8(subreg);} void XEmitter::PMADDWD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xF5, true, dest, arg); } void XEmitter::PSADBW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xF6, true, dest, arg);} @@ -1531,6 +1584,13 @@ void XEmitter::PMOVMSKB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xD7, true, d void XEmitter::PSHUFD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(64, 0x70, true, regOp, arg, 1); Write8(shuffle);} void XEmitter::PSHUFLW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(64, 0x70, false, regOp, arg, 1); Write8(shuffle);} +// VEX +void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseADD, false, regOp1, regOp2, arg);} +void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseSUB, false, regOp1, regOp2, arg);} +void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseMUL, false, regOp1, regOp2, arg);} +void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseDIV, false, regOp1, regOp2, arg);} +void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseSQRT, false, regOp1, regOp2, arg);} + // Prefixes void XEmitter::LOCK() { Write8(0xF0); } diff --git a/Common/x64Emitter.h b/Common/x64Emitter.h index cf739c7a30..2b163ff52a 100644 --- a/Common/x64Emitter.h +++ b/Common/x64Emitter.h @@ -22,10 +22,6 @@ #include "Common.h" -#if !defined(_M_IX86) && !defined(_M_X64) -#error "Don't build this on arm." -#endif - namespace Gen { @@ -33,7 +29,7 @@ enum X64Reg { EAX = 0, EBX = 3, ECX = 1, EDX = 2, ESI = 6, EDI = 7, EBP = 5, ESP = 4, - + RAX = 0, RBX = 3, RCX = 1, RDX = 2, RSI = 6, RDI = 7, RBP = 5, RSP = 4, R8 = 8, R9 = 9, R10 = 10,R11 = 11, @@ -46,9 +42,12 @@ enum X64Reg AX = 0, BX = 3, CX = 1, DX = 2, SI = 6, DI = 7, BP = 5, SP = 4, - XMM0=0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM0=0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, + YMM0=0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, + YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15, + INVALID_REG = 0xFFFFFFFF }; @@ -59,7 +58,7 @@ enum CCFlags CC_B = 2, CC_C = 2, CC_NAE = 2, CC_NB = 3, CC_NC = 3, CC_AE = 3, CC_Z = 4, CC_E = 4, - CC_NZ = 5, CC_NE = 5, + CC_NZ = 5, CC_NE = 5, CC_BE = 6, CC_NA = 6, CC_NBE = 7, CC_A = 7, CC_S = 8, @@ -111,8 +110,7 @@ enum NormalOp { nrmXCHG, }; -enum -{ +enum { CMP_EQ = 0, CMP_LT = 1, CMP_LE = 2, @@ -125,6 +123,7 @@ enum class XEmitter; +// RIP addressing does not benefit from micro op fusion on Core arch struct OpArg { OpArg() {} // dummy op arg, used for storage @@ -134,10 +133,11 @@ struct OpArg scale = (u8)_scale; offsetOrBaseReg = (u16)rmReg; indexReg = (u16)scaledReg; - //if scale == 0 never mind offseting + //if scale == 0 never mind offsetting offset = _offset; } void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const; + void WriteVex(XEmitter* emit, int size, int packed, Gen::X64Reg regOp1, X64Reg regOp2) const; void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=(X64Reg)0xFF, bool warn_64bit_offset = true) const; void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits); // This one is public - must be written to @@ -148,6 +148,8 @@ struct OpArg bool IsImm() const {return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 || scale == SCALE_IMM64;} bool IsSimpleReg() const {return scale == SCALE_NONE;} bool IsSimpleReg(X64Reg reg) const { + if (!IsSimpleReg()) + return false; return GetSimpleReg() == reg; } @@ -186,16 +188,17 @@ struct OpArg void IncreaseOffset(int sz) { offset += sz; } + private: u8 scale; u16 offsetOrBaseReg; u16 indexReg; }; -inline OpArg M(const void *ptr) {return OpArg((u64)ptr, (int)SCALE_RIP);} +inline OpArg M(void *ptr) {return OpArg((u64)ptr, (int)SCALE_RIP);} template inline OpArg M(const T *ptr) {return OpArg((u64)(const void *)ptr, (int)SCALE_RIP);} -inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);} +inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);} inline OpArg MatR(X64Reg value) {return OpArg(0, SCALE_ATREG, value);} inline OpArg MDisp(X64Reg value, int offset) { return OpArg((u32)offset, SCALE_ATREG, value); @@ -224,11 +227,11 @@ inline OpArg SImmAuto(s32 imm) { } #ifdef _M_X64 -inline OpArg ImmPtr(const void *imm) {return Imm64((u64)imm);} +inline OpArg ImmPtr(const void* imm) {return Imm64((u64)imm);} #else -inline OpArg ImmPtr(const void *imm) {return Imm32((u32)imm);} +inline OpArg ImmPtr(const void* imm) {return Imm32((u32)imm);} #endif -inline u32 PtrOffset(const void *ptr, const void *base) { +inline u32 PtrOffset(const void* ptr, const void* base) { #ifdef _M_X64 s64 distance = (s64)ptr-(s64)base; if (distance >= 0x80000000LL || @@ -253,6 +256,18 @@ struct FixupBranch int type; //0 = 8bit 1 = 32bit }; +enum SSECompare +{ + EQ = 0, + LT, + LE, + UNORD, + NEQ, + NLT, + NLE, + ORD, +}; + typedef const u8* JumpTarget; class XEmitter @@ -271,15 +286,12 @@ private: void WriteMXCSR(OpArg arg, int ext); void WriteSSEOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0); void WriteSSEOp2(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0); + void WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0); + void WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2); protected: - inline void Write8(u8 value) { - //if (value == 0xcc) { - // value = 0xcc; // set breakpoint here to find where mysterious 0xcc are written - //} - *code++ = value; - } + inline void Write8(u8 value) {*code++ = value;} inline void Write16(u16 value) {*(u16*)code = (value); code += 2;} inline void Write32(u32 value) {*(u32*)code = (value); code += 4;} inline void Write64(u64 value) {*(u64*)code = (value); code += 8;} @@ -301,7 +313,7 @@ public: u8 *GetWritableCodePtr(); // Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU - // INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other string instr., + // INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other string instr., // INC and DEC are slow on Intel Core, but not on AMD. They create a // false flag dependency because they only update a subset of the flags. // XCHG is SLOW and should be avoided. @@ -390,7 +402,7 @@ public: void DIV(int bits, OpArg src); void IDIV(int bits, OpArg src); - // Shift + // Shift void ROL(int bits, OpArg dest, OpArg shift); void ROR(int bits, OpArg dest, OpArg shift); void RCL(int bits, OpArg dest, OpArg shift); @@ -445,7 +457,7 @@ public: // Sign/zero extension void MOVSX(int dbits, int sbits, X64Reg dest, OpArg src); //automatically uses MOVSXD if necessary - void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src); + void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src); // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64) void STMXCSR(OpArg memloc); @@ -459,25 +471,33 @@ public: void FWAIT(); // SSE/SSE2: Floating point arithmetic - void ADDSS(X64Reg regOp, OpArg arg); - void ADDSD(X64Reg regOp, OpArg arg); - void SUBSS(X64Reg regOp, OpArg arg); - void SUBSD(X64Reg regOp, OpArg arg); - void MULSS(X64Reg regOp, OpArg arg); - void MULSD(X64Reg regOp, OpArg arg); - void DIVSS(X64Reg regOp, OpArg arg); - void DIVSD(X64Reg regOp, OpArg arg); - void MINSS(X64Reg regOp, OpArg arg); - void MINSD(X64Reg regOp, OpArg arg); - void MAXSS(X64Reg regOp, OpArg arg); - void MAXSD(X64Reg regOp, OpArg arg); - void SQRTSS(X64Reg regOp, OpArg arg); - void SQRTSD(X64Reg regOp, OpArg arg); + void ADDSS(X64Reg regOp, OpArg arg); + void ADDSD(X64Reg regOp, OpArg arg); + void SUBSS(X64Reg regOp, OpArg arg); + void SUBSD(X64Reg regOp, OpArg arg); + void MULSS(X64Reg regOp, OpArg arg); + void MULSD(X64Reg regOp, OpArg arg); + void DIVSS(X64Reg regOp, OpArg arg); + void DIVSD(X64Reg regOp, OpArg arg); + void MINSS(X64Reg regOp, OpArg arg); + void MINSD(X64Reg regOp, OpArg arg); + void MAXSS(X64Reg regOp, OpArg arg); + void MAXSD(X64Reg regOp, OpArg arg); + void SQRTSS(X64Reg regOp, OpArg arg); + void SQRTSD(X64Reg regOp, OpArg arg); void RSQRTSS(X64Reg regOp, OpArg arg); // SSE/SSE2: Floating point bitwise (yes) - void CMPSS(X64Reg regOp, OpArg arg, u8 compare); - void CMPSD(X64Reg regOp, OpArg arg, u8 compare); + void CMPSS(X64Reg regOp, OpArg arg, u8 compare); + void CMPSD(X64Reg regOp, OpArg arg, u8 compare); + void ANDSS(X64Reg regOp, OpArg arg); + void ANDSD(X64Reg regOp, OpArg arg); + void ANDNSS(X64Reg regOp, OpArg arg); + void ANDNSD(X64Reg regOp, OpArg arg); + void ORSS(X64Reg regOp, OpArg arg); + void ORSD(X64Reg regOp, OpArg arg); + void XORSS(X64Reg regOp, OpArg arg); + void XORSD(X64Reg regOp, OpArg arg); inline void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); } inline void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); } @@ -487,24 +507,12 @@ public: inline void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); } inline void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); } - - // I don't think these exist - /* - void ANDSD(X64Reg regOp, OpArg arg); - void ANDNSS(X64Reg regOp, OpArg arg); - void ANDNSD(X64Reg regOp, OpArg arg); - void ORSS(X64Reg regOp, OpArg arg); - void ORSD(X64Reg regOp, OpArg arg); - void XORSS(X64Reg regOp, OpArg arg); - void XORSD(X64Reg regOp, OpArg arg); - */ - // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double) - void ADDPS(X64Reg regOp, OpArg arg); - void ADDPD(X64Reg regOp, OpArg arg); - void SUBPS(X64Reg regOp, OpArg arg); - void SUBPD(X64Reg regOp, OpArg arg); - void CMPPS(X64Reg regOp, OpArg arg, u8 compare); + void ADDPS(X64Reg regOp, OpArg arg); + void ADDPD(X64Reg regOp, OpArg arg); + void SUBPS(X64Reg regOp, OpArg arg); + void SUBPD(X64Reg regOp, OpArg arg); + void CMPPS(X64Reg regOp, OpArg arg, u8 compare); void CMPPD(X64Reg regOp, OpArg arg, u8 compare); void MULPS(X64Reg regOp, OpArg arg); void MULPD(X64Reg regOp, OpArg arg); @@ -519,8 +527,8 @@ public: void RSQRTPS(X64Reg regOp, OpArg arg); // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double) - void ANDPS(X64Reg regOp, OpArg arg); - void ANDPD(X64Reg regOp, OpArg arg); + void ANDPS(X64Reg regOp, OpArg arg); + void ANDPD(X64Reg regOp, OpArg arg); void ANDNPS(X64Reg regOp, OpArg arg); void ANDNPD(X64Reg regOp, OpArg arg); void ORPS(X64Reg regOp, OpArg arg); @@ -529,9 +537,9 @@ public: void XORPD(X64Reg regOp, OpArg arg); // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation. - void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle); - void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle); - + void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle); + void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle); + // SSE/SSE2: Useful alternative to shuffle in some cases. void MOVDDUP(X64Reg regOp, OpArg arg); @@ -549,18 +557,17 @@ public: void UCOMISS(X64Reg regOp, OpArg arg); void UCOMISD(X64Reg regOp, OpArg arg); - // SSE/SSE2: Moves. Use the right data type for your data to avoid slight penalties on some CPUs. - - // Singles + // SSE/SSE2: Moves. Use the right data type for your data, in most cases. void MOVAPS(X64Reg regOp, OpArg arg); - void MOVAPS(OpArg arg, X64Reg regOp); - void MOVUPS(X64Reg regOp, OpArg arg); - void MOVUPS(OpArg arg, X64Reg regOp); - // Doubles void MOVAPD(X64Reg regOp, OpArg arg); + void MOVAPS(OpArg arg, X64Reg regOp); void MOVAPD(OpArg arg, X64Reg regOp); + + void MOVUPS(X64Reg regOp, OpArg arg); void MOVUPD(X64Reg regOp, OpArg arg); + void MOVUPS(OpArg arg, X64Reg regOp); void MOVUPD(OpArg arg, X64Reg regOp); + // Integers (NOTE: untested - I added these then it turned out I didn't have a use for them after all). void MOVDQA(X64Reg regOp, OpArg arg); void MOVDQA(OpArg arg, X64Reg regOp); @@ -596,11 +603,11 @@ public: void CVTDQ2PS(X64Reg regOp, OpArg arg); void CVTPS2DQ(X64Reg regOp, OpArg arg); + void CVTTSS2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX! + void CVTTPS2DQ(X64Reg regOp, OpArg arg); void CVTSI2SS(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX! void CVTSS2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX! - void CVTTSS2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX! void CVTTSD2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX! - void CVTTPS2DQ(X64Reg regOp, OpArg arg); void CVTTPD2DQ(X64Reg xregdest, OpArg arg); // SSE2: Packed integer instructions @@ -621,57 +628,57 @@ public: void PMOVZXWD(X64Reg dest, const OpArg &arg); void PAND(X64Reg dest, OpArg arg); - void PANDN(X64Reg dest, OpArg arg); - void PXOR(X64Reg dest, OpArg arg); - void POR(X64Reg dest, OpArg arg); + void PANDN(X64Reg dest, OpArg arg); + void PXOR(X64Reg dest, OpArg arg); + void POR(X64Reg dest, OpArg arg); void PADDB(X64Reg dest, OpArg arg); - void PADDW(X64Reg dest, OpArg arg); - void PADDD(X64Reg dest, OpArg arg); - void PADDQ(X64Reg dest, OpArg arg); + void PADDW(X64Reg dest, OpArg arg); + void PADDD(X64Reg dest, OpArg arg); + void PADDQ(X64Reg dest, OpArg arg); - void PADDSB(X64Reg dest, OpArg arg); - void PADDSW(X64Reg dest, OpArg arg); - void PADDUSB(X64Reg dest, OpArg arg); - void PADDUSW(X64Reg dest, OpArg arg); + void PADDSB(X64Reg dest, OpArg arg); + void PADDSW(X64Reg dest, OpArg arg); + void PADDUSB(X64Reg dest, OpArg arg); + void PADDUSW(X64Reg dest, OpArg arg); - void PSUBB(X64Reg dest, OpArg arg); - void PSUBW(X64Reg dest, OpArg arg); - void PSUBD(X64Reg dest, OpArg arg); - void PSUBQ(X64Reg dest, OpArg arg); + void PSUBB(X64Reg dest, OpArg arg); + void PSUBW(X64Reg dest, OpArg arg); + void PSUBD(X64Reg dest, OpArg arg); + void PSUBQ(X64Reg dest, OpArg arg); - void PSUBSB(X64Reg dest, OpArg arg); - void PSUBSW(X64Reg dest, OpArg arg); - void PSUBUSB(X64Reg dest, OpArg arg); - void PSUBUSW(X64Reg dest, OpArg arg); + void PSUBSB(X64Reg dest, OpArg arg); + void PSUBSW(X64Reg dest, OpArg arg); + void PSUBUSB(X64Reg dest, OpArg arg); + void PSUBUSW(X64Reg dest, OpArg arg); - void PAVGB(X64Reg dest, OpArg arg); - void PAVGW(X64Reg dest, OpArg arg); + void PAVGB(X64Reg dest, OpArg arg); + void PAVGW(X64Reg dest, OpArg arg); - void PCMPEQB(X64Reg dest, OpArg arg); - void PCMPEQW(X64Reg dest, OpArg arg); - void PCMPEQD(X64Reg dest, OpArg arg); + void PCMPEQB(X64Reg dest, OpArg arg); + void PCMPEQW(X64Reg dest, OpArg arg); + void PCMPEQD(X64Reg dest, OpArg arg); - void PCMPGTB(X64Reg dest, OpArg arg); - void PCMPGTW(X64Reg dest, OpArg arg); - void PCMPGTD(X64Reg dest, OpArg arg); + void PCMPGTB(X64Reg dest, OpArg arg); + void PCMPGTW(X64Reg dest, OpArg arg); + void PCMPGTD(X64Reg dest, OpArg arg); void PEXTRW(X64Reg dest, OpArg arg, u8 subreg); void PINSRW(X64Reg dest, OpArg arg, u8 subreg); - void PMADDWD(X64Reg dest, OpArg arg); - void PSADBW(X64Reg dest, OpArg arg); + void PMADDWD(X64Reg dest, OpArg arg); + void PSADBW(X64Reg dest, OpArg arg); - void PMAXSW(X64Reg dest, OpArg arg); - void PMAXUB(X64Reg dest, OpArg arg); - void PMINSW(X64Reg dest, OpArg arg); - void PMINUB(X64Reg dest, OpArg arg); + void PMAXSW(X64Reg dest, OpArg arg); + void PMAXUB(X64Reg dest, OpArg arg); + void PMINSW(X64Reg dest, OpArg arg); + void PMINUB(X64Reg dest, OpArg arg); // SSE4 has PMAXSB and PMINSB and PMAXUW and PMINUW too if we need them. - + void PMOVMSKB(X64Reg dest, OpArg arg); + void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle); void PSHUFB(X64Reg dest, OpArg arg); - void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle); void PSHUFLW(X64Reg dest, OpArg arg, u8 shuffle); void PSRLW(X64Reg reg, int shift); @@ -688,13 +695,19 @@ public: void PSRAW(X64Reg reg, int shift); void PSRAD(X64Reg reg, int shift); + // AVX + void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void RTDSC(); // Utility functions // The difference between this and CALL is that this aligns the stack // where appropriate. void ABI_CallFunction(const void *func); - template void ABI_CallFunction(T (*func)()) { ABI_CallFunction((const void *)func); @@ -703,10 +716,9 @@ public: void ABI_CallFunction(const u8 *func) { ABI_CallFunction((const void *)func); } - void ABI_CallFunctionC16(const void *func, u16 param1); void ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2); - + // These only support u32 parameters, but that's enough for a lot of uses. // These will destroy the 1 or 2 first "parameter regs". void ABI_CallFunctionC(const void *func, u32 param1); @@ -783,8 +795,7 @@ public: // Call this when shutting down. Don't rely on the destructor, even though it'll do the job. void FreeCodeSpace(); - bool IsInSpace(const u8 *ptr) const - { + bool IsInSpace(const u8 *ptr) const { return ptr >= region && ptr < region + region_size; } @@ -792,13 +803,11 @@ public: // Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()). void WriteProtect(); - void ResetCodePtr() - { + void ResetCodePtr() { SetCodePtr(region); } - size_t GetSpaceLeft() const - { + size_t GetSpaceLeft() const { return region_size - (GetCodePtr() - region); } From 3b1476c8ecb59982b0349cd85e75a0a2777cd777 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Thu, 9 Oct 2014 21:38:25 +0200 Subject: [PATCH 2/3] MIPSTables: Annotate fp and hi/lo in/out more accurately than just "other" Some typo fixes --- Core/Debugger/DisassemblyManager.cpp | 4 +- Core/MIPS/MIPS.h | 10 +-- Core/MIPS/MIPSAnalyst.cpp | 8 +- Core/MIPS/MIPSAnalyst.h | 2 +- Core/MIPS/MIPSInt.cpp | 36 +++------ Core/MIPS/MIPSTables.cpp | 106 +++++++++++++-------------- Core/MIPS/MIPSTables.h | 89 ++++++++++++---------- Windows/Debugger/CtrlDisAsmView.cpp | 2 +- 8 files changed, 124 insertions(+), 133 deletions(-) diff --git a/Core/Debugger/DisassemblyManager.cpp b/Core/Debugger/DisassemblyManager.cpp index 874207b2f6..fa0203878d 100644 --- a/Core/Debugger/DisassemblyManager.cpp +++ b/Core/Debugger/DisassemblyManager.cpp @@ -772,7 +772,7 @@ bool DisassemblyMacro::disassemble(u32 address, DisassemblyLineInfo& dest, bool dest.params = buffer; dest.info.hasRelevantAddress = true; - dest.info.releventAddress = immediate; + dest.info.relevantAddress = immediate; break; case MACRO_MEMORYIMM: dest.name = name; @@ -792,7 +792,7 @@ bool DisassemblyMacro::disassemble(u32 address, DisassemblyLineInfo& dest, bool dest.info.dataSize = dataSize; dest.info.hasRelevantAddress = true; - dest.info.releventAddress = immediate; + dest.info.relevantAddress = immediate; break; default: return false; diff --git a/Core/MIPS/MIPS.h b/Core/MIPS/MIPS.h index 4a69dcd345..61ccc639ff 100644 --- a/Core/MIPS/MIPS.h +++ b/Core/MIPS/MIPS.h @@ -26,8 +26,7 @@ class PointerWrap; typedef Memory::Opcode MIPSOpcode; -enum MIPSGPReg -{ +enum MIPSGPReg { MIPS_REG_ZERO=0, MIPS_REG_COMPILER_SCRATCH=1, @@ -65,17 +64,16 @@ enum MIPSGPReg MIPS_REG_FP=30, MIPS_REG_RA=31, - MIPS_REG_INVALID=-1, - // Not real regs, just for convenience/jit mapping. MIPS_REG_HI = 32, MIPS_REG_LO = 33, MIPS_REG_FPCOND = 34, MIPS_REG_VFPUCC = 35, + + MIPS_REG_INVALID=-1, }; -enum -{ +enum { VFPU_CTRL_SPREFIX, VFPU_CTRL_TPREFIX, VFPU_CTRL_DPREFIX, diff --git a/Core/MIPS/MIPSAnalyst.cpp b/Core/MIPS/MIPSAnalyst.cpp index 626931a650..9dc3ea4cee 100644 --- a/Core/MIPS/MIPSAnalyst.cpp +++ b/Core/MIPS/MIPSAnalyst.cpp @@ -1204,19 +1204,19 @@ skip: case 0x20: // add case 0x21: // addu info.hasRelevantAddress = true; - info.releventAddress = cpu->GetRegValue(0,MIPS_GET_RS(op))+cpu->GetRegValue(0,MIPS_GET_RT(op)); + info.relevantAddress = cpu->GetRegValue(0,MIPS_GET_RS(op))+cpu->GetRegValue(0,MIPS_GET_RT(op)); break; case 0x22: // sub case 0x23: // subu info.hasRelevantAddress = true; - info.releventAddress = cpu->GetRegValue(0,MIPS_GET_RS(op))-cpu->GetRegValue(0,MIPS_GET_RT(op)); + info.relevantAddress = cpu->GetRegValue(0,MIPS_GET_RS(op))-cpu->GetRegValue(0,MIPS_GET_RT(op)); break; } break; case 0x08: // addi case 0x09: // adiu info.hasRelevantAddress = true; - info.releventAddress = cpu->GetRegValue(0,MIPS_GET_RS(op))+((s16)(op & 0xFFFF)); + info.relevantAddress = cpu->GetRegValue(0,MIPS_GET_RS(op))+((s16)(op & 0xFFFF)); break; } @@ -1323,7 +1323,7 @@ skip: info.dataAddress = rs + imm16; info.hasRelevantAddress = true; - info.releventAddress = info.dataAddress; + info.relevantAddress = info.dataAddress; } return info; diff --git a/Core/MIPS/MIPSAnalyst.h b/Core/MIPS/MIPSAnalyst.h index f296e88a66..4eaefbb91b 100644 --- a/Core/MIPS/MIPSAnalyst.h +++ b/Core/MIPS/MIPSAnalyst.h @@ -154,7 +154,7 @@ namespace MIPSAnalyst u32 dataAddress; bool hasRelevantAddress; - u32 releventAddress; + u32 relevantAddress; } MipsOpcodeInfo; MipsOpcodeInfo GetOpcodeInfo(DebugInterface* cpu, u32 address); diff --git a/Core/MIPS/MIPSInt.cpp b/Core/MIPS/MIPSInt.cpp index 330f29a054..792f85a0ae 100644 --- a/Core/MIPS/MIPSInt.cpp +++ b/Core/MIPS/MIPSInt.cpp @@ -74,29 +74,13 @@ int MIPS_SingleStep() #else MIPSOpcode op = Memory::Read_Opcode_JIT(mipsr4k.pc); #endif - /* - // Choke on VFPU - MIPSInfo info = MIPSGetInfo(op); - if (info & IS_VFPU) - { - if (!Core_IsStepping() && !GetAsyncKeyState(VK_LSHIFT)) - { - Core_EnableStepping(true); - return; - } - }*/ - - if (mipsr4k.inDelaySlot) - { + if (mipsr4k.inDelaySlot) { MIPSInterpret(op); - if (mipsr4k.inDelaySlot) - { + if (mipsr4k.inDelaySlot) { mipsr4k.pc = mipsr4k.nextPC; mipsr4k.inDelaySlot = false; } - } - else - { + } else { MIPSInterpret(op); } return 1; @@ -872,14 +856,12 @@ namespace MIPSInt int pos = _POS; // Don't change $zr. - if (rt == 0) - { + if (rt == 0) { PC += 4; return; } - switch (op & 0x3f) - { + switch (op & 0x3f) { case 0x0: //ext { int size = _SIZE + 1; @@ -1025,10 +1007,10 @@ namespace MIPSInt switch (op & 0x3f) { - case 0: F(fd) = F(fs) + F(ft); break; //add - case 1: F(fd) = F(fs) - F(ft); break; //sub - case 2: F(fd) = F(fs) * F(ft); break; //mul - case 3: F(fd) = F(fs) / F(ft); break; //div + case 0: F(fd) = F(fs) + F(ft); break; // add.s + case 1: F(fd) = F(fs) - F(ft); break; // sub.s + case 2: F(fd) = F(fs) * F(ft); break; // mul.s + case 3: F(fd) = F(fs) / F(ft); break; // div.s default: _dbg_assert_msg_(CPU,0,"Trying to interpret FPU3Op instruction that can't be interpreted"); break; diff --git a/Core/MIPS/MIPSTables.cpp b/Core/MIPS/MIPSTables.cpp index fe5e269904..ff03a09fbc 100644 --- a/Core/MIPS/MIPSTables.cpp +++ b/Core/MIPS/MIPSTables.cpp @@ -31,8 +31,7 @@ #include "JitCommon/JitCommon.h" -enum MipsEncoding -{ +enum MipsEncoding { Imme, Spec, Spe2, @@ -66,8 +65,7 @@ enum MipsEncoding Inval = -2, }; -struct MIPSInstruction -{ +struct MIPSInstruction { MipsEncoding altEncoding; const char *name; MIPSComp::MIPSCompileFunc compile; @@ -152,7 +150,7 @@ const MIPSInstruction tableImmediate[64] = // xxxxxx ..... ..... ............... INVALID, INVALID, INSTR("swr", &Jit::Comp_ITypeMem, Dis_ITypeMem, Int_ITypeMem, IN_IMM16|IN_RS_ADDR|IN_RT|OUT_MEM|MEMTYPE_WORD), - INSTR("cache", &Jit::Comp_Cache, Dis_Cache, Int_Cache, IN_MEM|IN_IMM16|IN_RS_ADDR|IN_OTHER|OUT_OTHER), + INSTR("cache", &Jit::Comp_Cache, Dis_Cache, Int_Cache, IN_MEM|IN_IMM16|IN_RS_ADDR), //48 INSTR("ll", &Jit::Comp_Generic, Dis_Generic, Int_StoreSync, IN_MEM|IN_IMM16|IN_RS_ADDR|OUT_RT|OUT_OTHER|MEMTYPE_WORD), INSTR("lwc1", &Jit::Comp_FPULS, Dis_FPULS, Int_FPULS, IN_MEM|IN_IMM16|IN_RS_ADDR|OUT_OTHER|MEMTYPE_FLOAT), @@ -198,22 +196,22 @@ const MIPSInstruction tableSpecial[64] = // 000000 ..... ..... ..... ..... xxxxx INSTR("sync", &Jit::Comp_DoNothing, Dis_Generic, Int_Sync, 0), //16 - INSTR("mfhi", &Jit::Comp_MulDivType, Dis_FromHiloTransfer, Int_MulDivType, OUT_RD|IN_OTHER), - INSTR("mthi", &Jit::Comp_MulDivType, Dis_ToHiloTransfer, Int_MulDivType, IN_RS|OUT_OTHER), - INSTR("mflo", &Jit::Comp_MulDivType, Dis_FromHiloTransfer, Int_MulDivType, OUT_RD|IN_OTHER), - INSTR("mtlo", &Jit::Comp_MulDivType, Dis_ToHiloTransfer, Int_MulDivType, IN_RS|OUT_OTHER), + INSTR("mfhi", &Jit::Comp_MulDivType, Dis_FromHiloTransfer, Int_MulDivType, OUT_RD|IN_HI), + INSTR("mthi", &Jit::Comp_MulDivType, Dis_ToHiloTransfer, Int_MulDivType, IN_RS|OUT_HI), + INSTR("mflo", &Jit::Comp_MulDivType, Dis_FromHiloTransfer, Int_MulDivType, OUT_RD|IN_LO), + INSTR("mtlo", &Jit::Comp_MulDivType, Dis_ToHiloTransfer, Int_MulDivType, IN_RS|OUT_LO), INVALID, INVALID, INSTR("clz", &Jit::Comp_RType2, Dis_RType2, Int_RType2, OUT_RD|IN_RS), INSTR("clo", &Jit::Comp_RType2, Dis_RType2, Int_RType2, OUT_RD|IN_RS), //24 - INSTR("mult", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|OUT_OTHER), - INSTR("multu", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|OUT_OTHER), - INSTR("div", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|OUT_OTHER), - INSTR("divu", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|OUT_OTHER), - INSTR("madd", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|IN_OTHER|OUT_OTHER), - INSTR("maddu", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|IN_OTHER|OUT_OTHER), + INSTR("mult", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|OUT_HI|OUT_LO), + INSTR("multu", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|OUT_HI|OUT_LO), + INSTR("div", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|OUT_HI|OUT_LO), + INSTR("divu", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|OUT_HI|OUT_LO), + INSTR("madd", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|IN_HI|IN_LO|OUT_HI|OUT_LO), + INSTR("maddu", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|IN_HI|IN_LO|OUT_HI|OUT_LO), INVALID, INVALID, @@ -234,8 +232,8 @@ const MIPSInstruction tableSpecial[64] = // 000000 ..... ..... ..... ..... xxxxx INSTR("sltu", &Jit::Comp_RType3, Dis_RType3, Int_RType3, IN_RS|IN_RT|OUT_RD), INSTR("max", &Jit::Comp_RType3, Dis_RType3, Int_RType3, IN_RS|IN_RT|OUT_RD), INSTR("min", &Jit::Comp_RType3, Dis_RType3, Int_RType3, IN_RS|IN_RT|OUT_RD), - INSTR("msub", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|IN_OTHER|OUT_OTHER), - INSTR("msubu", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|IN_OTHER|OUT_OTHER), + INSTR("msub", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|IN_HI|IN_LO|OUT_HI|OUT_LO), + INSTR("msubu", &Jit::Comp_MulDivType, Dis_MulDivType, Int_MulDivType, IN_RS|IN_RT|IN_HI|IN_LO|OUT_HI|OUT_LO), //48 INSTR("tge", &Jit::Comp_Generic, Dis_RType3, 0, 0), @@ -262,9 +260,9 @@ const MIPSInstruction tableSpecial2[64] = // 011100 ..... ..... ..... ..... xxxx INVALID_X_8, //32 INVALID, INVALID, INVALID, INVALID, - INSTR("mfic", &Jit::Comp_Generic, Dis_Generic, Int_Special2, 0), + INSTR("mfic", &Jit::Comp_Generic, Dis_Generic, Int_Special2, OUT_OTHER), INVALID, - INSTR("mtic", &Jit::Comp_Generic, Dis_Generic, Int_Special2, 0), + INSTR("mtic", &Jit::Comp_Generic, Dis_Generic, Int_Special2, OUT_OTHER), INVALID, //40 INVALID_X_8, @@ -369,11 +367,11 @@ const MIPSInstruction tableCop2BC2[4] = // 010010 01000 ...xx ................ const MIPSInstruction tableCop0[32] = // 010000 xxxxx ..... ................ { - INSTR("mfc0", &Jit::Comp_Generic, Dis_Generic, 0, OUT_RT), + INSTR("mfc0", &Jit::Comp_Generic, Dis_Generic, 0, OUT_RT), // unused INVALID, INVALID, INVALID, - INSTR("mtc0", &Jit::Comp_Generic, Dis_Generic, 0, IN_RT), + INSTR("mtc0", &Jit::Comp_Generic, Dis_Generic, 0, IN_RT), // unused INVALID, INVALID, INVALID, @@ -423,11 +421,11 @@ const MIPSInstruction tableCop0CO[64] = // 010000 1.... ..... ..... ..... xxxxxx const MIPSInstruction tableCop1[32] = // 010001 xxxxx ..... ..... ........... { - INSTR("mfc1", &Jit::Comp_mxc1, Dis_mxc1, Int_mxc1, IN_OTHER|OUT_RT), + INSTR("mfc1", &Jit::Comp_mxc1, Dis_mxc1, Int_mxc1, IN_FS|OUT_RT), INVALID, INSTR("cfc1", &Jit::Comp_mxc1, Dis_mxc1, Int_mxc1, IN_OTHER|IN_FPUFLAG|OUT_RT), INVALID, - INSTR("mtc1", &Jit::Comp_mxc1, Dis_mxc1, Int_mxc1, IN_RT|OUT_OTHER), + INSTR("mtc1", &Jit::Comp_mxc1, Dis_mxc1, Int_mxc1, IN_RT|OUT_FS), INVALID, INSTR("ctc1", &Jit::Comp_mxc1, Dis_mxc1, Int_mxc1, IN_RT|OUT_FPUFLAG|OUT_OTHER), INVALID, @@ -455,20 +453,20 @@ const MIPSInstruction tableCop1BC[32] = // 010001 01000 xxxxx ................ const MIPSInstruction tableCop1S[64] = // 010001 10000 ..... ..... ..... xxxxxx { - INSTR("add.s", &Jit::Comp_FPU3op, Dis_FPU3op, Int_FPU3op, IN_OTHER|OUT_OTHER), - INSTR("sub.s", &Jit::Comp_FPU3op, Dis_FPU3op, Int_FPU3op, IN_OTHER|OUT_OTHER), - INSTR("mul.s", &Jit::Comp_FPU3op, Dis_FPU3op, Int_FPU3op, IN_OTHER|OUT_OTHER), - INSTR("div.s", &Jit::Comp_FPU3op, Dis_FPU3op, Int_FPU3op, IN_OTHER|OUT_OTHER), - INSTR("sqrt.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER), - INSTR("abs.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER), - INSTR("mov.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER), - INSTR("neg.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER), + INSTR("add.s", &Jit::Comp_FPU3op, Dis_FPU3op, Int_FPU3op, OUT_FD|IN_FS|IN_FT), + INSTR("sub.s", &Jit::Comp_FPU3op, Dis_FPU3op, Int_FPU3op, OUT_FD|IN_FS|IN_FT), + INSTR("mul.s", &Jit::Comp_FPU3op, Dis_FPU3op, Int_FPU3op, OUT_FD|IN_FS|IN_FT), + INSTR("div.s", &Jit::Comp_FPU3op, Dis_FPU3op, Int_FPU3op, OUT_FD|IN_FS|IN_FT), + INSTR("sqrt.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS), + INSTR("abs.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS), + INSTR("mov.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS), + INSTR("neg.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS), //8 INVALID, INVALID, INVALID, INVALID, - INSTR("round.w.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER), - INSTR("trunc.w.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER), - INSTR("ceil.w.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER), - INSTR("floor.w.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER), + INSTR("round.w.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS), + INSTR("trunc.w.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS), + INSTR("ceil.w.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS), + INSTR("floor.w.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS), //16 INVALID_X_8, //24 @@ -476,29 +474,29 @@ const MIPSInstruction tableCop1S[64] = // 010001 10000 ..... ..... ..... xxxxxx //32 INVALID, INVALID, INVALID, INVALID, //36 - INSTR("cvt.w.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER), + INSTR("cvt.w.s", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS), INVALID, INSTR("dis.int", &Jit::Comp_Generic, Dis_Generic, Int_Interrupt, 0), INVALID, //40 INVALID_X_8, //48 - 010001 10000 ..... ..... ..... 11xxxx - INSTR("c.f", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG), - INSTR("c.un", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG), - INSTR("c.eq", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG), - INSTR("c.ueq", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG), - INSTR("c.olt", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG), - INSTR("c.ult", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG), - INSTR("c.ole", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG), - INSTR("c.ule", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG), - INSTR("c.sf", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG), - INSTR("c.ngle",&Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG), - INSTR("c.seq", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG), - INSTR("c.ngl", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG), - INSTR("c.lt", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG), - INSTR("c.nge", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG), - INSTR("c.le", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG), - INSTR("c.ngt", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_OTHER|OUT_FPUFLAG), + INSTR("c.f", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, OUT_FPUFLAG), + INSTR("c.un", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG), + INSTR("c.eq", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG), + INSTR("c.ueq", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG), + INSTR("c.olt", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG), + INSTR("c.ult", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG), + INSTR("c.ole", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG), + INSTR("c.ule", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG), + INSTR("c.sf", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, OUT_FPUFLAG), + INSTR("c.ngle",&Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG), + INSTR("c.seq", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG), + INSTR("c.ngl", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG), + INSTR("c.lt", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG), + INSTR("c.nge", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG), + INSTR("c.le", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG), + INSTR("c.ngt", &Jit::Comp_FPUComp, Dis_FPUComp, Int_FPUComp, IN_FS|IN_FT|OUT_FPUFLAG), }; const MIPSInstruction tableCop1W[64] = // 010001 10100 ..... ..... ..... xxxxxx @@ -511,7 +509,7 @@ const MIPSInstruction tableCop1W[64] = // 010001 10100 ..... ..... ..... xxxxxx //24 INVALID_X_8, //32 - INSTR("cvt.s.w", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, IN_OTHER|OUT_OTHER), + INSTR("cvt.s.w", &Jit::Comp_FPU2op, Dis_FPU2op, Int_FPU2op, OUT_FD|IN_FS), INVALID, INVALID, INVALID, //36 INVALID, @@ -890,8 +888,6 @@ const MIPSInstruction *mipsTables[NumEncodings] = 0, }; - - //arm encoding table //const MIPSInstruction mipsinstructions[] = //{ diff --git a/Core/MIPS/MIPSTables.h b/Core/MIPS/MIPSTables.h index b05e04e199..1f70366eb2 100644 --- a/Core/MIPS/MIPSTables.h +++ b/Core/MIPS/MIPSTables.h @@ -25,14 +25,14 @@ struct MIPSInfo { value = 0; } - explicit MIPSInfo(u32 v) : value(v) { + explicit MIPSInfo(u64 v) : value(v) { } - u32 operator & (const u32 &arg) const { + u64 operator & (const u32 &arg) const { return value & arg; } - u32 value; + u64 value; }; #define CONDTYPE_MASK 0x00000007 @@ -49,44 +49,59 @@ struct MIPSInfo { // as long as the other flags are checked, // there is no way to misinterpret these // as CONDTYPE_X -#define MEMTYPE_MASK 0x00000007 -#define MEMTYPE_BYTE 0x00000001 -#define MEMTYPE_HWORD 0x00000002 -#define MEMTYPE_WORD 0x00000003 -#define MEMTYPE_FLOAT 0x00000004 -#define MEMTYPE_VQUAD 0x00000005 +#define MEMTYPE_MASK 0x00000007ULL +#define MEMTYPE_BYTE 0x00000001ULL +#define MEMTYPE_HWORD 0x00000002ULL +#define MEMTYPE_WORD 0x00000003ULL +#define MEMTYPE_FLOAT 0x00000004ULL +#define MEMTYPE_VQUAD 0x00000005ULL -#define IS_CONDMOVE 0x00000008 -#define DELAYSLOT 0x00000010 -#define BAD_INSTRUCTION 0x00000020 -#define LIKELY 0x00000040 -#define IS_CONDBRANCH 0x00000080 -#define IS_JUMP 0x00000100 +#define IS_CONDMOVE 0x00000008ULL +#define DELAYSLOT 0x00000010ULL +#define BAD_INSTRUCTION 0x00000020ULL +#define LIKELY 0x00000040ULL +#define IS_CONDBRANCH 0x00000080ULL +#define IS_JUMP 0x00000100ULL -#define IN_RS 0x00000200 -#define IN_RS_ADDR (0x00000400 | IN_RS) -#define IN_RS_SHIFT (0x00000800 | IN_RS) -#define IN_RT 0x00001000 -#define IN_SA 0x00002000 -#define IN_IMM16 0x00004000 -#define IN_IMM26 0x00008000 -#define IN_MEM 0x00010000 -#define IN_OTHER 0x00020000 -#define IN_FPUFLAG 0x00040000 -#define IN_VFPU_CC 0x00080000 +#define IN_RS 0x00000200ULL +#define IN_RS_ADDR (0x00000400ULL | IN_RS) +#define IN_RS_SHIFT (0x00000800ULL | IN_RS) +#define IN_RT 0x00001000ULL +#define IN_SA 0x00002000ULL +#define IN_IMM16 0x00004000ULL +#define IN_IMM26 0x00008000ULL +#define IN_MEM 0x00010000ULL +#define IN_OTHER 0x00020000ULL +#define IN_FPUFLAG 0x00040000ULL +#define IN_VFPU_CC 0x00080000ULL -#define OUT_RT 0x00100000 -#define OUT_RD 0x00200000 -#define OUT_RA 0x00400000 -#define OUT_MEM 0x00800000 -#define OUT_OTHER 0x01000000 -#define OUT_FPUFLAG 0x02000000 -#define OUT_VFPU_CC 0x04000000 -#define OUT_EAT_PREFIX 0x08000000 +#define OUT_RT 0x00100000ULL +#define OUT_RD 0x00200000ULL +#define OUT_RA 0x00400000ULL +#define OUT_MEM 0x00800000ULL +#define OUT_OTHER 0x01000000ULL +#define OUT_FPUFLAG 0x02000000ULL +#define OUT_VFPU_CC 0x04000000ULL +#define OUT_EAT_PREFIX 0x08000000ULL -#define VFPU_NO_PREFIX 0x10000000 -#define IS_VFPU 0x20000000 -#define IS_FPU 0x40000000 +#define VFPU_NO_PREFIX 0x10000000ULL +#define IS_VFPU 0x20000000ULL +#define IS_FPU 0x40000000ULL + +#define IN_FS 0x000100000000ULL +#define IN_FT 0x000200000000ULL +#define IN_LO 0x000400000000ULL +#define IN_HI 0x000800000000ULL + +#define OUT_FD 0x001000000000ULL +#define OUT_FS 0x002000000000ULL +#define OUT_LO 0x004000000000ULL +#define OUT_HI 0x008000000000ULL + +#define IN_VS 0x010000000000ULL +#define IN_VT 0x020000000000ULL + +#define OUT_VD 0x100000000000ULL #ifndef CDECL #define CDECL diff --git a/Windows/Debugger/CtrlDisAsmView.cpp b/Windows/Debugger/CtrlDisAsmView.cpp index ec90a340e4..3fe114c135 100644 --- a/Windows/Debugger/CtrlDisAsmView.cpp +++ b/Windows/Debugger/CtrlDisAsmView.cpp @@ -643,7 +643,7 @@ void CtrlDisAsmView::followBranch() } else if (line.info.hasRelevantAddress) { // well, not exactly a branch, but we can do something anyway - SendMessage(GetParent(wnd),WM_DEB_GOTOHEXEDIT,line.info.releventAddress,0); + SendMessage(GetParent(wnd),WM_DEB_GOTOHEXEDIT,line.info.relevantAddress,0); SetFocus(wnd); } } else if (line.type == DISTYPE_DATA) From 7bde97606919f8a012b5991645903e21d906daf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 10 Oct 2014 20:41:00 +0200 Subject: [PATCH 3/3] Merge x64 emitter from a newer Dolphin version. This one can generate slightly smaller code by exploiting some EAX-only encoding and various other short forms, and adds support for many newer CPU instructions. --- Common/CPUDetect.cpp | 43 ++ Common/CPUDetect.h | 5 + Common/x64Emitter.cpp | 1086 ++++++++++++++++++++++++++--------------- Common/x64Emitter.h | 233 ++++++--- 4 files changed, 912 insertions(+), 455 deletions(-) diff --git a/Common/CPUDetect.cpp b/Common/CPUDetect.cpp index cb2e72eb23..188fbdaf28 100644 --- a/Common/CPUDetect.cpp +++ b/Common/CPUDetect.cpp @@ -49,6 +49,17 @@ void do_cpuid(u32 regs[4], u32 cpuid_leaf) { #ifdef _M_SSE #include + +#define _XCR_XFEATURE_ENABLED_MASK 0 +static unsigned long long _xgetbv(unsigned int index) +{ + unsigned int eax, edx; + __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); + return ((unsigned long long)edx << 32) | eax; +} + +#else +#define _XCR_XFEATURE_ENABLED_MASK 0 #endif #if defined __FreeBSD__ @@ -172,6 +183,38 @@ void CPUInfo::Detect() { bFMA = true; } if ((cpu_id[2] >> 25) & 1) bAES = true; + + if ((cpu_id[3] >> 24) & 1) + { + // We can use FXSAVE. + bFXSR = true; + } + + // AVX support requires 3 separate checks: + // - Is the AVX bit set in CPUID? + // - Is the XSAVE bit set in CPUID? + // - XGETBV result has the XCR bit set. + if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) + { + if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) + { + bAVX = true; + if ((cpu_id[2] >> 12) & 1) + bFMA = true; + } + } + + if (max_std_fn >= 7) + { + do_cpuid(cpu_id, 0x00000007); + // careful; we can't enable AVX2 unless the XSAVE/XGETBV checks above passed + if ((cpu_id[1] >> 5) & 1) + bAVX2 = bAVX; + if ((cpu_id[1] >> 3) & 1) + bBMI1 = true; + if ((cpu_id[1] >> 8) & 1) + bBMI2 = true; + } } if (max_ex_fn >= 0x80000004) { // Extract brand string diff --git a/Common/CPUDetect.h b/Common/CPUDetect.h index 04c615b412..091e8f9713 100644 --- a/Common/CPUDetect.h +++ b/Common/CPUDetect.h @@ -56,10 +56,15 @@ struct CPUInfo { bool bLZCNT; bool bSSE4A; bool bAVX; + bool bAVX2; bool bFMA; bool bAES; bool bLAHFSAHF64; bool bLongMode; + bool bBMI1; + bool bBMI2; + bool bMOVBE; + bool bFXSR; // ARM specific CPUInfo bool bSwp; diff --git a/Common/x64Emitter.cpp b/Common/x64Emitter.cpp index f454296470..c4455067e4 100644 --- a/Common/x64Emitter.cpp +++ b/Common/x64Emitter.cpp @@ -34,27 +34,28 @@ namespace Gen // TODO(ector): Add EAX special casing, for ever so slightly smaller code. struct NormalOpDef { - u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, ext; + u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, eaximm8, eaximm32, ext; }; -static const NormalOpDef nops[11] = +// 0xCC is code for invalid combination of immediates +static const NormalOpDef normalops[11] = { - {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0}, //ADD - {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 2}, //ADC + {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0x04, 0x05, 0}, //ADD + {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 0x14, 0x15, 2}, //ADC - {0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 5}, //SUB - {0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 3}, //SBB + {0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 0x2C, 0x2D, 5}, //SUB + {0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 0x1C, 0x1D, 3}, //SBB - {0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 4}, //AND - {0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 1}, //OR + {0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 0x24, 0x25, 4}, //AND + {0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 0x0C, 0x0D, 1}, //OR - {0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 6}, //XOR - {0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0}, //MOV + {0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 0x34, 0x35, 6}, //XOR + {0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0xCC, 0xCC, 0}, //MOV - {0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0}, //TEST (to == from) - {0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 7}, //CMP + {0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0xA8, 0xA9, 0}, //TEST (to == from) + {0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 0x3C, 0x3D, 7}, //CMP - {0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 7}, //XCHG + {0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 7}, //XCHG }; enum NormalSSEOps @@ -76,10 +77,16 @@ enum NormalSSEOps sseRSQRT = 0x52, //RSQRT (NO DOUBLE PRECISION!!!) sseMOVAPfromRM = 0x28, //MOVAP from RM sseMOVAPtoRM = 0x29, //MOVAP to RM - sseMOVUPfromRM = 0x10, //MOVUP from RM + sseMOVUPfromRM = 0x10, //MOVUP from RM + sseMOVUPtoRM = 0x11, //MOVUP to RM + sseMOVLPDfromRM= 0x12, + sseMOVLPDtoRM = 0x13, + sseMOVHPDfromRM= 0x16, + sseMOVHPDtoRM = 0x17, + sseMOVHLPS = 0x12, + sseMOVLHPS = 0x16, sseMOVDQfromRM = 0x6F, sseMOVDQtoRM = 0x7F, - sseMOVUPtoRM = 0x11, //MOVUP to RM sseMASKMOVDQU = 0xF7, sseLDDQU = 0xF0, sseSHUF = 0xC6, @@ -133,6 +140,14 @@ const u8 *XEmitter::AlignCodePage() return code; } +// This operation modifies flags; check to see the flags are locked. +// If the flags are locked, we should immediately and loudly fail before +// causing a subtle JIT bug. +void XEmitter::CheckFlags() +{ + _assert_msg_(DYNA_REC, !flags_locked, "Attempt to modify flags while flags locked!"); +} + void XEmitter::WriteModRM(int mod, int reg, int rm) { Write8((u8)((mod << 6) | ((reg & 7) << 3) | (rm & 7))); @@ -148,51 +163,42 @@ void OpArg::WriteRex(XEmitter *emit, int opBits, int bits, int customOp) const if (customOp == -1) customOp = operandReg; #ifdef _M_X64 u8 op = 0x40; + // REX.W (whether operation is a 64-bit operation) if (opBits == 64) op |= 8; + // REX.R (whether ModR/M reg field refers to R8-R15. if (customOp & 8) op |= 4; + // REX.X (whether ModR/M SIB index field refers to R8-R15) if (indexReg & 8) op |= 2; - if (offsetOrBaseReg & 8) op |= 1; //TODO investigate if this is dangerous + // REX.B (whether ModR/M rm or SIB base or opcode reg field refers to R8-R15) + if (offsetOrBaseReg & 8) op |= 1; + // Write REX if wr have REX bits to write, or if the operation accesses + // SIL, DIL, BPL, or SPL. if (op != 0x40 || - (bits == 8 && (offsetOrBaseReg & 0x10c) == 4) || - (opBits == 8 && (customOp & 0x10c) == 4)) { + (scale == SCALE_NONE && bits == 8 && (offsetOrBaseReg & 0x10c) == 4) || + (opBits == 8 && (customOp & 0x10c) == 4)) + { emit->Write8(op); - _dbg_assert_(DYNA_REC, (offsetOrBaseReg & 0x100) == 0 || bits != 8); - _dbg_assert_(DYNA_REC, (customOp & 0x100) == 0 || opBits != 8); - } else { - _dbg_assert_(DYNA_REC, (offsetOrBaseReg & 0x10c) == 0 || - (offsetOrBaseReg & 0x10c) == 0x104 || - bits != 8); - _dbg_assert_(DYNA_REC, (customOp & 0x10c) == 0 || - (customOp & 0x10c) == 0x104 || - opBits != 8); + // Check the operation doesn't access AH, BH, CH, or DH. + _dbg_assert_(DYNA_REC, (offsetOrBaseReg & 0x100) == 0); + _dbg_assert_(DYNA_REC, (customOp & 0x100) == 0); } - #else _dbg_assert_(DYNA_REC, opBits != 64); _dbg_assert_(DYNA_REC, (customOp & 8) == 0 || customOp == -1); _dbg_assert_(DYNA_REC, (indexReg & 8) == 0); _dbg_assert_(DYNA_REC, (offsetOrBaseReg & 8) == 0); _dbg_assert_(DYNA_REC, opBits != 8 || (customOp & 0x10c) != 4 || customOp == -1); - _dbg_assert_(DYNA_REC, bits != 8 || (offsetOrBaseReg & 0x10c) != 4); + _dbg_assert_(DYNA_REC, scale == SCALE_ATREG || bits != 8 || (offsetOrBaseReg & 0x10c) != 4); #endif } -void OpArg::WriteVex(XEmitter* emit, int size, int packed, Gen::X64Reg regOp1, Gen::X64Reg regOp2) const +void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W) const { int R = !(regOp1 & 8); int X = !(indexReg & 8); int B = !(offsetOrBaseReg & 8); - // not so sure about this one... - int W = 0; - - // aka map_select in AMD manuals - // only support VEX opcode map 1 for now (analog to secondary opcode map) - int mmmmm = 1; - int vvvv = (regOp2 == X64Reg::INVALID_REG) ? 0xf : (regOp2 ^ 0xf); - int L = size == 256; - int pp = (packed << 1) | (size == 64); // do we need any VEX fields that only appear in the three-byte form? if (X == 1 && B == 1 && W == 0 && mmmmm == 1) @@ -214,7 +220,7 @@ void OpArg::WriteVex(XEmitter* emit, int size, int packed, Gen::X64Reg regOp1, G void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, bool warn_64bit_offset) const { - if (_operandReg == 0xff) + if (_operandReg == INVALID_REG) _operandReg = (X64Reg)this->operandReg; int mod = 0; int ireg = indexReg; @@ -225,16 +231,17 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, { // Oh, RIP addressing. _offsetOrBaseReg = 5; - emit->WriteModRM(0, _operandReg&7, 5); + emit->WriteModRM(0, _operandReg, _offsetOrBaseReg); //TODO : add some checks #ifdef _M_X64 u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes; s64 distance = (s64)offset - (s64)ripAddr; - _assert_msg_(DYNA_REC, (distance < 0x80000000LL - && distance >= -0x80000000LL) || - !warn_64bit_offset, - "WriteRest: op out of range (0x%" PRIx64 " uses 0x%" PRIx64 ")", - ripAddr, offset); + _assert_msg_(DYNA_REC, + (distance < 0x80000000LL && + distance >= -0x80000000LL) || + !warn_64bit_offset, + "WriteRest: op out of range (0x%" PRIx64 " uses 0x%" PRIx64 ")", + ripAddr, offset); s32 offs = (s32)distance; emit->Write32((u32)offs); #else @@ -349,7 +356,6 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, } } - // W = operand extended width (1 if 64-bit) // R = register# upper bit // X = scale amnt upper bit @@ -381,9 +387,9 @@ void XEmitter::JMP(const u8 *addr, bool force5Bytes) { s64 distance = (s64)(fn - ((u64)code + 5)); - _assert_msg_(DYNA_REC, distance >= -0x80000000LL - && distance < 0x80000000LL, - "Jump target too far away, needs indirect register"); + _assert_msg_(DYNA_REC, + distance >= -0x80000000LL && distance < 0x80000000LL, + "Jump target too far away, needs indirect register"); Write8(0xE9); Write32((u32)(s32)distance); } @@ -419,9 +425,10 @@ void XEmitter::CALLptr(OpArg arg) void XEmitter::CALL(const void *fnptr) { u64 distance = u64(fnptr) - (u64(code) + 5); - _assert_msg_(DYNA_REC, distance < 0x0000000080000000ULL - || distance >= 0xFFFFFFFF80000000ULL, - "CALL out of range (%p calls %p)", code, fnptr); + _assert_msg_(DYNA_REC, + distance < 0x0000000080000000ULL || + distance >= 0xFFFFFFFF80000000ULL, + "CALL out of range (%p calls %p)", code, fnptr); Write8(0xE8); Write32(u32(distance)); } @@ -465,27 +472,25 @@ FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes) return branch; } -void XEmitter::J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes) +void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes) { u64 fn = (u64)addr; - if (!force5Bytes) + s64 distance = (s64)(fn - ((u64)code + 2)); + if (distance < -0x80 || distance >= 0x80 || force5bytes) { - s64 distance = (s64)(fn - ((u64)code + 2)); - _assert_msg_(DYNA_REC, distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true"); - //8 bits will do - Write8(0x70 + conditionCode); - Write8((u8)(s8)distance); - } - else - { - s64 distance = (s64)(fn - ((u64)code + 6)); - _assert_msg_(DYNA_REC, distance >= -0x80000000LL - && distance < 0x80000000LL, - "Jump target too far away, needs indirect register"); + distance = (s64)(fn - ((u64)code + 6)); + _assert_msg_(DYNA_REC, + distance >= -0x80000000LL && distance < 0x80000000LL, + "Jump target too far away, needs indirect register"); Write8(0x0F); Write8(0x80 + conditionCode); Write32((u32)(s32)distance); } + else + { + Write8(0x70 + conditionCode); + Write8((u8)(s8)distance); + } } void XEmitter::SetJumpTarget(const FixupBranch &branch) @@ -534,30 +539,71 @@ void XEmitter::INT3() {Write8(0xCC);} void XEmitter::RET() {Write8(0xC3);} void XEmitter::RET_FAST() {Write8(0xF3); Write8(0xC3);} //two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a ret -void XEmitter::NOP(int count) +// The first sign of decadence: optimized NOPs. +void XEmitter::NOP(size_t size) { - // TODO: look up the fastest nop sleds for various sizes - int i; - switch (count) { - case 1: - Write8(0x90); - break; - case 2: - Write8(0x66); - Write8(0x90); - break; - default: - for (i = 0; i < count; i++) { + _dbg_assert_(DYNA_REC, (int)size > 0); + while (true) + { + switch (size) + { + case 0: + return; + case 1: Write8(0x90); + return; + case 2: + Write8(0x66); Write8(0x90); + return; + case 3: + Write8(0x0F); Write8(0x1F); Write8(0x00); + return; + case 4: + Write8(0x0F); Write8(0x1F); Write8(0x40); Write8(0x00); + return; + case 5: + Write8(0x0F); Write8(0x1F); Write8(0x44); Write8(0x00); + Write8(0x00); + return; + case 6: + Write8(0x66); Write8(0x0F); Write8(0x1F); Write8(0x44); + Write8(0x00); Write8(0x00); + return; + case 7: + Write8(0x0F); Write8(0x1F); Write8(0x80); Write8(0x00); + Write8(0x00); Write8(0x00); Write8(0x00); + return; + case 8: + Write8(0x0F); Write8(0x1F); Write8(0x84); Write8(0x00); + Write8(0x00); Write8(0x00); Write8(0x00); Write8(0x00); + return; + case 9: + Write8(0x66); Write8(0x0F); Write8(0x1F); Write8(0x84); + Write8(0x00); Write8(0x00); Write8(0x00); Write8(0x00); + Write8(0x00); + return; + case 10: + Write8(0x66); Write8(0x66); Write8(0x0F); Write8(0x1F); + Write8(0x84); Write8(0x00); Write8(0x00); Write8(0x00); + Write8(0x00); Write8(0x00); + return; + default: + // Even though x86 instructions are allowed to be up to 15 bytes long, + // AMD advises against using NOPs longer than 11 bytes because they + // carry a performance penalty on CPUs older than AMD family 16h. + Write8(0x66); Write8(0x66); Write8(0x66); Write8(0x0F); + Write8(0x1F); Write8(0x84); Write8(0x00); Write8(0x00); + Write8(0x00); Write8(0x00); Write8(0x00); + size -= 11; + continue; } - break; } } void XEmitter::PAUSE() {Write8(0xF3); NOP();} //use in tight spinloops for energy saving on some cpu -void XEmitter::CLC() {Write8(0xF8);} //clear carry -void XEmitter::CMC() {Write8(0xF5);} //flip carry -void XEmitter::STC() {Write8(0xF9);} //set carry +void XEmitter::CLC() {CheckFlags(); Write8(0xF8);} //clear carry +void XEmitter::CMC() {CheckFlags(); Write8(0xF5);} //flip carry +void XEmitter::STC() {CheckFlags(); Write8(0xF9);} //set carry //TODO: xchg ah, al ??? void XEmitter::XCHG_AHAL() @@ -569,10 +615,10 @@ void XEmitter::XCHG_AHAL() //These two can not be executed on early Intel 64-bit CPU:s, only on AMD! void XEmitter::LAHF() {Write8(0x9F);} -void XEmitter::SAHF() {Write8(0x9E);} +void XEmitter::SAHF() {CheckFlags(); Write8(0x9E);} void XEmitter::PUSHF() {Write8(0x9C);} -void XEmitter::POPF() {Write8(0x9D);} +void XEmitter::POPF() {CheckFlags(); Write8(0x9D);} void XEmitter::LFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xE8);} void XEmitter::MFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF0);} @@ -580,14 +626,16 @@ void XEmitter::SFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF8);} void XEmitter::WriteSimple1Byte(int bits, u8 byte, X64Reg reg) { - if (bits == 16) {Write8(0x66);} + if (bits == 16) + Write8(0x66); Rex(bits == 64, 0, 0, (int)reg >> 3); Write8(byte + ((int)reg & 7)); } void XEmitter::WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg) { - if (bits == 16) {Write8(0x66);} + if (bits == 16) + Write8(0x66); Rex(bits==64, 0, 0, (int)reg >> 3); Write8(byte1); Write8(byte2 + ((int)reg & 7)); @@ -595,14 +643,16 @@ void XEmitter::WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg) void XEmitter::CWD(int bits) { - if (bits == 16) {Write8(0x66);} + if (bits == 16) + Write8(0x66); Rex(bits == 64, 0, 0, 0); Write8(0x99); } void XEmitter::CBW(int bits) { - if (bits == 8) {Write8(0x66);} + if (bits == 8) + Write8(0x66); Rex(bits == 32, 0, 0, 0); Write8(0x98); } @@ -655,7 +705,7 @@ void XEmitter::POP(int /*bits*/, const OpArg ®) if (reg.IsSimpleReg()) POP(reg.GetSimpleReg()); else - INT3(); + _assert_msg_(DYNA_REC, 0, "POP - Unsupported encoding"); } void XEmitter::BSWAP(int bits, X64Reg reg) @@ -688,7 +738,7 @@ void XEmitter::UD2() void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg) { - if (arg.IsImm()) _assert_msg_(DYNA_REC, 0, "PREFETCH - Imm argument");; + _assert_msg_(DYNA_REC, !arg.IsImm(), "PREFETCH - Imm argument"); arg.operandReg = (u8)level; arg.WriteRex(this, 0, 0); Write8(0x0F); @@ -698,9 +748,9 @@ void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg) void XEmitter::SETcc(CCFlags flag, OpArg dest) { - if (dest.IsImm()) _assert_msg_(DYNA_REC, 0, "SETcc - Imm argument"); + _assert_msg_(DYNA_REC, !dest.IsImm(), "SETcc - Imm argument"); dest.operandReg = 0; - dest.WriteRex(this, 0, 0); + dest.WriteRex(this, 0, 8); Write8(0x0F); Write8(0x90 + (u8)flag); dest.WriteRest(this); @@ -708,7 +758,10 @@ void XEmitter::SETcc(CCFlags flag, OpArg dest) void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) { - if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "CMOVcc - Imm argument"); + _assert_msg_(DYNA_REC, !src.IsImm(), "CMOVcc - Imm argument"); + _assert_msg_(DYNA_REC, bits != 8, "CMOVcc - 8 bits unsupported"); + if (bits == 16) + Write8(0x66); src.operandReg = dest; src.WriteRex(this, bits, bits); Write8(0x0F); @@ -718,10 +771,12 @@ void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) void XEmitter::WriteMulDivType(int bits, OpArg src, int ext) { - if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "WriteMulDivType - Imm argument"); + _assert_msg_(DYNA_REC, !src.IsImm(), "WriteMulDivType - Imm argument"); + CheckFlags(); src.operandReg = ext; - if (bits == 16) Write8(0x66); - src.WriteRex(this, bits, bits); + if (bits == 16) + Write8(0x66); + src.WriteRex(this, bits, bits, 0); if (bits == 8) { Write8(0xF6); @@ -740,11 +795,15 @@ void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);} void XEmitter::NEG(int bits, OpArg src) {WriteMulDivType(bits, src, 3);} void XEmitter::NOT(int bits, OpArg src) {WriteMulDivType(bits, src, 2);} -void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2) +void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep) { - if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "WriteBitSearchType - Imm argument"); + _assert_msg_(DYNA_REC, !src.IsImm(), "WriteBitSearchType - Imm argument"); + CheckFlags(); src.operandReg = (u8)dest; - if (bits == 16) Write8(0x66); + if (bits == 16) + Write8(0x66); + if (rep) + Write8(0xF3); src.WriteRex(this, bits, bits); Write8(0x0F); Write8(byte2); @@ -753,22 +812,40 @@ void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2) void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src) { - if (bits <= 16) _assert_msg_(DYNA_REC, 0, "MOVNTI - bits<=16"); + if (bits <= 16) + _assert_msg_(DYNA_REC, 0, "MOVNTI - bits<=16"); WriteBitSearchType(bits, src, dest, 0xC3); } void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit +void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src) +{ + CheckFlags(); + if (!cpu_info.bBMI1) + PanicAlert("Trying to use BMI1 on a system that doesn't support it. Bad programmer."); + WriteBitSearchType(bits, dest, src, 0xBC, true); +} +void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src) +{ + CheckFlags(); + if (!cpu_info.bLZCNT) + PanicAlert("Trying to use LZCNT on a system that doesn't support it. Bad programmer."); + WriteBitSearchType(bits, dest, src, 0xBD, true); +} + void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) { - if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "MOVSX - Imm argument"); - if (dbits == sbits) { + _assert_msg_(DYNA_REC, !src.IsImm(), "MOVSX - Imm argument"); + if (dbits == sbits) + { MOV(dbits, R(dest), src); return; } src.operandReg = (u8)dest; - if (dbits == 16) Write8(0x66); + if (dbits == 16) + Write8(0x66); src.WriteRex(this, dbits, sbits); if (sbits == 8) { @@ -793,13 +870,15 @@ void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) { - if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "MOVZX - Imm argument"); - if (dbits == sbits) { + _assert_msg_(DYNA_REC, !src.IsImm(), "MOVZX - Imm argument"); + if (dbits == sbits) + { MOV(dbits, R(dest), src); return; } src.operandReg = (u8)dest; - if (dbits == 16) Write8(0x66); + if (dbits == 16) + Write8(0x66); //the 32bit result is automatically zero extended to 64bit src.WriteRex(this, dbits == 64 ? 32 : dbits, sbits); if (sbits == 8) @@ -818,25 +897,59 @@ void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) } else { - Crash(); + _assert_msg_(DYNA_REC, 0, "MOVZX - Invalid size"); } src.WriteRest(this); } +void XEmitter::MOVBE(int bits, const OpArg& dest, const OpArg& src) +{ + _assert_msg_(DYNA_REC, cpu_info.bMOVBE, "Generating MOVBE on a system that does not support it."); + if (bits == 8) + { + MOV(bits, dest, src); + return; + } + + if (bits == 16) + Write8(0x66); + + if (dest.IsSimpleReg()) + { + _assert_msg_(DYNA_REC, !src.IsSimpleReg() && !src.IsImm(), "MOVBE: Loading from !mem"); + src.WriteRex(this, bits, bits, dest.GetSimpleReg()); + Write8(0x0F); Write8(0x38); Write8(0xF0); + src.WriteRest(this, 0, dest.GetSimpleReg()); + } + else if (src.IsSimpleReg()) + { + _assert_msg_(DYNA_REC, !dest.IsSimpleReg() && !dest.IsImm(), "MOVBE: Storing to !mem"); + dest.WriteRex(this, bits, bits, src.GetSimpleReg()); + Write8(0x0F); Write8(0x38); Write8(0xF1); + dest.WriteRest(this, 0, src.GetSimpleReg()); + } + else + { + _assert_msg_(DYNA_REC, 0, "MOVBE: Not loading or storing to mem"); + } +} + void XEmitter::LEA(int bits, X64Reg dest, OpArg src) { - if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "LEA - Imm argument"); + _assert_msg_(DYNA_REC, !src.IsImm(), "LEA - Imm argument"); src.operandReg = (u8)dest; - if (bits == 16) Write8(0x66); //TODO: performance warning + if (bits == 16) + Write8(0x66); //TODO: performance warning src.WriteRex(this, bits, bits); Write8(0x8D); - src.WriteRest(this, 0, (X64Reg)0xFF, bits == 64); + src.WriteRest(this, 0, INVALID_REG, bits == 64); } //shift can be either imm8 or cl void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext) { + CheckFlags(); bool writeImm = false; if (dest.IsImm()) { @@ -847,7 +960,8 @@ void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext) _assert_msg_(DYNA_REC, 0, "WriteShift - illegal argument"); } dest.operandReg = ext; - if (bits == 16) Write8(0x66); + if (bits == 16) + Write8(0x66); dest.WriteRex(this, bits, bits, 0); if (shift.GetImmBits() == 8) { @@ -885,6 +999,7 @@ void XEmitter::SAR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, sh // index can be either imm8 or register, don't use memory destination because it's slow void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext) { + CheckFlags(); if (dest.IsImm()) { _assert_msg_(DYNA_REC, 0, "WriteBitTest - can't test imms"); @@ -893,7 +1008,8 @@ void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext) { _assert_msg_(DYNA_REC, 0, "WriteBitTest - illegal argument"); } - if (bits == 16) Write8(0x66); + if (bits == 16) + Write8(0x66); if (index.IsImm()) { dest.WriteRex(this, bits, bits); @@ -918,6 +1034,7 @@ void XEmitter::BTC(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, //shift can be either imm8 or cl void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift) { + CheckFlags(); if (dest.IsImm()) { _assert_msg_(DYNA_REC, 0, "SHRD - can't use imms as destination"); @@ -930,7 +1047,8 @@ void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift) { _assert_msg_(DYNA_REC, 0, "SHRD - illegal shift"); } - if (bits == 16) Write8(0x66); + if (bits == 16) + Write8(0x66); X64Reg operand = src.GetSimpleReg(); dest.WriteRex(this, bits, bits, operand); if (shift.GetImmBits() == 8) @@ -948,6 +1066,7 @@ void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift) void XEmitter::SHLD(int bits, OpArg dest, OpArg src, OpArg shift) { + CheckFlags(); if (dest.IsImm()) { _assert_msg_(DYNA_REC, 0, "SHLD - can't use imms as destination"); @@ -960,7 +1079,8 @@ void XEmitter::SHLD(int bits, OpArg dest, OpArg src, OpArg shift) { _assert_msg_(DYNA_REC, 0, "SHLD - illegal shift"); } - if (bits == 16) Write8(0x66); + if (bits == 16) + Write8(0x66); X64Reg operand = src.GetSimpleReg(); dest.WriteRex(this, bits, bits, operand); if (shift.GetImmBits() == 8) @@ -990,7 +1110,7 @@ void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bit //operand can either be immediate or register void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const { - X64Reg _operandReg = (X64Reg)this->operandReg; + X64Reg _operandReg; if (IsImm()) { _assert_msg_(DYNA_REC, 0, "WriteNormalOp - Imm argument, wrong order"); @@ -1003,7 +1123,6 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &o if (operand.IsImm()) { - _operandReg = (X64Reg)0; WriteRex(emit, bits, bits); if (!toRM) @@ -1013,26 +1132,81 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &o if (operand.scale == SCALE_IMM8 && bits == 8) { - emit->Write8(nops[op].imm8); + // op al, imm8 + if (!scale && offsetOrBaseReg == AL && normalops[op].eaximm8 != 0xCC) + { + emit->Write8(normalops[op].eaximm8); + emit->Write8((u8)operand.offset); + return; + } + // mov reg, imm8 + if (!scale && op == nrmMOV) + { + emit->Write8(0xB0 + (offsetOrBaseReg & 7)); + emit->Write8((u8)operand.offset); + return; + } + // op r/m8, imm8 + emit->Write8(normalops[op].imm8); immToWrite = 8; } else if ((operand.scale == SCALE_IMM16 && bits == 16) || (operand.scale == SCALE_IMM32 && bits == 32) || (operand.scale == SCALE_IMM32 && bits == 64)) { - emit->Write8(nops[op].imm32); - immToWrite = bits == 16 ? 16 : 32; + // Try to save immediate size if we can, but first check to see + // if the instruction supports simm8. + // op r/m, imm8 + if (normalops[op].simm8 != 0xCC && + ((operand.scale == SCALE_IMM16 && (s16)operand.offset == (s8)operand.offset) || + (operand.scale == SCALE_IMM32 && (s32)operand.offset == (s8)operand.offset))) + { + emit->Write8(normalops[op].simm8); + immToWrite = 8; + } + else + { + // mov reg, imm + if (!scale && op == nrmMOV && bits != 64) + { + emit->Write8(0xB8 + (offsetOrBaseReg & 7)); + if (bits == 16) + emit->Write16((u16)operand.offset); + else + emit->Write32((u32)operand.offset); + return; + } + // op eax, imm + if (!scale && offsetOrBaseReg == EAX && normalops[op].eaximm32 != 0xCC) + { + emit->Write8(normalops[op].eaximm32); + if (bits == 16) + emit->Write16((u16)operand.offset); + else + emit->Write32((u32)operand.offset); + return; + } + // op r/m, imm + emit->Write8(normalops[op].imm32); + immToWrite = bits == 16 ? 16 : 32; + } } else if ((operand.scale == SCALE_IMM8 && bits == 16) || (operand.scale == SCALE_IMM8 && bits == 32) || (operand.scale == SCALE_IMM8 && bits == 64)) { - emit->Write8(nops[op].simm8); + // op r/m, imm8 + emit->Write8(normalops[op].simm8); immToWrite = 8; } else if (operand.scale == SCALE_IMM64 && bits == 64) { - if (op == nrmMOV) + if (scale) + { + _assert_msg_(DYNA_REC, 0, "WriteNormalOp - MOV with 64-bit imm requres register destination"); + } + // mov reg64, imm64 + else if (op == nrmMOV) { emit->Write8(0xB8 + (offsetOrBaseReg & 7)); emit->Write64((u64)operand.offset); @@ -1044,25 +1218,24 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &o { _assert_msg_(DYNA_REC, 0, "WriteNormalOp - Unhandled case"); } - _operandReg = (X64Reg)nops[op].ext; //pass extension in REG of ModRM + _operandReg = (X64Reg)normalops[op].ext; //pass extension in REG of ModRM } else { _operandReg = (X64Reg)operand.offsetOrBaseReg; WriteRex(emit, bits, bits, _operandReg); - // mem/reg or reg/reg op + // op r/m, reg if (toRM) { - emit->Write8(bits == 8 ? nops[op].toRm8 : nops[op].toRm32); - // _assert_msg_(DYNA_REC, code[-1] != 0xCC, "ARGH4"); + emit->Write8(bits == 8 ? normalops[op].toRm8 : normalops[op].toRm32); } + // op reg, r/m else { - emit->Write8(bits == 8 ? nops[op].fromRm8 : nops[op].fromRm32); - // _assert_msg_(DYNA_REC, code[-1] != 0xCC, "ARGH5"); + emit->Write8(bits == 8 ? normalops[op].fromRm8 : normalops[op].fromRm32); } } - WriteRest(emit, immToWrite>>3, _operandReg); + WriteRest(emit, immToWrite >> 3, _operandReg); switch (immToWrite) { case 0: @@ -1101,40 +1274,44 @@ void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg } else { + _assert_msg_(DYNA_REC, a2.IsSimpleReg() || a2.IsImm(), "WriteNormalOp - a1 and a2 cannot both be memory"); a1.WriteNormalOp(emit, true, op, a2, bits); } } } -void XEmitter::ADD (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmADD, a1, a2);} -void XEmitter::ADC (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmADC, a1, a2);} -void XEmitter::SUB (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmSUB, a1, a2);} -void XEmitter::SBB (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmSBB, a1, a2);} -void XEmitter::AND (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmAND, a1, a2);} -void XEmitter::OR (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmOR , a1, a2);} -void XEmitter::XOR (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmXOR, a1, a2);} +void XEmitter::ADD (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);} +void XEmitter::ADC (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);} +void XEmitter::SUB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);} +void XEmitter::SBB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);} +void XEmitter::AND (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);} +void XEmitter::OR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);} +void XEmitter::XOR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);} void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2) { -#ifdef _DEBUG - _assert_msg_(DYNA_REC, !a1.IsSimpleReg() || !a2.IsSimpleReg() || a1.GetSimpleReg() != a2.GetSimpleReg(), "Redundant MOV @ %p - bug in DYNA_REC?", - code); -#endif + if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg()) + ERROR_LOG(DYNA_REC, "Redundant MOV @ %p - bug in JIT?", code); WriteNormalOp(this, bits, nrmMOV, a1, a2); } -void XEmitter::TEST(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmTEST, a1, a2);} -void XEmitter::CMP (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmCMP, a1, a2);} +void XEmitter::TEST(int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);} +void XEmitter::CMP (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);} void XEmitter::XCHG(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);} void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2) { - if (bits == 8) { + CheckFlags(); + if (bits == 8) + { _assert_msg_(DYNA_REC, 0, "IMUL - illegal bit size!"); return; } - if (a1.IsImm()) { + + if (a1.IsImm()) + { _assert_msg_(DYNA_REC, 0, "IMUL - second arg cannot be imm!"); return; } + if (!a2.IsImm()) { _assert_msg_(DYNA_REC, 0, "IMUL - third arg must be imm!"); @@ -1145,20 +1322,29 @@ void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2) Write8(0x66); a1.WriteRex(this, bits, bits, regOp); - if (a2.GetImmBits() == 8) { + if (a2.GetImmBits() == 8 || + (a2.GetImmBits() == 16 && (s8)a2.offset == (s16)a2.offset) || + (a2.GetImmBits() == 32 && (s8)a2.offset == (s32)a2.offset)) + { Write8(0x6B); a1.WriteRest(this, 1, regOp); Write8((u8)a2.offset); - } else { + } + else + { Write8(0x69); - if (a2.GetImmBits() == 16 && bits == 16) { + if (a2.GetImmBits() == 16 && bits == 16) + { a1.WriteRest(this, 2, regOp); Write16((u16)a2.offset); - } else if (a2.GetImmBits() == 32 && - (bits == 32 || bits == 64)) { - a1.WriteRest(this, 4, regOp); - Write32((u32)a2.offset); - } else { + } + else if (a2.GetImmBits() == 32 && (bits == 32 || bits == 64)) + { + a1.WriteRest(this, 4, regOp); + Write32((u32)a2.offset); + } + else + { _assert_msg_(DYNA_REC, 0, "IMUL - unhandled case!"); } } @@ -1166,10 +1352,13 @@ void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2) void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a) { - if (bits == 8) { + CheckFlags(); + if (bits == 8) + { _assert_msg_(DYNA_REC, 0, "IMUL - illegal bit size!"); return; } + if (a.IsImm()) { IMUL(bits, regOp, R(regOp), a) ; @@ -1185,49 +1374,92 @@ void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a) } -void XEmitter::WriteSSEOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes) +void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) { - if (size == 64 && packed) - Write8(0x66); //this time, override goes upwards - if (!packed) - Write8(size == 64 ? 0xF2 : 0xF3); + if (opPrefix) + Write8(opPrefix); arg.operandReg = regOp; arg.WriteRex(this, 0, 0); Write8(0x0F); - Write8(sseOp); + if (op > 0xFF) + Write8((op >> 8) & 0xFF); + Write8(op & 0xFF); arg.WriteRest(this, extrabytes); } -void XEmitter::WriteSSEOp2(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes) +void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) { - if (size == 64 && packed) - Write8(0x66); //this time, override goes upwards - if (!packed) - Write8(size == 64 ? 0xF2 : 0xF3); - arg.operandReg = regOp; - arg.WriteRex(this, 0, 0); - Write8(0x0F); - Write8(0x38); - Write8(sseOp); - arg.WriteRest(this, extrabytes); + WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes); } -void XEmitter::WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes) +static int GetVEXmmmmm(u16 op) { - WriteAVXOp(size, sseOp, packed, regOp, X64Reg::INVALID_REG, arg, extrabytes); + // Currently, only 0x38 and 0x3A are used as secondary escape byte. + if ((op >> 8) == 0x3A) + return 3; + else if ((op >> 8) == 0x38) + return 2; + else + return 1; } -void XEmitter::WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +static int GetVEXpp(u8 opPrefix) { - arg.WriteVex(this, size, packed, regOp1, regOp2); - Write8(sseOp); + if (opPrefix == 0x66) + return 1; + else if (opPrefix == 0xF3) + return 2; + else if (opPrefix == 0xF2) + return 3; + else + return 0; +} + +void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +{ + if (!cpu_info.bAVX) + PanicAlert("Trying to use AVX on a system that doesn't support it. Bad programmer."); + int mmmmm = GetVEXmmmmm(op); + int pp = GetVEXpp(opPrefix); + // FIXME: we currently don't support 256-bit instructions, and "size" is not the vector size here + arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm); + Write8(op & 0xFF); arg.WriteRest(this, extrabytes, regOp1); } -void XEmitter::MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x6E, true, dest, arg, 0);} -void XEmitter::MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(64, 0x7E, true, src, arg, 0);} +// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2 +void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +{ + if (size != 32 && size != 64) + PanicAlert("VEX GPR instructions only support 32-bit and 64-bit modes!"); + int mmmmm = GetVEXmmmmm(op); + int pp = GetVEXpp(opPrefix); + arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm, size == 64); + Write8(op & 0xFF); + arg.WriteRest(this, extrabytes, regOp1); +} -void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg) { +void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +{ + CheckFlags(); + if (!cpu_info.bBMI1) + PanicAlert("Trying to use BMI1 on a system that doesn't support it. Bad programmer."); + WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); +} + +void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +{ + CheckFlags(); + if (!cpu_info.bBMI2) + PanicAlert("Trying to use BMI2 on a system that doesn't support it. Bad programmer."); + WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); +} + +void XEmitter::MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x6E, dest, arg, 0);} +void XEmitter::MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(0x66, 0x7E, src, arg, 0);} + +void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg) +{ #ifdef _M_X64 // Alternate encoding // This does not display correctly in MSVC's debugger, it thinks it's a MOVD @@ -1246,10 +1478,9 @@ void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg) { #endif } -void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) { - if (arg.IsSimpleReg()) - PanicAlert("Emitter: MOVQ_xmm doesn't support single registers as destination"); - if (src > 7) +void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) +{ + if (src > 7 || arg.IsSimpleReg()) { // Alternate encoding // This does not display correctly in MSVC's debugger, it thinks it's a MOVD @@ -1259,7 +1490,9 @@ void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) { Write8(0x0f); Write8(0x7E); arg.WriteRest(this, 0); - } else { + } + else + { arg.operandReg = src; arg.WriteRex(this, 0, 0); Write8(0x66); @@ -1284,119 +1517,128 @@ void XEmitter::WriteMXCSR(OpArg arg, int ext) void XEmitter::STMXCSR(OpArg memloc) {WriteMXCSR(memloc, 3);} void XEmitter::LDMXCSR(OpArg memloc) {WriteMXCSR(memloc, 2);} -void XEmitter::MOVNTDQ(OpArg arg, X64Reg regOp) {WriteSSEOp(64, sseMOVNTDQ, true, regOp, arg);} -void XEmitter::MOVNTPS(OpArg arg, X64Reg regOp) {WriteSSEOp(32, sseMOVNTP, true, regOp, arg);} -void XEmitter::MOVNTPD(OpArg arg, X64Reg regOp) {WriteSSEOp(64, sseMOVNTP, true, regOp, arg);} +void XEmitter::MOVNTDQ(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);} +void XEmitter::MOVNTPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);} +void XEmitter::MOVNTPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);} -void XEmitter::ADDSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseADD, false, regOp, arg);} -void XEmitter::ADDSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseADD, false, regOp, arg);} -void XEmitter::SUBSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseSUB, false, regOp, arg);} -void XEmitter::SUBSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseSUB, false, regOp, arg);} -void XEmitter::CMPSS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(32, sseCMP, false, regOp, arg,1); Write8(compare);} -void XEmitter::CMPSD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(64, sseCMP, false, regOp, arg,1); Write8(compare);} -void XEmitter::MULSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMUL, false, regOp, arg);} -void XEmitter::MULSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMUL, false, regOp, arg);} -void XEmitter::DIVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseDIV, false, regOp, arg);} -void XEmitter::DIVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseDIV, false, regOp, arg);} -void XEmitter::MINSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMIN, false, regOp, arg);} -void XEmitter::MINSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMIN, false, regOp, arg);} -void XEmitter::MAXSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMAX, false, regOp, arg);} -void XEmitter::MAXSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMAX, false, regOp, arg);} -void XEmitter::SQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseSQRT, false, regOp, arg);} -void XEmitter::SQRTSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseSQRT, false, regOp, arg);} -void XEmitter::RSQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseRSQRT, false, regOp, arg);} +void XEmitter::ADDSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);} +void XEmitter::ADDSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);} +void XEmitter::SUBSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);} +void XEmitter::SUBSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);} +void XEmitter::CMPSS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);} +void XEmitter::CMPSD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);} +void XEmitter::MULSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);} +void XEmitter::MULSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);} +void XEmitter::DIVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);} +void XEmitter::DIVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);} +void XEmitter::MINSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);} +void XEmitter::MINSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);} +void XEmitter::MAXSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);} +void XEmitter::MAXSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);} +void XEmitter::SQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);} +void XEmitter::SQRTSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);} +void XEmitter::RSQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);} -void XEmitter::ADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseADD, true, regOp, arg);} -void XEmitter::ADDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseADD, true, regOp, arg);} -void XEmitter::SUBPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseSUB, true, regOp, arg);} -void XEmitter::SUBPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseSUB, true, regOp, arg);} -void XEmitter::CMPPS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(32, sseCMP, true, regOp, arg,1); Write8(compare);} -void XEmitter::CMPPD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(64, sseCMP, true, regOp, arg,1); Write8(compare);} -void XEmitter::ANDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseAND, true, regOp, arg);} -void XEmitter::ANDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseAND, true, regOp, arg);} -void XEmitter::ANDNPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseANDN, true, regOp, arg);} -void XEmitter::ANDNPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseANDN, true, regOp, arg);} -void XEmitter::ORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseOR, true, regOp, arg);} -void XEmitter::ORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseOR, true, regOp, arg);} -void XEmitter::XORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseXOR, true, regOp, arg);} -void XEmitter::XORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseXOR, true, regOp, arg);} -void XEmitter::MULPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMUL, true, regOp, arg);} -void XEmitter::MULPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMUL, true, regOp, arg);} -void XEmitter::DIVPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseDIV, true, regOp, arg);} -void XEmitter::DIVPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseDIV, true, regOp, arg);} -void XEmitter::MINPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMIN, true, regOp, arg);} -void XEmitter::MINPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMIN, true, regOp, arg);} -void XEmitter::MAXPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMAX, true, regOp, arg);} -void XEmitter::MAXPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMAX, true, regOp, arg);} -void XEmitter::SQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseSQRT, true, regOp, arg);} -void XEmitter::SQRTPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseSQRT, true, regOp, arg);} -void XEmitter::RSQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseRSQRT, true, regOp, arg);} -void XEmitter::SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(32, sseSHUF, true, regOp, arg,1); Write8(shuffle);} -void XEmitter::SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(64, sseSHUF, true, regOp, arg,1); Write8(shuffle);} +void XEmitter::ADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseADD, regOp, arg);} +void XEmitter::ADDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseADD, regOp, arg);} +void XEmitter::SUBPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);} +void XEmitter::SUBPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);} +void XEmitter::CMPPS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);} +void XEmitter::CMPPD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);} +void XEmitter::ANDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseAND, regOp, arg);} +void XEmitter::ANDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseAND, regOp, arg);} +void XEmitter::ANDNPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);} +void XEmitter::ANDNPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);} +void XEmitter::ORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseOR, regOp, arg);} +void XEmitter::ORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseOR, regOp, arg);} +void XEmitter::XORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);} +void XEmitter::XORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);} +void XEmitter::MULPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);} +void XEmitter::MULPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);} +void XEmitter::DIVPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);} +void XEmitter::DIVPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);} +void XEmitter::MINPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);} +void XEmitter::MINPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);} +void XEmitter::MAXPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);} +void XEmitter::MAXPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);} +void XEmitter::SQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);} +void XEmitter::SQRTPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);} +void XEmitter::RSQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);} +void XEmitter::SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);} +void XEmitter::SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);} -void XEmitter::COMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseCOMIS, true, regOp, arg);} //weird that these should be packed -void XEmitter::COMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseCOMIS, true, regOp, arg);} //ordered -void XEmitter::UCOMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseUCOMIS, true, regOp, arg);} //unordered -void XEmitter::UCOMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseUCOMIS, true, regOp, arg);} +void XEmitter::COMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed +void XEmitter::COMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered +void XEmitter::UCOMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered +void XEmitter::UCOMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);} -void XEmitter::MOVAPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMOVAPfromRM, true, regOp, arg);} -void XEmitter::MOVAPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMOVAPfromRM, true, regOp, arg);} -void XEmitter::MOVAPS(OpArg arg, X64Reg regOp) {WriteSSEOp(32, sseMOVAPtoRM, true, regOp, arg);} -void XEmitter::MOVAPD(OpArg arg, X64Reg regOp) {WriteSSEOp(64, sseMOVAPtoRM, true, regOp, arg);} +void XEmitter::MOVAPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);} +void XEmitter::MOVAPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);} +void XEmitter::MOVAPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);} +void XEmitter::MOVAPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);} -void XEmitter::MOVUPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMOVUPfromRM, true, regOp, arg);} -void XEmitter::MOVUPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMOVUPfromRM, true, regOp, arg);} -void XEmitter::MOVUPS(OpArg arg, X64Reg regOp) {WriteSSEOp(32, sseMOVUPtoRM, true, regOp, arg);} -void XEmitter::MOVUPD(OpArg arg, X64Reg regOp) {WriteSSEOp(64, sseMOVUPtoRM, true, regOp, arg);} +void XEmitter::MOVUPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);} +void XEmitter::MOVUPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);} +void XEmitter::MOVUPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);} +void XEmitter::MOVUPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);} -void XEmitter::MOVDQA(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMOVDQfromRM, true, regOp, arg);} -void XEmitter::MOVDQA(OpArg arg, X64Reg regOp) {WriteSSEOp(64, sseMOVDQtoRM, true, regOp, arg);} -void XEmitter::MOVDQU(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMOVDQfromRM, false, regOp, arg);} -void XEmitter::MOVDQU(OpArg arg, X64Reg regOp) {WriteSSEOp(32, sseMOVDQtoRM, false, regOp, arg);} +void XEmitter::MOVDQA(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);} +void XEmitter::MOVDQA(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);} +void XEmitter::MOVDQU(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);} +void XEmitter::MOVDQU(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);} -void XEmitter::MOVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMOVUPfromRM, false, regOp, arg);} -void XEmitter::MOVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMOVUPfromRM, false, regOp, arg);} -void XEmitter::MOVSS(OpArg arg, X64Reg regOp) {WriteSSEOp(32, sseMOVUPtoRM, false, regOp, arg);} -void XEmitter::MOVSD(OpArg arg, X64Reg regOp) {WriteSSEOp(64, sseMOVUPtoRM, false, regOp, arg);} +void XEmitter::MOVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);} +void XEmitter::MOVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);} +void XEmitter::MOVSS(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);} +void XEmitter::MOVSD(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);} -void XEmitter::CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0x5A, true, regOp, arg);} -void XEmitter::CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x5A, true, regOp, arg);} +void XEmitter::MOVLPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMOVLPDfromRM, regOp, arg);} +void XEmitter::MOVHPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMOVHPDfromRM, regOp, arg);} +void XEmitter::MOVLPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVLPDtoRM, regOp, arg);} +void XEmitter::MOVHPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVHPDtoRM, regOp, arg);} -void XEmitter::CVTSD2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x5A, false, regOp, arg);} -void XEmitter::CVTSS2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0x5A, false, regOp, arg);} -void XEmitter::CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x2D, false, regOp, arg);} +void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));} +void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));} -void XEmitter::CVTDQ2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0xE6, false, regOp, arg);} -void XEmitter::CVTDQ2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0x5B, true, regOp, arg);} -void XEmitter::CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0xE6, false, regOp, arg);} -void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x5B, true, regOp, arg);} +void XEmitter::CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);} +void XEmitter::CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);} -void XEmitter::CVTSI2SS(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2A, false, xregdest, arg);} -void XEmitter::CVTSS2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2D, false, xregdest, arg);} -void XEmitter::CVTTSS2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2C, false, xregdest, arg);} -void XEmitter::CVTTPS2DQ(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x5B, false, xregdest, arg);} -void XEmitter::CVTTSD2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(64, 0x2C, false, xregdest, arg);} -void XEmitter::CVTTPD2DQ(X64Reg xregdest, OpArg arg) {WriteSSEOp(64, 0xE6, true, xregdest, arg); } +void XEmitter::CVTSD2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);} +void XEmitter::CVTSS2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);} +void XEmitter::CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);} +void XEmitter::CVTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);} +void XEmitter::CVTSI2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);} +void XEmitter::CVTSI2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);} -void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(64, sseMASKMOVDQU, true, dest, R(src));} +void XEmitter::CVTDQ2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);} +void XEmitter::CVTDQ2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);} +void XEmitter::CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);} +void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);} -void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(32, 0x50, true, dest, arg);} -void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x50, true, dest, arg);} +void XEmitter::CVTTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);} +void XEmitter::CVTTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);} +void XEmitter::CVTTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);} +void XEmitter::CVTTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);} -void XEmitter::LDDQU(X64Reg dest, OpArg arg) {WriteSSEOp(64, sseLDDQU, false, dest, arg);} // For integer data only +void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));} + +void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x50, dest, arg);} +void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x50, dest, arg);} + +void XEmitter::LDDQU(X64Reg dest, OpArg arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only // THESE TWO ARE UNTESTED. -void XEmitter::UNPCKLPS(X64Reg dest, OpArg arg) {WriteSSEOp(32, 0x14, true, dest, arg);} -void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(32, 0x15, true, dest, arg);} +void XEmitter::UNPCKLPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x14, dest, arg);} +void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x15, dest, arg);} -void XEmitter::UNPCKLPD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x14, true, dest, arg);} -void XEmitter::UNPCKHPD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x15, true, dest, arg);} +void XEmitter::UNPCKLPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x14, dest, arg);} +void XEmitter::UNPCKHPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x15, dest, arg);} void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg) { if (cpu_info.bSSE3) { - WriteSSEOp(64, 0x12, false, regOp, arg); //SSE3 movddup + WriteSSEOp(0xF2, 0x12, regOp, arg); //SSE3 movddup } else { @@ -1410,101 +1652,69 @@ void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg) //There are a few more left // Also some integer instructions are missing -void XEmitter::PACKSSDW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x6B, true, dest, arg);} -void XEmitter::PACKSSWB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x63, true, dest, arg);} -//void PACKUSDW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x66, true, dest, arg);} // WRONG -void XEmitter::PACKUSWB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x67, true, dest, arg);} +void XEmitter::PACKSSDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x6B, dest, arg);} +void XEmitter::PACKSSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x63, dest, arg);} +void XEmitter::PACKUSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x67, dest, arg);} -void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x60, true, dest, arg);} -void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x61, true, dest, arg);} -void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x62, true, dest, arg);} -//void PUNPCKLQDQ(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x60, true, dest, arg);} +void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);} +void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);} +void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x62, dest, arg);} -void XEmitter::PMOVSXBW(X64Reg dest, const OpArg &arg) { - if (!cpu_info.bSSE4_1) { - PanicAlert("Trying to use PMOVSXBW on a system that doesn't support it. Bad programmer."); - } - WriteSSEOp2(64, 0x20, true, dest, arg); -} - -void XEmitter::PMOVSXBD(X64Reg dest, const OpArg &arg) { - if (!cpu_info.bSSE4_1) { - PanicAlert("Trying to use PMOVSXBD on a system that doesn't support it. Bad programmer."); - } - WriteSSEOp2(64, 0x21, true, dest, arg); -} - -void XEmitter::PMOVSXWD(X64Reg dest, const OpArg &arg) { - if (!cpu_info.bSSE4_1) { - PanicAlert("Trying to use PMOVSXWD on a system that doesn't support it. Bad programmer."); - } - WriteSSEOp2(64, 0x23, true, dest, arg); -} - -void XEmitter::PMOVZXBW(X64Reg dest, const OpArg &arg) { - if (!cpu_info.bSSE4_1) { - PanicAlert("Trying to use PMOVSXBW on a system that doesn't support it. Bad programmer."); - } - WriteSSEOp2(64, 0x30, true, dest, arg); -} - -void XEmitter::PMOVZXBD(X64Reg dest, const OpArg &arg) { - if (!cpu_info.bSSE4_1) { - PanicAlert("Trying to use PMOVSXBD on a system that doesn't support it. Bad programmer."); - } - WriteSSEOp2(64, 0x31, true, dest, arg); -} - -void XEmitter::PMOVZXWD(X64Reg dest, const OpArg &arg) { - if (!cpu_info.bSSE4_1) { - PanicAlert("Trying to use PMOVSXWD on a system that doesn't support it. Bad programmer."); - } - WriteSSEOp2(64, 0x33, true, dest, arg); -} - -void XEmitter::PSRLW(X64Reg reg, int shift) { - WriteSSEOp(64, 0x71, true, (X64Reg)2, R(reg)); +void XEmitter::PSRLW(X64Reg reg, int shift) +{ + WriteSSEOp(0x66, 0x71, (X64Reg)2, R(reg)); Write8(shift); } -void XEmitter::PSRLD(X64Reg reg, int shift) { - WriteSSEOp(64, 0x72, true, (X64Reg)2, R(reg)); +void XEmitter::PSRLD(X64Reg reg, int shift) +{ + WriteSSEOp(0x66, 0x72, (X64Reg)2, R(reg)); Write8(shift); } -void XEmitter::PSRLQ(X64Reg reg, int shift) { - WriteSSEOp(64, 0x73, true, (X64Reg)2, R(reg)); +void XEmitter::PSRLQ(X64Reg reg, int shift) +{ + WriteSSEOp(0x66, 0x73, (X64Reg)2, R(reg)); Write8(shift); } -void XEmitter::PSLLW(X64Reg reg, int shift) { - WriteSSEOp(64, 0x71, true, (X64Reg)6, R(reg)); +void XEmitter::PSRLQ(X64Reg reg, OpArg arg) +{ + WriteSSEOp(0x66, 0xd3, reg, arg); +} + +void XEmitter::PSRLDQ(X64Reg reg, int shift) { + WriteSSEOp(0x66, 0x73, (X64Reg)3, R(reg)); Write8(shift); } -void XEmitter::PSLLD(X64Reg reg, int shift) { - WriteSSEOp(64, 0x72, true, (X64Reg)6, R(reg)); +void XEmitter::PSLLW(X64Reg reg, int shift) +{ + WriteSSEOp(0x66, 0x71, (X64Reg)6, R(reg)); Write8(shift); } -void XEmitter::PSLLQ(X64Reg reg, int shift) { - WriteSSEOp(64, 0x73, true, (X64Reg)6, R(reg)); +void XEmitter::PSLLD(X64Reg reg, int shift) +{ + WriteSSEOp(0x66, 0x72, (X64Reg)6, R(reg)); + Write8(shift); +} + +void XEmitter::PSLLQ(X64Reg reg, int shift) +{ + WriteSSEOp(0x66, 0x73, (X64Reg)6, R(reg)); Write8(shift); } void XEmitter::PSLLDQ(X64Reg reg, int shift) { - WriteSSEOp(64, 0x73, true, (X64Reg)7, R(reg)); - Write8(shift); -} - -void XEmitter::PSRLDQ(X64Reg reg, int shift) { - WriteSSEOp(64, 0x73, true, (X64Reg)3, R(reg)); + WriteSSEOp(0x66, 0x73, (X64Reg)7, R(reg)); Write8(shift); } // WARNING not REX compatible -void XEmitter::PSRAW(X64Reg reg, int shift) { +void XEmitter::PSRAW(X64Reg reg, int shift) +{ if (reg > 7) PanicAlert("The PSRAW-emitter does not support regs above 7"); Write8(0x66); @@ -1515,7 +1725,8 @@ void XEmitter::PSRAW(X64Reg reg, int shift) { } // WARNING not REX compatible -void XEmitter::PSRAD(X64Reg reg, int shift) { +void XEmitter::PSRAD(X64Reg reg, int shift) +{ if (reg > 7) PanicAlert("The PSRAD-emitter does not support regs above 7"); Write8(0x66); @@ -1525,83 +1736,163 @@ void XEmitter::PSRAD(X64Reg reg, int shift) { Write8(shift); } -void XEmitter::PSHUFB(X64Reg dest, OpArg arg) { - if (!cpu_info.bSSSE3) { - PanicAlert("Trying to use PSHUFB on a system that doesn't support it. Bad programmer."); - } - WriteSSEOp2(64, 0x00, true, dest, arg); +void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) +{ + if (!cpu_info.bSSSE3) + PanicAlert("Trying to use SSSE3 on a system that doesn't support it. Bad programmer."); + WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); } -void XEmitter::PAND(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDB, true, dest, arg);} -void XEmitter::PANDN(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDF, true, dest, arg);} -void XEmitter::PXOR(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xEF, true, dest, arg);} -void XEmitter::POR(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xEB, true, dest, arg);} +void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) +{ + if (!cpu_info.bSSE4_1) + PanicAlert("Trying to use SSE4.1 on a system that doesn't support it. Bad programmer."); + WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); +} -void XEmitter::PADDB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xFC, true, dest, arg);} -void XEmitter::PADDW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xFD, true, dest, arg);} -void XEmitter::PADDD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xFE, true, dest, arg);} -void XEmitter::PADDQ(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xD4, true, dest, arg);} +void XEmitter::PSHUFB(X64Reg dest, OpArg arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);} +void XEmitter::PTEST(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);} +void XEmitter::PACKUSDW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);} -void XEmitter::PADDSB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xEC, true, dest, arg);} -void XEmitter::PADDSW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xED, true, dest, arg);} -void XEmitter::PADDUSB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDC, true, dest, arg);} -void XEmitter::PADDUSW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDD, true, dest, arg);} +void XEmitter::PMOVSXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);} +void XEmitter::PMOVSXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);} +void XEmitter::PMOVSXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);} +void XEmitter::PMOVSXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);} +void XEmitter::PMOVSXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);} +void XEmitter::PMOVSXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);} +void XEmitter::PMOVZXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);} +void XEmitter::PMOVZXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);} +void XEmitter::PMOVZXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);} +void XEmitter::PMOVZXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);} +void XEmitter::PMOVZXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);} +void XEmitter::PMOVZXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);} -void XEmitter::PSUBB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xF8, true, dest, arg);} -void XEmitter::PSUBW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xF9, true, dest, arg);} -void XEmitter::PSUBD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xFA, true, dest, arg);} -void XEmitter::PSUBQ(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDB, true, dest, arg);} +void XEmitter::PBLENDVB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);} +void XEmitter::BLENDVPS(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);} +void XEmitter::BLENDVPD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);} -void XEmitter::PSUBSB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xE8, true, dest, arg);} -void XEmitter::PSUBSW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xE9, true, dest, arg);} -void XEmitter::PSUBUSB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xD8, true, dest, arg);} -void XEmitter::PSUBUSW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xD9, true, dest, arg);} +void XEmitter::PAND(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDB, dest, arg);} +void XEmitter::PANDN(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDF, dest, arg);} +void XEmitter::PXOR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEF, dest, arg);} +void XEmitter::POR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEB, dest, arg);} -void XEmitter::PAVGB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xE0, true, dest, arg);} -void XEmitter::PAVGW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xE3, true, dest, arg);} +void XEmitter::PADDB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFC, dest, arg);} +void XEmitter::PADDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFD, dest, arg);} +void XEmitter::PADDD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFE, dest, arg);} +void XEmitter::PADDQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD4, dest, arg);} -void XEmitter::PCMPEQB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x74, true, dest, arg);} -void XEmitter::PCMPEQW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x75, true, dest, arg);} -void XEmitter::PCMPEQD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x76, true, dest, arg);} +void XEmitter::PADDSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEC, dest, arg);} +void XEmitter::PADDSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xED, dest, arg);} +void XEmitter::PADDUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDC, dest, arg);} +void XEmitter::PADDUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDD, dest, arg);} -void XEmitter::PCMPGTB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x64, true, dest, arg);} -void XEmitter::PCMPGTW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x65, true, dest, arg);} -void XEmitter::PCMPGTD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x66, true, dest, arg);} +void XEmitter::PSUBB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF8, dest, arg);} +void XEmitter::PSUBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF9, dest, arg);} +void XEmitter::PSUBD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFA, dest, arg);} +void XEmitter::PSUBQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFB, dest, arg);} -void XEmitter::PEXTRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(64, 0xC5, true, dest, arg); Write8(subreg);} -void XEmitter::PINSRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(64, 0xC4, true, dest, arg); Write8(subreg);} +void XEmitter::PSUBSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE8, dest, arg);} +void XEmitter::PSUBSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE9, dest, arg);} +void XEmitter::PSUBUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD8, dest, arg);} +void XEmitter::PSUBUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD9, dest, arg);} -void XEmitter::PMADDWD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xF5, true, dest, arg); } -void XEmitter::PSADBW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xF6, true, dest, arg);} +void XEmitter::PAVGB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE0, dest, arg);} +void XEmitter::PAVGW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE3, dest, arg);} -void XEmitter::PMAXSW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xEE, true, dest, arg); } -void XEmitter::PMAXUB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDE, true, dest, arg); } -void XEmitter::PMINSW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xEA, true, dest, arg); } -void XEmitter::PMINUB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDA, true, dest, arg); } +void XEmitter::PCMPEQB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x74, dest, arg);} +void XEmitter::PCMPEQW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x75, dest, arg);} +void XEmitter::PCMPEQD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x76, dest, arg);} -void XEmitter::PMOVMSKB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xD7, true, dest, arg); } +void XEmitter::PCMPGTB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x64, dest, arg);} +void XEmitter::PCMPGTW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x65, dest, arg);} +void XEmitter::PCMPGTD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x66, dest, arg);} -void XEmitter::PSHUFD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(64, 0x70, true, regOp, arg, 1); Write8(shuffle);} -void XEmitter::PSHUFLW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(64, 0x70, false, regOp, arg, 1); Write8(shuffle);} +void XEmitter::PEXTRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg); Write8(subreg);} +void XEmitter::PINSRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg); Write8(subreg);} + +void XEmitter::PMADDWD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF5, dest, arg); } +void XEmitter::PSADBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF6, dest, arg);} + +void XEmitter::PMAXSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEE, dest, arg); } +void XEmitter::PMAXUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDE, dest, arg); } +void XEmitter::PMINSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEA, dest, arg); } +void XEmitter::PMINUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDA, dest, arg); } + +void XEmitter::PMOVMSKB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD7, dest, arg); } +void XEmitter::PSHUFD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);} +void XEmitter::PSHUFLW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);} +void XEmitter::PSHUFHW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);} // VEX -void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseADD, false, regOp1, regOp2, arg);} -void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseSUB, false, regOp1, regOp2, arg);} -void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseMUL, false, regOp1, regOp2, arg);} -void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseDIV, false, regOp1, regOp2, arg);} -void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseSQRT, false, regOp1, regOp2, arg);} +void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);} +void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);} +void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);} +void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);} +void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);} +void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);} +void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);} +void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);} +void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);} +void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg);} +void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg);} +void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg);} +void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg);} +void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);} +void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);} +void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);} + +void XEmitter::SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);} +void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);} +void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);} +void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);} +void XEmitter::BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);} +void XEmitter::BLSR(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);} +void XEmitter::BLSMSK(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);} +void XEmitter::BLSI(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);} +void XEmitter::BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);} // Prefixes void XEmitter::LOCK() { Write8(0xF0); } void XEmitter::REP() { Write8(0xF3); } void XEmitter::REPNE() { Write8(0xF2); } +void XEmitter::FSOverride() { Write8(0x64); } +void XEmitter::GSOverride() { Write8(0x65); } -void XEmitter::FWAIT() { +void XEmitter::FWAIT() +{ Write8(0x9B); } -void XEmitter::RTDSC() { Write8(0x0F); Write8(0x31); } +// TODO: make this more generic +void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg) +{ + int mf = 0; + _assert_msg_(DYNA_REC, !(bits == 80 && op_80b == floatINVALID), "WriteFloatLoadStore: 80 bits not supported for this instruction"); + switch (bits) + { + case 32: mf = 0; break; + case 64: mf = 4; break; + case 80: mf = 2; break; + default: _assert_msg_(DYNA_REC, 0, "WriteFloatLoadStore: invalid bits (should be 32/64/80)"); + } + Write8(0xd9 | mf); + // x87 instructions use the reg field of the ModR/M byte as opcode: + if (bits == 80) + op = op_80b; + arg.WriteRest(this, 0, (X64Reg) op); +} + +void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);} +void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);} +void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);} +void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); } + +void XEmitter::RDTSC() { Write8(0x0F); Write8(0x31); } void XCodeBlock::AllocCodeSpace(int size) { region_size = size; @@ -1625,5 +1916,4 @@ void XCodeBlock::WriteProtect() { WriteProtectMemory(region, region_size, true); } -} // Gen - +} diff --git a/Common/x64Emitter.h b/Common/x64Emitter.h index 2b163ff52a..3af96eea84 100644 --- a/Common/x64Emitter.h +++ b/Common/x64Emitter.h @@ -22,6 +22,10 @@ #include "Common.h" +#ifdef _M_X64 +#define _ARCH_64 +#endif + namespace Gen { @@ -55,10 +59,10 @@ enum CCFlags { CC_O = 0, CC_NO = 1, - CC_B = 2, CC_C = 2, CC_NAE = 2, - CC_NB = 3, CC_NC = 3, CC_AE = 3, + CC_B = 2, CC_C = 2, CC_NAE = 2, + CC_NB = 3, CC_NC = 3, CC_AE = 3, CC_Z = 4, CC_E = 4, - CC_NZ = 5, CC_NE = 5, + CC_NZ = 5, CC_NE = 5, CC_BE = 6, CC_NA = 6, CC_NBE = 7, CC_A = 7, CC_S = 8, @@ -121,6 +125,16 @@ enum { CMP_ORD = 7, }; +enum FloatOp { + floatLD = 0, + floatST = 2, + floatSTP = 3, + floatLD80 = 5, + floatSTP80 = 7, + + floatINVALID = -1, +}; + class XEmitter; // RIP addressing does not benefit from micro op fusion on Core arch @@ -136,9 +150,15 @@ struct OpArg //if scale == 0 never mind offsetting offset = _offset; } + bool operator==(OpArg b) + { + return operandReg == b.operandReg && scale == b.scale && offsetOrBaseReg == b.offsetOrBaseReg && + indexReg == b.indexReg && offset == b.offset; + } void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const; - void WriteVex(XEmitter* emit, int size, int packed, Gen::X64Reg regOp1, X64Reg regOp2) const; - void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=(X64Reg)0xFF, bool warn_64bit_offset = true) const; + void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W = 0) const; + void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=INVALID_REG, bool warn_64bit_offset = true) const; + void WriteFloatModRM(XEmitter *emit, FloatOp op); void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits); // This one is public - must be written to u64 offset; // use RIP-relative as much as possible - 64-bit immediates are not available. @@ -147,7 +167,8 @@ struct OpArg void WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const; bool IsImm() const {return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 || scale == SCALE_IMM64;} bool IsSimpleReg() const {return scale == SCALE_NONE;} - bool IsSimpleReg(X64Reg reg) const { + bool IsSimpleReg(X64Reg reg) const + { if (!IsSimpleReg()) return false; return GetSimpleReg() == reg; @@ -195,26 +216,35 @@ private: u16 indexReg; }; -inline OpArg M(void *ptr) {return OpArg((u64)ptr, (int)SCALE_RIP);} +inline OpArg M(const void *ptr) {return OpArg((u64)ptr, (int)SCALE_RIP);} template inline OpArg M(const T *ptr) {return OpArg((u64)(const void *)ptr, (int)SCALE_RIP);} -inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);} +inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);} inline OpArg MatR(X64Reg value) {return OpArg(0, SCALE_ATREG, value);} -inline OpArg MDisp(X64Reg value, int offset) { + +inline OpArg MDisp(X64Reg value, int offset) +{ return OpArg((u32)offset, SCALE_ATREG, value); } -inline OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) { + +inline OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) +{ return OpArg(offset, scale, base, scaled); } -inline OpArg MScaled(X64Reg scaled, int scale, int offset) { + +inline OpArg MScaled(X64Reg scaled, int scale, int offset) +{ if (scale == SCALE_1) return OpArg(offset, SCALE_ATREG, scaled); else return OpArg(offset, scale | 0x20, RAX, scaled); } -inline OpArg MRegSum(X64Reg base, X64Reg offset) { + +inline OpArg MRegSum(X64Reg base, X64Reg offset) +{ return MComplex(base, offset, 1, 0); } + inline OpArg Imm8 (u8 imm) {return OpArg(imm, SCALE_IMM8);} inline OpArg Imm16(u16 imm) {return OpArg(imm, SCALE_IMM16);} //rarely used inline OpArg Imm32(u32 imm) {return OpArg(imm, SCALE_IMM32);} @@ -226,19 +256,23 @@ inline OpArg SImmAuto(s32 imm) { return OpArg(imm, (imm >= 128 || imm < -128) ? SCALE_IMM32 : SCALE_IMM8); } -#ifdef _M_X64 +#ifdef _ARCH_64 inline OpArg ImmPtr(const void* imm) {return Imm64((u64)imm);} #else inline OpArg ImmPtr(const void* imm) {return Imm32((u32)imm);} #endif -inline u32 PtrOffset(const void* ptr, const void* base) { -#ifdef _M_X64 + +inline u32 PtrOffset(const void* ptr, const void* base) +{ +#ifdef _ARCH_64 s64 distance = (s64)ptr-(s64)base; if (distance >= 0x80000000LL || - distance < -0x80000000LL) { - _assert_msg_(JIT, 0, "pointer offset out of range"); + distance < -0x80000000LL) + { + _assert_msg_(DYNA_REC, 0, "pointer offset out of range"); return 0; } + return (u32)distance; #else return (u32)ptr-(u32)base; @@ -275,21 +309,31 @@ class XEmitter friend struct OpArg; // for Write8 etc private: u8 *code; + bool flags_locked; + + void CheckFlags(); void Rex(int w, int r, int x, int b); void WriteSimple1Byte(int bits, u8 byte, X64Reg reg); void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg); void WriteMulDivType(int bits, OpArg src, int ext); - void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2); + void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false); void WriteShift(int bits, OpArg dest, OpArg &shift, int ext); void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext); void WriteMXCSR(OpArg arg, int ext); - void WriteSSEOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0); - void WriteSSEOp2(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0); - void WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0); - void WriteAVXOp(int size, u8 sseOp, bool packed, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); + void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); + void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); + void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); + void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); + void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); + void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); + void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); + void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); + void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg); void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2); + void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); + protected: inline void Write8(u8 value) {*code++ = value;} inline void Write16(u16 value) {*(u16*)code = (value); code += 2;} @@ -297,8 +341,8 @@ protected: inline void Write64(u64 value) {*(u64*)code = (value); code += 8;} public: - XEmitter() { code = NULL; } - XEmitter(u8 *code_ptr) { code = code_ptr; } + XEmitter() { code = nullptr; flags_locked = false; } + XEmitter(u8 *code_ptr) { code = code_ptr; flags_locked = false; } virtual ~XEmitter() {} void WriteModRM(int mod, int rm, int reg); @@ -312,6 +356,9 @@ public: const u8 *GetCodePtr() const; u8 *GetWritableCodePtr(); + void LockFlags() { flags_locked = true; } + void UnlockFlags() { flags_locked = false; } + // Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU // INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other string instr., // INC and DEC are slow on Intel Core, but not on AMD. They create a @@ -322,7 +369,7 @@ public: void INT3(); // Do nothing - void NOP(int count = 1); //nop padding - TODO: fast nop slides, for amd and intel (check their manuals) + void NOP(size_t count = 1); // Save energy in wait-loops on P4 only. Probably not too useful. void PAUSE(); @@ -459,6 +506,14 @@ public: void MOVSX(int dbits, int sbits, X64Reg dest, OpArg src); //automatically uses MOVSXD if necessary void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src); + // Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE. + void MOVBE(int dbits, const OpArg& dest, const OpArg& src); + + // Available only on AMD >= Phenom or Intel >= Haswell + void LZCNT(int bits, X64Reg dest, OpArg src); + // Note: this one is actually part of BMI1 + void TZCNT(int bits, X64Reg dest, OpArg src); + // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64) void STMXCSR(OpArg memloc); void LDMXCSR(OpArg memloc); @@ -467,7 +522,31 @@ public: void LOCK(); void REP(); void REPNE(); + void FSOverride(); + void GSOverride(); + // x87 + enum x87StatusWordBits { + x87_InvalidOperation = 0x1, + x87_DenormalizedOperand = 0x2, + x87_DivisionByZero = 0x4, + x87_Overflow = 0x8, + x87_Underflow = 0x10, + x87_Precision = 0x20, + x87_StackFault = 0x40, + x87_ErrorSummary = 0x80, + x87_C0 = 0x100, + x87_C1 = 0x200, + x87_C2 = 0x400, + x87_TopOfStack = 0x2000 | 0x1000 | 0x800, + x87_C3 = 0x4000, + x87_FPUBusy = 0x8000, + }; + + void FLD(int bits, OpArg src); + void FST(int bits, OpArg dest); + void FSTP(int bits, OpArg dest); + void FNSTSW_AX(); void FWAIT(); // SSE/SSE2: Floating point arithmetic @@ -490,14 +569,6 @@ public: // SSE/SSE2: Floating point bitwise (yes) void CMPSS(X64Reg regOp, OpArg arg, u8 compare); void CMPSD(X64Reg regOp, OpArg arg, u8 compare); - void ANDSS(X64Reg regOp, OpArg arg); - void ANDSD(X64Reg regOp, OpArg arg); - void ANDNSS(X64Reg regOp, OpArg arg); - void ANDNSD(X64Reg regOp, OpArg arg); - void ORSS(X64Reg regOp, OpArg arg); - void ORSD(X64Reg regOp, OpArg arg); - void XORSS(X64Reg regOp, OpArg arg); - void XORSD(X64Reg regOp, OpArg arg); inline void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); } inline void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); } @@ -543,11 +614,8 @@ public: // SSE/SSE2: Useful alternative to shuffle in some cases. void MOVDDUP(X64Reg regOp, OpArg arg); - // THESE TWO ARE NEW AND UNTESTED void UNPCKLPS(X64Reg dest, OpArg src); void UNPCKHPS(X64Reg dest, OpArg src); - - // These are OK. void UNPCKLPD(X64Reg dest, OpArg src); void UNPCKHPD(X64Reg dest, OpArg src); @@ -568,7 +636,6 @@ public: void MOVUPS(OpArg arg, X64Reg regOp); void MOVUPD(OpArg arg, X64Reg regOp); - // Integers (NOTE: untested - I added these then it turned out I didn't have a use for them after all). void MOVDQA(X64Reg regOp, OpArg arg); void MOVDQA(OpArg arg, X64Reg regOp); void MOVDQU(X64Reg regOp, OpArg arg); @@ -579,6 +646,14 @@ public: void MOVSS(OpArg arg, X64Reg regOp); void MOVSD(OpArg arg, X64Reg regOp); + void MOVLPD(X64Reg regOp, OpArg arg); + void MOVHPD(X64Reg regOp, OpArg arg); + void MOVLPD(OpArg arg, X64Reg regOp); + void MOVHPD(OpArg arg, X64Reg regOp); + + void MOVHLPS(X64Reg regOp1, X64Reg regOp2); + void MOVLHPS(X64Reg regOp1, X64Reg regOp2); + void MOVD_xmm(X64Reg dest, const OpArg &arg); void MOVQ_xmm(X64Reg dest, OpArg arg); void MOVD_xmm(const OpArg &arg, X64Reg src); @@ -596,37 +671,34 @@ public: void CVTPS2PD(X64Reg dest, OpArg src); void CVTPD2PS(X64Reg dest, OpArg src); void CVTSS2SD(X64Reg dest, OpArg src); + void CVTSI2SS(X64Reg dest, OpArg src); void CVTSD2SS(X64Reg dest, OpArg src); - void CVTSD2SI(X64Reg dest, OpArg src); + void CVTSI2SD(X64Reg dest, OpArg src); void CVTDQ2PD(X64Reg regOp, OpArg arg); void CVTPD2DQ(X64Reg regOp, OpArg arg); void CVTDQ2PS(X64Reg regOp, OpArg arg); void CVTPS2DQ(X64Reg regOp, OpArg arg); - void CVTTSS2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX! void CVTTPS2DQ(X64Reg regOp, OpArg arg); - void CVTSI2SS(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX! - void CVTSS2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX! - void CVTTSD2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX! - void CVTTPD2DQ(X64Reg xregdest, OpArg arg); + void CVTTPD2DQ(X64Reg regOp, OpArg arg); + + // Destinations are X64 regs (rax, rbx, ...) for these instructions. + void CVTSS2SI(X64Reg xregdest, OpArg src); + void CVTSD2SI(X64Reg xregdest, OpArg src); + void CVTTSS2SI(X64Reg xregdest, OpArg arg); + void CVTTSD2SI(X64Reg xregdest, OpArg arg); // SSE2: Packed integer instructions void PACKSSDW(X64Reg dest, OpArg arg); void PACKSSWB(X64Reg dest, OpArg arg); - //void PACKUSDW(X64Reg dest, OpArg arg); + void PACKUSDW(X64Reg dest, OpArg arg); void PACKUSWB(X64Reg dest, OpArg arg); void PUNPCKLBW(X64Reg dest, const OpArg &arg); void PUNPCKLWD(X64Reg dest, const OpArg &arg); void PUNPCKLDQ(X64Reg dest, const OpArg &arg); - void PMOVSXBW(X64Reg dest, const OpArg &arg); - void PMOVSXBD(X64Reg dest, const OpArg &arg); - void PMOVSXWD(X64Reg dest, const OpArg &arg); - void PMOVZXBW(X64Reg dest, const OpArg &arg); - void PMOVZXBD(X64Reg dest, const OpArg &arg); - void PMOVZXWD(X64Reg dest, const OpArg &arg); - + void PTEST(X64Reg dest, OpArg arg); void PAND(X64Reg dest, OpArg arg); void PANDN(X64Reg dest, OpArg arg); void PXOR(X64Reg dest, OpArg arg); @@ -680,29 +752,75 @@ public: void PSHUFB(X64Reg dest, OpArg arg); void PSHUFLW(X64Reg dest, OpArg arg, u8 shuffle); + void PSHUFHW(X64Reg dest, OpArg arg, u8 shuffle); void PSRLW(X64Reg reg, int shift); void PSRLD(X64Reg reg, int shift); void PSRLQ(X64Reg reg, int shift); + void PSRLQ(X64Reg reg, OpArg arg); + void PSRLDQ(X64Reg reg, int shift); void PSLLW(X64Reg reg, int shift); void PSLLD(X64Reg reg, int shift); void PSLLQ(X64Reg reg, int shift); - - void PSRLDQ(X64Reg reg, int shift); void PSLLDQ(X64Reg reg, int shift); void PSRAW(X64Reg reg, int shift); void PSRAD(X64Reg reg, int shift); + // SSE4: data type conversions + void PMOVSXBW(X64Reg dest, OpArg arg); + void PMOVSXBD(X64Reg dest, OpArg arg); + void PMOVSXBQ(X64Reg dest, OpArg arg); + void PMOVSXWD(X64Reg dest, OpArg arg); + void PMOVSXWQ(X64Reg dest, OpArg arg); + void PMOVSXDQ(X64Reg dest, OpArg arg); + void PMOVZXBW(X64Reg dest, OpArg arg); + void PMOVZXBD(X64Reg dest, OpArg arg); + void PMOVZXBQ(X64Reg dest, OpArg arg); + void PMOVZXWD(X64Reg dest, OpArg arg); + void PMOVZXWQ(X64Reg dest, OpArg arg); + void PMOVZXDQ(X64Reg dest, OpArg arg); + + // SSE4: variable blend instructions (xmm0 implicit argument) + void PBLENDVB(X64Reg dest, OpArg arg); + void BLENDVPS(X64Reg dest, OpArg arg); + void BLENDVPD(X64Reg dest, OpArg arg); + // AVX void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); void VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle); + void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void RTDSC(); + // VEX GPR instructions + void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate); + void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void BLSR(int bits, X64Reg regOp, OpArg arg); + void BLSMSK(int bits, X64Reg regOp, OpArg arg); + void BLSI(int bits, X64Reg regOp, OpArg arg); + void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + + void RDTSC(); // Utility functions // The difference between this and CALL is that this aligns the stack @@ -719,6 +837,7 @@ public: void ABI_CallFunctionC16(const void *func, u16 param1); void ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2); + // These only support u32 parameters, but that's enough for a lot of uses. // These will destroy the 1 or 2 first "parameter regs". void ABI_CallFunctionC(const void *func, u32 param1); @@ -736,8 +855,8 @@ public: void ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2); // Pass a register as a parameter. - void ABI_CallFunctionR(const void *func, Gen::X64Reg reg1); - void ABI_CallFunctionRR(const void *func, Gen::X64Reg reg1, Gen::X64Reg reg2); + void ABI_CallFunctionR(const void *func, X64Reg reg1); + void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2); template void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) { @@ -822,4 +941,4 @@ public: } // namespace -#endif // _DOLPHIN_INTEL_CODEGEN_ +#endif