diff --git a/Common/ArmEmitter.cpp b/Common/ArmEmitter.cpp index 9362315f73..cdda435929 100644 --- a/Common/ArmEmitter.cpp +++ b/Common/ArmEmitter.cpp @@ -1613,7 +1613,7 @@ void ARMXEmitter::VADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) bool register_quad = Vd >= Q0; if (Size & F_32) - Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD0 << 4) | (register_quad << 6) | EncodeVm(Vm)); + Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD << 8) | (register_quad << 6) | EncodeVm(Vm)); else Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \ | (0x8 << 8) | (register_quad << 6) | EncodeVm(Vm)); diff --git a/ext/disarm.cpp b/ext/disarm.cpp index 166ff9858f..1cfc747d28 100644 --- a/ext/disarm.cpp +++ b/ext/disarm.cpp @@ -92,26 +92,39 @@ static const char *CCFlagsStr[] = { }; int GetVd(uint32_t op, bool quad = false, bool dbl = false) { + int val; if (!quad && !dbl) { - return ((op >> 22) & 1) | ((op >> 11) & 0x1E); + val = ((op >> 22) & 1) | ((op >> 11) & 0x1E); + } else { + val = ((op >> 18) & 0x10) | ((op >> 12) & 0xF); } - return 0; + if (quad) + val >>= 1; + return val; } int GetVn(uint32_t op, bool quad = false, bool dbl = false) { + int val; if (!quad && !dbl) { - return ((op >> 7) & 1) | ((op >> 15) & 0x1E); - } else if (dbl) { - return ((op >> 16) & 0xF) | ((op >> 3) & 0x10); + val = ((op >> 7) & 1) | ((op >> 15) & 0x1E); + } else { + val = ((op >> 16) & 0xF) | ((op >> 3) & 0x10); } - return 0; + if (quad) + val >>= 1; + return val; } int GetVm(uint32_t op, bool quad = false, bool dbl = false) { + int val; if (!quad && !dbl) { - return ((op >> 5) & 1) | ((op << 1) & 0x1E); + val = ((op >> 5) & 1) | ((op << 1) & 0x1E); + } else { + val = ((op >> 1) & 0x10) | (op & 0xF); } - return 0; + if (quad) + val >>= 1; + return val; } @@ -214,12 +227,16 @@ bool DisasmVFP(uint32_t op, char *text) { int part2 = ((op >> 9) & 0x7) ; int part3 = ((op >> 20) & 0x3) ; if (part3 == 3 && part2 == 5 && part1 == 0x1D && (op & (1<<6))) { - // VMOV + // VMOV, VCMP int vn = GetVn(op); if (vn != 1 && vn != 3) { int vm = GetVm(op); int vd = GetVd(op); - sprintf(text, "VMOV%s s%i, s%i", cond, vd, vm); + + const char *name = "VMOV"; + if (op & 0x40000) + name = (op & 0x80) ? "VCMPE" : "VCMP"; + sprintf(text, "%s%s s%i, s%i", name, cond, vd, vm); return true; } } @@ -232,13 +249,18 @@ bool DisasmVFP(uint32_t op, char *text) { int opc1 = (op >> 20) & 0xFB; int opc2 = (op >> 4) & 0xAC; for (int i = 0; i < 16; i++) { - if (ArmGen::VFPOps[i][0].opc1 == opc1 && ArmGen::VFPOps[i][0].opc2 == opc2) { + // What the hell? + int fixed_opc2 = opc2; + if (!(ArmGen::VFPOps[i][0].opc2 & 0x8)) + fixed_opc2 &= 0xA7; + if (ArmGen::VFPOps[i][0].opc1 == opc1 && ArmGen::VFPOps[i][0].opc2 == fixed_opc2) { opnum = i; break; } } if (opnum < 0) return false; + char c = double_reg ? 'd' : 's'; switch (opnum) { case 8: case 10: @@ -251,8 +273,9 @@ bool DisasmVFP(uint32_t op, char *text) { int vd = GetVd(op, quad_reg, double_reg); int vn = GetVn(op, quad_reg, true); int vm = GetVm(op, quad_reg, double_reg); - if (opnum == 8 && vn == 0x11) opnum += 3; - sprintf(text, "%s%s s%i, s%i", ArmGen::VFPOpNames[opnum], cond, vd, vm); + if (opnum == 8 && vn == 0x11) + opnum += 3; + sprintf(text, "%s%s %c%i, %c%i", ArmGen::VFPOpNames[opnum], cond, c, vd, c, vm); return true; } default: @@ -261,7 +284,7 @@ bool DisasmVFP(uint32_t op, char *text) { int vd = GetVd(op, quad_reg, double_reg); int vn = GetVn(op, quad_reg, double_reg); int vm = GetVm(op, quad_reg, double_reg); - sprintf(text, "%s%s s%i, s%i, s%i", ArmGen::VFPOpNames[opnum], cond, vd, vn, vm); + sprintf(text, "%s%s %c%i, %c%i, %c%i", ArmGen::VFPOpNames[opnum], cond, c, vd, c, vn, c, vm); return true; } } @@ -272,13 +295,122 @@ bool DisasmVFP(uint32_t op, char *text) { #endif return false; } + +static const char *GetSizeString(int sz) { + switch (sz) { + case 0: + return "8"; + case 1: + return "16"; + case 2: + return "32"; + case 3: + return "64"; + default: + return "(err)"; + } +} + +static const char *GetISizeString(int sz) { + switch (sz) { + case 0: + return "i8"; + case 1: + return "i16"; + case 2: + return "i32"; + case 3: + return "i64"; + default: + return "(err)"; + } +} + +static int GetRegCount(int type) { + switch (type) { + case 7: return 1; + case 10: return 2; + case 6: return 3; + case 4: return 4; + default: + return 0; + } +} + +// VLD1 / VST1 +static bool DisasmNeonLDST(uint32_t op, char *text) { + bool load = (op >> 21) & 1; + int Rn = (op >> 16) & 0xF; + int Rm = (op & 0xF); + int Vd = GetVd(op, false, true); + int sz = (op >> 6) & 3; + int regCount = GetRegCount((op >> 8) & 0xF); + + int startReg = Vd; + int endReg = Vd + regCount - 1; + + if (startReg == endReg) + sprintf(text, "V%s1.%s {d%i}, [r%i]", load ? "LD" : "ST", GetSizeString(sz), startReg, Rn); + else + sprintf(text, "V%s1.%s {d%i-d%i}, [r%i]", load ? "LD" : "ST", GetSizeString(sz), startReg, endReg, Rn); + + return true; +} +static bool DisasmNeonF3(uint32_t op, char *text) { + sprintf(text, "NEON F3"); + return true; +} +static bool DisasmNeonF2F3(uint32_t op, char *text) { + sprintf(text, "NEON F2"); + if (((op >> 20) & 0xFFC) == 0xF20 || ((op >> 20) & 0xFFC) == 0xF30) { + bool quad = ((op >> 6) & 1); + int size = (op >> 20) & 3; + int type = (op >> 8) & 0xF; + char r = quad ? 'q' : 'd'; + const char *opname = "(unk)"; + switch ((op >> 20) & 0xFF) { + case 0x20: + if (op & 0x10) + opname = "MLA"; + else + opname = "ADD"; + break; + case 0x22: + if (op & 0x10) + opname = "MLS"; + else + opname = "ADD"; + break; + case 0x31: + if (op & 0x100) + opname = "MLS"; + else + opname = "SUB"; + break; + case 0x30: + opname = "MUL"; + break; + } + const char *szname = GetISizeString(size); + if (type == 0xD) + szname = "f32"; + sprintf(text, "V%s.%s %c%i, %c%i, %c%i", opname, szname, r, GetVd(op, quad, true), r, GetVn(op, quad, true), r, GetVm(op, quad, true)); + } + return true; +} - - - - +static bool DisasmNeon(uint32_t op, char *text) { + switch (op >> 24) { + case 0xF4: + return DisasmNeonLDST(op, text); + case 0xF2: + case 0xF3: + return DisasmNeonF2F3(op, text); + } + return false; +} @@ -595,7 +727,15 @@ instr_disassemble(word instr, address addr, pDisOptions opts) { break; } case 3: - /* SWP or MRS/MSR or data processing */ + if (instr >> 24 == 0xF3) { + if (!DisasmNeon(instr, result.text)) { + goto lUndefined; + break; + } + result.undefined = 0; + return &result; + } + /* SWP or MRS/MSR or data processing */ // hrydgard addition: MOVW/MOVT if ((instr & 0x0FF00000) == 0x03000000) { mnemonic = "MOVW"; @@ -657,7 +797,15 @@ lMaybeLDRHetc: } #endif case 2: - /* data processing */ + if (instr >> 24 == 0xF2) { + if (!DisasmNeon(instr, result.text)) { + goto lUndefined; + break; + } + result.undefined = 0; + return &result; + } + /* data processing */ { word op21 = instr&(15<<21); if ((op21==(2<<21) || (op21==(4<<21))) /* ADD or SUB */ && ((instr&(RNbits+Ibit+Sbit))==RN(15)+Ibit) /* imm, no S */ @@ -692,7 +840,15 @@ lMaybeLDRHetc: } break; case 4: - case 5: + if (instr >> 24 == 0xF4) { + if (!DisasmNeon(instr, result.text)) { + goto lUndefined; + break; + } + result.undefined = 0; + return &result; + } + case 5: case 6: case 7: /* undefined or STR/LDR */ @@ -733,12 +889,10 @@ lMaybeLDRHetc: case 13: case 14: // FPU { - char text[128]; - if (!DisasmVFP(instr, text)) { + if (!DisasmVFP(instr, result.text)) { goto lUndefined; break; } - strcpy(result.text, text); result.undefined = 0; return &result; } diff --git a/unittest/UnitTest.cpp b/unittest/UnitTest.cpp index a724cc68ce..afe8c864ca 100644 --- a/unittest/UnitTest.cpp +++ b/unittest/UnitTest.cpp @@ -31,6 +31,7 @@ #include #include "base/NativeApp.h" +#include "Common/CPUDetect.h" #include "Common/ArmEmitter.h" #include "ext/disarm.h" #include "math/math_util.h" @@ -260,14 +261,30 @@ bool TestArmEmitter() { RET(CheckLast(emitter, "ed4c2a09 VSTR s5, [r12, #-36]")); emitter.VADD(S1, S2, S3); RET(CheckLast(emitter, "ee710a21 VADD s1, s2, s3")); + emitter.VADD(D1, D2, D3); + RET(CheckLast(emitter, "ee321b03 VADD d1, d2, d3")); + emitter.VSUB(S1, S2, S3); + RET(CheckLast(emitter, "ee710a61 VSUB s1, s2, s3")); emitter.VMUL(S7, S8, S9); RET(CheckLast(emitter, "ee643a24 VMUL s7, s8, s9")); + emitter.VMUL(S0, S5, S10); + RET(CheckLast(emitter, "ee220a85 VMUL s0, s5, s10")); + emitter.VNMUL(S7, S8, S9); + RET(CheckLast(emitter, "ee643a64 VNMUL s7, s8, s9")); emitter.VMLA(S7, S8, S9); RET(CheckLast(emitter, "ee443a24 VMLA s7, s8, s9")); emitter.VNMLA(S7, S8, S9); RET(CheckLast(emitter, "ee543a64 VNMLA s7, s8, s9")); + emitter.VNMLS(S7, S8, S9); + RET(CheckLast(emitter, "ee543a24 VNMLS s7, s8, s9")); emitter.VABS(S1, S2); RET(CheckLast(emitter, "eef00ac1 VABS s1, s2")); + emitter.VMOV(S1, S2); + RET(CheckLast(emitter, "eef00a41 VMOV s1, s2")); + emitter.VCMP(S1, S2); + RET(CheckLast(emitter, "eef40a41 VCMP s1, s2")); + emitter.VCMPE(S1, S2); + RET(CheckLast(emitter, "eef40ac1 VCMPE s1, s2")); emitter.VSQRT(S1, S2); RET(CheckLast(emitter, "eef10ac1 VSQRT s1, s2")); emitter.VDIV(S1, S2, S3); @@ -289,6 +306,44 @@ bool TestArmEmitter() { emitter.VMOV(S3, S6); RET(CheckLast(emitter, "eef01a43 VMOV s3, s6")); + + /* + // These are only implemented in the neon-vfpu branch. will cherrypick later. + emitter.VMOV_imm(I_32, R0, VIMM___x___x, 0xF3); + emitter.VMOV_imm(I_8, R0, VIMMxxxxxxxx, 0xF3); + emitter.VMOV_immf(Q0, 1.0f); + RET(CheckLast(emitter, "eebd0a60 VMOV Q0, 1.0")); + emitter.VMOV_immf(Q0, -1.0f); + emitter.VBIC_imm(I_32, R0, VIMM___x___x, 0xF3); + emitter.VMVN_imm(I_32, R0, VIMM___x___x, 0xF3); + emitter.VPADD(F_32, D0, D0, D0); + emitter.VMOV(Q14, Q2); + */ + + emitter.VMOV(S3, S6); + RET(CheckLast(emitter, "eef01a43 VMOV s3, s6")); + emitter.VLD1(I_32, D19, R3, 2, ALIGN_NONE, R_PC); + RET(CheckLast(emitter, "f4633a8f VLD1.32 {d19-d20}, [r3]")); + emitter.VST1(I_32, D23, R9, 1, ALIGN_NONE, R_PC); + RET(CheckLast(emitter, "f449778f VST1.32 {d23}, [r9]")); + emitter.VADD(I_8, D3, D4, D19); + RET(CheckLast(emitter, "f2043823 VADD.i8 d3, d4, d19")); + emitter.VADD(I_32, D3, D4, D19); + RET(CheckLast(emitter, "f2243823 VADD.i32 d3, d4, d19")); + emitter.VADD(F_32, D3, D4, D19); + RET(CheckLast(emitter, "f2043d23 VADD.f32 d3, d4, d19")); + emitter.VSUB(I_16, Q5, Q6, Q15); + RET(CheckLast(emitter, "f31ca86e VSUB.i16 q5, q6, q15")); + emitter.VMUL(F_32, Q1, Q2, Q3); + RET(CheckLast(emitter, "f3042d56 VMUL.f32 q1, q2, q3")); + emitter.VADD(F_32, Q1, Q2, Q3); + RET(CheckLast(emitter, "f2042d46 VADD.f32 q1, q2, q3")); + emitter.VMLA(F_32, Q1, Q2, Q3); + RET(CheckLast(emitter, "f2042d56 VMLA.f32 q1, q2, q3")); + emitter.VMLS(F_32, Q1, Q2, Q3); + RET(CheckLast(emitter, "f2242d56 VMLS.f32 q1, q2, q3")); + emitter.VMLS(I_16, Q1, Q2, Q3); + RET(CheckLast(emitter, "f3142946 VMLS.i16 q1, q2, q3")); return true; } @@ -313,13 +368,16 @@ bool TestParsers() { return true; } -int main(int argc, const char *argv[]) -{ +int main(int argc, const char *argv[]) { + cpu_info.bNEON = true; + cpu_info.bVFP = true; + cpu_info.bVFPv3 = true; + cpu_info.bVFPv4 = true; g_Config.bEnableLogging = true; - TestAsin(); + //TestAsin(); //TestSinCos(); - //TestArmEmitter(); - TestMathUtil(); - TestParsers(); + TestArmEmitter(); + //TestMathUtil(); + //TestParsers(); return 0; }