diff --git a/Common/Arm64Emitter.cpp b/Common/Arm64Emitter.cpp index 24becd38b2..542ece192e 100644 --- a/Common/Arm64Emitter.cpp +++ b/Common/Arm64Emitter.cpp @@ -2052,7 +2052,7 @@ void ARM64FloatEmitter::EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, (size == 128 ? (1 << 23) : 0) | (opc << 22) | (encoded_imm << 10) | (Rn << 5) | Rt); } -void ARM64FloatEmitter::Emit2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +void ARM64FloatEmitter::EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { _assert_msg_(DYNA_REC, !IsQuad(Rd), "%s only supports double and single registers!", __FUNCTION__); Rd = DecodeReg(Rd); @@ -2683,55 +2683,55 @@ void ARM64FloatEmitter::FSQRT(ARM64Reg Rd, ARM64Reg Rn) // Scalar - 2 Source void ARM64FloatEmitter::FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { - Emit2Source(0, 0, IsDouble(Rd), 2, Rd, Rn, Rm); + EmitScalar2Source(0, 0, IsDouble(Rd), 2, Rd, Rn, Rm); } void ARM64FloatEmitter::FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { - Emit2Source(0, 0, IsDouble(Rd), 0, Rd, Rn, Rm); + EmitScalar2Source(0, 0, IsDouble(Rd), 0, Rd, Rn, Rm); } void ARM64FloatEmitter::FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { - Emit2Source(0, 0, IsDouble(Rd), 3, Rd, Rn, Rm); + EmitScalar2Source(0, 0, IsDouble(Rd), 3, Rd, Rn, Rm); } void ARM64FloatEmitter::FDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { - Emit2Source(0, 0, IsDouble(Rd), 1, Rd, Rn, Rm); + EmitScalar2Source(0, 0, IsDouble(Rd), 1, Rd, Rn, Rm); } void ARM64FloatEmitter::FMAX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { - Emit2Source(0, 0, IsDouble(Rd), 4, Rd, Rn, Rm); + EmitScalar2Source(0, 0, IsDouble(Rd), 4, Rd, Rn, Rm); } void ARM64FloatEmitter::FMIN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { - Emit2Source(0, 0, IsDouble(Rd), 5, Rd, Rn, Rm); + EmitScalar2Source(0, 0, IsDouble(Rd), 5, Rd, Rn, Rm); } void ARM64FloatEmitter::FMAXNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { - Emit2Source(0, 0, IsDouble(Rd), 6, Rd, Rn, Rm); + EmitScalar2Source(0, 0, IsDouble(Rd), 6, Rd, Rn, Rm); } void ARM64FloatEmitter::FMINNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { - Emit2Source(0, 0, IsDouble(Rd), 7, Rd, Rn, Rm); + EmitScalar2Source(0, 0, IsDouble(Rd), 7, Rd, Rn, Rm); } void ARM64FloatEmitter::FNMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { - Emit2Source(0, 0, IsDouble(Rd), 8, Rd, Rn, Rm); + EmitScalar2Source(0, 0, IsDouble(Rd), 8, Rd, Rn, Rm); } void ARM64FloatEmitter::FMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) { - Emit3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 0); + EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 0); } void ARM64FloatEmitter::FMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) { - Emit3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 1); + EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 1); } void ARM64FloatEmitter::FNMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) { - Emit3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 2); + EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 2); } void ARM64FloatEmitter::FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) { - Emit3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 3); + EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 3); } -void ARM64FloatEmitter::Emit3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode) { +void ARM64FloatEmitter::EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode) { int type = isDouble ? 1 : 0; Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); @@ -3235,20 +3235,32 @@ void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 bool L = false; bool H = false; - - if (size == 32) - { + if (size == 32) { L = index & 1; H = (index >> 1) & 1; - } - else if (size == 64) - { + } else if (size == 64) { H = index == 1; } EmitVectorxElement(0, 2 | (size >> 6), L, 0x9, H, Rd, Rn, Rm); } +void ARM64FloatEmitter::FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index) +{ + _assert_msg_(DYNA_REC, size == 32 || size == 64, "%s only supports 32bit or 64bit size!", __FUNCTION__); + + bool L = false; + bool H = false; + if (size == 32) { + L = index & 1; + H = (index >> 1) & 1; + } else if (size == 64) { + H = index == 1; + } + + EmitVectorxElement(0, 2 | (size >> 6), L, 1, H, Rd, Rn, Rm); +} + void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers) { for (auto it : registers) @@ -3271,10 +3283,12 @@ void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mas void ARM64XEmitter::ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) { unsigned int n, imm_s, imm_r; + if (!Is64Bit(Rn)) + imm &= 0xFFFFFFFF; if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) { AND(Rd, Rn, imm_r, imm_s, n); } else { - _assert_msg_(JIT, scratch != INVALID_REG, "ANDSI2R - failed to construct immediate value from %08x, need scratch", (u32)imm); + _assert_msg_(JIT, scratch != INVALID_REG, "ANDSI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm); MOVI2R(scratch, imm); AND(Rd, Rn, scratch); } @@ -3285,7 +3299,7 @@ void ARM64XEmitter::ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) { ORR(Rd, Rn, imm_r, imm_s, n); } else { - _assert_msg_(JIT, scratch != INVALID_REG, "ORRI2R - failed to construct immediate value from %08x, need scratch", (u32)imm); + _assert_msg_(JIT, scratch != INVALID_REG, "ORRI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm); MOVI2R(scratch, imm); ORR(Rd, Rn, scratch); } @@ -3296,7 +3310,7 @@ void ARM64XEmitter::EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) { EOR(Rd, Rn, imm_r, imm_s, n); } else { - _assert_msg_(JIT, scratch != INVALID_REG, "EORI2R - failed to construct immediate value from %08x, need scratch", (u32)imm); + _assert_msg_(JIT, scratch != INVALID_REG, "EORI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm); MOVI2R(scratch, imm); EOR(Rd, Rn, scratch); } @@ -3307,7 +3321,7 @@ void ARM64XEmitter::ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) { ANDS(Rd, Rn, imm_r, imm_s, n); } else { - _assert_msg_(JIT, scratch != INVALID_REG, "ANDSI2R - failed to construct immediate value from %08x, need scratch", (u32)imm); + _assert_msg_(JIT, scratch != INVALID_REG, "ANDSI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm); MOVI2R(scratch, imm); ANDS(Rd, Rn, scratch); } @@ -3319,7 +3333,7 @@ void ARM64XEmitter::ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) if (IsImmArithmetic(imm, &val, &shift)) { ADD(Rd, Rn, val, shift); } else { - _assert_msg_(JIT, scratch != INVALID_REG, "ADDI2R - failed to construct immediate value from %08x, need scratch", (u32)imm); + _assert_msg_(JIT, scratch != INVALID_REG, "ADDI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm); MOVI2R(scratch, imm); ADD(Rd, Rn, scratch); } @@ -3331,7 +3345,7 @@ void ARM64XEmitter::SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) if (IsImmArithmetic(imm, &val, &shift)) { SUB(Rd, Rn, val, shift); } else { - _assert_msg_(JIT, scratch != INVALID_REG, "SUBI2R - failed to construct immediate value from %08x, need scratch", (u32)imm); + _assert_msg_(JIT, scratch != INVALID_REG, "SUBI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm); MOVI2R(scratch, imm); SUB(Rd, Rn, scratch); } @@ -3343,7 +3357,7 @@ void ARM64XEmitter::CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch) { if (IsImmArithmetic(imm, &val, &shift)) { CMP(Rn, val, shift); } else { - _assert_msg_(JIT, scratch != INVALID_REG, "CMPI2R - failed to construct immediate value from %08x, need scratch", (u32)imm); + _assert_msg_(JIT, scratch != INVALID_REG, "CMPI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm); MOVI2R(scratch, imm); CMP(Rn, scratch); } diff --git a/Common/Arm64Emitter.h b/Common/Arm64Emitter.h index c388b8ad7a..6954f77985 100644 --- a/Common/Arm64Emitter.h +++ b/Common/Arm64Emitter.h @@ -872,6 +872,7 @@ public: // vector x indexed element void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index); + void FMLA(u8 esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index); void MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG, bool negate = false); @@ -885,7 +886,7 @@ private: // Emitting functions void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm); - void Emit2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn); void Emit2RegMisc(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn); @@ -904,7 +905,7 @@ private: void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm); void EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign); - void Emit3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode); + void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode); }; class ARM64CodeBlock : public CodeBlock diff --git a/Core/Util/DisArm64.cpp b/Core/Util/DisArm64.cpp index 38530ec9d6..10e1181274 100644 --- a/Core/Util/DisArm64.cpp +++ b/Core/Util/DisArm64.cpp @@ -405,8 +405,19 @@ static void DataProcessingRegister(uint32_t w, uint64_t addr, Instruction *instr } } +inline bool GetQ(uint32_t w) { return (w >> 30) & 1; } +inline bool GetU(uint32_t w) { return (w >> 29) & 1; } +const char *GetArrangement(bool Q, bool sz) { + if (Q == 0 && sz == 0) return "2s"; + else if (Q == 1 && sz == 0) return "4s"; + else if (Q == 1 && sz == 1) return "2d"; + else return "ERROR"; +} // (w >> 25) & 0xF == 7 static void FPandASIMD1(uint32_t w, uint64_t addr, Instruction *instr) { + int Rd = w & 0x1f; + int Rn = (w >> 5) & 0x1f; + int Rm = (w >> 16) & 0x1f; if (((w >> 21) & 0x4F9) == 0x71) { switch ((w >> 10) & 3) { case 1: case 3: @@ -430,13 +441,38 @@ static void FPandASIMD1(uint32_t w, uint64_t addr, Instruction *instr) { } } else if (((w >> 21) & 0x4F8) == 0x78) { if ((w >> 10) & 1) { - if (((w >> 19) & 0xf) == 0) { + if (((w >> 19) & 0xf) == 0) { snprintf(instr->text, sizeof(instr->text), "(asimd modified immediate %08x)", w); } else { snprintf(instr->text, sizeof(instr->text), "(asimd shift-by-immediate %08x)", w); } } else { - snprintf(instr->text, sizeof(instr->text), "(asimd vector x indexed elem %08x)", w); + bool Q = GetQ(w); + bool U = GetU(w); + int size = (w >> 22) & 3; + bool L = (w >> 21) & 1; + bool M = (w >> 20) & 1; + bool H = (w >> 11) & 1; + int opcode = (w >> 12) & 0xf; + if (size & 0x2) { + const char *opname = 0; + switch (opcode) { + case 1: opname = "fmla"; break; + case 5: opname = "fmls"; break; + case 9: opname = "fmul"; break; + } + int index; + if ((size & 1) == 0) { + index = (H << 1) | L; + } else { + index = H; + } + char r = Q ? 'q' : 'd'; + const char *arrangement = GetArrangement(Q, size & 1); + snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, %c%d.%s[%d]", opname, r, Rd, r, Rn, r, Rm, arrangement, index); + } else { + snprintf(instr->text, sizeof(instr->text), "(asimd vector x indexed elem %08x)", w); + } } } else { bail: @@ -516,10 +552,12 @@ static void FPandASIMD2(uint32_t w, uint64_t addr, Instruction *instr) { } else if (((w >> 10) & 3) == 2) { int opc = (w >> 12) & 0xf; const char *opnames[9] = { "fmul", "fdiv", "fadd", "fsub", "fmax", "fmin", "fmaxnm", "fminnm", "fnmul" }; - char r = 's'; // TODO: Support doubles too + char r = ((w >> 22) & 1) ? 'd' : 's'; snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, %c%d", opnames[opc], r, Rd, r, Rn, r, Rm); } else if (((w >> 10) & 3) == 3) { - snprintf(instr->text, sizeof(instr->text), "(float cond select %08x)", w); + char fr = ((w >> 22) & 1) ? 'd' : 's'; + int cond = (w >> 12) & 0xf; + snprintf(instr->text, sizeof(instr->text), "fcsel %c%d, %c%d, %c%d, %s", fr, Rd, fr, Rn, fr, Rm, condnames[cond]); } } else if (((w >> 21) & 0x2F8) == 0xF8) { int opcode = ((w >> 15) & 1) | ((w >> 20) & 2); diff --git a/unittest/TestArm64Emitter.cpp b/unittest/TestArm64Emitter.cpp index fa26083afa..055f884e7c 100644 --- a/unittest/TestArm64Emitter.cpp +++ b/unittest/TestArm64Emitter.cpp @@ -39,6 +39,12 @@ bool TestArm64Emitter() { //emitter.EXTR(W1, W3, 0, 7); //RET(CheckLast(emitter, "53033061 extr w1, w3, w7")); + fp.FMUL(32, Q0, Q1, Q2, 3); + RET(CheckLast(emitter, "4fa29820 fmul q0, q1, q2.4s[3]")); // A real disasm says fmla v0.2s, v1.2s, v2.s[1] but I think our way is more readable + fp.FMLA(32, D0, D1, D2, 1); + RET(CheckLast(emitter, "1e222c20 fmla d0, d1, d2.2s[1]")); + fp.FCSEL(S0, S1, S2, CC_CS); + RET(CheckLast(emitter, "1e222c20 fcsel s0, s1, s2, cs")); float value = 1.0; uint8_t imm8; FPImm8FromFloat(value, &imm8); @@ -46,7 +52,7 @@ bool TestArm64Emitter() { RET(CheckLast(emitter, "1e2e1007 fmov s7, #1.000000")); FPImm8FromFloat(-value, &imm8); fp.FMOV(S7, imm8); - RET(CheckLast(emitter, "1e2e1007 fmov s7, #-1.000000")); + RET(CheckLast(emitter, "0fa21020 fmov s7, #-1.000000")); fp.FMADD(S1, S2, S3, S4); RET(CheckLast(emitter, "1f031041 fmadd s1, s2, s3, s4")); fp.FNMSUB(D1, D2, D3, D4); @@ -151,7 +157,7 @@ bool TestArm64Emitter() { RET(CheckLast(emitter, "1e2020e8 fcmp s7, #0.0")); fp.FCMP(D7, D3); RET(CheckLast(emitter, "1e6320e0 fcmp d7, d3")); - emitter.ORI2R(X1, X3, 0x3F, INVALID_REG); + emitter.ORRI2R(X1, X3, 0x3F, INVALID_REG); RET(CheckLast(emitter, "b2401461 orr x1, x3, #0x3f")); emitter.EORI2R(X1, X3, 0x3F0000003F0, INVALID_REG); RET(CheckLast(emitter, "d21c1461 eor x1, x3, #0x3f0000003f0"));