diff --git a/Common/ArmEmitter.cpp b/Common/ArmEmitter.cpp index 302e56b0fd..681dc4f7b9 100644 --- a/Common/ArmEmitter.cpp +++ b/Common/ArmEmitter.cpp @@ -906,24 +906,30 @@ struct VFPEnc // Double/single, Neon const VFPEnc VFPOps[][2] = { {{0xE0, 0xA0}, {0x20, 0xD1}}, // 0: VMLA - {{0xE0, 0xA4}, {0x22, 0xD1}}, // 1: VMLS - {{0xE3, 0xA0}, {0x20, 0xD0}}, // 2: VADD - {{0xE3, 0xA4}, {0x22, 0xD0}}, // 3: VSUB - {{0xE2, 0xA0}, {0x30, 0xD1}}, // 4: VMUL - {{0xEB, 0xAC}, { -1 /* 0x3B */, -1 /* 0x70 */}}, // 5: VABS(Vn(0x0) used for encoding) - {{0xE8, 0xA0}, { -1, -1}}, // 6: VDIV - {{0xEB, 0xA4}, { -1 /* 0x3B */, -1 /* 0x78 */}}, // 7: VNEG(Vn(0x1) used for encoding) - {{0xEB, 0xAC}, { -1, -1}}, // 8: VSQRT (Vn(0x1) used for encoding) - {{0xEB, 0xA4}, { -1, -1}}, // 9: VCMP (Vn(0x4 | #0 ? 1 : 0) used for encoding) - {{0xEB, 0xAC}, { -1, -1}}, // 10: VCMPE (Vn(0x4 | #0 ? 1 : 0) used for encoding) - {{ -1, -1}, {0x3B, 0x30}}, // 11: VABSi + {{0xE1, 0xA4}, { -1, -1}}, // 1: VNMLA + {{0xE0, 0xA4}, {0x22, 0xD1}}, // 2: VMLS + {{0xE1, 0xA0}, { -1, -1}}, // 3: VNMLS + {{0xE3, 0xA0}, {0x20, 0xD0}}, // 4: VADD + {{0xE3, 0xA4}, {0x22, 0xD0}}, // 5: VSUB + {{0xE2, 0xA0}, {0x30, 0xD1}}, // 6: VMUL + {{0xE2, 0xA4}, { -1, -1}}, // 7: VNMUL + {{0xEB, 0xAC}, { -1 /* 0x3B */, -1 /* 0x70 */}}, // 8: VABS(Vn(0x0) used for encoding) + {{0xE8, 0xA0}, { -1, -1}}, // 9: VDIV + {{0xEB, 0xA4}, { -1 /* 0x3B */, -1 /* 0x78 */}}, // 10: VNEG(Vn(0x1) used for encoding) + {{0xEB, 0xAC}, { -1, -1}}, // 11: VSQRT (Vn(0x1) used for encoding) + {{0xEB, 0xA4}, { -1, -1}}, // 12: VCMP (Vn(0x4 | #0 ? 1 : 0) used for encoding) + {{0xEB, 0xAC}, { -1, -1}}, // 13: VCMPE (Vn(0x4 | #0 ? 1 : 0) used for encoding) + {{ -1, -1}, {0x3B, 0x30}}, // 14: VABSi }; const char *VFPOpNames[] = { "VMLA", + "VNMLA", "VMLS", + "VNMLS", "VADD", "VSUB", "VMUL", + "VNMUL", "VABS", "VDIV", "VNEG", @@ -993,18 +999,21 @@ void ARMXEmitter::WriteVFPDataOp(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32(cond | (enc.opc1 << 20) | VnEnc | VdEnc | (enc.opc2 << 4) | (quad_reg << 6) | (double_reg << 8) | VmEnc); } void ARMXEmitter::VMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(0, Vd, Vn, Vm); } -void ARMXEmitter::VMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(1, Vd, Vn, Vm); } -void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(2, Vd, Vn, Vm); } -void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(3, Vd, Vn, Vm); } -void ARMXEmitter::VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(4, Vd, Vn, Vm); } -void ARMXEmitter::VABS(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(5, Vd, D0, Vm); } -void ARMXEmitter::VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(6, Vd, Vn, Vm); } -void ARMXEmitter::VNEG(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(7, Vd, D1, Vm); } -void ARMXEmitter::VSQRT(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(8, Vd, D1, Vm); } -void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(9, Vd, D4, Vm); } -void ARMXEmitter::VCMPE(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(10, Vd, D4, Vm); } -void ARMXEmitter::VCMP(ARMReg Vd){ WriteVFPDataOp(9, Vd, D5, D0); } -void ARMXEmitter::VCMPE(ARMReg Vd){ WriteVFPDataOp(10, Vd, D5, D0); } +void ARMXEmitter::VNMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(1, Vd, Vn, Vm); } +void ARMXEmitter::VMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(2, Vd, Vn, Vm); } +void ARMXEmitter::VNMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(3, Vd, Vn, Vm); } +void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(4, Vd, Vn, Vm); } +void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(5, Vd, Vn, Vm); } +void ARMXEmitter::VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(6, Vd, Vn, Vm); } +void ARMXEmitter::VNMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(7, Vd, Vn, Vm); } +void ARMXEmitter::VABS(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(8, Vd, D0, Vm); } +void ARMXEmitter::VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(9, Vd, Vn, Vm); } +void ARMXEmitter::VNEG(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(10, Vd, D1, Vm); } +void ARMXEmitter::VSQRT(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(11, Vd, D1, Vm); } +void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(12, Vd, D4, Vm); } +void ARMXEmitter::VCMPE(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(13, Vd, D4, Vm); } +void ARMXEmitter::VCMP(ARMReg Vd){ WriteVFPDataOp(12, Vd, D5, D0); } +void ARMXEmitter::VCMPE(ARMReg Vd){ WriteVFPDataOp(13, Vd, D5, D0); } void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, s16 offset) { diff --git a/Common/ArmEmitter.h b/Common/ArmEmitter.h index 73ab938231..740057055f 100644 --- a/Common/ArmEmitter.h +++ b/Common/ArmEmitter.h @@ -542,6 +542,10 @@ public: // Compares against zero void VCMP(ARMReg Vd); void VCMPE(ARMReg Vd); + + void VNMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VNMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VNMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VSQRT(ARMReg Vd, ARMReg Vm); diff --git a/Core/MIPS/ARM/ArmCompFPU.cpp b/Core/MIPS/ARM/ArmCompFPU.cpp index 47a1f6620e..5ebf63d8c2 100644 --- a/Core/MIPS/ARM/ArmCompFPU.cpp +++ b/Core/MIPS/ARM/ArmCompFPU.cpp @@ -16,6 +16,7 @@ // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #include "Core/Config.h" #include "Core/MIPS/MIPS.h" +#include "Core/MIPS/MIPSTables.h" #include "ArmJit.h" #include "ArmRegCache.h" @@ -52,7 +53,22 @@ void Jit::Comp_FPU3op(u32 op) { case 0: VADD(fpr.R(fd), fpr.R(fs), fpr.R(ft)); break; //F(fd) = F(fs) + F(ft); //add case 1: VSUB(fpr.R(fd), fpr.R(fs), fpr.R(ft)); break; //F(fd) = F(fs) - F(ft); //sub - case 2: VMUL(fpr.R(fd), fpr.R(fs), fpr.R(ft)); break; //F(fd) = F(fs) * F(ft); //mul + case 2: { //F(fd) = F(fs) * F(ft); //mul + u32 nextOp = Memory::Read_Instruction(js.compilerPC + 4); + // Optimise possible if destination is the same + if (fd == ((nextOp>>6) & 0x1F)) { + // VMUL + VNEG -> VNMUL + if (!strcmp(MIPSGetName(nextOp), "neg.s")) { + if (fd == ((nextOp>>11) & 0x1F)) { + VNMUL(fpr.R(fd), fpr.R(fs), fpr.R(ft)); + EatInstruction(nextOp); + } + return; + } + } + VMUL(fpr.R(fd), fpr.R(fs), fpr.R(ft)); + break; + } case 3: VDIV(fpr.R(fd), fpr.R(fs), fpr.R(ft)); break; //F(fd) = F(fs) / F(ft); //div default: DISABLE;