mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Armjit: Combine mul.s + neg.s to VNMUL. Implement VNMUL, VNMLA, VNMLS.
I had implemented mul.s + add/sub.s + add/sub.s -> VADD/VSUB + V(N)ML(A/S). Turns out it doesn't happen enough though (once or twice per game).
This commit is contained in:
parent
c2dcebf36d
commit
f21218c3f9
3 changed files with 53 additions and 24 deletions
|
@ -906,24 +906,30 @@ struct VFPEnc
|
|||
// Double/single, Neon
|
||||
const VFPEnc VFPOps[][2] = {
|
||||
{{0xE0, 0xA0}, {0x20, 0xD1}}, // 0: VMLA
|
||||
{{0xE0, 0xA4}, {0x22, 0xD1}}, // 1: VMLS
|
||||
{{0xE3, 0xA0}, {0x20, 0xD0}}, // 2: VADD
|
||||
{{0xE3, 0xA4}, {0x22, 0xD0}}, // 3: VSUB
|
||||
{{0xE2, 0xA0}, {0x30, 0xD1}}, // 4: VMUL
|
||||
{{0xEB, 0xAC}, { -1 /* 0x3B */, -1 /* 0x70 */}}, // 5: VABS(Vn(0x0) used for encoding)
|
||||
{{0xE8, 0xA0}, { -1, -1}}, // 6: VDIV
|
||||
{{0xEB, 0xA4}, { -1 /* 0x3B */, -1 /* 0x78 */}}, // 7: VNEG(Vn(0x1) used for encoding)
|
||||
{{0xEB, 0xAC}, { -1, -1}}, // 8: VSQRT (Vn(0x1) used for encoding)
|
||||
{{0xEB, 0xA4}, { -1, -1}}, // 9: VCMP (Vn(0x4 | #0 ? 1 : 0) used for encoding)
|
||||
{{0xEB, 0xAC}, { -1, -1}}, // 10: VCMPE (Vn(0x4 | #0 ? 1 : 0) used for encoding)
|
||||
{{ -1, -1}, {0x3B, 0x30}}, // 11: VABSi
|
||||
{{0xE1, 0xA4}, { -1, -1}}, // 1: VNMLA
|
||||
{{0xE0, 0xA4}, {0x22, 0xD1}}, // 2: VMLS
|
||||
{{0xE1, 0xA0}, { -1, -1}}, // 3: VNMLS
|
||||
{{0xE3, 0xA0}, {0x20, 0xD0}}, // 4: VADD
|
||||
{{0xE3, 0xA4}, {0x22, 0xD0}}, // 5: VSUB
|
||||
{{0xE2, 0xA0}, {0x30, 0xD1}}, // 6: VMUL
|
||||
{{0xE2, 0xA4}, { -1, -1}}, // 7: VNMUL
|
||||
{{0xEB, 0xAC}, { -1 /* 0x3B */, -1 /* 0x70 */}}, // 8: VABS(Vn(0x0) used for encoding)
|
||||
{{0xE8, 0xA0}, { -1, -1}}, // 9: VDIV
|
||||
{{0xEB, 0xA4}, { -1 /* 0x3B */, -1 /* 0x78 */}}, // 10: VNEG(Vn(0x1) used for encoding)
|
||||
{{0xEB, 0xAC}, { -1, -1}}, // 11: VSQRT (Vn(0x1) used for encoding)
|
||||
{{0xEB, 0xA4}, { -1, -1}}, // 12: VCMP (Vn(0x4 | #0 ? 1 : 0) used for encoding)
|
||||
{{0xEB, 0xAC}, { -1, -1}}, // 13: VCMPE (Vn(0x4 | #0 ? 1 : 0) used for encoding)
|
||||
{{ -1, -1}, {0x3B, 0x30}}, // 14: VABSi
|
||||
};
|
||||
const char *VFPOpNames[] = {
|
||||
"VMLA",
|
||||
"VNMLA",
|
||||
"VMLS",
|
||||
"VNMLS",
|
||||
"VADD",
|
||||
"VSUB",
|
||||
"VMUL",
|
||||
"VNMUL",
|
||||
"VABS",
|
||||
"VDIV",
|
||||
"VNEG",
|
||||
|
@ -993,18 +999,21 @@ void ARMXEmitter::WriteVFPDataOp(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm)
|
|||
Write32(cond | (enc.opc1 << 20) | VnEnc | VdEnc | (enc.opc2 << 4) | (quad_reg << 6) | (double_reg << 8) | VmEnc);
|
||||
}
|
||||
void ARMXEmitter::VMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(0, Vd, Vn, Vm); }
|
||||
void ARMXEmitter::VMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(1, Vd, Vn, Vm); }
|
||||
void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(2, Vd, Vn, Vm); }
|
||||
void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(3, Vd, Vn, Vm); }
|
||||
void ARMXEmitter::VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(4, Vd, Vn, Vm); }
|
||||
void ARMXEmitter::VABS(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(5, Vd, D0, Vm); }
|
||||
void ARMXEmitter::VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(6, Vd, Vn, Vm); }
|
||||
void ARMXEmitter::VNEG(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(7, Vd, D1, Vm); }
|
||||
void ARMXEmitter::VSQRT(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(8, Vd, D1, Vm); }
|
||||
void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(9, Vd, D4, Vm); }
|
||||
void ARMXEmitter::VCMPE(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(10, Vd, D4, Vm); }
|
||||
void ARMXEmitter::VCMP(ARMReg Vd){ WriteVFPDataOp(9, Vd, D5, D0); }
|
||||
void ARMXEmitter::VCMPE(ARMReg Vd){ WriteVFPDataOp(10, Vd, D5, D0); }
|
||||
void ARMXEmitter::VNMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(1, Vd, Vn, Vm); }
|
||||
void ARMXEmitter::VMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(2, Vd, Vn, Vm); }
|
||||
void ARMXEmitter::VNMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(3, Vd, Vn, Vm); }
|
||||
void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(4, Vd, Vn, Vm); }
|
||||
void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(5, Vd, Vn, Vm); }
|
||||
void ARMXEmitter::VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(6, Vd, Vn, Vm); }
|
||||
void ARMXEmitter::VNMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(7, Vd, Vn, Vm); }
|
||||
void ARMXEmitter::VABS(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(8, Vd, D0, Vm); }
|
||||
void ARMXEmitter::VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(9, Vd, Vn, Vm); }
|
||||
void ARMXEmitter::VNEG(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(10, Vd, D1, Vm); }
|
||||
void ARMXEmitter::VSQRT(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(11, Vd, D1, Vm); }
|
||||
void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(12, Vd, D4, Vm); }
|
||||
void ARMXEmitter::VCMPE(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(13, Vd, D4, Vm); }
|
||||
void ARMXEmitter::VCMP(ARMReg Vd){ WriteVFPDataOp(12, Vd, D5, D0); }
|
||||
void ARMXEmitter::VCMPE(ARMReg Vd){ WriteVFPDataOp(13, Vd, D5, D0); }
|
||||
|
||||
void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, s16 offset)
|
||||
{
|
||||
|
|
|
@ -542,6 +542,10 @@ public:
|
|||
// Compares against zero
|
||||
void VCMP(ARMReg Vd);
|
||||
void VCMPE(ARMReg Vd);
|
||||
|
||||
void VNMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VNMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VNMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VSQRT(ARMReg Vd, ARMReg Vm);
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
#include "Core/Config.h"
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
|
||||
#include "ArmJit.h"
|
||||
#include "ArmRegCache.h"
|
||||
|
@ -52,7 +53,22 @@ void Jit::Comp_FPU3op(u32 op)
|
|||
{
|
||||
case 0: VADD(fpr.R(fd), fpr.R(fs), fpr.R(ft)); break; //F(fd) = F(fs) + F(ft); //add
|
||||
case 1: VSUB(fpr.R(fd), fpr.R(fs), fpr.R(ft)); break; //F(fd) = F(fs) - F(ft); //sub
|
||||
case 2: VMUL(fpr.R(fd), fpr.R(fs), fpr.R(ft)); break; //F(fd) = F(fs) * F(ft); //mul
|
||||
case 2: { //F(fd) = F(fs) * F(ft); //mul
|
||||
u32 nextOp = Memory::Read_Instruction(js.compilerPC + 4);
|
||||
// Optimise possible if destination is the same
|
||||
if (fd == ((nextOp>>6) & 0x1F)) {
|
||||
// VMUL + VNEG -> VNMUL
|
||||
if (!strcmp(MIPSGetName(nextOp), "neg.s")) {
|
||||
if (fd == ((nextOp>>11) & 0x1F)) {
|
||||
VNMUL(fpr.R(fd), fpr.R(fs), fpr.R(ft));
|
||||
EatInstruction(nextOp);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
VMUL(fpr.R(fd), fpr.R(fs), fpr.R(ft));
|
||||
break;
|
||||
}
|
||||
case 3: VDIV(fpr.R(fd), fpr.R(fs), fpr.R(ft)); break; //F(fd) = F(fs) / F(ft); //div
|
||||
default:
|
||||
DISABLE;
|
||||
|
|
Loading…
Add table
Reference in a new issue