mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #7843 from unknownbrackets/arm64-micro
Update ARM64 emitter from Dolphin, minor flushing tweaks
This commit is contained in:
commit
511d8bd3d4
5 changed files with 486 additions and 86 deletions
|
@ -394,7 +394,7 @@ static const u32 LogicalEnc[][2] = {
|
|||
};
|
||||
|
||||
// Load/Store Exclusive
|
||||
static u32 LoadStoreExcEnc[][5] = {
|
||||
static const u32 LoadStoreExcEnc[][5] = {
|
||||
{0, 0, 0, 0, 0}, // STXRB
|
||||
{0, 0, 0, 0, 1}, // STLXRB
|
||||
{0, 0, 1, 0, 0}, // LDXRB
|
||||
|
@ -434,11 +434,11 @@ void ARM64XEmitter::EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr
|
|||
bool b64Bit = Is64Bit(Rt);
|
||||
s64 distance = (s64)ptr - (s64)m_code;
|
||||
|
||||
_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, (int)distance);
|
||||
_assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %llx", __FUNCTION__, distance);
|
||||
|
||||
distance >>= 2;
|
||||
|
||||
_assert_msg_(DYNA_REC, distance >= -0x40000 && distance <= 0x3FFFF, "%s: Received too large distance: %lx", __FUNCTION__, (int)distance);
|
||||
_assert_msg_(DYNA_REC, distance >= -0x40000 && distance <= 0x3FFFF, "%s: Received too large distance: %llx", __FUNCTION__, distance);
|
||||
|
||||
Rt = DecodeReg(Rt);
|
||||
Write32((b64Bit << 31) | (0x34 << 24) | (op << 24) | \
|
||||
|
@ -744,8 +744,9 @@ void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 i
|
|||
(immr << 16) | (imms << 10) | (Rn << 5) | Rd);
|
||||
}
|
||||
|
||||
void ARM64XEmitter::EncodeLoadStorePair(u32 op, bool V, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
|
||||
void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
|
||||
{
|
||||
bool b64Bit = Is64Bit(Rt);
|
||||
u32 type_encode = 0;
|
||||
|
||||
switch (type) {
|
||||
|
@ -758,30 +759,18 @@ void ARM64XEmitter::EncodeLoadStorePair(u32 op, bool V, u32 load, IndexType type
|
|||
case INDEX_PRE:
|
||||
type_encode = 3;
|
||||
break;
|
||||
case INDEX_SIGNED:
|
||||
_assert_msg_(DYNA_REC, false, "%s doesn't support INDEX_SIGNED!", __FUNCTION__);
|
||||
break;
|
||||
}
|
||||
|
||||
// ASIMD
|
||||
if (V) {
|
||||
if (b64Bit) {
|
||||
op |= 2;
|
||||
imm >>= 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
imm >>= 2;
|
||||
if (IsQuad(Rt)) {
|
||||
op = 2;
|
||||
if (type_encode == 2)
|
||||
imm >>= 2;
|
||||
} else if (IsDouble(Rt)) {
|
||||
op = 1;
|
||||
if (type_encode == 2)
|
||||
imm >>= 1;
|
||||
} else {
|
||||
op = 0;
|
||||
}
|
||||
} else {
|
||||
bool b64Bit = Is64Bit(Rt);
|
||||
if (b64Bit) {
|
||||
op |= 2;
|
||||
imm >>= 3;
|
||||
} else {
|
||||
imm >>= 2;
|
||||
}
|
||||
}
|
||||
|
||||
_assert_msg_(JIT, imm >= -64 && imm <= 63, "%s recieved too large imm: %d", imm);
|
||||
|
@ -790,10 +779,9 @@ void ARM64XEmitter::EncodeLoadStorePair(u32 op, bool V, u32 load, IndexType type
|
|||
Rt2 = DecodeReg(Rt2);
|
||||
Rn = DecodeReg(Rn);
|
||||
|
||||
Write32((op << 30) | (5 << 27) | (V << 26) | (type_encode << 23) | (load << 22) | \
|
||||
Write32((op << 30) | (5 << 27) | (type_encode << 23) | (load << 22) | \
|
||||
(((uint32_t)imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
|
||||
}
|
||||
|
||||
void ARM64XEmitter::EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm)
|
||||
{
|
||||
Rd = DecodeReg(Rd);
|
||||
|
@ -849,21 +837,21 @@ void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch)
|
|||
Not = true;
|
||||
case 0: // CBZ
|
||||
{
|
||||
_assert_msg_(DYNA_REC, IsInRangeImm19(distance), "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, (int)distance);
|
||||
_assert_msg_(DYNA_REC, IsInRangeImm19(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);
|
||||
bool b64Bit = Is64Bit(branch.reg);
|
||||
ARM64Reg reg = DecodeReg(branch.reg);
|
||||
inst = (b64Bit << 31) | (0x1A << 25) | (Not << 24) | (MaskImm19(distance) << 5) | reg;
|
||||
}
|
||||
break;
|
||||
case 2: // B (conditional)
|
||||
_assert_msg_(DYNA_REC, IsInRangeImm19(distance), "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance);
|
||||
_assert_msg_(DYNA_REC, IsInRangeImm19(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);
|
||||
inst = (0x2A << 25) | (MaskImm19(distance) << 5) | branch.cond;
|
||||
break;
|
||||
case 4: // TBNZ
|
||||
Not = true;
|
||||
case 3: // TBZ
|
||||
{
|
||||
_assert_msg_(DYNA_REC, IsInRangeImm14(distance), "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, (int)distance);
|
||||
_assert_msg_(DYNA_REC, IsInRangeImm14(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);
|
||||
ARM64Reg reg = DecodeReg(branch.reg);
|
||||
inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) | ((branch.bit & 0x1F) << 19) | (MaskImm14(distance) << 5) | reg;
|
||||
}
|
||||
|
@ -961,7 +949,7 @@ void ARM64XEmitter::B(CCFlags cond, const void* ptr)
|
|||
|
||||
distance >>= 2;
|
||||
|
||||
_assert_msg_(DYNA_REC, IsInRangeImm19(distance), "%s: Received too large distance: %p->%p %lld %llx", __FUNCTION__, m_code, ptr, distance, distance);
|
||||
_assert_msg_(DYNA_REC, IsInRangeImm19(distance), "%s: Received too large distance: %p->%p %ld %lx", __FUNCTION__, m_code, ptr, distance, distance);
|
||||
Write32((0x54 << 24) | (MaskImm19(distance) << 5) | cond);
|
||||
}
|
||||
|
||||
|
@ -1517,6 +1505,21 @@ void ARM64XEmitter::UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)
|
|||
{
|
||||
EncodeBitfieldMOVInst(2, Rd, Rn, immr, imms);
|
||||
}
|
||||
|
||||
void ARM64XEmitter::BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width)
|
||||
{
|
||||
u32 size = Is64Bit(Rn) ? 64 : 32;
|
||||
_assert_msg_(DYNA_REC, (lsb + width) <= size, "%s passed lsb %d and width %d which is greater than the register size!",
|
||||
__FUNCTION__, lsb, width);
|
||||
EncodeBitfieldMOVInst(1, Rd, Rn, (size - lsb) % size, width - 1);
|
||||
}
|
||||
void ARM64XEmitter::UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width)
|
||||
{
|
||||
u32 size = Is64Bit(Rn) ? 64 : 32;
|
||||
_assert_msg_(DYNA_REC, (lsb + width) <= size, "%s passed lsb %d and width %d which is greater than the register size!",
|
||||
__FUNCTION__, lsb, width);
|
||||
EncodeBitfieldMOVInst(2, Rd, Rn, (size - lsb) % size, width - 1);
|
||||
}
|
||||
void ARM64XEmitter::EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift) {
|
||||
bool sf = Is64Bit(Rd);
|
||||
bool N = sf;
|
||||
|
@ -1564,15 +1567,15 @@ void ARM64XEmitter::PRFM(ARM64Reg Rt, u32 imm)
|
|||
// Load/Store pair
|
||||
void ARM64XEmitter::LDP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
|
||||
{
|
||||
EncodeLoadStorePair(0, false, 1, type, Rt, Rt2, Rn, imm);
|
||||
EncodeLoadStorePair(0, 1, type, Rt, Rt2, Rn, imm);
|
||||
}
|
||||
void ARM64XEmitter::LDPSW(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
|
||||
{
|
||||
EncodeLoadStorePair(1, false, 1, type, Rt, Rt2, Rn, imm);
|
||||
EncodeLoadStorePair(1, 1, type, Rt, Rt2, Rn, imm);
|
||||
}
|
||||
void ARM64XEmitter::STP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
|
||||
{
|
||||
EncodeLoadStorePair(0, false, 0, type, Rt, Rt2, Rn, imm);
|
||||
EncodeLoadStorePair(0, 0, type, Rt, Rt2, Rn, imm);
|
||||
}
|
||||
|
||||
// Load/Store Exclusive
|
||||
|
@ -2344,6 +2347,27 @@ void ARM64FloatEmitter::EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opc
|
|||
(encoded_size << 10) | (Rn << 5) | Rt);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
bool quad = IsQuad(Rt);
|
||||
u32 encoded_size = 0;
|
||||
|
||||
if (size == 16)
|
||||
encoded_size = 1;
|
||||
else if (size == 32)
|
||||
encoded_size = 2;
|
||||
else if (size == 64)
|
||||
encoded_size = 3;
|
||||
|
||||
Rt = DecodeReg(Rt);
|
||||
Rn = DecodeReg(Rn);
|
||||
Rm = DecodeReg(Rm);
|
||||
|
||||
Write32((quad << 30) | (0x19 << 23) | (L << 22) | (Rm << 16) | (opcode << 12) | \
|
||||
(encoded_size << 10) | (Rn << 5) | Rt);
|
||||
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, !IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__);
|
||||
|
@ -2376,6 +2400,99 @@ void ARM64FloatEmitter::EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM
|
|||
Write32((size << 30) | (0xF << 26) | (op << 22) | ((imm & 0x1FF) << 12) | (Rn << 5) | Rt);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
|
||||
{
|
||||
u32 type_encode = 0;
|
||||
u32 opc = 0;
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case INDEX_SIGNED:
|
||||
type_encode = 2;
|
||||
break;
|
||||
case INDEX_POST:
|
||||
type_encode = 1;
|
||||
break;
|
||||
case INDEX_PRE:
|
||||
type_encode = 3;
|
||||
break;
|
||||
case INDEX_UNSIGNED:
|
||||
_assert_msg_(DYNA_REC, false, "%s doesn't support INDEX_UNSIGNED!", __FUNCTION__);
|
||||
break;
|
||||
}
|
||||
|
||||
if (size == 128)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, !(imm & 0xF), "%s received invalid offset 0x%x!", __FUNCTION__, imm);
|
||||
opc = 2;
|
||||
imm >>= 4;
|
||||
}
|
||||
else if (size == 64)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, !(imm & 0x7), "%s received invalid offset 0x%x!", __FUNCTION__, imm);
|
||||
opc = 1;
|
||||
imm >>= 3;
|
||||
}
|
||||
else if (size == 32)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, !(imm & 0x3), "%s received invalid offset 0x%x!", __FUNCTION__, imm);
|
||||
opc = 0;
|
||||
imm >>= 2;
|
||||
}
|
||||
|
||||
Rt = DecodeReg(Rt);
|
||||
Rt2 = DecodeReg(Rt2);
|
||||
Rn = DecodeReg(Rn);
|
||||
|
||||
Write32((opc << 30) | (0xB << 26) | (type_encode << 23) | (load << 22) | \
|
||||
((imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
|
||||
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, Rm.GetType() == ArithOption::TYPE_EXTENDEDREG, "%s must contain an extended reg as Rm!", __FUNCTION__);
|
||||
|
||||
u32 encoded_size = 0;
|
||||
u32 encoded_op = 0;
|
||||
|
||||
if (size == 8)
|
||||
{
|
||||
encoded_size = 0;
|
||||
encoded_op = 0;
|
||||
}
|
||||
else if (size == 16)
|
||||
{
|
||||
encoded_size = 1;
|
||||
encoded_op = 0;
|
||||
}
|
||||
else if (size == 32)
|
||||
{
|
||||
encoded_size = 2;
|
||||
encoded_op = 0;
|
||||
}
|
||||
else if (size == 64)
|
||||
{
|
||||
encoded_size = 3;
|
||||
encoded_op = 0;
|
||||
}
|
||||
else if (size == 128)
|
||||
{
|
||||
encoded_size = 0;
|
||||
encoded_op = 2;
|
||||
}
|
||||
|
||||
if (load)
|
||||
encoded_op |= 1;
|
||||
|
||||
Rt = DecodeReg(Rt);
|
||||
Rn = DecodeReg(Rn);
|
||||
ARM64Reg decoded_Rm = DecodeReg(Rm.GetReg());
|
||||
|
||||
Write32((encoded_size << 30) | (encoded_op << 22) | (0x1E1 << 21) | (decoded_Rm << 16) | \
|
||||
Rm.GetData() | (1 << 11) | (Rn << 5) | Rt);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
|
||||
{
|
||||
EmitLoadStoreImmediate(size, 1, type, Rt, Rn, imm);
|
||||
|
@ -2565,6 +2682,18 @@ void ARM64FloatEmitter::LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn)
|
|||
{
|
||||
EmitLoadStoreSingleStructure(1, 0, 6, 0, size >> 4, Rt, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn)
|
||||
{
|
||||
EmitLoadStoreSingleStructure(1, 1, 6, 0, size >> 4, Rt, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
EmitLoadStoreSingleStructure(1, 0, 6, 0, size >> 4, Rt, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
EmitLoadStoreSingleStructure(1, 1, 6, 0, size >> 4, Rt, Rn, Rm);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn)
|
||||
{
|
||||
|
@ -2687,6 +2816,22 @@ void ARM64FloatEmitter::LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn)
|
|||
opcode = 2;
|
||||
EmitLoadStoreMultipleStructure(size, 1, opcode, Rt, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, !(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);
|
||||
_assert_msg_(DYNA_REC, type == INDEX_POST, "%s only supports post indexing!", __FUNCTION__);
|
||||
|
||||
u32 opcode = 0;
|
||||
if (count == 1)
|
||||
opcode = 7;
|
||||
else if (count == 2)
|
||||
opcode = 0xA;
|
||||
else if (count == 3)
|
||||
opcode = 6;
|
||||
else if (count == 4)
|
||||
opcode = 2;
|
||||
EmitLoadStoreMultipleStructurePost(size, 1, opcode, Rt, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, !(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);
|
||||
|
@ -2701,6 +2846,22 @@ void ARM64FloatEmitter::ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn)
|
|||
opcode = 2;
|
||||
EmitLoadStoreMultipleStructure(size, 0, opcode, Rt, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, !(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);
|
||||
_assert_msg_(DYNA_REC, type == INDEX_POST, "%s only supports post indexing!", __FUNCTION__);
|
||||
|
||||
u32 opcode = 0;
|
||||
if (count == 1)
|
||||
opcode = 7;
|
||||
else if (count == 2)
|
||||
opcode = 0xA;
|
||||
else if (count == 3)
|
||||
opcode = 6;
|
||||
else if (count == 4)
|
||||
opcode = 2;
|
||||
EmitLoadStoreMultipleStructurePost(size, 0, opcode, Rt, Rn, Rm);
|
||||
}
|
||||
|
||||
// Scalar - 1 Source
|
||||
void ARM64FloatEmitter::FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top)
|
||||
|
@ -2709,7 +2870,6 @@ void ARM64FloatEmitter::FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top)
|
|||
EmitScalar1Source(0, 0, IsDouble(Rd), 0, Rd, Rn);
|
||||
} else {
|
||||
_assert_msg_(JIT, !IsQuad(Rd) && !IsQuad(Rn), "FMOV can't move to/from quads");
|
||||
int type = 0;
|
||||
int rmode = 0;
|
||||
int opcode = 6;
|
||||
int sf = 0;
|
||||
|
@ -2728,6 +2888,26 @@ void ARM64FloatEmitter::FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top)
|
|||
}
|
||||
}
|
||||
|
||||
// Loadstore paired
|
||||
void ARM64FloatEmitter::LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
|
||||
{
|
||||
EncodeLoadStorePair(size, true, type, Rt, Rt2, Rn, imm);
|
||||
}
|
||||
void ARM64FloatEmitter::STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
|
||||
{
|
||||
EncodeLoadStorePair(size, false, type, Rt, Rt2, Rn, imm);
|
||||
}
|
||||
|
||||
// Loadstore register offset
|
||||
void ARM64FloatEmitter::STR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
|
||||
{
|
||||
EncodeLoadStoreRegisterOffset(size, false, Rt, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::LDR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
|
||||
{
|
||||
EncodeLoadStoreRegisterOffset(size, true, Rt, Rn, Rm);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::FABS(ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
EmitScalar1Source(0, 0, IsDouble(Rd), 1, Rd, Rn);
|
||||
|
@ -2854,13 +3034,25 @@ void ARM64FloatEmitter::FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
|||
{
|
||||
EmitThreeSame(0, size >> 6, 0x1A, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::FMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
EmitThreeSame(0, size >> 6, 0b11110, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
EmitThreeSame(0, size >> 6, 0x19, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn, bool source_upper)
|
||||
void ARM64FloatEmitter::FMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
Emit2RegMisc(source_upper, 0, size >> 6, 0x17, Rd, Rn);
|
||||
EmitThreeSame(0, 2 | size >> 6, 0b11110, Rd, Rn, Rm);
|
||||
}
|
||||
void ARM64FloatEmitter::FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(false, 0, size >> 6, 0x17, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(true, 0, size >> 6, 0x17, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
|
@ -3254,6 +3446,47 @@ void ARM64FloatEmitter::ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
|
|||
}
|
||||
|
||||
// Shift by immediate
|
||||
void ARM64FloatEmitter::SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
|
||||
{
|
||||
SSHLL(src_size, Rd, Rn, shift, false);
|
||||
}
|
||||
void ARM64FloatEmitter::SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
|
||||
{
|
||||
SSHLL(src_size, Rd, Rn, shift, true);
|
||||
}
|
||||
void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
|
||||
{
|
||||
SHRN(dest_size, Rd, Rn, shift, false);
|
||||
}
|
||||
void ARM64FloatEmitter::SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
|
||||
{
|
||||
SHRN(dest_size, Rd, Rn, shift, true);
|
||||
}
|
||||
void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
|
||||
{
|
||||
USHLL(src_size, Rd, Rn, shift, false);
|
||||
}
|
||||
void ARM64FloatEmitter::USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
|
||||
{
|
||||
USHLL(src_size, Rd, Rn, shift, true);
|
||||
}
|
||||
void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
SXTL(src_size, Rd, Rn, false);
|
||||
}
|
||||
void ARM64FloatEmitter::SXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
SXTL(src_size, Rd, Rn, true);
|
||||
}
|
||||
void ARM64FloatEmitter::UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
UXTL(src_size, Rd, Rn, false);
|
||||
}
|
||||
void ARM64FloatEmitter::UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
UXTL(src_size, Rd, Rn, true);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, shift < src_size, "%s shift amount must less than the element size!", __FUNCTION__);
|
||||
|
@ -3296,7 +3529,7 @@ void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift,
|
|||
EmitShiftImm(upper, 1, immh, immb, 0x14, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
|
||||
void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, shift < dest_size, "%s shift amount must less than the element size!", __FUNCTION__);
|
||||
u32 immh = 0;
|
||||
|
@ -3314,7 +3547,7 @@ void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
|
|||
{
|
||||
immh = 4 | ((shift >> 3) & 3);;
|
||||
}
|
||||
EmitShiftImm(IsQuad(Rd), 1, immh, immb, 0x10, Rd, Rn);
|
||||
EmitShiftImm(upper, 1, immh, immb, 0x10, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper)
|
||||
|
@ -3360,32 +3593,160 @@ void ARM64FloatEmitter::FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8
|
|||
EmitVectorxElement(0, 2 | (size >> 6), L, 1, H, Rd, Rn, Rm);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::LDP(IndexType index_type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) {
|
||||
m_emit->EncodeLoadStorePair(0, true, 1, index_type, Rt, Rt2, Rn, imm);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::STP(IndexType index_type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) {
|
||||
m_emit->EncodeLoadStorePair(0, true, 0, index_type, Rt, Rt2, Rn, imm);
|
||||
}
|
||||
|
||||
// TODO: According to the ABI, we really only need to save the bottom 64 bits of D8-D15.
|
||||
void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers)
|
||||
void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp)
|
||||
{
|
||||
for (auto it : registers)
|
||||
STR(128, INDEX_PRE, (ARM64Reg)(Q0 + it), SP, -16);
|
||||
}
|
||||
bool bundled_loadstore = false;
|
||||
|
||||
void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask)
|
||||
{
|
||||
for (int i = 31; i >= 0; --i)
|
||||
for (int i = 0; i < 32; ++i)
|
||||
{
|
||||
if (!registers[i])
|
||||
continue;
|
||||
|
||||
if (ignore_mask[i])
|
||||
m_emit->ADD(SP, SP, 16);
|
||||
else
|
||||
LDR(128, INDEX_POST, (ARM64Reg)(Q0 + i), SP, 16);
|
||||
int count = 0;
|
||||
while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
|
||||
if (count > 1)
|
||||
{
|
||||
bundled_loadstore = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (bundled_loadstore && tmp != INVALID_REG)
|
||||
{
|
||||
int num_regs = registers.Count();
|
||||
m_emit->SUB(SP, SP, num_regs * 16);
|
||||
m_emit->ADD(tmp, SP, 0);
|
||||
std::vector<ARM64Reg> island_regs;
|
||||
for (int i = 0; i < 32; ++i)
|
||||
{
|
||||
if (!registers[i])
|
||||
continue;
|
||||
|
||||
int count = 0;
|
||||
|
||||
// 0 = true
|
||||
// 1 < 4 && registers[i + 1] true!
|
||||
// 2 < 4 && registers[i + 2] true!
|
||||
// 3 < 4 && registers[i + 3] true!
|
||||
// 4 < 4 && registers[i + 4] false!
|
||||
while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
|
||||
|
||||
if (count == 1)
|
||||
island_regs.push_back((ARM64Reg)(Q0 + i));
|
||||
else
|
||||
ST1(64, count, INDEX_POST, (ARM64Reg)(Q0 + i), tmp);
|
||||
|
||||
i += count - 1;
|
||||
}
|
||||
|
||||
// Handle island registers
|
||||
std::vector<ARM64Reg> pair_regs;
|
||||
for (auto& it : island_regs)
|
||||
{
|
||||
pair_regs.push_back(it);
|
||||
if (pair_regs.size() == 2)
|
||||
{
|
||||
STP(128, INDEX_POST, pair_regs[0], pair_regs[1], tmp, 32);
|
||||
pair_regs.clear();
|
||||
}
|
||||
}
|
||||
if (pair_regs.size())
|
||||
STR(128, INDEX_POST, pair_regs[0], tmp, 16);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<ARM64Reg> pair_regs;
|
||||
for (auto it : registers)
|
||||
{
|
||||
pair_regs.push_back((ARM64Reg)(Q0 + it));
|
||||
if (pair_regs.size() == 2)
|
||||
{
|
||||
STP(128, INDEX_PRE, pair_regs[0], pair_regs[1], SP, -32);
|
||||
pair_regs.clear();
|
||||
}
|
||||
}
|
||||
if (pair_regs.size())
|
||||
STR(128, INDEX_PRE, pair_regs[0], SP, -16);
|
||||
}
|
||||
}
|
||||
void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp)
|
||||
{
|
||||
bool bundled_loadstore = false;
|
||||
int num_regs = registers.Count();
|
||||
|
||||
for (int i = 0; i < 32; ++i)
|
||||
{
|
||||
if (!registers[i])
|
||||
continue;
|
||||
|
||||
int count = 0;
|
||||
while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
|
||||
if (count > 1)
|
||||
{
|
||||
bundled_loadstore = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (bundled_loadstore && tmp != INVALID_REG)
|
||||
{
|
||||
// The temporary register is only used to indicate that we can use this code path
|
||||
std::vector<ARM64Reg> island_regs;
|
||||
for (int i = 0; i < 32; ++i)
|
||||
{
|
||||
if (!registers[i])
|
||||
continue;
|
||||
|
||||
int count = 0;
|
||||
while (++count < 4 && (i + count) < 32 && registers[i + count]) {}
|
||||
|
||||
if (count == 1)
|
||||
island_regs.push_back((ARM64Reg)(Q0 + i));
|
||||
else
|
||||
LD1(64, count, INDEX_POST, (ARM64Reg)(Q0 + i), SP);
|
||||
|
||||
i += count - 1;
|
||||
}
|
||||
|
||||
// Handle island registers
|
||||
std::vector<ARM64Reg> pair_regs;
|
||||
for (auto& it : island_regs)
|
||||
{
|
||||
pair_regs.push_back(it);
|
||||
if (pair_regs.size() == 2)
|
||||
{
|
||||
LDP(128, INDEX_POST, pair_regs[0], pair_regs[1], SP, 32);
|
||||
pair_regs.clear();
|
||||
}
|
||||
}
|
||||
if (pair_regs.size())
|
||||
LDR(128, INDEX_POST, pair_regs[0], SP, 16);
|
||||
}
|
||||
else
|
||||
{
|
||||
bool odd = num_regs % 2;
|
||||
std::vector<ARM64Reg> pair_regs;
|
||||
for (int i = 31; i >= 0; --i)
|
||||
{
|
||||
if (!registers[i])
|
||||
continue;
|
||||
|
||||
if (odd)
|
||||
{
|
||||
// First load must be a regular LDR if odd
|
||||
odd = false;
|
||||
LDR(128, INDEX_POST, (ARM64Reg)(Q0 + i), SP, 16);
|
||||
}
|
||||
else
|
||||
{
|
||||
pair_regs.push_back((ARM64Reg)(Q0 + i));
|
||||
if (pair_regs.size() == 2)
|
||||
{
|
||||
LDP(128, INDEX_POST, pair_regs[1], pair_regs[0], SP, 32);
|
||||
pair_regs.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
// Copyright 2013 Dolphin Emulator Project
|
||||
// Licensed under GPLv2
|
||||
// Copyright 2015 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
@ -139,7 +139,7 @@ enum IndexType
|
|||
INDEX_UNSIGNED = 0,
|
||||
INDEX_POST = 1,
|
||||
INDEX_PRE = 2,
|
||||
INDEX_SIGNED = INDEX_UNSIGNED // used in LDP/STP
|
||||
INDEX_SIGNED = 3, // used in LDP/STP
|
||||
};
|
||||
|
||||
enum ShiftAmount
|
||||
|
@ -309,7 +309,7 @@ public:
|
|||
{
|
||||
return m_type;
|
||||
}
|
||||
ARM64Reg GetReg()
|
||||
ARM64Reg GetReg() const
|
||||
{
|
||||
return m_destReg;
|
||||
}
|
||||
|
@ -367,7 +367,7 @@ private:
|
|||
void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
||||
void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd);
|
||||
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n);
|
||||
void EncodeLoadStorePair(u32 op, bool V, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
|
||||
void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
|
||||
void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm);
|
||||
void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
|
||||
|
||||
|
@ -592,10 +592,8 @@ public:
|
|||
void BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||
void SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||
void UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
|
||||
|
||||
void BFI(ARM64Reg Rd, ARM64Reg Rn, int lsb, int width) {
|
||||
BFM(Rd, Rn, (64 - lsb) % (Is64Bit(Rd) ? 64 : 32), width - 1);
|
||||
}
|
||||
void BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);
|
||||
void UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);
|
||||
|
||||
// Extract register (ROR with two inputs, if same then faster on A67)
|
||||
void EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift);
|
||||
|
@ -773,12 +771,25 @@ public:
|
|||
void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn);
|
||||
void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn);
|
||||
void LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn);
|
||||
void LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn);
|
||||
void ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm);
|
||||
|
||||
// Loadstore multiple structure
|
||||
void LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
|
||||
void LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = SP);
|
||||
void ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
|
||||
void ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = SP);
|
||||
|
||||
// Loadstore paired
|
||||
void LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
|
||||
void STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
|
||||
|
||||
// Loadstore register offset
|
||||
void STR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
||||
void LDR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
||||
|
||||
// Scalar - 1 Source
|
||||
void FABS(ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
@ -812,9 +823,12 @@ public:
|
|||
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
|
||||
void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FMLS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn, bool source_upper = false);
|
||||
void FMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
@ -835,11 +849,11 @@ public:
|
|||
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
|
||||
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
|
||||
void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
||||
// Move
|
||||
|
@ -892,11 +906,16 @@ public:
|
|||
void ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
|
||||
// Shift by immediate
|
||||
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper = false);
|
||||
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper = false);
|
||||
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper = false);
|
||||
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper = false);
|
||||
void SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
|
||||
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
||||
// vector x indexed element
|
||||
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
|
||||
|
@ -905,12 +924,9 @@ public:
|
|||
void MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG, bool negate = false);
|
||||
void MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG);
|
||||
|
||||
void LDP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
|
||||
void STP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
|
||||
|
||||
// ABI related
|
||||
void ABI_PushRegisters(BitSet32 registers);
|
||||
void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0));
|
||||
void ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp = INVALID_REG);
|
||||
void ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp = INVALID_REG);
|
||||
|
||||
private:
|
||||
ARM64XEmitter* m_emit;
|
||||
|
@ -934,11 +950,20 @@ private:
|
|||
void EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn);
|
||||
void EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
|
||||
void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
|
||||
void EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign);
|
||||
void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode);
|
||||
void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
|
||||
void EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
|
||||
|
||||
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
|
||||
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
|
||||
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
|
||||
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
|
||||
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
|
||||
};
|
||||
|
||||
class ARM64CodeBlock : public CodeBlock<ARM64XEmitter>
|
||||
|
@ -956,4 +981,3 @@ private:
|
|||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -299,7 +299,7 @@ allocate:
|
|||
goto allocate;
|
||||
}
|
||||
|
||||
// Uh oh, we have all them spilllocked....
|
||||
// Uh oh, we have all of them spilllocked....
|
||||
ERROR_LOG_REPORT(JIT, "Out of spillable registers at PC %08x!!!", mips_->pc);
|
||||
return INVALID_REG;
|
||||
}
|
||||
|
|
|
@ -226,7 +226,7 @@ allocate:
|
|||
goto allocate;
|
||||
}
|
||||
|
||||
// Uh oh, we have all them spilllocked....
|
||||
// Uh oh, we have all of them spilllocked....
|
||||
ERROR_LOG_REPORT(JIT, "Out of spillable registers at PC %08x!!!", mips_->pc);
|
||||
return INVALID_REG;
|
||||
}
|
||||
|
@ -347,6 +347,10 @@ void Arm64RegCache::DiscardR(MIPSGPReg mipsReg) {
|
|||
ARM64Reg Arm64RegCache::ARM64RegForFlush(MIPSGPReg r) {
|
||||
switch (mr[r].loc) {
|
||||
case ML_IMM:
|
||||
// Zero is super easy.
|
||||
if (mr[r].imm == 0) {
|
||||
return WZR;
|
||||
}
|
||||
// Could we get lucky? Check for an exact match in another armreg.
|
||||
for (int i = 0; i < NUM_MIPSREG; ++i) {
|
||||
if (mr[i].loc == ML_ARMREG_IMM && mr[i].imm == mr[r].imm) {
|
||||
|
@ -434,13 +438,24 @@ void Arm64RegCache::FlushAll() {
|
|||
// Flush it first so we don't get it confused.
|
||||
FlushR(MIPS_REG_LO);
|
||||
|
||||
// TODO: We could do a pass to allocate regs for imms for more paired stores.
|
||||
// 1 because MIPS_REG_ZERO isn't flushable anyway.
|
||||
// 31 because 30 and 31 are the last possible pair - MIPS_REG_FPCOND, etc. are too far away.
|
||||
for (int i = 0; i < 31; i++) {
|
||||
for (int i = 1; i < 31; i++) {
|
||||
MIPSGPReg mreg1 = MIPSGPReg(i);
|
||||
MIPSGPReg mreg2 = MIPSGPReg(i + 1);
|
||||
ARM64Reg areg1 = ARM64RegForFlush(mreg1);
|
||||
ARM64Reg areg2 = ARM64RegForFlush(mreg2);
|
||||
|
||||
// If either one doesn't have a reg yet, try flushing imms to scratch regs.
|
||||
if (areg1 == INVALID_REG && IsImm(mreg1)) {
|
||||
SetRegImm(SCRATCH1, GetImm(mreg1));
|
||||
areg1 = SCRATCH1;
|
||||
}
|
||||
if (areg2 == INVALID_REG && IsImm(mreg2)) {
|
||||
SetRegImm(SCRATCH2, GetImm(mreg2));
|
||||
areg2 = SCRATCH2;
|
||||
}
|
||||
|
||||
if (areg1 != INVALID_REG && areg2 != INVALID_REG) {
|
||||
// We can use a paired store, awesome.
|
||||
emit_->STP(INDEX_SIGNED, areg1, areg2, CTXREG, GetMipsRegOffset(mreg1));
|
||||
|
|
|
@ -307,8 +307,8 @@ void VertexDecoderJitCache::Jit_ApplyWeights() {
|
|||
break;
|
||||
default:
|
||||
// Matrices 4+ need to be loaded from memory.
|
||||
fp.LDP(INDEX_SIGNED, Q8, Q9, scratchReg64, 0);
|
||||
fp.LDP(INDEX_SIGNED, Q10, Q11, scratchReg64, 2 * 16);
|
||||
fp.LDP(128, INDEX_SIGNED, Q8, Q9, scratchReg64, 0);
|
||||
fp.LDP(128, INDEX_SIGNED, Q10, Q11, scratchReg64, 2 * 16);
|
||||
fp.FMLA(32, Q4, Q8, neonWeightRegsQ[i >> 2], i & 3);
|
||||
fp.FMLA(32, Q5, Q9, neonWeightRegsQ[i >> 2], i & 3);
|
||||
fp.FMLA(32, Q6, Q10, neonWeightRegsQ[i >> 2], i & 3);
|
||||
|
|
Loading…
Add table
Reference in a new issue