mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
arm64jit: Try shifted MOVI in MOVI2FDUP().
Any penalty from int/float or size change should be less than GPR load.
This commit is contained in:
parent
a8493c0e19
commit
00e691d633
2 changed files with 102 additions and 0 deletions
|
@ -4204,6 +4204,14 @@ void ARM64FloatEmitter::MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch, bo
|
|||
if (negate) {
|
||||
FNEG(32, Rd, Rd);
|
||||
}
|
||||
} else if (TryAnyMOVI(32, Rd, ival)) {
|
||||
if (negate) {
|
||||
FNEG(32, Rd, Rd);
|
||||
}
|
||||
} else if (TryAnyMOVI(32, Rd, ival ^ 0x80000000)) {
|
||||
if (!negate) {
|
||||
FNEG(32, Rd, Rd);
|
||||
}
|
||||
} else {
|
||||
_assert_msg_(scratch != INVALID_REG, "Failed to find a way to generate FP immediate %f without scratch", value);
|
||||
if (negate) {
|
||||
|
@ -4214,6 +4222,96 @@ void ARM64FloatEmitter::MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch, bo
|
|||
}
|
||||
}
|
||||
|
||||
bool ARM64FloatEmitter::TryMOVI(u8 size, ARM64Reg Rd, uint64_t elementValue) {
|
||||
if (size == 8) {
|
||||
// Can always do 8.
|
||||
MOVI(size, Rd, elementValue & 0xFF);
|
||||
return true;
|
||||
} else if (size == 16) {
|
||||
if ((elementValue & 0xFF00) == 0) {
|
||||
MOVI(size, Rd, elementValue & 0xFF, 0);
|
||||
return true;
|
||||
} else if ((elementValue & 0x00FF) == 0) {
|
||||
MOVI(size, Rd, (elementValue >> 8) & 0xFF, 8);
|
||||
return true;
|
||||
} else if ((elementValue & 0xFF00) == 0xFF00) {
|
||||
MVNI(size, Rd, ~elementValue & 0xFF, 0);
|
||||
return true;
|
||||
} else if ((elementValue & 0x00FF) == 0x00FF) {
|
||||
MVNI(size, Rd, (~elementValue >> 8) & 0xFF, 8);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
} else if (size == 32) {
|
||||
for (int shift = 0; shift < 32; shift += 8) {
|
||||
uint32_t mask = 0xFFFFFFFF &~ (0xFF << shift);
|
||||
if ((elementValue & mask) == 0) {
|
||||
MOVI(size, Rd, (elementValue >> shift) & 0xFF, shift);
|
||||
return true;
|
||||
} else if ((elementValue & mask) == mask) {
|
||||
MVNI(size, Rd, (~elementValue >> shift) & 0xFF, shift);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Maybe an MSL shift will work?
|
||||
for (int shift = 8; shift <= 16; shift += 8) {
|
||||
uint32_t mask = 0xFFFFFFFF & ~(0xFF << shift);
|
||||
uint32_t ones = (1 << shift) - 1;
|
||||
uint32_t notOnes = 0xFFFFFF00 << shift;
|
||||
if ((elementValue & mask) == ones) {
|
||||
MOVI(size, Rd, (elementValue >> shift) & 0xFF, shift, true);
|
||||
return true;
|
||||
} else if ((elementValue & mask) == notOnes) {
|
||||
MVNI(size, Rd, (elementValue >> shift) & 0xFF, shift, true);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
} else if (size == 64) {
|
||||
uint8_t imm8 = 0;
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
uint8_t byte = (elementValue >> (i * 8)) & 0xFF;
|
||||
if (byte != 0 && byte != 0xFF)
|
||||
return false;
|
||||
|
||||
if (byte == 0xFF)
|
||||
imm8 |= 1 << i;
|
||||
}
|
||||
|
||||
// Didn't run into any partial bytes, so size 64 is doable.
|
||||
MOVI(size, Rd, imm8);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ARM64FloatEmitter::TryAnyMOVI(u8 size, ARM64Reg Rd, uint64_t elementValue) {
|
||||
// Try the original size first in case that's more optimal.
|
||||
if (TryMOVI(size, Rd, elementValue))
|
||||
return true;
|
||||
|
||||
uint64_t value = elementValue;
|
||||
if (size != 64) {
|
||||
uint64_t masked = elementValue & ((1 << size) - 1);
|
||||
for (int i = size; i < 64; ++i) {
|
||||
value |= masked << i;
|
||||
}
|
||||
}
|
||||
|
||||
for (int attempt = 8; attempt <= 64; attempt += attempt) {
|
||||
// Original size was already attempted above.
|
||||
if (attempt != size) {
|
||||
if (TryMOVI(attempt, Rd, value))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void ARM64XEmitter::SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
|
||||
u32 val;
|
||||
bool shift;
|
||||
|
|
|
@ -925,6 +925,10 @@ public:
|
|||
void ORR(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0);
|
||||
void BIC(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0);
|
||||
|
||||
bool TryMOVI(u8 size, ARM64Reg Rd, uint64_t value);
|
||||
// Allow using a different size. Unclear if there's a penalty.
|
||||
bool TryAnyMOVI(u8 size, ARM64Reg Rd, uint64_t value);
|
||||
|
||||
// One source
|
||||
void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue