Merge pull request #18227 from unknownbrackets/x86-ir-flush

x86jit: Flush floats together if possible
This commit is contained in:
Henrik Rydgård 2023-09-24 17:27:38 +02:00 committed by GitHub
commit 2ba63c65f2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 90 additions and 7 deletions

View file

@ -437,19 +437,21 @@ void Arm64IRRegCache::FlushAll(bool gprs, bool fprs) {
// Note: make sure not to change the registers when flushing:
// Branching code may expect the armreg to retain its value.
auto needsFlush = [&](IRReg i) {
if (mr[i].loc != MIPSLoc::MEM || mr[i].isStatic)
return false;
if (mr[i].nReg == -1 || !nr[mr[i].nReg].isDirty)
return false;
return true;
};
// Try to flush in pairs when possible.
for (int i = 1; i < TOTAL_MAPPABLE_IRREGS - 1; ++i) {
if (mr[i].loc == MIPSLoc::MEM || mr[i].loc == MIPSLoc::MEM || mr[i].isStatic || mr[i + 1].isStatic)
if (!needsFlush(i) || !needsFlush(i + 1))
continue;
// Ignore multilane regs. Could handle with more smartness...
if (mr[i].lane != -1 || mr[i + 1].lane != -1)
continue;
if (mr[i].nReg != -1 && !nr[mr[i].nReg].isDirty)
continue;
if (mr[i + 1].nReg != -1 && !nr[mr[i + 1].nReg].isDirty)
continue;
if (mr[i].loc == MIPSLoc::MEM || mr[i + 1].loc == MIPSLoc::MEM)
continue;
int offset = GetMipsRegOffset(i);

View file

@ -62,6 +62,20 @@ void X64JitBackend::CompIR_Basic(IRInst inst) {
regs_.Map(inst);
if (inst.constant == 0) {
XORPS(regs_.FX(inst.dest), regs_.F(inst.dest));
} else if (inst.constant == 0x7FFFFFFF) {
MOVSS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
} else if (inst.constant == 0x80000000) {
MOVSS(regs_.FX(inst.dest), M(constants.signBitAll)); // rip accessible
} else if (inst.constant == 0x7F800000) {
MOVSS(regs_.FX(inst.dest), M(constants.positiveInfinity)); // rip accessible
} else if (inst.constant == 0x7FC00000) {
MOVSS(regs_.FX(inst.dest), M(constants.qNAN)); // rip accessible
} else if (inst.constant == 0x3F800000) {
MOVSS(regs_.FX(inst.dest), M(constants.positiveOnes)); // rip accessible
} else if (inst.constant == 0xBF800000) {
MOVSS(regs_.FX(inst.dest), M(constants.negativeOnes)); // rip accessible
} else if (inst.constant == 0x4EFFFFFF) {
MOVSS(regs_.FX(inst.dest), M(constants.maxIntBelowAsFloat)); // rip accessible
} else {
MOV(32, R(SCRATCH1), Imm32(inst.constant));
MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1));

View file

@ -147,6 +147,67 @@ void X64IRRegCache::FlushBeforeCall() {
#endif
}
void X64IRRegCache::FlushAll(bool gprs, bool fprs) {
// Note: make sure not to change the registers when flushing:
// Branching code may expect the x64reg to retain its value.
auto needsFlush = [&](IRReg i) {
if (mr[i].loc != MIPSLoc::MEM || mr[i].isStatic)
return false;
if (mr[i].nReg == -1 || !nr[mr[i].nReg].isDirty)
return false;
return true;
};
auto isSingleFloat = [&](IRReg i) {
if (mr[i].lane != -1 || mr[i].loc != MIPSLoc::FREG)
return false;
return true;
};
// Sometimes, float/vector regs may be in separate regs in a sequence.
// It's worth combining and flushing together.
for (int i = 1; i < TOTAL_MAPPABLE_IRREGS - 1; ++i) {
if (!needsFlush(i) || !needsFlush(i + 1))
continue;
// GPRs are probably not worth it. Merging Vec2s might be, but pretty uncommon.
if (!isSingleFloat(i) || !isSingleFloat(i + 1))
continue;
X64Reg regs[4]{ INVALID_REG, INVALID_REG, INVALID_REG, INVALID_REG };
regs[0] = FromNativeReg(mr[i + 0].nReg);
regs[1] = FromNativeReg(mr[i + 1].nReg);
bool flushVec4 = i + 3 < TOTAL_MAPPABLE_IRREGS && needsFlush(i + 2) && needsFlush(i + 3);
if (flushVec4 && isSingleFloat(i + 2) && isSingleFloat(i + 3) && (i & 3) == 0) {
regs[2] = FromNativeReg(mr[i + 2].nReg);
regs[3] = FromNativeReg(mr[i + 3].nReg);
// Note that this doesn't change the low lane of any of these regs.
emit_->UNPCKLPS(regs[1], ::R(regs[3]));
emit_->UNPCKLPS(regs[0], ::R(regs[2]));
emit_->UNPCKLPS(regs[0], ::R(regs[1]));
emit_->MOVAPS(MDisp(CTXREG, -128 + GetMipsRegOffset(i)), regs[0]);
for (int j = 0; j < 4; ++j)
DiscardReg(i + j);
i += 3;
continue;
}
// TODO: Maybe this isn't always worth doing.
emit_->UNPCKLPS(regs[0], ::R(regs[1]));
emit_->MOVLPS(MDisp(CTXREG, -128 + GetMipsRegOffset(i)), regs[0]);
DiscardReg(i);
DiscardReg(i + 1);
++i;
continue;
}
IRNativeRegCacheBase::FlushAll(gprs, fprs);
}
X64Reg X64IRRegCache::TryMapTempImm(IRReg r, X64Map flags) {
_dbg_assert_(IsValidGPR(r));
@ -353,6 +414,8 @@ void X64IRRegCache::LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) {
emit_->MOVSS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
else if (lanes == 2)
emit_->MOVLPS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
else if (lanes == 4 && (first & 3) == 0)
emit_->MOVAPS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
else if (lanes == 4)
emit_->MOVUPS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
else
@ -381,6 +444,8 @@ void X64IRRegCache::StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) {
emit_->MOVSS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r);
else if (lanes == 2)
emit_->MOVLPS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r);
else if (lanes == 4 && (first & 3) == 0)
emit_->MOVAPS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r);
else if (lanes == 4)
emit_->MOVUPS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r);
else

View file

@ -92,6 +92,8 @@ public:
void MapWithFlags(IRInst inst, X64IRJitConstants::X64Map destFlags, X64IRJitConstants::X64Map src1Flags = X64IRJitConstants::X64Map::NONE, X64IRJitConstants::X64Map src2Flags = X64IRJitConstants::X64Map::NONE);
// Note: may change the high lanes of single-register XMMs.
void FlushAll(bool gprs = true, bool fprs = true) override;
void FlushBeforeCall();
Gen::X64Reg GetAndLockTempGPR();