From decccf199ae9751ea19417d487404bd08a2ff7ce Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 23 Sep 2023 12:09:36 -0700 Subject: [PATCH] x86jit: Flush floats together if possible. --- Core/MIPS/ARM64/Arm64IRRegCache.cpp | 16 ++++---- Core/MIPS/x86/X64IRRegCache.cpp | 61 +++++++++++++++++++++++++++++ Core/MIPS/x86/X64IRRegCache.h | 2 + 3 files changed, 72 insertions(+), 7 deletions(-) diff --git a/Core/MIPS/ARM64/Arm64IRRegCache.cpp b/Core/MIPS/ARM64/Arm64IRRegCache.cpp index 0ce5422fd5..0420a808ce 100644 --- a/Core/MIPS/ARM64/Arm64IRRegCache.cpp +++ b/Core/MIPS/ARM64/Arm64IRRegCache.cpp @@ -437,19 +437,21 @@ void Arm64IRRegCache::FlushAll(bool gprs, bool fprs) { // Note: make sure not to change the registers when flushing: // Branching code may expect the armreg to retain its value. + auto needsFlush = [&](IRReg i) { + if (mr[i].loc != MIPSLoc::MEM || mr[i].isStatic) + return false; + if (mr[i].nReg == -1 || !nr[mr[i].nReg].isDirty) + return false; + return true; + }; + // Try to flush in pairs when possible. for (int i = 1; i < TOTAL_MAPPABLE_IRREGS - 1; ++i) { - if (mr[i].loc == MIPSLoc::MEM || mr[i].loc == MIPSLoc::MEM || mr[i].isStatic || mr[i + 1].isStatic) + if (!needsFlush(i) || !needsFlush(i + 1)) continue; // Ignore multilane regs. Could handle with more smartness... if (mr[i].lane != -1 || mr[i + 1].lane != -1) continue; - if (mr[i].nReg != -1 && !nr[mr[i].nReg].isDirty) - continue; - if (mr[i + 1].nReg != -1 && !nr[mr[i + 1].nReg].isDirty) - continue; - if (mr[i].loc == MIPSLoc::MEM || mr[i + 1].loc == MIPSLoc::MEM) - continue; int offset = GetMipsRegOffset(i); diff --git a/Core/MIPS/x86/X64IRRegCache.cpp b/Core/MIPS/x86/X64IRRegCache.cpp index d3f554e32b..ee176546c8 100644 --- a/Core/MIPS/x86/X64IRRegCache.cpp +++ b/Core/MIPS/x86/X64IRRegCache.cpp @@ -147,6 +147,67 @@ void X64IRRegCache::FlushBeforeCall() { #endif } +void X64IRRegCache::FlushAll(bool gprs, bool fprs) { + // Note: make sure not to change the registers when flushing: + // Branching code may expect the x64reg to retain its value. + + auto needsFlush = [&](IRReg i) { + if (mr[i].loc != MIPSLoc::MEM || mr[i].isStatic) + return false; + if (mr[i].nReg == -1 || !nr[mr[i].nReg].isDirty) + return false; + return true; + }; + + auto isSingleFloat = [&](IRReg i) { + if (mr[i].lane != -1 || mr[i].loc != MIPSLoc::FREG) + return false; + return true; + }; + + // Sometimes, float/vector regs may be in separate regs in a sequence. + // It's worth combining and flushing together. + for (int i = 1; i < TOTAL_MAPPABLE_IRREGS - 1; ++i) { + if (!needsFlush(i) || !needsFlush(i + 1)) + continue; + // GPRs are probably not worth it. Merging Vec2s might be, but pretty uncommon. + if (!isSingleFloat(i) || !isSingleFloat(i + 1)) + continue; + + X64Reg regs[4]{ INVALID_REG, INVALID_REG, INVALID_REG, INVALID_REG }; + regs[0] = FromNativeReg(mr[i + 0].nReg); + regs[1] = FromNativeReg(mr[i + 1].nReg); + + bool flushVec4 = i + 3 < TOTAL_MAPPABLE_IRREGS && needsFlush(i + 2) && needsFlush(i + 3); + if (flushVec4 && isSingleFloat(i + 2) && isSingleFloat(i + 3) && (i & 3) == 0) { + regs[2] = FromNativeReg(mr[i + 2].nReg); + regs[3] = FromNativeReg(mr[i + 3].nReg); + + // Note that this doesn't change the low lane of any of these regs. + emit_->UNPCKLPS(regs[1], ::R(regs[3])); + emit_->UNPCKLPS(regs[0], ::R(regs[2])); + emit_->UNPCKLPS(regs[0], ::R(regs[1])); + emit_->MOVAPS(MDisp(CTXREG, -128 + GetMipsRegOffset(i)), regs[0]); + + for (int j = 0; j < 4; ++j) + DiscardReg(i + j); + i += 3; + continue; + } + + // TODO: Maybe this isn't always worth doing. + emit_->UNPCKLPS(regs[0], ::R(regs[1])); + emit_->MOVLPS(MDisp(CTXREG, -128 + GetMipsRegOffset(i)), regs[0]); + + DiscardReg(i); + DiscardReg(i + 1); + ++i; + continue; + } + + IRNativeRegCacheBase::FlushAll(gprs, fprs); +} + X64Reg X64IRRegCache::TryMapTempImm(IRReg r, X64Map flags) { _dbg_assert_(IsValidGPR(r)); diff --git a/Core/MIPS/x86/X64IRRegCache.h b/Core/MIPS/x86/X64IRRegCache.h index 90e0259914..f33e4e8d89 100644 --- a/Core/MIPS/x86/X64IRRegCache.h +++ b/Core/MIPS/x86/X64IRRegCache.h @@ -92,6 +92,8 @@ public: void MapWithFlags(IRInst inst, X64IRJitConstants::X64Map destFlags, X64IRJitConstants::X64Map src1Flags = X64IRJitConstants::X64Map::NONE, X64IRJitConstants::X64Map src2Flags = X64IRJitConstants::X64Map::NONE); + // Note: may change the high lanes of single-register XMMs. + void FlushAll(bool gprs = true, bool fprs = true) override; void FlushBeforeCall(); Gen::X64Reg GetAndLockTempGPR();