From be938a850bcfee348855ee5314b29ecc7f6f1635 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Aug 2023 09:21:52 -0700 Subject: [PATCH 1/4] riscv: Remove FMul safety check. Let's just see if everything's right, this bloats multiplies a lot. Doesn't seem to impact perf a lot, though. --- Core/MIPS/RiscV/RiscVCompFPU.cpp | 38 +++----------------------------- 1 file changed, 3 insertions(+), 35 deletions(-) diff --git a/Core/MIPS/RiscV/RiscVCompFPU.cpp b/Core/MIPS/RiscV/RiscVCompFPU.cpp index e1581f3688..19655b8f65 100644 --- a/Core/MIPS/RiscV/RiscVCompFPU.cpp +++ b/Core/MIPS/RiscV/RiscVCompFPU.cpp @@ -50,41 +50,9 @@ void RiscVJitBackend::CompIR_FArith(IRInst inst) { case IROp::FMul: fpr.MapDirtyInIn(inst.dest, inst.src1, inst.src2); - // TODO: If FMUL consistently produces NAN across chip vendors, we can skip this. - // Luckily this does match the RISC-V canonical NAN. - if (inst.src1 != inst.src2) { - // These will output 0x80/0x01 if infinity, 0x10/0x80 if zero. - // We need to check if one is infinity and the other zero. - - // First, try inf * zero. - FCLASS(32, SCRATCH1, fpr.R(inst.src1)); - FCLASS(32, SCRATCH2, fpr.R(inst.src2)); - ANDI(R_RA, SCRATCH1, 0x81); - FixupBranch lhsNotInf = BEQ(R_RA, R_ZERO); - ANDI(R_RA, SCRATCH2, 0x18); - FixupBranch infZero = BNE(R_RA, R_ZERO); - - // Okay, what about the other order? - SetJumpTarget(lhsNotInf); - ANDI(R_RA, SCRATCH1, 0x18); - FixupBranch lhsNotZero = BEQ(R_RA, R_ZERO); - ANDI(R_RA, SCRATCH2, 0x81); - FixupBranch zeroInf = BNE(R_RA, R_ZERO); - - // Nope, all good. - SetJumpTarget(lhsNotZero); - FMUL(32, fpr.R(inst.dest), fpr.R(inst.src1), fpr.R(inst.src2)); - FixupBranch skip = J(); - - SetJumpTarget(infZero); - SetJumpTarget(zeroInf); - LI(SCRATCH1, 0x7FC00000); - FMV(FMv::W, FMv::X, fpr.R(inst.dest), SCRATCH1); - - SetJumpTarget(skip); - } else { - FMUL(32, fpr.R(inst.dest), fpr.R(inst.src1), fpr.R(inst.src2)); - } + // We'll assume everyone will make it such that 0 * infinity = NAN properly. + // See blame on this comment if that proves untrue. + FMUL(32, fpr.R(inst.dest), fpr.R(inst.src1), fpr.R(inst.src2)); break; case IROp::FDiv: From 7cc6c5fa6284ea89576732ff1a63ed676ffb4ae8 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Aug 2023 09:22:45 -0700 Subject: [PATCH 2/4] riscv: Fix load error w/o pointerify. --- Core/MIPS/RiscV/RiscVCompLoadStore.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Core/MIPS/RiscV/RiscVCompLoadStore.cpp b/Core/MIPS/RiscV/RiscVCompLoadStore.cpp index 0713016d6c..09ac714519 100644 --- a/Core/MIPS/RiscV/RiscVCompLoadStore.cpp +++ b/Core/MIPS/RiscV/RiscVCompLoadStore.cpp @@ -80,11 +80,8 @@ void RiscVJitBackend::CompIR_Load(IRInst inst) { SetScratch1ToSrc1Address(inst.src1); addrReg = SCRATCH1; } - // If they're the same, MapReg may subtract MEMBASEREG, so just mark dirty. - if (inst.dest == inst.src1) - gpr.MarkDirty(gpr.R(inst.dest), true); - else - gpr.MapReg(inst.dest, MIPSMap::NOINIT | MIPSMap::MARK_NORM32); + // With NOINIT, MapReg won't subtract MEMBASEREG even if dest == src1. + gpr.MapReg(inst.dest, MIPSMap::NOINIT | MIPSMap::MARK_NORM32); gpr.ReleaseSpillLock(inst.dest, inst.src1); s32 imm = AdjustForAddressOffset(&addrReg, inst.constant); From 41cddce1679b18f142634b291842b95684be168b Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Aug 2023 09:28:34 -0700 Subject: [PATCH 3/4] TexCache: Encourage vectorization. This gets clang to vectorize on RISC-V V, although it looks suboptimal (probably faster than not using vector, though.) Also improves other platforms, but our specializations seem better. --- GPU/Common/TextureDecoder.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/GPU/Common/TextureDecoder.cpp b/GPU/Common/TextureDecoder.cpp index 32da2dffa9..da58590db8 100644 --- a/GPU/Common/TextureDecoder.cpp +++ b/GPU/Common/TextureDecoder.cpp @@ -243,8 +243,13 @@ static u32 QuickTexHashNonSSE(const void *checkp, u32 size) { if (((intptr_t)checkp & 0xf) == 0 && (size & 0x3f) == 0) { static const u16 cursor2_initial[8] = {0xc00bU, 0x9bd9U, 0x4b73U, 0xb651U, 0x4d9bU, 0x4309U, 0x0083U, 0x0001U}; union u32x4_u16x8 { +#if defined(__GNUC__) + uint32_t x32 __attribute__((vector_size(16))); + uint16_t x16 __attribute__((vector_size(16))); +#else u32 x32[4]; u16 x16[8]; +#endif }; u32x4_u16x8 cursor{}; u32x4_u16x8 cursor2; From e51eca9eee33b64436f400d1fbf584c01a458f55 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 13 Aug 2023 09:30:27 -0700 Subject: [PATCH 4/4] UI: Fix frame profiler. --- UI/EmuScreen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/UI/EmuScreen.cpp b/UI/EmuScreen.cpp index 66bb14a849..1149ad8545 100644 --- a/UI/EmuScreen.cpp +++ b/UI/EmuScreen.cpp @@ -1547,7 +1547,7 @@ void EmuScreen::renderUI() { } #ifdef USE_PROFILER - if (g_Config.bShowFrameProfiler && !invalid_) { + if ((DebugOverlay)g_Config.iDebugOverlay == DebugOverlay::FRAME_PROFILE && !invalid_) { DrawProfile(*ctx); } #endif