From 0e63b357b3d4aa8370a1c3e83f206e9c5a8d58ae Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 22 Nov 2021 15:59:41 -0800 Subject: [PATCH] softjit: Add dithering. --- GPU/Software/DrawPixel.cpp | 71 +++++++++++++++++++++++------ GPU/Software/DrawPixel.h | 7 +++ GPU/Software/DrawPixelX86.cpp | 84 ++++++++++++++++++++++++++++++++++- 3 files changed, 147 insertions(+), 15 deletions(-) diff --git a/GPU/Software/DrawPixel.cpp b/GPU/Software/DrawPixel.cpp index a1395a3286..1a5e1af238 100644 --- a/GPU/Software/DrawPixel.cpp +++ b/GPU/Software/DrawPixel.cpp @@ -577,14 +577,13 @@ void PixelRegCache::Reset() { } void PixelRegCache::Release(PixelRegCache::Reg r, PixelRegCache::Type t, PixelRegCache::Purpose p) { - for (auto ® : regs) { - if (reg.reg == r && reg.type == t) { - _assert_msg_(reg.locked > 0, "softjit Release() reg that isn't locked"); - _assert_msg_(!reg.forceLocked, "softjit Release() reg that is force locked"); - reg.purpose = p; - reg.locked--; - return; - } + RegStatus *status = FindReg(r, t); + if (status) { + _assert_msg_(status->locked > 0, "softjit Release() reg that isn't locked"); + _assert_msg_(!status->forceLocked, "softjit Release() reg that is force locked"); + status->purpose = p; + status->locked--; + return; } RegStatus newStatus; @@ -595,12 +594,11 @@ void PixelRegCache::Release(PixelRegCache::Reg r, PixelRegCache::Type t, PixelRe } void PixelRegCache::Unlock(PixelRegCache::Reg r, PixelRegCache::Type t) { - for (auto ® : regs) { - if (reg.reg == r && reg.type == t) { - _assert_msg_(reg.locked > 0, "softjit Unlock() reg that isn't locked"); - reg.locked--; - return; - } + RegStatus *status = FindReg(r, t); + if (status) { + _assert_msg_(status->locked > 0, "softjit Unlock() reg that isn't locked"); + status->locked--; + return; } _assert_msg_(false, "softjit Unlock() reg that isn't there"); @@ -664,4 +662,49 @@ void PixelRegCache::ForceLock(PixelRegCache::Purpose p, PixelRegCache::Type t, b _assert_msg_(false, "softjit ForceLock() reg that isn't there"); } +void PixelRegCache::GrabReg(PixelRegCache::Reg r, PixelRegCache::Purpose p, PixelRegCache::Type t, bool &needsSwap, PixelRegCache::Reg swapReg) { + for (auto ® : regs) { + if (reg.reg != r || reg.type != t) + continue; + + // Easy version, it's free. + if (reg.locked == 0 && !reg.forceLocked) { + needsSwap = false; + reg.purpose = p; + reg.locked = 1; + return; + } + + // Okay, we need to swap. Find that reg. + needsSwap = true; + RegStatus *swap = FindReg(swapReg, t); + if (swap) { + swap->purpose = reg.purpose; + swap->forceLocked = reg.forceLocked; + swap->locked = reg.locked; + } else { + RegStatus newStatus = reg; + newStatus.reg = swapReg; + regs.push_back(newStatus); + } + + reg.purpose = p; + reg.locked = 1; + reg.forceLocked = false; + return; + } + + _assert_msg_(false, "softjit GrabReg() reg that isn't there"); +} + +PixelRegCache::RegStatus *PixelRegCache::FindReg(PixelRegCache::Reg r, PixelRegCache::Type t) { + for (auto ® : regs) { + if (reg.reg == r && reg.type == t) { + return ® + } + } + + return nullptr; +} + }; diff --git a/GPU/Software/DrawPixel.h b/GPU/Software/DrawPixel.h index d9b2e8891a..2bf396f2c1 100644 --- a/GPU/Software/DrawPixel.h +++ b/GPU/Software/DrawPixel.h @@ -106,7 +106,12 @@ struct PixelRegCache { Reg Alloc(Purpose p, Type t); void ForceLock(Purpose p, Type t, bool state = true); + // For getting a specific reg. WARNING: May return a locked reg, so you have to check. + void GrabReg(Reg r, Purpose p, Type t, bool &needsSwap, Reg swapReg); + private: + RegStatus *FindReg(Reg r, Type t); + std::vector regs; }; @@ -156,6 +161,8 @@ private: bool Jit_WriteStencilOnly(const PixelFuncID &id, PixelRegCache::Reg stencilReg); bool Jit_DepthTest(const PixelFuncID &id); bool Jit_WriteDepth(const PixelFuncID &id); + bool Jit_AlphaBlend(const PixelFuncID &id); + bool Jit_Dither(const PixelFuncID &id); std::unordered_map cache_; std::unordered_map addresses_; diff --git a/GPU/Software/DrawPixelX86.cpp b/GPU/Software/DrawPixelX86.cpp index bd10d666b5..e4609307ca 100644 --- a/GPU/Software/DrawPixelX86.cpp +++ b/GPU/Software/DrawPixelX86.cpp @@ -111,9 +111,11 @@ SingleFunc PixelJitCache::CompileSingle(const PixelFuncID &id) { success = success && Jit_StencilAndDepthTest(id); else if (!id.clearMode) success = success && Jit_DepthTest(id); - success = success && Jit_WriteDepth(id); + success = success && Jit_AlphaBlend(id); + success = success && Jit_Dither(id); + // TODO: There's more... success = false; @@ -929,6 +931,86 @@ bool PixelJitCache::Jit_WriteDepth(const PixelFuncID &id) { return true; } +bool PixelJitCache::Jit_AlphaBlend(const PixelFuncID &id) { + if (!id.alphaBlend) + return true; + + // TODO: Will need old color in some cases, too. + return false; +} + +bool PixelJitCache::Jit_Dither(const PixelFuncID &id) { + if (!id.dithering) + return true; + + X64Reg gstateReg = GetGState(); + X64Reg valueReg = regCache_.Alloc(PixelRegCache::TEMP0, PixelRegCache::T_GEN); + + // Load the row dither matrix entry (will still need to get the X.) + MOV(32, R(valueReg), R(argYReg)); + AND(8, R(valueReg), Imm8(3)); + MOVZX(32, 16, valueReg, MComplex(gstateReg, valueReg, 4, offsetof(GPUgstate, dithmtx))); + regCache_.Unlock(gstateReg, PixelRegCache::T_GEN); + + // At this point, we're done with depth and y, so let's grab COLOR_OFF and lock it. + // Then we can modify x and throw it away too, which is our actual goal. + regCache_.Unlock(GetColorOff(id), PixelRegCache::T_GEN); + regCache_.ForceLock(PixelRegCache::COLOR_OFF, PixelRegCache::T_GEN); + regCache_.Release(argYReg, PixelRegCache::T_GEN); + + AND(32, R(argXReg), Imm32(3)); + SHL(32, R(argXReg), Imm8(2)); + + // Conveniently, this is ECX on Windows, but otherwise we need to swap it. + if (argXReg != RCX) { + bool needsSwap = false; + regCache_.GrabReg(RCX, PixelRegCache::TEMP1, PixelRegCache::T_GEN, needsSwap, argXReg); + + if (needsSwap) { + XCHG(PTRBITS, R(RCX), R(argXReg)); + if (valueReg == RCX) + valueReg = argXReg; + } else { + MOV(32, R(RCX), R(argXReg)); + regCache_.Release(argXReg, PixelRegCache::T_GEN); + } + } + + // Okay shift to the specific value to add. + SHR(32, R(valueReg), R(CL)); + AND(16, R(valueReg), Imm16(0x000F)); + + // This will either be argXReg on Windows, or RCX we explicitly grabbed. + regCache_.Release(RCX, PixelRegCache::T_GEN); + + // Now we need to make 0-7 positive, 8-F negative.. so sign extend. + SHL(32, R(valueReg), Imm8(4)); + SAR(8, R(valueReg), Imm8(4)); + + // Copy that value into a vec to add to the color. + X64Reg vecValueReg = regCache_.Alloc(PixelRegCache::TEMP0, PixelRegCache::T_VEC); + MOVD_xmm(vecValueReg, R(valueReg)); + regCache_.Release(valueReg, PixelRegCache::T_GEN); + + // Now we want to broadcast RGB in 16-bit, but keep A as 0. + // Luckily, we know that second lane (in 16-bit) is zero from valueReg's high 16 bits. + // We use 16-bit because we need a signed add, but we also want to saturate. + PSHUFLW(vecValueReg, R(vecValueReg), _MM_SHUFFLE(1, 0, 0, 0)); + // With that, now let's convert the color to 16 bit... + X64Reg zeroReg = regCache_.Alloc(PixelRegCache::TEMP1, PixelRegCache::T_VEC); + PXOR(zeroReg, R(zeroReg)); + PUNPCKLBW(argColorReg, R(zeroReg)); + regCache_.Release(zeroReg, PixelRegCache::T_VEC); + // And simply add the dither values. + PADDSW(argColorReg, R(vecValueReg)); + regCache_.Release(vecValueReg, PixelRegCache::T_VEC); + + // Now that we're done, put color back in 4x8-bit. + PACKUSWB(argColorReg, R(argColorReg)); + + return true; +} + }; #endif