From 1f9dc3a5686bb79a45ac8f4360fa7684afcbea5e Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Fri, 26 Nov 2021 10:12:54 -0800 Subject: [PATCH] softjit: Precalculate write mask and dither. This is slightly abusing PixelFuncID, but the intent is to provide some memory that's easily accessible from the jit func, but still associated with that calculation (i.e. not global.) --- GPU/Software/DrawPixelX86.cpp | 29 +++++++++++++++++++++++++++ GPU/Software/FuncId.cpp | 37 +++++++++++++++++++++++++++++++++++ GPU/Software/FuncId.h | 14 +++++++++++++ 3 files changed, 80 insertions(+) diff --git a/GPU/Software/DrawPixelX86.cpp b/GPU/Software/DrawPixelX86.cpp index 4aaf012094..ad1ad8b25c 100644 --- a/GPU/Software/DrawPixelX86.cpp +++ b/GPU/Software/DrawPixelX86.cpp @@ -38,6 +38,9 @@ static const X64Reg argZReg = R8; static const X64Reg argFogReg = R9; static const X64Reg argColorReg = XMM4; +// Windows reserves space to save args, 1 xmm + 4 ints before the id. +static const OpArg mArgID = MDisp(RSP, 1 * 16 + 4 * PTRBITS / 8); + // Must save: RBX, RSP, RBP, RDI, RSI, R12-R15, XMM6-15 #else static const X64Reg argXReg = RDI; @@ -46,6 +49,9 @@ static const X64Reg argZReg = RDX; static const X64Reg argFogReg = RCX; static const X64Reg argColorReg = XMM0; +// Here we just have the return and padding to align RPB. +static const OpArg mArgID = MDisp(RSP, 16); + // Must save: RBX, RSP, RBP, R12-R15 #endif @@ -1293,14 +1299,18 @@ bool PixelJitCache::Jit_Dither(const PixelFuncID &id) { if (!id.dithering) return true; +#ifndef SOFTPIXEL_USE_CACHE X64Reg gstateReg = GetGState(); +#endif X64Reg valueReg = regCache_.Alloc(PixelRegCache::TEMP0, PixelRegCache::T_GEN); // Load the row dither matrix entry (will still need to get the X.) MOV(32, R(valueReg), R(argYReg)); AND(32, R(valueReg), Imm8(3)); +#ifndef SOFTPIXEL_USE_CACHE MOVZX(32, 16, valueReg, MComplex(gstateReg, valueReg, 4, offsetof(GPUgstate, dithmtx))); regCache_.Unlock(gstateReg, PixelRegCache::T_GEN); +#endif // At this point, we're done with depth and y, so let's grab COLOR_OFF and lock it. // Then we can modify x and throw it away too, which is our actual goal. @@ -1309,6 +1319,8 @@ bool PixelJitCache::Jit_Dither(const PixelFuncID &id) { regCache_.Release(argYReg, PixelRegCache::T_GEN); AND(32, R(argXReg), Imm32(3)); + +#ifndef SOFTPIXEL_USE_CACHE SHL(32, R(argXReg), Imm8(2)); // Conveniently, this is ECX on Windows, but otherwise we need to swap it. @@ -1337,6 +1349,16 @@ bool PixelJitCache::Jit_Dither(const PixelFuncID &id) { SHL(32, R(valueReg), Imm8(4)); MOVSX(32, 8, valueReg, R(valueReg)); SAR(8, R(valueReg), Imm8(4)); +#else + // Sum up (x + y * 4) * 2 + ditherMatrix offset to valueReg. + SHL(32, R(argXReg), Imm8(1)); + LEA(32, valueReg, MComplex(argXReg, valueReg, 8, offsetof(PixelFuncID, cached.ditherMatrix))); + + // Okay, now abuse argXReg to read the PixelFuncID pointer on the stack. + MOV(PTRBITS, R(argXReg), mArgID); + MOVSX(32, 16, valueReg, MRegSum(argXReg, valueReg)); + regCache_.Release(argXReg, PixelRegCache::T_GEN); +#endif // Copy that value into a vec to add to the color. X64Reg vecValueReg = regCache_.Alloc(PixelRegCache::TEMP0, PixelRegCache::T_VEC); @@ -1461,6 +1483,7 @@ bool PixelJitCache::Jit_WriteColor(const PixelFuncID &id) { // Note that we apply the write mask at the destination bit depth. X64Reg maskReg = INVALID_REG; if (id.applyColorWriteMask) { +#ifndef SOFTPIXEL_USE_CACHE X64Reg gstateReg = GetGState(); maskReg = regCache_.Alloc(PixelRegCache::TEMP3, PixelRegCache::T_GEN); @@ -1496,6 +1519,12 @@ bool PixelJitCache::Jit_WriteColor(const PixelFuncID &id) { OR(32, R(maskReg), Imm32(fixedKeepMask)); break; } +#else + maskReg = regCache_.Alloc(PixelRegCache::TEMP3, PixelRegCache::T_GEN); + // Load the pre-converted and combined write mask. + MOV(PTRBITS, R(maskReg), mArgID); + MOV(32, R(maskReg), MDisp(maskReg, offsetof(PixelFuncID, cached.colorWriteMask))); +#endif } // We've run out of regs, let's live without temp2 from here on. diff --git a/GPU/Software/FuncId.cpp b/GPU/Software/FuncId.cpp index 8c9b4b4e93..08a987b178 100644 --- a/GPU/Software/FuncId.cpp +++ b/GPU/Software/FuncId.cpp @@ -15,12 +15,17 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. +#include "Common/Data/Convert/ColorConv.h" #include "Common/StringUtils.h" #include "GPU/Software/FuncId.h" #include "GPU/GPUState.h" static_assert(sizeof(SamplerID) == sizeof(SamplerID::fullKey), "Bad sampler ID size"); +#ifdef SOFTPIXEL_USE_CACHE +static_assert(sizeof(PixelFuncID) == sizeof(PixelFuncID::fullKey) + sizeof(PixelFuncID::cached), "Bad pixel func ID size"); +#else static_assert(sizeof(PixelFuncID) == sizeof(PixelFuncID::fullKey), "Bad pixel func ID size"); +#endif void ComputePixelFuncID(PixelFuncID *id) { id->fullKey = 0; @@ -93,6 +98,38 @@ void ComputePixelFuncID(PixelFuncID *id) { id->applyLogicOp = gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY; id->applyFog = gstate.isFogEnabled() && !gstate.isModeThrough(); } + + // Cache some values for later convenience. + if (id->dithering) { + for (int y = 0; y < 4; ++y) { + for (int x = 0; x < 4; ++x) + id->cached.ditherMatrix[y * 4 + x] = gstate.getDitherValue(x, y); + } + } + if (id->applyColorWriteMask) { + uint32_t mask = gstate.getColorMask(); + // This flag means stencil clear or stencil test, basically whether writing to stencil. + if (!id->stencilTest) + mask |= 0xFF000000; + + switch (id->fbFormat) { + case GE_FORMAT_565: + id->cached.colorWriteMask = RGBA8888ToRGB565(mask); + break; + + case GE_FORMAT_5551: + id->cached.colorWriteMask = RGBA8888ToRGBA5551(mask); + break; + + case GE_FORMAT_4444: + id->cached.colorWriteMask = RGBA8888ToRGBA4444(mask); + break; + + case GE_FORMAT_8888: + id->cached.colorWriteMask = mask; + break; + } + } } std::string DescribePixelFuncID(const PixelFuncID &id) { diff --git a/GPU/Software/FuncId.h b/GPU/Software/FuncId.h index f0fb419c58..565860f15d 100644 --- a/GPU/Software/FuncId.h +++ b/GPU/Software/FuncId.h @@ -23,10 +23,22 @@ #include "GPU/ge_constants.h" +#define SOFTPIXEL_USE_CACHE 1 + +#pragma pack(push, 1) + struct PixelFuncID { PixelFuncID() { } +#ifdef SOFTPIXEL_USE_CACHE + struct { + // Warning: these are not hashed or compared for equal. Just cached values. + uint32_t colorWriteMask{}; + int16_t ditherMatrix[16]{}; + } cached; +#endif + union { uint64_t fullKey{}; struct { @@ -120,6 +132,8 @@ struct PixelFuncID { } }; +#pragma pack(pop) + struct SamplerID { SamplerID() : fullKey(0) { }