From 953200c9954a88b84e5ddfec8892e7086c619ecc Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 20 Nov 2021 13:11:52 -0800 Subject: [PATCH 1/5] softgpu: Add func to calculate pixel func ID. This normalizes some things, and eventually can be used for a jit key. --- CMakeLists.txt | 2 + GPU/GPU.vcxproj | 2 + GPU/GPU.vcxproj.filters | 6 ++ GPU/Software/FuncId.cpp | 77 +++++++++++++++++ GPU/Software/FuncId.h | 128 ++++++++++++++++++++++++++++ GPU/Software/Sampler.h | 37 +------- UWP/GPU_UWP/GPU_UWP.vcxproj | 2 + UWP/GPU_UWP/GPU_UWP.vcxproj.filters | 2 + android/jni/Android.mk | 1 + libretro/Makefile.common | 1 + 10 files changed, 222 insertions(+), 36 deletions(-) create mode 100644 GPU/Software/FuncId.cpp create mode 100644 GPU/Software/FuncId.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 315433f51c..7ef38ef8c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1580,6 +1580,8 @@ set(GPU_SOURCES GPU/Math3D.h GPU/Software/Clipper.cpp GPU/Software/Clipper.h + GPU/Software/FuncId.cpp + GPU/Software/FuncId.h GPU/Software/Lighting.cpp GPU/Software/Lighting.h GPU/Software/Rasterizer.cpp diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 290a86c054..7392a733a1 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -454,6 +454,7 @@ + @@ -629,6 +630,7 @@ + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index ba8b72b871..23a81b6bcc 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -264,6 +264,9 @@ Common + + Software + @@ -530,5 +533,8 @@ Common + + Software + \ No newline at end of file diff --git a/GPU/Software/FuncId.cpp b/GPU/Software/FuncId.cpp new file mode 100644 index 0000000000..0bdde24f7d --- /dev/null +++ b/GPU/Software/FuncId.cpp @@ -0,0 +1,77 @@ +// Copyright (c) 2021- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "GPU/Software/FuncId.h" +#include "GPU/GPUState.h" + +void ComputePixelFuncID(PixelFuncID *id) { + id->fullKey = 0; + + // TODO: Could this be minz > 0x0000 || maxz < 0xFFFF? Maybe unsafe, depending on verts... + id->applyDepthRange = !gstate.isModeThrough(); + // Dither happens even in clear mode. + id->dithering = gstate.isDitherEnabled(); + id->fbFormat = gstate.FrameBufFormat(); + id->useStandardStride = gstate.FrameBufStride() == 512 && gstate.DepthBufStride() == 512; + id->applyColorWriteMask = gstate.getColorMask() != 0; + + id->clearMode = gstate.isModeClear(); + if (id->clearMode) { + id->colorClear = gstate.isClearModeColorMask(); + id->stencilClear = gstate.isClearModeAlphaMask(); + id->depthClear = gstate.isClearModeDepthMask(); + } else { + id->colorTest = gstate.isColorTestEnabled() && gstate.getColorTestFunction() != GE_COMP_ALWAYS; + if (gstate.isStencilTestEnabled() && gstate.getStencilTestFunction() == GE_COMP_ALWAYS) { + // If stencil always passes, force off when we won't write any stencil bits. + bool stencilWrite = (gstate.pmska & 0xFF) != 0xFF && gstate.FrameBufFormat() != GE_FORMAT_565; + if (gstate.isDepthTestEnabled() && gstate.getDepthTestFunction() != GE_COMP_ALWAYS) + id->stencilTest = stencilWrite && (gstate.getStencilOpZPass() != GE_STENCILOP_KEEP || gstate.getStencilOpZFail() != GE_STENCILOP_KEEP); + else + id->stencilTest = stencilWrite && gstate.getStencilOpZPass() != GE_STENCILOP_KEEP; + } else { + id->stencilTest = gstate.isStencilTestEnabled(); + } + id->depthWrite = gstate.isDepthTestEnabled() && gstate.isDepthWriteEnabled(); + + if (id->stencilTest) { + id->stencilTestFunc = gstate.getStencilTestFunction(); + id->stencilTestRef = gstate.getStencilTestRef() & gstate.getStencilTestMask(); + id->hasStencilTestMask = gstate.getStencilTestMask() != 0xFF; + id->sFail = gstate.getStencilOpSFail(); + id->zFail = gstate.isDepthTestEnabled() ? gstate.getStencilOpZFail() : GE_STENCILOP_KEEP; + id->zPass = gstate.getStencilOpZPass(); + } + + id->depthTestFunc = gstate.isDepthTestEnabled() ? gstate.getDepthTestFunction() : GE_COMP_ALWAYS; + id->alphaTestFunc = gstate.isAlphaTestEnabled() ? gstate.getAlphaTestFunction() : GE_COMP_ALWAYS; + if (id->alphaTestFunc != GE_COMP_ALWAYS) { + id->alphaTestRef = gstate.getAlphaTestRef() & gstate.getAlphaTestMask(); + id->hasAlphaTestMask = gstate.getAlphaTestMask() != 0xFF; + } + + id->alphaBlend = gstate.isAlphaBlendEnabled(); + if (id->alphaBlend) { + id->alphaBlendEq = gstate.getBlendEq(); + id->alphaBlendSrc = gstate.getBlendFuncA(); + id->alphaBlendDst = gstate.getBlendFuncB(); + } + + id->applyLogicOp = gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY; + id->applyFog = gstate.isFogEnabled() && !gstate.isModeThrough(); + } +} diff --git a/GPU/Software/FuncId.h b/GPU/Software/FuncId.h new file mode 100644 index 0000000000..6b7ed5b108 --- /dev/null +++ b/GPU/Software/FuncId.h @@ -0,0 +1,128 @@ +// Copyright (c) 2021- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include +#include +#include + +#include "GPU/ge_constants.h" + +struct PixelFuncID { + PixelFuncID() { + } + + union { + uint64_t fullKey{}; + struct { + bool clearMode : 1; + union { + bool colorTest : 1; + bool colorClear : 1; + }; + union { + bool stencilTest : 1; + bool stencilClear : 1; + }; + union { + bool depthWrite : 1; + bool depthClear : 1; + }; + bool applyDepthRange : 1; + // If alpha testing is disabled, set to GE_COMP_ALWAYS. + GEComparison alphaTestFunc : 3; + // If depth testing is disabled, set to GE_COMP_ALWAYS. + GEComparison depthTestFunc : 3; + GEComparison stencilTestFunc : 3; + GEBufferFormat fbFormat : 2; + // 16 bits before alphaTestRef. + uint8_t alphaTestRef : 8; + uint8_t stencilTestRef : 8; + // 32 bits before alphaBlend. + bool alphaBlend : 1; + GEBlendMode alphaBlendEq : 3; + GEBlendSrcFactor alphaBlendSrc : 4; + GEBlendDstFactor alphaBlendDst : 4; + // Meaning: alphaTestMask != 0xFF + bool hasAlphaTestMask : 1; + // Meaning: stencilTestMask != 0xFF + bool hasStencilTestMask : 1; + bool dithering : 1; + bool applyLogicOp : 1; + // 48 bits before applyFog. + bool applyFog : 1; + // Meaning: fb_stride == 512 && z_stride == 512 + bool useStandardStride : 1; + // Meaning: maskRGB != 0 || maskA != 0 + bool applyColorWriteMask : 1; + GEStencilOp sFail : 3; + GEStencilOp zFail : 3; + GEStencilOp zPass : 3; + // 60 bits, 4 free. + }; + }; + + bool operator == (const PixelFuncID &other) const { + return fullKey == other.fullKey; + } +}; + +struct SamplerID { + SamplerID() : fullKey(0) { + } + + union { + uint32_t fullKey; + struct { + uint8_t texfmt : 4; + uint8_t clutfmt : 2; + uint8_t : 2; + bool swizzle : 1; + bool useSharedClut : 1; + bool hasClutMask : 1; + bool hasClutShift : 1; + bool hasClutOffset : 1; + bool hasInvalidPtr : 1; + bool linear : 1; + }; + }; + + bool operator == (const SamplerID &other) const { + return fullKey == other.fullKey; + } +}; + +namespace std { + +template <> +struct hash { + std::size_t operator()(const PixelFuncID &k) const { + return hash()(k.fullKey); + } +}; + +template <> +struct hash { + std::size_t operator()(const SamplerID &k) const { + return hash()(k.fullKey); + } +}; + +}; + +void ComputePixelFuncID(PixelFuncID *id); diff --git a/GPU/Software/Sampler.h b/GPU/Software/Sampler.h index 5d72cf2cd9..025b798252 100644 --- a/GPU/Software/Sampler.h +++ b/GPU/Software/Sampler.h @@ -32,42 +32,7 @@ #include "Common/FakeEmitter.h" #endif #include "GPU/Math3D.h" - -struct SamplerID { - SamplerID() : fullKey(0) { - } - - union { - u32 fullKey; - struct { - uint8_t texfmt : 4; - uint8_t clutfmt : 2; - uint8_t : 2; - bool swizzle : 1; - bool useSharedClut : 1; - bool hasClutMask : 1; - bool hasClutShift : 1; - bool hasClutOffset : 1; - bool hasInvalidPtr : 1; - bool linear : 1; - }; - }; - - bool operator == (const SamplerID &other) const { - return fullKey == other.fullKey; - } -}; - -namespace std { - -template <> -struct hash { - std::size_t operator()(const SamplerID &k) const { - return hash()(k.fullKey); - } -}; - -}; +#include "GPU/Software/FuncId.h" namespace Sampler { diff --git a/UWP/GPU_UWP/GPU_UWP.vcxproj b/UWP/GPU_UWP/GPU_UWP.vcxproj index 60246c4892..5ca11e0a5c 100644 --- a/UWP/GPU_UWP/GPU_UWP.vcxproj +++ b/UWP/GPU_UWP/GPU_UWP.vcxproj @@ -426,6 +426,7 @@ + @@ -484,6 +485,7 @@ + diff --git a/UWP/GPU_UWP/GPU_UWP.vcxproj.filters b/UWP/GPU_UWP/GPU_UWP.vcxproj.filters index 3f1d5431cf..79a6701d04 100644 --- a/UWP/GPU_UWP/GPU_UWP.vcxproj.filters +++ b/UWP/GPU_UWP/GPU_UWP.vcxproj.filters @@ -46,6 +46,7 @@ + @@ -102,6 +103,7 @@ + diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 37f4314a98..ab190e8e1f 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -358,6 +358,7 @@ EXEC_AND_LIB_FILES := \ $(SRC)/GPU/GLES/FragmentTestCacheGLES.cpp.arm \ $(SRC)/GPU/GLES/TextureScalerGLES.cpp \ $(SRC)/GPU/Software/Clipper.cpp \ + $(SRC)/GPU/Software/FuncId.cpp \ $(SRC)/GPU/Software/Lighting.cpp \ $(SRC)/GPU/Software/Rasterizer.cpp.arm \ $(SRC)/GPU/Software/RasterizerRectangle.cpp.arm \ diff --git a/libretro/Makefile.common b/libretro/Makefile.common index 519c4f3812..38467b6017 100644 --- a/libretro/Makefile.common +++ b/libretro/Makefile.common @@ -348,6 +348,7 @@ SOURCES_CXX += \ $(GPUDIR)/GPUState.cpp \ $(GPUDIR)/Math3D.cpp \ $(GPUDIR)/Software/Clipper.cpp \ + $(GPUDIR)/Software/FuncId.cpp \ $(GPUDIR)/Software/Lighting.cpp \ $(GPUDIR)/Software/Rasterizer.cpp \ $(GPUDIR)/Software/RasterizerRectangle.cpp \ From f7a31c992d75061a383be3804e4f4db4a9ef0e4c Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 20 Nov 2021 14:22:55 -0800 Subject: [PATCH 2/5] softgpu: Use pixel func ID to draw pixels. This just reduces reliance on gstate directly, and should help keep things consistent. --- GPU/Software/FuncId.h | 20 +-- GPU/Software/Rasterizer.cpp | 242 +++++++++++++-------------- GPU/Software/Rasterizer.h | 7 +- GPU/Software/RasterizerRectangle.cpp | 17 +- 4 files changed, 145 insertions(+), 141 deletions(-) diff --git a/GPU/Software/FuncId.h b/GPU/Software/FuncId.h index 6b7ed5b108..2154a0c3d2 100644 --- a/GPU/Software/FuncId.h +++ b/GPU/Software/FuncId.h @@ -45,19 +45,19 @@ struct PixelFuncID { }; bool applyDepthRange : 1; // If alpha testing is disabled, set to GE_COMP_ALWAYS. - GEComparison alphaTestFunc : 3; + uint8_t alphaTestFunc : 3; // If depth testing is disabled, set to GE_COMP_ALWAYS. - GEComparison depthTestFunc : 3; - GEComparison stencilTestFunc : 3; - GEBufferFormat fbFormat : 2; + uint8_t depthTestFunc : 3; + uint8_t stencilTestFunc : 3; + uint8_t fbFormat : 2; // 16 bits before alphaTestRef. uint8_t alphaTestRef : 8; uint8_t stencilTestRef : 8; // 32 bits before alphaBlend. bool alphaBlend : 1; - GEBlendMode alphaBlendEq : 3; - GEBlendSrcFactor alphaBlendSrc : 4; - GEBlendDstFactor alphaBlendDst : 4; + uint8_t alphaBlendEq : 3; + uint8_t alphaBlendSrc : 4; + uint8_t alphaBlendDst : 4; // Meaning: alphaTestMask != 0xFF bool hasAlphaTestMask : 1; // Meaning: stencilTestMask != 0xFF @@ -70,9 +70,9 @@ struct PixelFuncID { bool useStandardStride : 1; // Meaning: maskRGB != 0 || maskA != 0 bool applyColorWriteMask : 1; - GEStencilOp sFail : 3; - GEStencilOp zFail : 3; - GEStencilOp zPass : 3; + uint8_t sFail : 3; + uint8_t zFail : 3; + uint8_t zPass : 3; // 60 bits, 4 free. }; }; diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index edf24ac7cc..f9c8ec9308 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -252,9 +252,8 @@ static inline void GetTextureCoordinates(const VertexData& v0, const VertexData& } // NOTE: These likely aren't endian safe -static inline u32 GetPixelColor(int x, int y) -{ - switch (gstate.FrameBufFormat()) { +static inline u32 GetPixelColor(GEBufferFormat fmt, int x, int y) { + switch (fmt) { case GE_FORMAT_565: return RGB565ToRGBA8888(fb.Get16(x, y, gstate.FrameBufStride())); @@ -274,9 +273,8 @@ static inline u32 GetPixelColor(int x, int y) return 0; } -static inline void SetPixelColor(int x, int y, u32 value) -{ - switch (gstate.FrameBufFormat()) { +static inline void SetPixelColor(GEBufferFormat fmt, int x, int y, u32 value) { + switch (fmt) { case GE_FORMAT_565: fb.Set16(x, y, gstate.FrameBufStride(), RGBA8888ToRGB565(value)); break; @@ -309,46 +307,46 @@ static inline void SetPixelDepth(int x, int y, u16 value) depthbuf.Set16(x, y, gstate.DepthBufStride(), value); } -static inline u8 GetPixelStencil(int x, int y) -{ - if (gstate.FrameBufFormat() == GE_FORMAT_565) { +static inline u8 GetPixelStencil(GEBufferFormat fmt, int x, int y) { + if (fmt == GE_FORMAT_565) { // Always treated as 0 for comparison purposes. return 0; - } else if (gstate.FrameBufFormat() == GE_FORMAT_5551) { + } else if (fmt == GE_FORMAT_5551) { return ((fb.Get16(x, y, gstate.FrameBufStride()) & 0x8000) != 0) ? 0xFF : 0; - } else if (gstate.FrameBufFormat() == GE_FORMAT_4444) { + } else if (fmt == GE_FORMAT_4444) { return Convert4To8(fb.Get16(x, y, gstate.FrameBufStride()) >> 12); } else { return fb.Get32(x, y, gstate.FrameBufStride()) >> 24; } } -static inline void SetPixelStencil(int x, int y, u8 value) -{ - // TODO: This seems like it maybe respects the alpha mask (at least in some scenarios?) - - if (gstate.FrameBufFormat() == GE_FORMAT_565) { +static inline void SetPixelStencil(GEBufferFormat fmt, int x, int y, u8 value) { + if (fmt == GE_FORMAT_565) { // Do nothing - } else if (gstate.FrameBufFormat() == GE_FORMAT_5551) { - u16 pixel = fb.Get16(x, y, gstate.FrameBufStride()) & ~0x8000; - pixel |= value != 0 ? 0x8000 : 0; - fb.Set16(x, y, gstate.FrameBufStride(), pixel); - } else if (gstate.FrameBufFormat() == GE_FORMAT_4444) { - u16 pixel = fb.Get16(x, y, gstate.FrameBufStride()) & ~0xF000; - pixel |= (u16)value << 12; + } else if (fmt == GE_FORMAT_5551) { + if ((gstate.getStencilWriteMask() & 0x80) == 0) { + u16 pixel = fb.Get16(x, y, gstate.FrameBufStride()) & ~0x8000; + pixel |= (value & 0x80) << 8; + fb.Set16(x, y, gstate.FrameBufStride(), pixel); + } + } else if (fmt == GE_FORMAT_4444) { + const u16 write_mask = (gstate.getStencilWriteMask() << 8) | 0x0FFF; + u16 pixel = fb.Get16(x, y, gstate.FrameBufStride()) & write_mask; + pixel |= ((u16)value << 8) & ~write_mask; fb.Set16(x, y, gstate.FrameBufStride(), pixel); } else { - u32 pixel = fb.Get32(x, y, gstate.FrameBufStride()) & ~0xFF000000; - pixel |= (u32)value << 24; + const u32 write_mask = (gstate.getStencilWriteMask() << 24) | 0x00FFFFFF; + u32 pixel = fb.Get32(x, y, gstate.FrameBufStride()) & write_mask; + pixel |= ((u32)value << 24) & ~write_mask; fb.Set32(x, y, gstate.FrameBufStride(), pixel); } } -static inline bool DepthTestPassed(int x, int y, u16 z) +static inline bool DepthTestPassed(GEComparison func, int x, int y, u16 z) { u16 reference_z = GetPixelDepth(x, y); - switch (gstate.getDepthTestFunction()) { + switch (func) { case GE_COMP_NEVER: return false; @@ -389,12 +387,11 @@ static inline bool IsRightSideOrFlatBottomLine(const Vec2& vertex, const Ve } } -static inline bool StencilTestPassed(u8 stencil) -{ - // TODO: Does the masking logic make any sense? - stencil &= gstate.getStencilTestMask(); - u8 ref = gstate.getStencilTestRef() & gstate.getStencilTestMask(); - switch (gstate.getStencilTestFunction()) { +static inline bool StencilTestPassed(const PixelFuncID &pixelID, u8 stencil) { + if (pixelID.hasStencilTestMask) + stencil &= gstate.getStencilTestMask(); + u8 ref = pixelID.stencilTestRef; + switch (GEComparison(pixelID.stencilTestFunc)) { case GE_COMP_NEVER: return false; @@ -422,36 +419,33 @@ static inline bool StencilTestPassed(u8 stencil) return true; } -static inline u8 ApplyStencilOp(int op, u8 old_stencil) { - // TODO: Apply mask to reference or old stencil? - u8 reference_stencil = gstate.getStencilTestRef(); // TODO: Apply mask? - const u8 write_mask = gstate.getStencilWriteMask(); - +static inline u8 ApplyStencilOp(GEBufferFormat fmt, GEStencilOp op, u8 old_stencil) { switch (op) { case GE_STENCILOP_KEEP: return old_stencil; case GE_STENCILOP_ZERO: - return old_stencil & write_mask; + return 0; case GE_STENCILOP_REPLACE: - return (reference_stencil & ~write_mask) | (old_stencil & write_mask); + // TODO: Apply mask to reference? + return gstate.getStencilTestRef(); case GE_STENCILOP_INVERT: - return (~old_stencil & ~write_mask) | (old_stencil & write_mask); + return ~old_stencil; case GE_STENCILOP_INCR: - switch (gstate.FrameBufFormat()) { + switch (fmt) { case GE_FORMAT_8888: if (old_stencil != 0xFF) { - return ((old_stencil + 1) & ~write_mask) | (old_stencil & write_mask); + return old_stencil + 1; } return old_stencil; case GE_FORMAT_5551: - return ~write_mask | (old_stencil & write_mask); + return 0xFF; case GE_FORMAT_4444: if (old_stencil < 0xF0) { - return ((old_stencil + 0x10) & ~write_mask) | (old_stencil & write_mask); + return old_stencil + 0x10; } return old_stencil; default: @@ -460,14 +454,14 @@ static inline u8 ApplyStencilOp(int op, u8 old_stencil) { break; case GE_STENCILOP_DECR: - switch (gstate.FrameBufFormat()) { + switch (fmt) { case GE_FORMAT_4444: if (old_stencil >= 0x10) - return ((old_stencil - 0x10) & ~write_mask) | (old_stencil & write_mask); + return old_stencil - 0x10; break; default: if (old_stencil != 0) - return ((old_stencil - 1) & ~write_mask) | (old_stencil & write_mask); + return old_stencil - 1; return old_stencil; } break; @@ -651,13 +645,13 @@ static inline bool ColorTestPassed(const Vec3 &color) return true; } -static inline bool AlphaTestPassed(int alpha) +static inline bool AlphaTestPassed(const PixelFuncID &pixelID, int alpha) { - const u8 mask = gstate.getAlphaTestMask() & 0xFF; - const u8 ref = gstate.getAlphaTestRef() & mask; - alpha &= mask; + const u8 ref = pixelID.alphaTestRef; + if (pixelID.hasAlphaTestMask) + alpha &= gstate.getAlphaTestMask(); - switch (gstate.getAlphaTestFunction()) { + switch (GEComparison(pixelID.alphaTestFunc)) { case GE_COMP_NEVER: return false; @@ -685,9 +679,8 @@ static inline bool AlphaTestPassed(int alpha) return true; } -static inline Vec3 GetSourceFactor(const Vec4& source, const Vec4& dst) -{ - switch (gstate.getBlendFuncA()) { +static inline Vec3 GetSourceFactor(GEBlendSrcFactor factor, const Vec4 &source, const Vec4 &dst) { + switch (factor) { case GE_SRCBLEND_DSTCOLOR: return dst.rgb(); @@ -733,9 +726,8 @@ static inline Vec3 GetSourceFactor(const Vec4& source, const Vec4 } } -static inline Vec3 GetDestFactor(const Vec4& source, const Vec4& dst) -{ - switch (gstate.getBlendFuncB()) { +static inline Vec3 GetDestFactor(GEBlendDstFactor factor, const Vec4 &source, const Vec4 &dst) { + switch (factor) { case GE_DSTBLEND_SRCCOLOR: return source.rgb(); @@ -782,13 +774,13 @@ static inline Vec3 GetDestFactor(const Vec4& source, const Vec4& } // Removed inline here - it was never chosen to be inlined by the compiler anyway, too complex. -Vec3 AlphaBlendingResult(const Vec4 &source, const Vec4 &dst) +Vec3 AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4 &source, const Vec4 &dst) { // Note: These factors cannot go below 0, but they can go above 255 when doubling. - Vec3 srcfactor = GetSourceFactor(source, dst); - Vec3 dstfactor = GetDestFactor(source, dst); + Vec3 srcfactor = GetSourceFactor(GEBlendSrcFactor(pixelID.alphaBlendSrc), source, dst); + Vec3 dstfactor = GetDestFactor(GEBlendDstFactor(pixelID.alphaBlendDst), source, dst); - switch (gstate.getBlendEq()) { + switch (GEBlendMode(pixelID.alphaBlendEq)) { case GE_BLENDMODE_MUL_AND_ADD: { #if defined(_M_SSE) @@ -838,25 +830,25 @@ Vec3 AlphaBlendingResult(const Vec4 &source, const Vec4 &dst) ::abs(source.b() - dst.b())); default: - ERROR_LOG_REPORT(G3D, "Software: Unknown blend function %x", gstate.getBlendEq()); + ERROR_LOG_REPORT(G3D, "Software: Unknown blend function %x", pixelID.alphaBlendEq); return Vec3(); } } template -inline void DrawSinglePixel(const DrawingCoords &p, int z, u8 fog, const Vec4 &color_in) { +inline void DrawSinglePixel(const DrawingCoords &p, int z, u8 fog, const Vec4 &color_in, const PixelFuncID &pixelID) { Vec4 prim_color = color_in.Clamp(0, 255); // Depth range test - applied in clear mode, if not through mode. - if (!gstate.isModeThrough()) + if (pixelID.applyDepthRange) if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax()) return; - if (gstate.isAlphaTestEnabled() && !clearMode) - if (!AlphaTestPassed(prim_color.a())) + if (GEComparison(pixelID.alphaTestFunc) != GE_COMP_ALWAYS && !clearMode) + if (!AlphaTestPassed(pixelID, prim_color.a())) return; // Fog is applied prior to color test. - if (gstate.isFogEnabled() && !gstate.isModeThrough() && !clearMode) { + if (pixelID.applyFog && !clearMode) { Vec3 fogColor = Vec3::FromRGB(gstate.fogcolor); fogColor = (prim_color.rgb() * (int)fog + fogColor * (255 - (int)fog)) / 255; prim_color.r() = fogColor.r(); @@ -864,46 +856,48 @@ inline void DrawSinglePixel(const DrawingCoords &p, int z, u8 fog, const Vec4 dst = Vec4::FromRGBA(old_color); - Vec3 blended = AlphaBlendingResult(prim_color, dst); - if (gstate.isDitherEnabled()) { + Vec3 blended = AlphaBlendingResult(pixelID, prim_color, dst); + if (pixelID.dithering) { blended += Vec3::AssignToAll(gstate.getDitherValue(p.x, p.y)); } @@ -911,7 +905,7 @@ inline void DrawSinglePixel(const DrawingCoords &p, int z, u8 fog, const Vec4::AssignToAll(gstate.getDitherValue(p.x, p.y)); } @@ -925,7 +919,7 @@ inline void DrawSinglePixel(const DrawingCoords &p, int z, u8 fog, const Vec4 &color_in) { - DrawSinglePixel(p, z, fog, color_in); +void DrawSinglePixelNonClear(const DrawingCoords &p, u16 z, u8 fog, const Vec4 &color_in, const PixelFuncID &pixelID) { + DrawSinglePixel(p, z, fog, color_in, pixelID); } static inline void ApplyTexturing(Sampler::Funcs sampler, Vec4 &prim_color, float s, float t, int texlevel, int frac_texlevel, bool bilinear, u8 *texptr[], int texbufw[]) { @@ -1138,7 +1132,8 @@ template void DrawTriangleSlice( const VertexData& v0, const VertexData& v1, const VertexData& v2, int x1, int y1, int x2, int y2, - bool byY, int h1, int h2) + bool byY, int h1, int h2, + const PixelFuncID &pixelID) { Vec4 bias0 = Vec4::AssignToAll(IsRightSideOrFlatBottomLine(v0.screenpos.xy(), v1.screenpos.xy(), v2.screenpos.xy()) ? -1 : 0); Vec4 bias1 = Vec4::AssignToAll(IsRightSideOrFlatBottomLine(v1.screenpos.xy(), v2.screenpos.xy(), v0.screenpos.xy()) ? -1 : 0); @@ -1287,7 +1282,7 @@ void DrawTriangleSlice( subp.x = p.x + (i & 1); subp.y = p.y + (i / 2); - DrawSinglePixel(subp, z[i], fog[i], prim_color[i]); + DrawSinglePixel(subp, z[i], fog[i], prim_color[i], pixelID); } } } @@ -1323,37 +1318,40 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& int rangeY = (maxY - minY) / 32 + 1; int rangeX = (maxX - minX) / 32 + 1; + PixelFuncID pixelID; + ComputePixelFuncID(&pixelID); + const int MIN_LINES_PER_THREAD = 4; if (rangeY >= 12 && rangeX >= rangeY * 4) { if (gstate.isModeClear()) { auto bound = [&](int a, int b) -> void { - DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, false, a, b); + DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, false, a, b, pixelID); }; ParallelRangeLoop(&g_threadManager, bound, 0, rangeX, MIN_LINES_PER_THREAD); } else { auto bound = [&](int a, int b) -> void { - DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, false, a, b); + DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, false, a, b, pixelID); }; ParallelRangeLoop(&g_threadManager, bound, 0, rangeX, MIN_LINES_PER_THREAD); } } else if (rangeY >= 12 && rangeX >= 12) { if (gstate.isModeClear()) { auto bound = [&](int a, int b) -> void { - DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, true, a, b); + DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, true, a, b, pixelID); }; ParallelRangeLoop(&g_threadManager, bound, 0, rangeY, MIN_LINES_PER_THREAD); } else { auto bound = [&](int a, int b) -> void { - DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, true, a, b); + DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, true, a, b, pixelID); }; ParallelRangeLoop(&g_threadManager, bound, 0, rangeY, MIN_LINES_PER_THREAD); } } else { if (gstate.isModeClear()) { - DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, true, 0, rangeY); + DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, true, 0, rangeY, pixelID); } else { - DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, true, 0, rangeY); + DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, true, 0, rangeY, pixelID); } } } @@ -1373,11 +1371,11 @@ void DrawPoint(const VertexData &v0) if (pos.x < scissorTL.x || pos.y < scissorTL.y || pos.x > scissorBR.x || pos.y > scissorBR.y) return; - bool clearMode = gstate.isModeClear(); - Sampler::Funcs sampler = Sampler::GetFuncs(); + PixelFuncID pixelID; + ComputePixelFuncID(&pixelID); - if (gstate.isTextureMapEnabled() && !clearMode) { + if (gstate.isTextureMapEnabled() && !pixelID.clearMode) { int texbufw[8] = {0}; int maxTexLevel = gstate.getTextureMaxLevel(); @@ -1388,7 +1386,7 @@ void DrawPoint(const VertexData &v0) maxTexLevel = 0; } - if (gstate.isTextureMapEnabled() && !clearMode) { + if (gstate.isTextureMapEnabled() && !pixelID.clearMode) { GETextureFormat texfmt = gstate.getTextureFormat(); for (int i = 0; i <= maxTexLevel; i++) { u32 texaddr = gstate.getTextureAddress(i); @@ -1417,7 +1415,7 @@ void DrawPoint(const VertexData &v0) ApplyTexturing(sampler, prim_color, s, t, texLevel, texLevelFrac, bilinear, texptr, texbufw); } - if (!clearMode) + if (!pixelID.clearMode) prim_color += Vec4(sec_color, 0); ScreenCoords pprime = pos; @@ -1426,14 +1424,14 @@ void DrawPoint(const VertexData &v0) u16 z = pos.z; u8 fog = 255; - if (gstate.isFogEnabled() && !clearMode) { + if (gstate.isFogEnabled() && !pixelID.clearMode) { fog = ClampFogDepth(v0.fogdepth); } - if (clearMode) { - DrawSinglePixel(p, z, fog, prim_color); + if (pixelID.clearMode) { + DrawSinglePixel(p, z, fog, prim_color, pixelID); } else { - DrawSinglePixel(p, z, fog, prim_color); + DrawSinglePixel(p, z, fog, prim_color, pixelID); } } @@ -1626,7 +1624,9 @@ void DrawLine(const VertexData &v0, const VertexData &v1) // Allow drawing within a pixel's center. scissorBR.x += 15; scissorBR.y += 15; - bool clearMode = gstate.isModeClear(); + + PixelFuncID pixelID; + ComputePixelFuncID(&pixelID); int texbufw[8] = {0}; @@ -1638,7 +1638,7 @@ void DrawLine(const VertexData &v0, const VertexData &v1) maxTexLevel = 0; } - if (gstate.isTextureMapEnabled() && !clearMode) { + if (gstate.isTextureMapEnabled() && !pixelID.clearMode) { GETextureFormat texfmt = gstate.getTextureFormat(); for (int i = 0; i <= maxTexLevel; i++) { u32 texaddr = gstate.getTextureAddress(i); @@ -1667,7 +1667,7 @@ void DrawLine(const VertexData &v0, const VertexData &v1) } u8 fog = 255; - if (gstate.isFogEnabled() && !clearMode) { + if (gstate.isFogEnabled() && !pixelID.clearMode) { fog = ClampFogDepth((v0.fogdepth * (float)(steps - i) + v1.fogdepth * (float)i) / steps1); } @@ -1677,7 +1677,7 @@ void DrawLine(const VertexData &v0, const VertexData &v1) prim_color.a() = 0x7F; } - if (gstate.isTextureMapEnabled() && !clearMode) { + if (gstate.isTextureMapEnabled() && !pixelID.clearMode) { float s, s1; float t, t1; if (gstate.isModeThrough()) { @@ -1715,16 +1715,16 @@ void DrawLine(const VertexData &v0, const VertexData &v1) ApplyTexturing(sampler, prim_color, s, t, texLevel, texLevelFrac, texBilinear, texptr, texbufw); } - if (!clearMode) + if (!pixelID.clearMode) prim_color += Vec4(sec_color, 0); ScreenCoords pprime = ScreenCoords((int)x, (int)y, (int)z); DrawingCoords p = TransformUnit::ScreenToDrawing(pprime); - if (clearMode) { - DrawSinglePixel(p, z, fog, prim_color); + if (pixelID.clearMode) { + DrawSinglePixel(p, z, fog, prim_color, pixelID); } else { - DrawSinglePixel(p, z, fog, prim_color); + DrawSinglePixel(p, z, fog, prim_color, pixelID); } } @@ -1743,7 +1743,7 @@ bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer) u8 *row = buffer.GetData(); for (int y = gstate.getRegionY1(); y <= gstate.getRegionY2(); ++y) { for (int x = gstate.getRegionX1(); x <= gstate.getRegionX2(); ++x) { - row[x - gstate.getRegionX1()] = GetPixelStencil(x, y); + row[x - gstate.getRegionX1()] = GetPixelStencil(gstate.FrameBufFormat(), x, y); } row += w; } diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h index 1fd6407ce7..b9d9ac59ff 100644 --- a/GPU/Software/Rasterizer.h +++ b/GPU/Software/Rasterizer.h @@ -17,7 +17,8 @@ #pragma once -#include "TransformUnit.h" // for DrawingCoords +#include "GPU/Software/FuncId.h" +#include "GPU/Software/TransformUnit.h" // for DrawingCoords struct GPUDebugBuffer; @@ -33,8 +34,8 @@ bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer); bool GetCurrentTexture(GPUDebugBuffer &buffer, int level); // Shared functions with RasterizerRectangle.cpp -Vec3 AlphaBlendingResult(const Vec4 &source, const Vec4 &dst); -void DrawSinglePixelNonClear(const DrawingCoords &p, u16 z, u8 fog, const Vec4 &color_in); +Vec3 AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4 &source, const Vec4 &dst); +void DrawSinglePixelNonClear(const DrawingCoords &p, u16 z, u8 fog, const Vec4 &color_in, const PixelFuncID &pixelID); Vec4 GetTextureFunctionOutput(const Vec4& prim_color, const Vec4& texcolor); } // namespace Rasterizer diff --git a/GPU/Software/RasterizerRectangle.cpp b/GPU/Software/RasterizerRectangle.cpp index 5e4047a9ec..b29e776e14 100644 --- a/GPU/Software/RasterizerRectangle.cpp +++ b/GPU/Software/RasterizerRectangle.cpp @@ -29,14 +29,14 @@ extern bool currentDialogActive; namespace Rasterizer { // Through mode, with the specific Darkstalker settings. -inline void DrawSinglePixel5551(u16 *pixel, const u32 color_in) { +inline void DrawSinglePixel5551(u16 *pixel, const u32 color_in, const PixelFuncID &pixelID) { u32 new_color; if ((color_in >> 24) == 255) { new_color = color_in & 0xFFFFFF; } else { const u32 old_color = RGBA5551ToRGBA8888(*pixel); const Vec4 dst = Vec4::FromRGBA(old_color); - Vec3 blended = AlphaBlendingResult(Vec4::FromRGBA(color_in), dst); + Vec3 blended = AlphaBlendingResult(pixelID, Vec4::FromRGBA(color_in), dst); // ToRGB() always automatically clamps. new_color = blended.ToRGB(); } @@ -98,6 +98,9 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { bool isWhite = v1.color0 == Vec4(255, 255, 255, 255); + PixelFuncID pixelID; + ComputePixelFuncID(&pixelID); + constexpr int MIN_LINES_PER_THREAD = 32; if (gstate.isTextureMapEnabled()) { @@ -149,7 +152,7 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { for (int x = pos0.x; x < pos1.x; x++) { u32 tex_color = nearestFunc(s, t, texptr, texbufw, 0); if (tex_color & 0xFF000000) { - DrawSinglePixel5551(pixel, tex_color); + DrawSinglePixel5551(pixel, tex_color, pixelID); } s += ds; pixel++; @@ -168,7 +171,7 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { Vec4 tex_color = Vec4::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0)); prim_color = ModulateRGBA(prim_color, tex_color); if (prim_color.a() > 0) { - DrawSinglePixel5551(pixel, prim_color.ToRGBA()); + DrawSinglePixel5551(pixel, prim_color.ToRGBA(), pixelID); } s += ds; pixel++; @@ -188,7 +191,7 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { Vec4 tex_color = Vec4::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0)); prim_color = GetTextureFunctionOutput(prim_color, tex_color); DrawingCoords pos(x, y, z); - DrawSinglePixelNonClear(pos, (u16)z, 1.0f, prim_color); + DrawSinglePixelNonClear(pos, (u16)z, 1.0f, prim_color, pixelID); s += ds; } t += dt; @@ -221,7 +224,7 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { u16 *pixel = fb.Get16Ptr(pos0.x, y, gstate.FrameBufStride()); for (int x = pos0.x; x < pos1.x; x++) { Vec4 prim_color = v1.color0; - DrawSinglePixel5551(pixel, prim_color.ToRGBA()); + DrawSinglePixel5551(pixel, prim_color.ToRGBA(), pixelID); pixel++; } } @@ -232,7 +235,7 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { for (int x = pos0.x; x < pos1.x; x++) { Vec4 prim_color = v1.color0; DrawingCoords pos(x, y, z); - DrawSinglePixelNonClear(pos, (u16)z, fog, prim_color); + DrawSinglePixelNonClear(pos, (u16)z, fog, prim_color, pixelID); } } }, pos0.y, pos1.y, MIN_LINES_PER_THREAD); From 26378f9c89c1db8d0575948d61cbba76a678cf17 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 20 Nov 2021 14:45:38 -0800 Subject: [PATCH 3/5] softgpu: Specialize sprite based on pixel func ID. --- GPU/Software/RasterizerRectangle.cpp | 46 +++++++++++++++------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/GPU/Software/RasterizerRectangle.cpp b/GPU/Software/RasterizerRectangle.cpp index b29e776e14..47349791f1 100644 --- a/GPU/Software/RasterizerRectangle.cpp +++ b/GPU/Software/RasterizerRectangle.cpp @@ -130,19 +130,20 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { pos0.y = scissorTL.y; } - if (!gstate.isStencilTestEnabled() && - !gstate.isDepthTestEnabled() && - !gstate.isLogicOpEnabled() && - !gstate.isColorTestEnabled() && - !gstate.isDitherEnabled() && - gstate.isAlphaTestEnabled() && - gstate.getAlphaTestRef() == 0 && - gstate.getAlphaTestMask() == 0xFF && - gstate.isAlphaBlendEnabled() && + if (!pixelID.stencilTest && + pixelID.depthTestFunc == GE_COMP_ALWAYS && + !pixelID.applyLogicOp && + !pixelID.colorTest && + !pixelID.dithering && + // TODO: Safe? + pixelID.alphaTestFunc != GE_COMP_ALWAYS && + pixelID.alphaTestRef == 0 && + !pixelID.hasAlphaTestMask && + pixelID.alphaBlend && gstate.isTextureAlphaUsed() && gstate.getTextureFunction() == GE_TEXFUNC_MODULATE && - gstate.getColorMask() == 0x000000 && - gstate.FrameBufFormat() == GE_FORMAT_5551) { + !pixelID.applyColorWriteMask && + pixelID.fbFormat == GE_FORMAT_5551) { if (isWhite) { ParallelRangeLoop(&g_threadManager, [=](int y1, int y2) { int t = t_start + (y1 - pos0.y) * dt; @@ -203,19 +204,20 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { if (pos1.y > scissorBR.y) pos1.y = scissorBR.y + 1; if (pos0.x < scissorTL.x) pos0.x = scissorTL.x; if (pos0.y < scissorTL.y) pos0.y = scissorTL.y; - if (!gstate.isStencilTestEnabled() && - !gstate.isDepthTestEnabled() && - !gstate.isLogicOpEnabled() && - !gstate.isColorTestEnabled() && - !gstate.isDitherEnabled() && - gstate.isAlphaTestEnabled() && - gstate.getAlphaTestRef() == 0 && - gstate.getAlphaTestMask() == 0xFF && - gstate.isAlphaBlendEnabled() && + if (!pixelID.stencilTest && + pixelID.depthTestFunc == GE_COMP_ALWAYS && + !pixelID.applyLogicOp && + !pixelID.colorTest && + !pixelID.dithering && + // TODO: Safe? + pixelID.alphaTestFunc != GE_COMP_ALWAYS && + pixelID.alphaTestRef == 0 && + !pixelID.hasAlphaTestMask && + pixelID.alphaBlend && gstate.isTextureAlphaUsed() && gstate.getTextureFunction() == GE_TEXFUNC_MODULATE && - gstate.getColorMask() == 0x000000 && - gstate.FrameBufFormat() == GE_FORMAT_5551) { + !pixelID.applyColorWriteMask && + pixelID.fbFormat == GE_FORMAT_5551) { if (v1.color0.a() == 0) return; From aa3786ed2132de18d3b4c2e2070ad8e974c3a76d Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 20 Nov 2021 14:52:51 -0800 Subject: [PATCH 4/5] softgpu: Force off alpha blend if uselessly on. This is a simple optimization to prevent some work games sometimes waste. --- GPU/Software/FuncId.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/GPU/Software/FuncId.cpp b/GPU/Software/FuncId.cpp index 0bdde24f7d..9af575359c 100644 --- a/GPU/Software/FuncId.cpp +++ b/GPU/Software/FuncId.cpp @@ -65,6 +65,13 @@ void ComputePixelFuncID(PixelFuncID *id) { } id->alphaBlend = gstate.isAlphaBlendEnabled(); + // Force it off if the factors are constant and don't blend. Some games use this... + if (id->alphaBlend && gstate.getBlendEq() == GE_BLENDMODE_MUL_AND_ADD) { + bool srcFixedOne = gstate.getBlendFuncA() == GE_SRCBLEND_FIXA && gstate.getFixA() == 0x00FFFFFF; + bool dstFixedZero = gstate.getBlendFuncB() == GE_DSTBLEND_FIXB && gstate.getFixB() == 0x00000000; + if (srcFixedOne && dstFixedZero) + id->alphaBlend = false; + } if (id->alphaBlend) { id->alphaBlendEq = gstate.getBlendEq(); id->alphaBlendSrc = gstate.getBlendFuncA(); From 9abf2a472595c669dbcf8d5c90cf8bb20d4891be Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 20 Nov 2021 18:53:51 -0800 Subject: [PATCH 5/5] softgpu: Confirm mask doesn't hit stencil REPLACE. --- GPU/Software/Rasterizer.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index f9c8ec9308..9c3062f3df 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -428,7 +428,6 @@ static inline u8 ApplyStencilOp(GEBufferFormat fmt, GEStencilOp op, u8 old_stenc return 0; case GE_STENCILOP_REPLACE: - // TODO: Apply mask to reference? return gstate.getStencilTestRef(); case GE_STENCILOP_INVERT: