diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ef38ef8c8..e9debfd844 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1580,6 +1580,8 @@ set(GPU_SOURCES GPU/Math3D.h GPU/Software/Clipper.cpp GPU/Software/Clipper.h + GPU/Software/DrawPixel.cpp + GPU/Software/DrawPixel.h GPU/Software/FuncId.cpp GPU/Software/FuncId.h GPU/Software/Lighting.cpp diff --git a/GPU/GPU.vcxproj b/GPU/GPU.vcxproj index 7392a733a1..3360496097 100644 --- a/GPU/GPU.vcxproj +++ b/GPU/GPU.vcxproj @@ -453,6 +453,7 @@ + @@ -629,6 +630,7 @@ + diff --git a/GPU/GPU.vcxproj.filters b/GPU/GPU.vcxproj.filters index 23a81b6bcc..c8aaf5354c 100644 --- a/GPU/GPU.vcxproj.filters +++ b/GPU/GPU.vcxproj.filters @@ -267,6 +267,9 @@ Software + + Software + @@ -536,5 +539,8 @@ Software + + Software + \ No newline at end of file diff --git a/GPU/Software/DrawPixel.cpp b/GPU/Software/DrawPixel.cpp new file mode 100644 index 0000000000..126a690c50 --- /dev/null +++ b/GPU/Software/DrawPixel.cpp @@ -0,0 +1,467 @@ +// Copyright (c) 2013- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include "Common/Data/Convert/ColorConv.h" +#include "GPU/GPUState.h" +#include "GPU/Software/DrawPixel.h" +#include "GPU/Software/FuncId.h" +#include "GPU/Software/Rasterizer.h" +#include "GPU/Software/SoftGpu.h" + +using namespace Math3D; + +namespace Rasterizer { + +void Init() { +} + +void Shutdown() { +} + +bool DescribeCodePtr(const u8 *ptr, std::string &name) { + return false; +} + +static inline u8 GetPixelStencil(GEBufferFormat fmt, int x, int y) { + if (fmt == GE_FORMAT_565) { + // Always treated as 0 for comparison purposes. + return 0; + } else if (fmt == GE_FORMAT_5551) { + return ((fb.Get16(x, y, gstate.FrameBufStride()) & 0x8000) != 0) ? 0xFF : 0; + } else if (fmt == GE_FORMAT_4444) { + return Convert4To8(fb.Get16(x, y, gstate.FrameBufStride()) >> 12); + } else { + return fb.Get32(x, y, gstate.FrameBufStride()) >> 24; + } +} + +static inline void SetPixelStencil(GEBufferFormat fmt, int x, int y, u8 value) { + if (fmt == GE_FORMAT_565) { + // Do nothing + } else if (fmt == GE_FORMAT_5551) { + if ((gstate.getStencilWriteMask() & 0x80) == 0) { + u16 pixel = fb.Get16(x, y, gstate.FrameBufStride()) & ~0x8000; + pixel |= (value & 0x80) << 8; + fb.Set16(x, y, gstate.FrameBufStride(), pixel); + } + } else if (fmt == GE_FORMAT_4444) { + const u16 write_mask = (gstate.getStencilWriteMask() << 8) | 0x0FFF; + u16 pixel = fb.Get16(x, y, gstate.FrameBufStride()) & write_mask; + pixel |= ((u16)value << 8) & ~write_mask; + fb.Set16(x, y, gstate.FrameBufStride(), pixel); + } else { + const u32 write_mask = (gstate.getStencilWriteMask() << 24) | 0x00FFFFFF; + u32 pixel = fb.Get32(x, y, gstate.FrameBufStride()) & write_mask; + pixel |= ((u32)value << 24) & ~write_mask; + fb.Set32(x, y, gstate.FrameBufStride(), pixel); + } +} + +static inline u16 GetPixelDepth(int x, int y) { + return depthbuf.Get16(x, y, gstate.DepthBufStride()); +} + +static inline void SetPixelDepth(int x, int y, u16 value) { + depthbuf.Set16(x, y, gstate.DepthBufStride(), value); +} + +// NOTE: These likely aren't endian safe +static inline u32 GetPixelColor(GEBufferFormat fmt, int x, int y) { + switch (fmt) { + case GE_FORMAT_565: + return RGB565ToRGBA8888(fb.Get16(x, y, gstate.FrameBufStride())); + + case GE_FORMAT_5551: + return RGBA5551ToRGBA8888(fb.Get16(x, y, gstate.FrameBufStride())); + + case GE_FORMAT_4444: + return RGBA4444ToRGBA8888(fb.Get16(x, y, gstate.FrameBufStride())); + + case GE_FORMAT_8888: + return fb.Get32(x, y, gstate.FrameBufStride()); + + default: + return 0; + } +} + +static inline void SetPixelColor(GEBufferFormat fmt, int x, int y, u32 value) { + switch (fmt) { + case GE_FORMAT_565: + fb.Set16(x, y, gstate.FrameBufStride(), RGBA8888ToRGB565(value)); + break; + + case GE_FORMAT_5551: + fb.Set16(x, y, gstate.FrameBufStride(), RGBA8888ToRGBA5551(value)); + break; + + case GE_FORMAT_4444: + fb.Set16(x, y, gstate.FrameBufStride(), RGBA8888ToRGBA4444(value)); + break; + + case GE_FORMAT_8888: + fb.Set32(x, y, gstate.FrameBufStride(), value); + break; + + default: + break; + } +} + +static inline bool AlphaTestPassed(const PixelFuncID &pixelID, int alpha) { + const u8 ref = pixelID.alphaTestRef; + if (pixelID.hasAlphaTestMask) + alpha &= gstate.getAlphaTestMask(); + + switch (GEComparison(pixelID.alphaTestFunc)) { + case GE_COMP_NEVER: + return false; + + case GE_COMP_ALWAYS: + return true; + + case GE_COMP_EQUAL: + return (alpha == ref); + + case GE_COMP_NOTEQUAL: + return (alpha != ref); + + case GE_COMP_LESS: + return (alpha < ref); + + case GE_COMP_LEQUAL: + return (alpha <= ref); + + case GE_COMP_GREATER: + return (alpha > ref); + + case GE_COMP_GEQUAL: + return (alpha >= ref); + } + return true; +} + +static inline bool ColorTestPassed(const Vec3 &color) { + const u32 mask = gstate.getColorTestMask(); + const u32 c = color.ToRGB() & mask; + const u32 ref = gstate.getColorTestRef() & mask; + switch (gstate.getColorTestFunction()) { + case GE_COMP_NEVER: + return false; + + case GE_COMP_ALWAYS: + return true; + + case GE_COMP_EQUAL: + return c == ref; + + case GE_COMP_NOTEQUAL: + return c != ref; + } + return true; +} + +static inline bool StencilTestPassed(const PixelFuncID &pixelID, u8 stencil) { + if (pixelID.hasStencilTestMask) + stencil &= gstate.getStencilTestMask(); + u8 ref = pixelID.stencilTestRef; + switch (GEComparison(pixelID.stencilTestFunc)) { + case GE_COMP_NEVER: + return false; + + case GE_COMP_ALWAYS: + return true; + + case GE_COMP_EQUAL: + return ref == stencil; + + case GE_COMP_NOTEQUAL: + return ref != stencil; + + case GE_COMP_LESS: + return ref < stencil; + + case GE_COMP_LEQUAL: + return ref <= stencil; + + case GE_COMP_GREATER: + return ref > stencil; + + case GE_COMP_GEQUAL: + return ref >= stencil; + } + return true; +} + +static inline u8 ApplyStencilOp(GEBufferFormat fmt, GEStencilOp op, u8 old_stencil) { + switch (op) { + case GE_STENCILOP_KEEP: + return old_stencil; + + case GE_STENCILOP_ZERO: + return 0; + + case GE_STENCILOP_REPLACE: + return gstate.getStencilTestRef(); + + case GE_STENCILOP_INVERT: + return ~old_stencil; + + case GE_STENCILOP_INCR: + switch (fmt) { + case GE_FORMAT_8888: + if (old_stencil != 0xFF) { + return old_stencil + 1; + } + return old_stencil; + case GE_FORMAT_5551: + return 0xFF; + case GE_FORMAT_4444: + if (old_stencil < 0xF0) { + return old_stencil + 0x10; + } + return old_stencil; + default: + return old_stencil; + } + break; + + case GE_STENCILOP_DECR: + switch (fmt) { + case GE_FORMAT_4444: + if (old_stencil >= 0x10) + return old_stencil - 0x10; + break; + default: + if (old_stencil != 0) + return old_stencil - 1; + return old_stencil; + } + break; + } + + return old_stencil; +} + +static inline bool DepthTestPassed(GEComparison func, int x, int y, u16 z) { + u16 reference_z = GetPixelDepth(x, y); + + switch (func) { + case GE_COMP_NEVER: + return false; + + case GE_COMP_ALWAYS: + return true; + + case GE_COMP_EQUAL: + return (z == reference_z); + + case GE_COMP_NOTEQUAL: + return (z != reference_z); + + case GE_COMP_LESS: + return (z < reference_z); + + case GE_COMP_LEQUAL: + return (z <= reference_z); + + case GE_COMP_GREATER: + return (z > reference_z); + + case GE_COMP_GEQUAL: + return (z >= reference_z); + + default: + return 0; + } +} + +static inline u32 ApplyLogicOp(GELogicOp op, u32 old_color, u32 new_color) { + // All of the operations here intentionally preserve alpha/stencil. + switch (op) { + case GE_LOGIC_CLEAR: + new_color &= 0xFF000000; + break; + + case GE_LOGIC_AND: + new_color = new_color & (old_color | 0xFF000000); + break; + + case GE_LOGIC_AND_REVERSE: + new_color = new_color & (~old_color | 0xFF000000); + break; + + case GE_LOGIC_COPY: + // No change to new_color. + break; + + case GE_LOGIC_AND_INVERTED: + new_color = (~new_color & (old_color & 0x00FFFFFF)) | (new_color & 0xFF000000); + break; + + case GE_LOGIC_NOOP: + new_color = (old_color & 0x00FFFFFF) | (new_color & 0xFF000000); + break; + + case GE_LOGIC_XOR: + new_color = new_color ^ (old_color & 0x00FFFFFF); + break; + + case GE_LOGIC_OR: + new_color = new_color | (old_color & 0x00FFFFFF); + break; + + case GE_LOGIC_NOR: + new_color = (~(new_color | old_color) & 0x00FFFFFF) | (new_color & 0xFF000000); + break; + + case GE_LOGIC_EQUIV: + new_color = (~(new_color ^ old_color) & 0x00FFFFFF) | (new_color & 0xFF000000); + break; + + case GE_LOGIC_INVERTED: + new_color = (~old_color & 0x00FFFFFF) | (new_color & 0xFF000000); + break; + + case GE_LOGIC_OR_REVERSE: + new_color = new_color | (~old_color & 0x00FFFFFF); + break; + + case GE_LOGIC_COPY_INVERTED: + new_color = (~new_color & 0x00FFFFFF) | (new_color & 0xFF000000); + break; + + case GE_LOGIC_OR_INVERTED: + new_color = ((~new_color | old_color) & 0x00FFFFFF) | (new_color & 0xFF000000); + break; + + case GE_LOGIC_NAND: + new_color = (~(new_color & old_color) & 0x00FFFFFF) | (new_color & 0xFF000000); + break; + + case GE_LOGIC_SET: + new_color |= 0x00FFFFFF; + break; + } + + return new_color; +} + +template +inline void DrawSinglePixel(int x, int y, int z, int fog, const Vec4 &color_in, const PixelFuncID &pixelID) { + Vec4 prim_color = color_in.Clamp(0, 255); + // Depth range test - applied in clear mode, if not through mode. + if (pixelID.applyDepthRange) + if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax()) + return; + + if (GEComparison(pixelID.alphaTestFunc) != GE_COMP_ALWAYS && !clearMode) + if (!AlphaTestPassed(pixelID, prim_color.a())) + return; + + // Fog is applied prior to color test. + if (pixelID.applyFog && !clearMode) { + Vec3 fogColor = Vec3::FromRGB(gstate.fogcolor); + fogColor = (prim_color.rgb() * fog + fogColor * (255 - fog)) / 255; + prim_color.r() = fogColor.r(); + prim_color.g() = fogColor.g(); + prim_color.b() = fogColor.b(); + } + + if (pixelID.colorTest && !clearMode) + if (!ColorTestPassed(prim_color.rgb())) + return; + + // In clear mode, it uses the alpha color as stencil. + u8 stencil = clearMode ? prim_color.a() : GetPixelStencil(GEBufferFormat(pixelID.fbFormat), x, y); + if (clearMode) { + if (pixelID.depthClear) + SetPixelDepth(x, y, z); + } else if (pixelID.stencilTest) { + if (!StencilTestPassed(pixelID, stencil)) { + stencil = ApplyStencilOp(GEBufferFormat(pixelID.fbFormat), GEStencilOp(pixelID.sFail), stencil); + SetPixelStencil(GEBufferFormat(pixelID.fbFormat), x, y, stencil); + return; + } + + // Also apply depth at the same time. If disabled, same as passing. + if (pixelID.depthTestFunc != GE_COMP_ALWAYS && !DepthTestPassed(GEComparison(pixelID.depthTestFunc), x, y, z)) { + stencil = ApplyStencilOp(GEBufferFormat(pixelID.fbFormat), GEStencilOp(pixelID.zFail), stencil); + SetPixelStencil(GEBufferFormat(pixelID.fbFormat), x, y, stencil); + return; + } + + stencil = ApplyStencilOp(GEBufferFormat(pixelID.fbFormat), GEStencilOp(pixelID.zPass), stencil); + } else { + if (pixelID.depthTestFunc != GE_COMP_ALWAYS && !DepthTestPassed(GEComparison(pixelID.depthTestFunc), x, y, z)) { + return; + } + } + + if (pixelID.depthWrite && !clearMode) + SetPixelDepth(x, y, z); + + const u32 old_color = GetPixelColor(GEBufferFormat(pixelID.fbFormat), x, y); + u32 new_color; + + // Dithering happens before the logic op and regardless of framebuffer format or clear mode. + // We do it while alpha blending because it happens before clamping. + if (pixelID.alphaBlend && !clearMode) { + const Vec4 dst = Vec4::FromRGBA(old_color); + Vec3 blended = AlphaBlendingResult(pixelID, prim_color, dst); + if (pixelID.dithering) { + blended += Vec3::AssignToAll(gstate.getDitherValue(x, y)); + } + + // ToRGB() always automatically clamps. + new_color = blended.ToRGB(); + new_color |= stencil << 24; + } else { + if (pixelID.dithering) { + // We'll discard alpha anyway. + prim_color += Vec4::AssignToAll(gstate.getDitherValue(x, y)); + } + +#if defined(_M_SSE) + new_color = Vec3(prim_color.ivec).ToRGB(); + new_color |= stencil << 24; +#else + new_color = Vec4(prim_color.r(), prim_color.g(), prim_color.b(), stencil).ToRGBA(); +#endif + } + + // Logic ops are applied after blending (if blending is enabled.) + if (pixelID.applyLogicOp && !clearMode) { + // Logic ops don't affect stencil, which happens inside ApplyLogicOp. + new_color = ApplyLogicOp(gstate.getLogicOp(), old_color, new_color); + } + + if (clearMode) { + new_color = (new_color & ~gstate.getClearModeColorMask()) | (old_color & gstate.getClearModeColorMask()); + } + new_color = (new_color & ~gstate.getColorMask()) | (old_color & gstate.getColorMask()); + + SetPixelColor(GEBufferFormat(pixelID.fbFormat), x, y, new_color); +} + +SingleFunc GetSingleFunc(const PixelFuncID &id) { + if (id.clearMode) + return &DrawSinglePixel; + return &DrawSinglePixel; +} + +}; diff --git a/GPU/Software/DrawPixel.h b/GPU/Software/DrawPixel.h new file mode 100644 index 0000000000..26d50da2ce --- /dev/null +++ b/GPU/Software/DrawPixel.h @@ -0,0 +1,48 @@ +// Copyright (c) 2021- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#pragma once + +#include "ppsspp_config.h" + +#include +#include +#if PPSSPP_ARCH(ARM) +#include "Common/ArmEmitter.h" +#elif PPSSPP_ARCH(ARM64) +#include "Common/Arm64Emitter.h" +#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64) +#include "Common/x64Emitter.h" +#elif PPSSPP_ARCH(MIPS) +#include "Common/MipsEmitter.h" +#else +#include "Common/FakeEmitter.h" +#endif +#include "GPU/Math3D.h" +#include "GPU/Software/FuncId.h" + +namespace Rasterizer { + +typedef void (*SingleFunc)(int x, int y, int z, int fog, const Math3D::Vec4 &color_in, const PixelFuncID &pixelID); +SingleFunc GetSingleFunc(const PixelFuncID &id); + +void Init(); +void Shutdown(); + +bool DescribeCodePtr(const u8 *ptr, std::string &name); + +}; diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 8cd6754f51..757a8eeff2 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -30,9 +30,10 @@ #include "GPU/GPUState.h" #include "GPU/Common/TextureDecoder.h" -#include "GPU/Software/SoftGpu.h" +#include "GPU/Software/DrawPixel.h" #include "GPU/Software/Rasterizer.h" #include "GPU/Software/Sampler.h" +#include "GPU/Software/SoftGpu.h" #if defined(_M_SSE) #include @@ -260,57 +261,6 @@ static inline void GetTextureCoordinates(const VertexData& v0, const VertexData& } } -// NOTE: These likely aren't endian safe -static inline u32 GetPixelColor(GEBufferFormat fmt, int x, int y) { - switch (fmt) { - case GE_FORMAT_565: - return RGB565ToRGBA8888(fb.Get16(x, y, gstate.FrameBufStride())); - - case GE_FORMAT_5551: - return RGBA5551ToRGBA8888(fb.Get16(x, y, gstate.FrameBufStride())); - - case GE_FORMAT_4444: - return RGBA4444ToRGBA8888(fb.Get16(x, y, gstate.FrameBufStride())); - - case GE_FORMAT_8888: - return fb.Get32(x, y, gstate.FrameBufStride()); - - case GE_FORMAT_INVALID: - case GE_FORMAT_DEPTH16: - _dbg_assert_msg_(false, "Software: invalid framebuf format."); - } - return 0; -} - -static inline void SetPixelColor(GEBufferFormat fmt, int x, int y, u32 value) { - switch (fmt) { - case GE_FORMAT_565: - fb.Set16(x, y, gstate.FrameBufStride(), RGBA8888ToRGB565(value)); - break; - - case GE_FORMAT_5551: - fb.Set16(x, y, gstate.FrameBufStride(), RGBA8888ToRGBA5551(value)); - break; - - case GE_FORMAT_4444: - fb.Set16(x, y, gstate.FrameBufStride(), RGBA8888ToRGBA4444(value)); - break; - - case GE_FORMAT_8888: - fb.Set32(x, y, gstate.FrameBufStride(), value); - break; - - case GE_FORMAT_INVALID: - case GE_FORMAT_DEPTH16: - _dbg_assert_msg_(false, "Software: invalid framebuf format."); - } -} - -static inline u16 GetPixelDepth(int x, int y) -{ - return depthbuf.Get16(x, y, gstate.DepthBufStride()); -} - static inline void SetPixelDepth(int x, int y, u16 value) { depthbuf.Set16(x, y, gstate.DepthBufStride(), value); @@ -329,62 +279,6 @@ static inline u8 GetPixelStencil(GEBufferFormat fmt, int x, int y) { } } -static inline void SetPixelStencil(GEBufferFormat fmt, int x, int y, u8 value) { - if (fmt == GE_FORMAT_565) { - // Do nothing - } else if (fmt == GE_FORMAT_5551) { - if ((gstate.getStencilWriteMask() & 0x80) == 0) { - u16 pixel = fb.Get16(x, y, gstate.FrameBufStride()) & ~0x8000; - pixel |= (value & 0x80) << 8; - fb.Set16(x, y, gstate.FrameBufStride(), pixel); - } - } else if (fmt == GE_FORMAT_4444) { - const u16 write_mask = (gstate.getStencilWriteMask() << 8) | 0x0FFF; - u16 pixel = fb.Get16(x, y, gstate.FrameBufStride()) & write_mask; - pixel |= ((u16)value << 8) & ~write_mask; - fb.Set16(x, y, gstate.FrameBufStride(), pixel); - } else { - const u32 write_mask = (gstate.getStencilWriteMask() << 24) | 0x00FFFFFF; - u32 pixel = fb.Get32(x, y, gstate.FrameBufStride()) & write_mask; - pixel |= ((u32)value << 24) & ~write_mask; - fb.Set32(x, y, gstate.FrameBufStride(), pixel); - } -} - -static inline bool DepthTestPassed(GEComparison func, int x, int y, u16 z) -{ - u16 reference_z = GetPixelDepth(x, y); - - switch (func) { - case GE_COMP_NEVER: - return false; - - case GE_COMP_ALWAYS: - return true; - - case GE_COMP_EQUAL: - return (z == reference_z); - - case GE_COMP_NOTEQUAL: - return (z != reference_z); - - case GE_COMP_LESS: - return (z < reference_z); - - case GE_COMP_LEQUAL: - return (z <= reference_z); - - case GE_COMP_GREATER: - return (z > reference_z); - - case GE_COMP_GEQUAL: - return (z >= reference_z); - - default: - return 0; - } -} - static inline bool IsRightSideOrFlatBottomLine(const Vec2& vertex, const Vec2& line1, const Vec2& line2) { if (line1.y == line2.y) { @@ -396,159 +290,6 @@ static inline bool IsRightSideOrFlatBottomLine(const Vec2& vertex, const Ve } } -static inline bool StencilTestPassed(const PixelFuncID &pixelID, u8 stencil) { - if (pixelID.hasStencilTestMask) - stencil &= gstate.getStencilTestMask(); - u8 ref = pixelID.stencilTestRef; - switch (GEComparison(pixelID.stencilTestFunc)) { - case GE_COMP_NEVER: - return false; - - case GE_COMP_ALWAYS: - return true; - - case GE_COMP_EQUAL: - return ref == stencil; - - case GE_COMP_NOTEQUAL: - return ref != stencil; - - case GE_COMP_LESS: - return ref < stencil; - - case GE_COMP_LEQUAL: - return ref <= stencil; - - case GE_COMP_GREATER: - return ref > stencil; - - case GE_COMP_GEQUAL: - return ref >= stencil; - } - return true; -} - -static inline u8 ApplyStencilOp(GEBufferFormat fmt, GEStencilOp op, u8 old_stencil) { - switch (op) { - case GE_STENCILOP_KEEP: - return old_stencil; - - case GE_STENCILOP_ZERO: - return 0; - - case GE_STENCILOP_REPLACE: - return gstate.getStencilTestRef(); - - case GE_STENCILOP_INVERT: - return ~old_stencil; - - case GE_STENCILOP_INCR: - switch (fmt) { - case GE_FORMAT_8888: - if (old_stencil != 0xFF) { - return old_stencil + 1; - } - return old_stencil; - case GE_FORMAT_5551: - return 0xFF; - case GE_FORMAT_4444: - if (old_stencil < 0xF0) { - return old_stencil + 0x10; - } - return old_stencil; - default: - return old_stencil; - } - break; - - case GE_STENCILOP_DECR: - switch (fmt) { - case GE_FORMAT_4444: - if (old_stencil >= 0x10) - return old_stencil - 0x10; - break; - default: - if (old_stencil != 0) - return old_stencil - 1; - return old_stencil; - } - break; - } - - return old_stencil; -} - -static inline u32 ApplyLogicOp(GELogicOp op, u32 old_color, u32 new_color) { - // All of the operations here intentionally preserve alpha/stencil. - switch (op) { - case GE_LOGIC_CLEAR: - new_color &= 0xFF000000; - break; - - case GE_LOGIC_AND: - new_color = new_color & (old_color | 0xFF000000); - break; - - case GE_LOGIC_AND_REVERSE: - new_color = new_color & (~old_color | 0xFF000000); - break; - - case GE_LOGIC_COPY: - // No change to new_color. - break; - - case GE_LOGIC_AND_INVERTED: - new_color = (~new_color & (old_color & 0x00FFFFFF)) | (new_color & 0xFF000000); - break; - - case GE_LOGIC_NOOP: - new_color = (old_color & 0x00FFFFFF) | (new_color & 0xFF000000); - break; - - case GE_LOGIC_XOR: - new_color = new_color ^ (old_color & 0x00FFFFFF); - break; - - case GE_LOGIC_OR: - new_color = new_color | (old_color & 0x00FFFFFF); - break; - - case GE_LOGIC_NOR: - new_color = (~(new_color | old_color) & 0x00FFFFFF) | (new_color & 0xFF000000); - break; - - case GE_LOGIC_EQUIV: - new_color = (~(new_color ^ old_color) & 0x00FFFFFF) | (new_color & 0xFF000000); - break; - - case GE_LOGIC_INVERTED: - new_color = (~old_color & 0x00FFFFFF) | (new_color & 0xFF000000); - break; - - case GE_LOGIC_OR_REVERSE: - new_color = new_color | (~old_color & 0x00FFFFFF); - break; - - case GE_LOGIC_COPY_INVERTED: - new_color = (~new_color & 0x00FFFFFF) | (new_color & 0xFF000000); - break; - - case GE_LOGIC_OR_INVERTED: - new_color = ((~new_color | old_color) & 0x00FFFFFF) | (new_color & 0xFF000000); - break; - - case GE_LOGIC_NAND: - new_color = (~(new_color & old_color) & 0x00FFFFFF) | (new_color & 0xFF000000); - break; - - case GE_LOGIC_SET: - new_color |= 0x00FFFFFF; - break; - } - - return new_color; -} - Vec4 GetTextureFunctionOutput(const Vec4& prim_color, const Vec4& texcolor) { Vec3 out_rgb; @@ -628,65 +369,6 @@ Vec4 GetTextureFunctionOutput(const Vec4& prim_color, const Vec4& return Vec4(out_rgb.r(), out_rgb.g(), out_rgb.b(), out_a); } -static inline bool ColorTestPassed(const Vec3 &color) -{ - const u32 mask = gstate.getColorTestMask(); - const u32 c = color.ToRGB() & mask; - const u32 ref = gstate.getColorTestRef() & mask; - switch (gstate.getColorTestFunction()) { - case GE_COMP_NEVER: - return false; - - case GE_COMP_ALWAYS: - return true; - - case GE_COMP_EQUAL: - return c == ref; - - case GE_COMP_NOTEQUAL: - return c != ref; - - default: - ERROR_LOG_REPORT(G3D, "Software: Invalid colortest function: %d", gstate.getColorTestFunction()); - break; - } - return true; -} - -static inline bool AlphaTestPassed(const PixelFuncID &pixelID, int alpha) -{ - const u8 ref = pixelID.alphaTestRef; - if (pixelID.hasAlphaTestMask) - alpha &= gstate.getAlphaTestMask(); - - switch (GEComparison(pixelID.alphaTestFunc)) { - case GE_COMP_NEVER: - return false; - - case GE_COMP_ALWAYS: - return true; - - case GE_COMP_EQUAL: - return (alpha == ref); - - case GE_COMP_NOTEQUAL: - return (alpha != ref); - - case GE_COMP_LESS: - return (alpha < ref); - - case GE_COMP_LEQUAL: - return (alpha <= ref); - - case GE_COMP_GREATER: - return (alpha > ref); - - case GE_COMP_GEQUAL: - return (alpha >= ref); - } - return true; -} - static inline Vec3 GetSourceFactor(GEBlendSrcFactor factor, const Vec4 &source, const Vec4 &dst) { switch (factor) { case GE_SRCBLEND_DSTCOLOR: @@ -843,107 +525,6 @@ Vec3 AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4 &sourc } } -template -inline void DrawSinglePixel(const DrawingCoords &p, int z, u8 fog, const Vec4 &color_in, const PixelFuncID &pixelID) { - Vec4 prim_color = color_in.Clamp(0, 255); - // Depth range test - applied in clear mode, if not through mode. - if (pixelID.applyDepthRange) - if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax()) - return; - - if (GEComparison(pixelID.alphaTestFunc) != GE_COMP_ALWAYS && !clearMode) - if (!AlphaTestPassed(pixelID, prim_color.a())) - return; - - // Fog is applied prior to color test. - if (pixelID.applyFog && !clearMode) { - Vec3 fogColor = Vec3::FromRGB(gstate.fogcolor); - fogColor = (prim_color.rgb() * (int)fog + fogColor * (255 - (int)fog)) / 255; - prim_color.r() = fogColor.r(); - prim_color.g() = fogColor.g(); - prim_color.b() = fogColor.b(); - } - - if (pixelID.colorTest && !clearMode) - if (!ColorTestPassed(prim_color.rgb())) - return; - - // In clear mode, it uses the alpha color as stencil. - u8 stencil = clearMode ? prim_color.a() : GetPixelStencil(GEBufferFormat(pixelID.fbFormat), p.x, p.y); - if (clearMode) { - if (pixelID.depthClear) - SetPixelDepth(p.x, p.y, z); - } else if (pixelID.stencilTest) { - if (!StencilTestPassed(pixelID, stencil)) { - stencil = ApplyStencilOp(GEBufferFormat(pixelID.fbFormat), GEStencilOp(pixelID.sFail), stencil); - SetPixelStencil(GEBufferFormat(pixelID.fbFormat), p.x, p.y, stencil); - return; - } - - // Also apply depth at the same time. If disabled, same as passing. - if (pixelID.depthTestFunc != GE_COMP_ALWAYS && !DepthTestPassed(GEComparison(pixelID.depthTestFunc), p.x, p.y, z)) { - stencil = ApplyStencilOp(GEBufferFormat(pixelID.fbFormat), GEStencilOp(pixelID.zFail), stencil); - SetPixelStencil(GEBufferFormat(pixelID.fbFormat), p.x, p.y, stencil); - return; - } - - stencil = ApplyStencilOp(GEBufferFormat(pixelID.fbFormat), GEStencilOp(pixelID.zPass), stencil); - } else { - if (pixelID.depthTestFunc != GE_COMP_ALWAYS && !DepthTestPassed(GEComparison(pixelID.depthTestFunc), p.x, p.y, z)) { - return; - } - } - - if (pixelID.depthWrite && !clearMode) - SetPixelDepth(p.x, p.y, z); - - const u32 old_color = GetPixelColor(GEBufferFormat(pixelID.fbFormat), p.x, p.y); - u32 new_color; - - // Dithering happens before the logic op and regardless of framebuffer format or clear mode. - // We do it while alpha blending because it happens before clamping. - if (pixelID.alphaBlend && !clearMode) { - const Vec4 dst = Vec4::FromRGBA(old_color); - Vec3 blended = AlphaBlendingResult(pixelID, prim_color, dst); - if (pixelID.dithering) { - blended += Vec3::AssignToAll(gstate.getDitherValue(p.x, p.y)); - } - - // ToRGB() always automatically clamps. - new_color = blended.ToRGB(); - new_color |= stencil << 24; - } else { - if (pixelID.dithering) { - // We'll discard alpha anyway. - prim_color += Vec4::AssignToAll(gstate.getDitherValue(p.x, p.y)); - } - -#if defined(_M_SSE) - new_color = Vec3(prim_color.ivec).ToRGB(); - new_color |= stencil << 24; -#else - new_color = Vec4(prim_color.r(), prim_color.g(), prim_color.b(), stencil).ToRGBA(); -#endif - } - - // Logic ops are applied after blending (if blending is enabled.) - if (pixelID.applyLogicOp && !clearMode) { - // Logic ops don't affect stencil, which happens inside ApplyLogicOp. - new_color = ApplyLogicOp(gstate.getLogicOp(), old_color, new_color); - } - - if (clearMode) { - new_color = (new_color & ~gstate.getClearModeColorMask()) | (old_color & gstate.getClearModeColorMask()); - } - new_color = (new_color & ~gstate.getColorMask()) | (old_color & gstate.getColorMask()); - - SetPixelColor(GEBufferFormat(pixelID.fbFormat), p.x, p.y, new_color); -} - -void DrawSinglePixelNonClear(const DrawingCoords &p, u16 z, u8 fog, const Vec4 &color_in, const PixelFuncID &pixelID) { - DrawSinglePixel(p, z, fog, color_in, pixelID); -} - static inline void ApplyTexturing(Sampler::Funcs sampler, Vec4 &prim_color, float s, float t, int texlevel, int frac_texlevel, bool bilinear, u8 *texptr[], int texbufw[]) { int u[8] = {0}, v[8] = {0}; // 1.23.8 fixed point int frac_u[2], frac_v[2]; @@ -1141,7 +722,8 @@ void DrawTriangleSlice( const VertexData& v0, const VertexData& v1, const VertexData& v2, int x1, int y1, int x2, int y2, bool byY, int h1, int h2, - const PixelFuncID &pixelID) + const PixelFuncID &pixelID, + const Rasterizer::SingleFunc &drawPixel) { Vec4 bias0 = Vec4::AssignToAll(IsRightSideOrFlatBottomLine(v0.screenpos.xy(), v1.screenpos.xy(), v2.screenpos.xy()) ? -1 : 0); Vec4 bias1 = Vec4::AssignToAll(IsRightSideOrFlatBottomLine(v1.screenpos.xy(), v2.screenpos.xy(), v0.screenpos.xy()) ? -1 : 0); @@ -1290,7 +872,7 @@ void DrawTriangleSlice( subp.x = p.x + (i & 1); subp.y = p.y + (i / 2); - DrawSinglePixel(subp, z[i], fog[i], prim_color[i], pixelID); + drawPixel(subp.x, subp.y, z[i], fog[i], prim_color[i], pixelID); } } } @@ -1328,38 +910,39 @@ void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& PixelFuncID pixelID; ComputePixelFuncID(&pixelID); + Rasterizer::SingleFunc drawPixel = Rasterizer::GetSingleFunc(pixelID); const int MIN_LINES_PER_THREAD = 4; if (rangeY >= 12 && rangeX >= rangeY * 4) { if (gstate.isModeClear()) { auto bound = [&](int a, int b) -> void { - DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, false, a, b, pixelID); + DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, false, a, b, pixelID, drawPixel); }; ParallelRangeLoop(&g_threadManager, bound, 0, rangeX, MIN_LINES_PER_THREAD); } else { auto bound = [&](int a, int b) -> void { - DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, false, a, b, pixelID); + DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, false, a, b, pixelID, drawPixel); }; ParallelRangeLoop(&g_threadManager, bound, 0, rangeX, MIN_LINES_PER_THREAD); } } else if (rangeY >= 12 && rangeX >= 12) { if (gstate.isModeClear()) { auto bound = [&](int a, int b) -> void { - DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, true, a, b, pixelID); + DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, true, a, b, pixelID, drawPixel); }; ParallelRangeLoop(&g_threadManager, bound, 0, rangeY, MIN_LINES_PER_THREAD); } else { auto bound = [&](int a, int b) -> void { - DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, true, a, b, pixelID); + DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, true, a, b, pixelID, drawPixel); }; ParallelRangeLoop(&g_threadManager, bound, 0, rangeY, MIN_LINES_PER_THREAD); } } else { if (gstate.isModeClear()) { - DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, true, 0, rangeY, pixelID); + DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, true, 0, rangeY, pixelID, drawPixel); } else { - DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, true, 0, rangeY, pixelID); + DrawTriangleSlice(v0, v1, v2, minX, minY, maxX, maxY, true, 0, rangeY, pixelID, drawPixel); } } } @@ -1382,6 +965,7 @@ void DrawPoint(const VertexData &v0) Sampler::Funcs sampler = Sampler::GetFuncs(); PixelFuncID pixelID; ComputePixelFuncID(&pixelID); + Rasterizer::SingleFunc drawPixel = Rasterizer::GetSingleFunc(pixelID); if (gstate.isTextureMapEnabled() && !pixelID.clearMode) { int texbufw[8] = {0}; @@ -1436,11 +1020,7 @@ void DrawPoint(const VertexData &v0) fog = ClampFogDepth(v0.fogdepth); } - if (pixelID.clearMode) { - DrawSinglePixel(p, z, fog, prim_color, pixelID); - } else { - DrawSinglePixel(p, z, fog, prim_color, pixelID); - } + drawPixel(p.x, p.y, z, fog, prim_color, pixelID); } void ClearRectangle(const VertexData &v0, const VertexData &v1) @@ -1656,6 +1236,7 @@ void DrawLine(const VertexData &v0, const VertexData &v1) } Sampler::Funcs sampler = Sampler::GetFuncs(); + Rasterizer::SingleFunc drawPixel = Rasterizer::GetSingleFunc(pixelID); double x = a.x > b.x ? a.x - 1 : a.x; double y = a.y > b.y ? a.y - 1 : a.y; @@ -1729,11 +1310,7 @@ void DrawLine(const VertexData &v0, const VertexData &v1) ScreenCoords pprime = ScreenCoords((int)x, (int)y, (int)z); DrawingCoords p = TransformUnit::ScreenToDrawing(pprime); - if (pixelID.clearMode) { - DrawSinglePixel(p, z, fog, prim_color, pixelID); - } else { - DrawSinglePixel(p, z, fog, prim_color, pixelID); - } + drawPixel(p.x, p.y, z, fog, prim_color, pixelID); } x += xinc; diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h index b9d9ac59ff..0d7e2e91f4 100644 --- a/GPU/Software/Rasterizer.h +++ b/GPU/Software/Rasterizer.h @@ -35,7 +35,6 @@ bool GetCurrentTexture(GPUDebugBuffer &buffer, int level); // Shared functions with RasterizerRectangle.cpp Vec3 AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4 &source, const Vec4 &dst); -void DrawSinglePixelNonClear(const DrawingCoords &p, u16 z, u8 fog, const Vec4 &color_in, const PixelFuncID &pixelID); Vec4 GetTextureFunctionOutput(const Vec4& prim_color, const Vec4& texcolor); } // namespace Rasterizer diff --git a/GPU/Software/RasterizerRectangle.cpp b/GPU/Software/RasterizerRectangle.cpp index 47349791f1..2fb43d55c3 100644 --- a/GPU/Software/RasterizerRectangle.cpp +++ b/GPU/Software/RasterizerRectangle.cpp @@ -14,9 +14,10 @@ #include "GPU/GPUState.h" #include "GPU/Common/TextureCacheCommon.h" -#include "GPU/Software/SoftGpu.h" +#include "GPU/Software/DrawPixel.h" #include "GPU/Software/Rasterizer.h" #include "GPU/Software/Sampler.h" +#include "GPU/Software/SoftGpu.h" #if defined(_M_SSE) #include @@ -94,12 +95,13 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { DrawingCoords scissorBR(gstate.getScissorX2(), gstate.getScissorY2(), 0); int z = pos0.z; - float fog = 1.0f; + int fog = 255; bool isWhite = v1.color0 == Vec4(255, 255, 255, 255); PixelFuncID pixelID; ComputePixelFuncID(&pixelID); + Rasterizer::SingleFunc drawPixel = Rasterizer::GetSingleFunc(pixelID); constexpr int MIN_LINES_PER_THREAD = 32; @@ -191,8 +193,7 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { Vec4 prim_color = v1.color0; Vec4 tex_color = Vec4::FromRGBA(nearestFunc(s, t, texptr, texbufw, 0)); prim_color = GetTextureFunctionOutput(prim_color, tex_color); - DrawingCoords pos(x, y, z); - DrawSinglePixelNonClear(pos, (u16)z, 1.0f, prim_color, pixelID); + drawPixel(x, y, z, 255, prim_color, pixelID); s += ds; } t += dt; @@ -236,8 +237,7 @@ void DrawSprite(const VertexData& v0, const VertexData& v1) { for (int y = y1; y < y2; y++) { for (int x = pos0.x; x < pos1.x; x++) { Vec4 prim_color = v1.color0; - DrawingCoords pos(x, y, z); - DrawSinglePixelNonClear(pos, (u16)z, fog, prim_color, pixelID); + drawPixel(x, y, z, fog, prim_color, pixelID); } } }, pos0.y, pos1.y, MIN_LINES_PER_THREAD); diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 16e7537171..c4e5a19511 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -36,6 +36,7 @@ #include "Common/Profiler/Profiler.h" #include "Common/GPU/thin3d.h" +#include "GPU/Software/DrawPixel.h" #include "GPU/Software/Rasterizer.h" #include "GPU/Software/Sampler.h" #include "GPU/Software/SoftGpu.h" @@ -66,6 +67,7 @@ SoftGPU::SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *draw) displayStride_ = 512; displayFormat_ = GE_FORMAT_8888; + Rasterizer::Init(); Sampler::Init(); drawEngine_ = new SoftwareDrawEngine(); drawEngine_->Init(); @@ -107,6 +109,7 @@ SoftGPU::~SoftGPU() { } Sampler::Shutdown(); + Rasterizer::Shutdown(); } void SoftGPU::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) { @@ -1008,5 +1011,9 @@ bool SoftGPU::DescribeCodePtr(const u8 *ptr, std::string &name) { name = "SamplerJit:" + subname; return true; } + if (Rasterizer::DescribeCodePtr(ptr, subname)) { + name = "RasterizerJit:" + subname; + return true; + } return false; } diff --git a/UWP/GPU_UWP/GPU_UWP.vcxproj b/UWP/GPU_UWP/GPU_UWP.vcxproj index 5ca11e0a5c..76458d8906 100644 --- a/UWP/GPU_UWP/GPU_UWP.vcxproj +++ b/UWP/GPU_UWP/GPU_UWP.vcxproj @@ -426,6 +426,7 @@ + @@ -485,6 +486,7 @@ + diff --git a/UWP/GPU_UWP/GPU_UWP.vcxproj.filters b/UWP/GPU_UWP/GPU_UWP.vcxproj.filters index 79a6701d04..591297863e 100644 --- a/UWP/GPU_UWP/GPU_UWP.vcxproj.filters +++ b/UWP/GPU_UWP/GPU_UWP.vcxproj.filters @@ -46,6 +46,7 @@ + @@ -103,6 +104,7 @@ + diff --git a/android/jni/Android.mk b/android/jni/Android.mk index ab190e8e1f..0e7cd944dd 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -358,6 +358,7 @@ EXEC_AND_LIB_FILES := \ $(SRC)/GPU/GLES/FragmentTestCacheGLES.cpp.arm \ $(SRC)/GPU/GLES/TextureScalerGLES.cpp \ $(SRC)/GPU/Software/Clipper.cpp \ + $(SRC)/GPU/Software/DrawPixel.cpp.arm \ $(SRC)/GPU/Software/FuncId.cpp \ $(SRC)/GPU/Software/Lighting.cpp \ $(SRC)/GPU/Software/Rasterizer.cpp.arm \ diff --git a/libretro/Makefile.common b/libretro/Makefile.common index 38467b6017..da86d6fece 100644 --- a/libretro/Makefile.common +++ b/libretro/Makefile.common @@ -348,6 +348,7 @@ SOURCES_CXX += \ $(GPUDIR)/GPUState.cpp \ $(GPUDIR)/Math3D.cpp \ $(GPUDIR)/Software/Clipper.cpp \ + $(GPUDIR)/Software/DrawPixel.cpp \ $(GPUDIR)/Software/FuncId.cpp \ $(GPUDIR)/Software/Lighting.cpp \ $(GPUDIR)/Software/Rasterizer.cpp \