From c47d7eab38e7dfc9ac610af9b5f2c4e4fb0504aa Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 24 Sep 2022 18:19:22 -0700 Subject: [PATCH] softgpu: Simply 5551 blending fast path. Since it only supports multiply and add, let's just stick with that. --- GPU/Software/DrawPixel.cpp | 200 +++++++++++++++++++++++++++ GPU/Software/Rasterizer.cpp | 200 --------------------------- GPU/Software/Rasterizer.h | 3 - GPU/Software/RasterizerRectangle.cpp | 52 +++++-- 4 files changed, 244 insertions(+), 211 deletions(-) diff --git a/GPU/Software/DrawPixel.cpp b/GPU/Software/DrawPixel.cpp index 912af5c67a..f47ea6656a 100644 --- a/GPU/Software/DrawPixel.cpp +++ b/GPU/Software/DrawPixel.cpp @@ -400,6 +400,206 @@ static inline u32 ApplyLogicOp(GELogicOp op, u32 old_color, u32 new_color) { return new_color; } +static inline Vec3 GetSourceFactor(PixelBlendFactor factor, const Vec4 &source, const Vec4 &dst, uint32_t fix) { + switch (factor) { + case PixelBlendFactor::OTHERCOLOR: + return dst.rgb(); + + case PixelBlendFactor::INVOTHERCOLOR: + return Vec3::AssignToAll(255) - dst.rgb(); + + case PixelBlendFactor::SRCALPHA: +#if defined(_M_SSE) + return Vec3(_mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3))); +#else + return Vec3::AssignToAll(source.a()); +#endif + + case PixelBlendFactor::INVSRCALPHA: +#if defined(_M_SSE) + return Vec3(_mm_sub_epi32(_mm_set1_epi32(255), _mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3)))); +#else + return Vec3::AssignToAll(255 - source.a()); +#endif + + case PixelBlendFactor::DSTALPHA: + return Vec3::AssignToAll(dst.a()); + + case PixelBlendFactor::INVDSTALPHA: + return Vec3::AssignToAll(255 - dst.a()); + + case PixelBlendFactor::DOUBLESRCALPHA: + return Vec3::AssignToAll(2 * source.a()); + + case PixelBlendFactor::DOUBLEINVSRCALPHA: + return Vec3::AssignToAll(255 - std::min(2 * source.a(), 255)); + + case PixelBlendFactor::DOUBLEDSTALPHA: + return Vec3::AssignToAll(2 * dst.a()); + + case PixelBlendFactor::DOUBLEINVDSTALPHA: + return Vec3::AssignToAll(255 - std::min(2 * dst.a(), 255)); + + case PixelBlendFactor::FIX: + default: + // All other dest factors (> 10) are treated as FIXA. + return Vec3::FromRGB(fix); + + case PixelBlendFactor::ZERO: + return Vec3::AssignToAll(0); + + case PixelBlendFactor::ONE: + return Vec3::AssignToAll(255); + } +} + +static inline Vec3 GetDestFactor(PixelBlendFactor factor, const Vec4 &source, const Vec4 &dst, uint32_t fix) { + switch (factor) { + case PixelBlendFactor::OTHERCOLOR: + return source.rgb(); + + case PixelBlendFactor::INVOTHERCOLOR: + return Vec3::AssignToAll(255) - source.rgb(); + + case PixelBlendFactor::SRCALPHA: +#if defined(_M_SSE) + return Vec3(_mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3))); +#else + return Vec3::AssignToAll(source.a()); +#endif + + case PixelBlendFactor::INVSRCALPHA: +#if defined(_M_SSE) + return Vec3(_mm_sub_epi32(_mm_set1_epi32(255), _mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3)))); +#else + return Vec3::AssignToAll(255 - source.a()); +#endif + + case PixelBlendFactor::DSTALPHA: + return Vec3::AssignToAll(dst.a()); + + case PixelBlendFactor::INVDSTALPHA: + return Vec3::AssignToAll(255 - dst.a()); + + case PixelBlendFactor::DOUBLESRCALPHA: + return Vec3::AssignToAll(2 * source.a()); + + case PixelBlendFactor::DOUBLEINVSRCALPHA: + return Vec3::AssignToAll(255 - std::min(2 * source.a(), 255)); + + case PixelBlendFactor::DOUBLEDSTALPHA: + return Vec3::AssignToAll(2 * dst.a()); + + case PixelBlendFactor::DOUBLEINVDSTALPHA: + return Vec3::AssignToAll(255 - std::min(2 * dst.a(), 255)); + + case PixelBlendFactor::FIX: + default: + // All other dest factors (> 10) are treated as FIXB. + return Vec3::FromRGB(fix); + + case PixelBlendFactor::ZERO: + return Vec3::AssignToAll(0); + + case PixelBlendFactor::ONE: + return Vec3::AssignToAll(255); + } +} + +// Removed inline here - it was never chosen to be inlined by the compiler anyway, too complex. +static Vec3 AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4 &source, const Vec4 &dst) { + // Note: These factors cannot go below 0, but they can go above 255 when doubling. + Vec3 srcfactor = GetSourceFactor(pixelID.AlphaBlendSrc(), source, dst, pixelID.cached.alphaBlendSrc); + Vec3 dstfactor = GetDestFactor(pixelID.AlphaBlendDst(), source, dst, pixelID.cached.alphaBlendDst); + + switch (pixelID.AlphaBlendEq()) { + case GE_BLENDMODE_MUL_AND_ADD: + { +#if defined(_M_SSE) + // We switch to 16 bit to use mulhi, and we use 4 bits of decimal to make the 16 bit shift free. + const __m128i half = _mm_set1_epi16(1 << 3); + + const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half); + const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half); + const __m128i s = _mm_mulhi_epi16(srgb, sf); + + const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half); + const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half); + const __m128i d = _mm_mulhi_epi16(drgb, df); + + return Vec3(_mm_unpacklo_epi16(_mm_adds_epi16(s, d), _mm_setzero_si128())); +#else + static constexpr Vec3 half = Vec3::AssignToAll(1); + Vec3 lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024; + Vec3 rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024; + return lhs + rhs; +#endif + } + + case GE_BLENDMODE_MUL_AND_SUBTRACT: + { +#if defined(_M_SSE) + const __m128i half = _mm_set1_epi16(1 << 3); + + const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half); + const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half); + const __m128i s = _mm_mulhi_epi16(srgb, sf); + + const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half); + const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half); + const __m128i d = _mm_mulhi_epi16(drgb, df); + + return Vec3(_mm_unpacklo_epi16(_mm_max_epi16(_mm_subs_epi16(s, d), _mm_setzero_si128()), _mm_setzero_si128())); +#else + static constexpr Vec3 half = Vec3::AssignToAll(1); + Vec3 lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024; + Vec3 rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024; + return lhs - rhs; +#endif + } + + case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE: + { +#if defined(_M_SSE) + const __m128i half = _mm_set1_epi16(1 << 3); + + const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half); + const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half); + const __m128i s = _mm_mulhi_epi16(srgb, sf); + + const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half); + const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half); + const __m128i d = _mm_mulhi_epi16(drgb, df); + + return Vec3(_mm_unpacklo_epi16(_mm_max_epi16(_mm_subs_epi16(d, s), _mm_setzero_si128()), _mm_setzero_si128())); +#else + static constexpr Vec3 half = Vec3::AssignToAll(1); + Vec3 lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024; + Vec3 rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024; + return rhs - lhs; +#endif + } + + case GE_BLENDMODE_MIN: + return Vec3(std::min(source.r(), dst.r()), + std::min(source.g(), dst.g()), + std::min(source.b(), dst.b())); + + case GE_BLENDMODE_MAX: + return Vec3(std::max(source.r(), dst.r()), + std::max(source.g(), dst.g()), + std::max(source.b(), dst.b())); + + case GE_BLENDMODE_ABSDIFF: + return Vec3(::abs(source.r() - dst.r()), + ::abs(source.g() - dst.g()), + ::abs(source.b() - dst.b())); + + default: + return source.rgb(); + } +} + template void SOFTRAST_CALL DrawSinglePixel(int x, int y, int z, int fog, Vec4IntArg color_in, const PixelFuncID &pixelID) { Vec4 prim_color = Vec4(color_in).Clamp(0, 255); diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index c4eca9f93a..c1084f31d5 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -270,206 +270,6 @@ static inline bool IsRightSideOrFlatBottomLine(const Vec2& vertex, const Ve } } -static inline Vec3 GetSourceFactor(PixelBlendFactor factor, const Vec4 &source, const Vec4 &dst, uint32_t fix) { - switch (factor) { - case PixelBlendFactor::OTHERCOLOR: - return dst.rgb(); - - case PixelBlendFactor::INVOTHERCOLOR: - return Vec3::AssignToAll(255) - dst.rgb(); - - case PixelBlendFactor::SRCALPHA: -#if defined(_M_SSE) - return Vec3(_mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3))); -#else - return Vec3::AssignToAll(source.a()); -#endif - - case PixelBlendFactor::INVSRCALPHA: -#if defined(_M_SSE) - return Vec3(_mm_sub_epi32(_mm_set1_epi32(255), _mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3)))); -#else - return Vec3::AssignToAll(255 - source.a()); -#endif - - case PixelBlendFactor::DSTALPHA: - return Vec3::AssignToAll(dst.a()); - - case PixelBlendFactor::INVDSTALPHA: - return Vec3::AssignToAll(255 - dst.a()); - - case PixelBlendFactor::DOUBLESRCALPHA: - return Vec3::AssignToAll(2 * source.a()); - - case PixelBlendFactor::DOUBLEINVSRCALPHA: - return Vec3::AssignToAll(255 - std::min(2 * source.a(), 255)); - - case PixelBlendFactor::DOUBLEDSTALPHA: - return Vec3::AssignToAll(2 * dst.a()); - - case PixelBlendFactor::DOUBLEINVDSTALPHA: - return Vec3::AssignToAll(255 - std::min(2 * dst.a(), 255)); - - case PixelBlendFactor::FIX: - default: - // All other dest factors (> 10) are treated as FIXA. - return Vec3::FromRGB(fix); - - case PixelBlendFactor::ZERO: - return Vec3::AssignToAll(0); - - case PixelBlendFactor::ONE: - return Vec3::AssignToAll(255); - } -} - -static inline Vec3 GetDestFactor(PixelBlendFactor factor, const Vec4 &source, const Vec4 &dst, uint32_t fix) { - switch (factor) { - case PixelBlendFactor::OTHERCOLOR: - return source.rgb(); - - case PixelBlendFactor::INVOTHERCOLOR: - return Vec3::AssignToAll(255) - source.rgb(); - - case PixelBlendFactor::SRCALPHA: -#if defined(_M_SSE) - return Vec3(_mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3))); -#else - return Vec3::AssignToAll(source.a()); -#endif - - case PixelBlendFactor::INVSRCALPHA: -#if defined(_M_SSE) - return Vec3(_mm_sub_epi32(_mm_set1_epi32(255), _mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3)))); -#else - return Vec3::AssignToAll(255 - source.a()); -#endif - - case PixelBlendFactor::DSTALPHA: - return Vec3::AssignToAll(dst.a()); - - case PixelBlendFactor::INVDSTALPHA: - return Vec3::AssignToAll(255 - dst.a()); - - case PixelBlendFactor::DOUBLESRCALPHA: - return Vec3::AssignToAll(2 * source.a()); - - case PixelBlendFactor::DOUBLEINVSRCALPHA: - return Vec3::AssignToAll(255 - std::min(2 * source.a(), 255)); - - case PixelBlendFactor::DOUBLEDSTALPHA: - return Vec3::AssignToAll(2 * dst.a()); - - case PixelBlendFactor::DOUBLEINVDSTALPHA: - return Vec3::AssignToAll(255 - std::min(2 * dst.a(), 255)); - - case PixelBlendFactor::FIX: - default: - // All other dest factors (> 10) are treated as FIXB. - return Vec3::FromRGB(fix); - - case PixelBlendFactor::ZERO: - return Vec3::AssignToAll(0); - - case PixelBlendFactor::ONE: - return Vec3::AssignToAll(255); - } -} - -// Removed inline here - it was never chosen to be inlined by the compiler anyway, too complex. -Vec3 AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4 &source, const Vec4 &dst) { - // Note: These factors cannot go below 0, but they can go above 255 when doubling. - Vec3 srcfactor = GetSourceFactor(pixelID.AlphaBlendSrc(), source, dst, pixelID.cached.alphaBlendSrc); - Vec3 dstfactor = GetDestFactor(pixelID.AlphaBlendDst(), source, dst, pixelID.cached.alphaBlendDst); - - switch (pixelID.AlphaBlendEq()) { - case GE_BLENDMODE_MUL_AND_ADD: - { -#if defined(_M_SSE) - // We switch to 16 bit to use mulhi, and we use 4 bits of decimal to make the 16 bit shift free. - const __m128i half = _mm_set1_epi16(1 << 3); - - const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half); - const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half); - const __m128i s = _mm_mulhi_epi16(srgb, sf); - - const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half); - const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half); - const __m128i d = _mm_mulhi_epi16(drgb, df); - - return Vec3(_mm_unpacklo_epi16(_mm_adds_epi16(s, d), _mm_setzero_si128())); -#else - static constexpr Vec3 half = Vec3::AssignToAll(1); - Vec3 lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024; - Vec3 rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024; - return lhs + rhs; -#endif - } - - case GE_BLENDMODE_MUL_AND_SUBTRACT: - { -#if defined(_M_SSE) - const __m128i half = _mm_set1_epi16(1 << 3); - - const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half); - const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half); - const __m128i s = _mm_mulhi_epi16(srgb, sf); - - const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half); - const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half); - const __m128i d = _mm_mulhi_epi16(drgb, df); - - return Vec3(_mm_unpacklo_epi16(_mm_max_epi16(_mm_subs_epi16(s, d), _mm_setzero_si128()), _mm_setzero_si128())); -#else - static constexpr Vec3 half = Vec3::AssignToAll(1); - Vec3 lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024; - Vec3 rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024; - return lhs - rhs; -#endif - } - - case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE: - { -#if defined(_M_SSE) - const __m128i half = _mm_set1_epi16(1 << 3); - - const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half); - const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half); - const __m128i s = _mm_mulhi_epi16(srgb, sf); - - const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half); - const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half); - const __m128i d = _mm_mulhi_epi16(drgb, df); - - return Vec3(_mm_unpacklo_epi16(_mm_max_epi16(_mm_subs_epi16(d, s), _mm_setzero_si128()), _mm_setzero_si128())); -#else - static constexpr Vec3 half = Vec3::AssignToAll(1); - Vec3 lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024; - Vec3 rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024; - return rhs - lhs; -#endif - } - - case GE_BLENDMODE_MIN: - return Vec3(std::min(source.r(), dst.r()), - std::min(source.g(), dst.g()), - std::min(source.b(), dst.b())); - - case GE_BLENDMODE_MAX: - return Vec3(std::max(source.r(), dst.r()), - std::max(source.g(), dst.g()), - std::max(source.b(), dst.b())); - - case GE_BLENDMODE_ABSDIFF: - return Vec3(::abs(source.r() - dst.r()), - ::abs(source.g() - dst.g()), - ::abs(source.b() - dst.b())); - - default: - return source.rgb(); - } -} - static inline Vec4IntResult SOFTRAST_CALL ApplyTexturing(float s, float t, int x, int y, Vec4IntArg prim_color, int texlevel, int frac_texlevel, bool bilinear, const RasterizerState &state) { const u8 **tptr0 = const_cast(&state.texptr[texlevel]); const uint16_t *bufw0 = &state.texbufw[texlevel]; diff --git a/GPU/Software/Rasterizer.h b/GPU/Software/Rasterizer.h index 93e89f9596..6c350e222b 100644 --- a/GPU/Software/Rasterizer.h +++ b/GPU/Software/Rasterizer.h @@ -76,7 +76,4 @@ void ClearRectangle(const VertexData &v0, const VertexData &v1, const BinCoords bool GetCurrentTexture(GPUDebugBuffer &buffer, int level); -// Shared functions with RasterizerRectangle.cpp -Vec3 AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4 &source, const Vec4 &dst); - } // namespace Rasterizer diff --git a/GPU/Software/RasterizerRectangle.cpp b/GPU/Software/RasterizerRectangle.cpp index 0322ae185f..f312addea1 100644 --- a/GPU/Software/RasterizerRectangle.cpp +++ b/GPU/Software/RasterizerRectangle.cpp @@ -32,18 +32,54 @@ extern bool currentDialogActive; namespace Rasterizer { +// This essentially AlphaBlendingResult() with fixed src.a / 1 - src.a factors and ADD equation. +// It allows us to skip round trips between 32-bit and 16-bit color values. +static uint32_t StandardAlphaBlend(uint32_t source, uint32_t dst) { +#if defined(_M_SSE) + const __m128i alpha = _mm_cvtsi32_si128(source >> 24); + // Keep the alpha lane of the srcfactor zero, so we keep dest alpha. + const __m128i srcfactor = _mm_shufflelo_epi16(alpha, _MM_SHUFFLE(1, 0, 0, 0)); + const __m128i dstfactor = _mm_sub_epi16(_mm_set1_epi16(255), srcfactor); + + const __m128i z = _mm_setzero_si128(); + const __m128i sourcevec = _mm_unpacklo_epi8(_mm_cvtsi32_si128(source), z); + const __m128i dstvec = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dst), z); + + // We switch to 16 bit to use mulhi, and we use 4 bits of decimal to make the 16 bit shift free. + const __m128i half = _mm_set1_epi16(1 << 3); + + const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(sourcevec, 4), half); + const __m128i sf = _mm_add_epi16(_mm_slli_epi16(srcfactor, 4), half); + const __m128i s = _mm_mulhi_epi16(srgb, sf); + + const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(dstvec, 4), half); + const __m128i df = _mm_add_epi16(_mm_slli_epi16(dstfactor, 4), half); + const __m128i d = _mm_mulhi_epi16(drgb, df); + + const __m128i blended16 = _mm_adds_epi16(s, d); + return _mm_cvtsi128_si32(_mm_packus_epi16(blended16, blended16)); +#else + Vec3 srcfactor = Vec3::AssignToAll(source >> 24); + Vec3 dstfactor = Vec3::AssignToAll(255 - (source >> 24)); + + static constexpr Vec3 half = Vec3::AssignToAll(1); + Vec3 lhs = ((Vec3::FromRGB(source) * 2 + half) * (srcfactor * 2 + half)) / 1024; + Vec3 rhs = ((Vec3::FromRGB(dst) * 2 + half) * (dstfactor * 2 + half)) / 1024; + Vec3 blended = lhs + rhs; + + return clamp_u8(blended.r()) | (clamp_u8(blended.g()) << 8) | (clamp_u8(blended.b()) << 16); +#endif +} + // Through mode, with the specific Darkstalker settings. -inline void DrawSinglePixel5551(u16 *pixel, const u32 color_in, const PixelFuncID &pixelID) { +inline void DrawSinglePixel5551(u16 *pixel, const u32 color_in) { u32 new_color; // Because of this check, we only support src.a / 1-src.a blending. if ((color_in >> 24) == 255) { new_color = color_in & 0xFFFFFF; } else { const u32 old_color = RGBA5551ToRGBA8888(*pixel); - const Vec4 dst = Vec4::FromRGBA(old_color); - Vec3 blended = AlphaBlendingResult(pixelID, Vec4::FromRGBA(color_in), dst); - // ToRGB() always automatically clamps. - new_color = blended.ToRGB(); + new_color = StandardAlphaBlend(color_in, old_color); } new_color |= (*pixel & 0x8000) ? 0xff000000 : 0x00000000; *pixel = RGBA8888ToRGBA5551(new_color); @@ -182,7 +218,7 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran for (int x = pos0.x; x < pos1.x; x++) { u32 tex_color = Vec4(fetchFunc(s, t, texptr, texbufw, 0, state.samplerID)).ToRGBA(); if (tex_color & 0xFF000000) { - DrawSinglePixel5551(pixel, tex_color, pixelID); + DrawSinglePixel5551(pixel, tex_color); } s += ds; pixel++; @@ -200,7 +236,7 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran Vec4 tex_color = fetchFunc(s, t, texptr, texbufw, 0, state.samplerID); prim_color = Vec4(ModulateRGBA(ToVec4IntArg(prim_color), ToVec4IntArg(tex_color), state.samplerID)); if (prim_color.a() > 0) { - DrawSinglePixel5551(pixel, prim_color.ToRGBA(), pixelID); + DrawSinglePixel5551(pixel, prim_color.ToRGBA()); } s += ds; pixel++; @@ -258,7 +294,7 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran for (int y = pos0.y; y < pos1.y; y++) { u16 *pixel = fb.Get16Ptr(pos0.x, y, pixelID.cached.framebufStride); for (int x = pos0.x; x < pos1.x; x++) { - DrawSinglePixel5551(pixel, v1.color0, pixelID); + DrawSinglePixel5551(pixel, v1.color0); pixel++; } }