softgpu: Simply 5551 blending fast path.

Since it only supports multiply and add, let's just stick with that.
This commit is contained in:
Unknown W. Brackets 2022-09-24 18:19:22 -07:00
parent 1eeb4f0bcf
commit c47d7eab38
4 changed files with 244 additions and 211 deletions

View file

@ -400,6 +400,206 @@ static inline u32 ApplyLogicOp(GELogicOp op, u32 old_color, u32 new_color) {
return new_color;
}
static inline Vec3<int> GetSourceFactor(PixelBlendFactor factor, const Vec4<int> &source, const Vec4<int> &dst, uint32_t fix) {
switch (factor) {
case PixelBlendFactor::OTHERCOLOR:
return dst.rgb();
case PixelBlendFactor::INVOTHERCOLOR:
return Vec3<int>::AssignToAll(255) - dst.rgb();
case PixelBlendFactor::SRCALPHA:
#if defined(_M_SSE)
return Vec3<int>(_mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3)));
#else
return Vec3<int>::AssignToAll(source.a());
#endif
case PixelBlendFactor::INVSRCALPHA:
#if defined(_M_SSE)
return Vec3<int>(_mm_sub_epi32(_mm_set1_epi32(255), _mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3))));
#else
return Vec3<int>::AssignToAll(255 - source.a());
#endif
case PixelBlendFactor::DSTALPHA:
return Vec3<int>::AssignToAll(dst.a());
case PixelBlendFactor::INVDSTALPHA:
return Vec3<int>::AssignToAll(255 - dst.a());
case PixelBlendFactor::DOUBLESRCALPHA:
return Vec3<int>::AssignToAll(2 * source.a());
case PixelBlendFactor::DOUBLEINVSRCALPHA:
return Vec3<int>::AssignToAll(255 - std::min(2 * source.a(), 255));
case PixelBlendFactor::DOUBLEDSTALPHA:
return Vec3<int>::AssignToAll(2 * dst.a());
case PixelBlendFactor::DOUBLEINVDSTALPHA:
return Vec3<int>::AssignToAll(255 - std::min(2 * dst.a(), 255));
case PixelBlendFactor::FIX:
default:
// All other dest factors (> 10) are treated as FIXA.
return Vec3<int>::FromRGB(fix);
case PixelBlendFactor::ZERO:
return Vec3<int>::AssignToAll(0);
case PixelBlendFactor::ONE:
return Vec3<int>::AssignToAll(255);
}
}
static inline Vec3<int> GetDestFactor(PixelBlendFactor factor, const Vec4<int> &source, const Vec4<int> &dst, uint32_t fix) {
switch (factor) {
case PixelBlendFactor::OTHERCOLOR:
return source.rgb();
case PixelBlendFactor::INVOTHERCOLOR:
return Vec3<int>::AssignToAll(255) - source.rgb();
case PixelBlendFactor::SRCALPHA:
#if defined(_M_SSE)
return Vec3<int>(_mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3)));
#else
return Vec3<int>::AssignToAll(source.a());
#endif
case PixelBlendFactor::INVSRCALPHA:
#if defined(_M_SSE)
return Vec3<int>(_mm_sub_epi32(_mm_set1_epi32(255), _mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3))));
#else
return Vec3<int>::AssignToAll(255 - source.a());
#endif
case PixelBlendFactor::DSTALPHA:
return Vec3<int>::AssignToAll(dst.a());
case PixelBlendFactor::INVDSTALPHA:
return Vec3<int>::AssignToAll(255 - dst.a());
case PixelBlendFactor::DOUBLESRCALPHA:
return Vec3<int>::AssignToAll(2 * source.a());
case PixelBlendFactor::DOUBLEINVSRCALPHA:
return Vec3<int>::AssignToAll(255 - std::min(2 * source.a(), 255));
case PixelBlendFactor::DOUBLEDSTALPHA:
return Vec3<int>::AssignToAll(2 * dst.a());
case PixelBlendFactor::DOUBLEINVDSTALPHA:
return Vec3<int>::AssignToAll(255 - std::min(2 * dst.a(), 255));
case PixelBlendFactor::FIX:
default:
// All other dest factors (> 10) are treated as FIXB.
return Vec3<int>::FromRGB(fix);
case PixelBlendFactor::ZERO:
return Vec3<int>::AssignToAll(0);
case PixelBlendFactor::ONE:
return Vec3<int>::AssignToAll(255);
}
}
// Removed inline here - it was never chosen to be inlined by the compiler anyway, too complex.
static Vec3<int> AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4<int> &source, const Vec4<int> &dst) {
// Note: These factors cannot go below 0, but they can go above 255 when doubling.
Vec3<int> srcfactor = GetSourceFactor(pixelID.AlphaBlendSrc(), source, dst, pixelID.cached.alphaBlendSrc);
Vec3<int> dstfactor = GetDestFactor(pixelID.AlphaBlendDst(), source, dst, pixelID.cached.alphaBlendDst);
switch (pixelID.AlphaBlendEq()) {
case GE_BLENDMODE_MUL_AND_ADD:
{
#if defined(_M_SSE)
// We switch to 16 bit to use mulhi, and we use 4 bits of decimal to make the 16 bit shift free.
const __m128i half = _mm_set1_epi16(1 << 3);
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
const __m128i s = _mm_mulhi_epi16(srgb, sf);
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
const __m128i d = _mm_mulhi_epi16(drgb, df);
return Vec3<int>(_mm_unpacklo_epi16(_mm_adds_epi16(s, d), _mm_setzero_si128()));
#else
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
return lhs + rhs;
#endif
}
case GE_BLENDMODE_MUL_AND_SUBTRACT:
{
#if defined(_M_SSE)
const __m128i half = _mm_set1_epi16(1 << 3);
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
const __m128i s = _mm_mulhi_epi16(srgb, sf);
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
const __m128i d = _mm_mulhi_epi16(drgb, df);
return Vec3<int>(_mm_unpacklo_epi16(_mm_max_epi16(_mm_subs_epi16(s, d), _mm_setzero_si128()), _mm_setzero_si128()));
#else
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
return lhs - rhs;
#endif
}
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
{
#if defined(_M_SSE)
const __m128i half = _mm_set1_epi16(1 << 3);
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
const __m128i s = _mm_mulhi_epi16(srgb, sf);
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
const __m128i d = _mm_mulhi_epi16(drgb, df);
return Vec3<int>(_mm_unpacklo_epi16(_mm_max_epi16(_mm_subs_epi16(d, s), _mm_setzero_si128()), _mm_setzero_si128()));
#else
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
return rhs - lhs;
#endif
}
case GE_BLENDMODE_MIN:
return Vec3<int>(std::min(source.r(), dst.r()),
std::min(source.g(), dst.g()),
std::min(source.b(), dst.b()));
case GE_BLENDMODE_MAX:
return Vec3<int>(std::max(source.r(), dst.r()),
std::max(source.g(), dst.g()),
std::max(source.b(), dst.b()));
case GE_BLENDMODE_ABSDIFF:
return Vec3<int>(::abs(source.r() - dst.r()),
::abs(source.g() - dst.g()),
::abs(source.b() - dst.b()));
default:
return source.rgb();
}
}
template <bool clearMode, GEBufferFormat fbFormat>
void SOFTRAST_CALL DrawSinglePixel(int x, int y, int z, int fog, Vec4IntArg color_in, const PixelFuncID &pixelID) {
Vec4<int> prim_color = Vec4<int>(color_in).Clamp(0, 255);

View file

@ -270,206 +270,6 @@ static inline bool IsRightSideOrFlatBottomLine(const Vec2<int>& vertex, const Ve
}
}
static inline Vec3<int> GetSourceFactor(PixelBlendFactor factor, const Vec4<int> &source, const Vec4<int> &dst, uint32_t fix) {
switch (factor) {
case PixelBlendFactor::OTHERCOLOR:
return dst.rgb();
case PixelBlendFactor::INVOTHERCOLOR:
return Vec3<int>::AssignToAll(255) - dst.rgb();
case PixelBlendFactor::SRCALPHA:
#if defined(_M_SSE)
return Vec3<int>(_mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3)));
#else
return Vec3<int>::AssignToAll(source.a());
#endif
case PixelBlendFactor::INVSRCALPHA:
#if defined(_M_SSE)
return Vec3<int>(_mm_sub_epi32(_mm_set1_epi32(255), _mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3))));
#else
return Vec3<int>::AssignToAll(255 - source.a());
#endif
case PixelBlendFactor::DSTALPHA:
return Vec3<int>::AssignToAll(dst.a());
case PixelBlendFactor::INVDSTALPHA:
return Vec3<int>::AssignToAll(255 - dst.a());
case PixelBlendFactor::DOUBLESRCALPHA:
return Vec3<int>::AssignToAll(2 * source.a());
case PixelBlendFactor::DOUBLEINVSRCALPHA:
return Vec3<int>::AssignToAll(255 - std::min(2 * source.a(), 255));
case PixelBlendFactor::DOUBLEDSTALPHA:
return Vec3<int>::AssignToAll(2 * dst.a());
case PixelBlendFactor::DOUBLEINVDSTALPHA:
return Vec3<int>::AssignToAll(255 - std::min(2 * dst.a(), 255));
case PixelBlendFactor::FIX:
default:
// All other dest factors (> 10) are treated as FIXA.
return Vec3<int>::FromRGB(fix);
case PixelBlendFactor::ZERO:
return Vec3<int>::AssignToAll(0);
case PixelBlendFactor::ONE:
return Vec3<int>::AssignToAll(255);
}
}
static inline Vec3<int> GetDestFactor(PixelBlendFactor factor, const Vec4<int> &source, const Vec4<int> &dst, uint32_t fix) {
switch (factor) {
case PixelBlendFactor::OTHERCOLOR:
return source.rgb();
case PixelBlendFactor::INVOTHERCOLOR:
return Vec3<int>::AssignToAll(255) - source.rgb();
case PixelBlendFactor::SRCALPHA:
#if defined(_M_SSE)
return Vec3<int>(_mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3)));
#else
return Vec3<int>::AssignToAll(source.a());
#endif
case PixelBlendFactor::INVSRCALPHA:
#if defined(_M_SSE)
return Vec3<int>(_mm_sub_epi32(_mm_set1_epi32(255), _mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3))));
#else
return Vec3<int>::AssignToAll(255 - source.a());
#endif
case PixelBlendFactor::DSTALPHA:
return Vec3<int>::AssignToAll(dst.a());
case PixelBlendFactor::INVDSTALPHA:
return Vec3<int>::AssignToAll(255 - dst.a());
case PixelBlendFactor::DOUBLESRCALPHA:
return Vec3<int>::AssignToAll(2 * source.a());
case PixelBlendFactor::DOUBLEINVSRCALPHA:
return Vec3<int>::AssignToAll(255 - std::min(2 * source.a(), 255));
case PixelBlendFactor::DOUBLEDSTALPHA:
return Vec3<int>::AssignToAll(2 * dst.a());
case PixelBlendFactor::DOUBLEINVDSTALPHA:
return Vec3<int>::AssignToAll(255 - std::min(2 * dst.a(), 255));
case PixelBlendFactor::FIX:
default:
// All other dest factors (> 10) are treated as FIXB.
return Vec3<int>::FromRGB(fix);
case PixelBlendFactor::ZERO:
return Vec3<int>::AssignToAll(0);
case PixelBlendFactor::ONE:
return Vec3<int>::AssignToAll(255);
}
}
// Removed inline here - it was never chosen to be inlined by the compiler anyway, too complex.
Vec3<int> AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4<int> &source, const Vec4<int> &dst) {
// Note: These factors cannot go below 0, but they can go above 255 when doubling.
Vec3<int> srcfactor = GetSourceFactor(pixelID.AlphaBlendSrc(), source, dst, pixelID.cached.alphaBlendSrc);
Vec3<int> dstfactor = GetDestFactor(pixelID.AlphaBlendDst(), source, dst, pixelID.cached.alphaBlendDst);
switch (pixelID.AlphaBlendEq()) {
case GE_BLENDMODE_MUL_AND_ADD:
{
#if defined(_M_SSE)
// We switch to 16 bit to use mulhi, and we use 4 bits of decimal to make the 16 bit shift free.
const __m128i half = _mm_set1_epi16(1 << 3);
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
const __m128i s = _mm_mulhi_epi16(srgb, sf);
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
const __m128i d = _mm_mulhi_epi16(drgb, df);
return Vec3<int>(_mm_unpacklo_epi16(_mm_adds_epi16(s, d), _mm_setzero_si128()));
#else
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
return lhs + rhs;
#endif
}
case GE_BLENDMODE_MUL_AND_SUBTRACT:
{
#if defined(_M_SSE)
const __m128i half = _mm_set1_epi16(1 << 3);
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
const __m128i s = _mm_mulhi_epi16(srgb, sf);
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
const __m128i d = _mm_mulhi_epi16(drgb, df);
return Vec3<int>(_mm_unpacklo_epi16(_mm_max_epi16(_mm_subs_epi16(s, d), _mm_setzero_si128()), _mm_setzero_si128()));
#else
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
return lhs - rhs;
#endif
}
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
{
#if defined(_M_SSE)
const __m128i half = _mm_set1_epi16(1 << 3);
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
const __m128i s = _mm_mulhi_epi16(srgb, sf);
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
const __m128i d = _mm_mulhi_epi16(drgb, df);
return Vec3<int>(_mm_unpacklo_epi16(_mm_max_epi16(_mm_subs_epi16(d, s), _mm_setzero_si128()), _mm_setzero_si128()));
#else
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
return rhs - lhs;
#endif
}
case GE_BLENDMODE_MIN:
return Vec3<int>(std::min(source.r(), dst.r()),
std::min(source.g(), dst.g()),
std::min(source.b(), dst.b()));
case GE_BLENDMODE_MAX:
return Vec3<int>(std::max(source.r(), dst.r()),
std::max(source.g(), dst.g()),
std::max(source.b(), dst.b()));
case GE_BLENDMODE_ABSDIFF:
return Vec3<int>(::abs(source.r() - dst.r()),
::abs(source.g() - dst.g()),
::abs(source.b() - dst.b()));
default:
return source.rgb();
}
}
static inline Vec4IntResult SOFTRAST_CALL ApplyTexturing(float s, float t, int x, int y, Vec4IntArg prim_color, int texlevel, int frac_texlevel, bool bilinear, const RasterizerState &state) {
const u8 **tptr0 = const_cast<const u8 **>(&state.texptr[texlevel]);
const uint16_t *bufw0 = &state.texbufw[texlevel];

View file

@ -76,7 +76,4 @@ void ClearRectangle(const VertexData &v0, const VertexData &v1, const BinCoords
bool GetCurrentTexture(GPUDebugBuffer &buffer, int level);
// Shared functions with RasterizerRectangle.cpp
Vec3<int> AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4<int> &source, const Vec4<int> &dst);
} // namespace Rasterizer

View file

@ -32,18 +32,54 @@ extern bool currentDialogActive;
namespace Rasterizer {
// This essentially AlphaBlendingResult() with fixed src.a / 1 - src.a factors and ADD equation.
// It allows us to skip round trips between 32-bit and 16-bit color values.
static uint32_t StandardAlphaBlend(uint32_t source, uint32_t dst) {
#if defined(_M_SSE)
const __m128i alpha = _mm_cvtsi32_si128(source >> 24);
// Keep the alpha lane of the srcfactor zero, so we keep dest alpha.
const __m128i srcfactor = _mm_shufflelo_epi16(alpha, _MM_SHUFFLE(1, 0, 0, 0));
const __m128i dstfactor = _mm_sub_epi16(_mm_set1_epi16(255), srcfactor);
const __m128i z = _mm_setzero_si128();
const __m128i sourcevec = _mm_unpacklo_epi8(_mm_cvtsi32_si128(source), z);
const __m128i dstvec = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dst), z);
// We switch to 16 bit to use mulhi, and we use 4 bits of decimal to make the 16 bit shift free.
const __m128i half = _mm_set1_epi16(1 << 3);
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(sourcevec, 4), half);
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(srcfactor, 4), half);
const __m128i s = _mm_mulhi_epi16(srgb, sf);
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(dstvec, 4), half);
const __m128i df = _mm_add_epi16(_mm_slli_epi16(dstfactor, 4), half);
const __m128i d = _mm_mulhi_epi16(drgb, df);
const __m128i blended16 = _mm_adds_epi16(s, d);
return _mm_cvtsi128_si32(_mm_packus_epi16(blended16, blended16));
#else
Vec3<int> srcfactor = Vec3<int>::AssignToAll(source >> 24);
Vec3<int> dstfactor = Vec3<int>::AssignToAll(255 - (source >> 24));
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
Vec3<int> lhs = ((Vec3<int>::FromRGB(source) * 2 + half) * (srcfactor * 2 + half)) / 1024;
Vec3<int> rhs = ((Vec3<int>::FromRGB(dst) * 2 + half) * (dstfactor * 2 + half)) / 1024;
Vec3<int> blended = lhs + rhs;
return clamp_u8(blended.r()) | (clamp_u8(blended.g()) << 8) | (clamp_u8(blended.b()) << 16);
#endif
}
// Through mode, with the specific Darkstalker settings.
inline void DrawSinglePixel5551(u16 *pixel, const u32 color_in, const PixelFuncID &pixelID) {
inline void DrawSinglePixel5551(u16 *pixel, const u32 color_in) {
u32 new_color;
// Because of this check, we only support src.a / 1-src.a blending.
if ((color_in >> 24) == 255) {
new_color = color_in & 0xFFFFFF;
} else {
const u32 old_color = RGBA5551ToRGBA8888(*pixel);
const Vec4<int> dst = Vec4<int>::FromRGBA(old_color);
Vec3<int> blended = AlphaBlendingResult(pixelID, Vec4<int>::FromRGBA(color_in), dst);
// ToRGB() always automatically clamps.
new_color = blended.ToRGB();
new_color = StandardAlphaBlend(color_in, old_color);
}
new_color |= (*pixel & 0x8000) ? 0xff000000 : 0x00000000;
*pixel = RGBA8888ToRGBA5551(new_color);
@ -182,7 +218,7 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
for (int x = pos0.x; x < pos1.x; x++) {
u32 tex_color = Vec4<int>(fetchFunc(s, t, texptr, texbufw, 0, state.samplerID)).ToRGBA();
if (tex_color & 0xFF000000) {
DrawSinglePixel5551(pixel, tex_color, pixelID);
DrawSinglePixel5551(pixel, tex_color);
}
s += ds;
pixel++;
@ -200,7 +236,7 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
Vec4<int> tex_color = fetchFunc(s, t, texptr, texbufw, 0, state.samplerID);
prim_color = Vec4<int>(ModulateRGBA(ToVec4IntArg(prim_color), ToVec4IntArg(tex_color), state.samplerID));
if (prim_color.a() > 0) {
DrawSinglePixel5551(pixel, prim_color.ToRGBA(), pixelID);
DrawSinglePixel5551(pixel, prim_color.ToRGBA());
}
s += ds;
pixel++;
@ -258,7 +294,7 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
for (int y = pos0.y; y < pos1.y; y++) {
u16 *pixel = fb.Get16Ptr(pos0.x, y, pixelID.cached.framebufStride);
for (int x = pos0.x; x < pos1.x; x++) {
DrawSinglePixel5551(pixel, v1.color0, pixelID);
DrawSinglePixel5551(pixel, v1.color0);
pixel++;
}
}