mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
softgpu: Simply 5551 blending fast path.
Since it only supports multiply and add, let's just stick with that.
This commit is contained in:
parent
1eeb4f0bcf
commit
c47d7eab38
4 changed files with 244 additions and 211 deletions
|
@ -400,6 +400,206 @@ static inline u32 ApplyLogicOp(GELogicOp op, u32 old_color, u32 new_color) {
|
|||
return new_color;
|
||||
}
|
||||
|
||||
static inline Vec3<int> GetSourceFactor(PixelBlendFactor factor, const Vec4<int> &source, const Vec4<int> &dst, uint32_t fix) {
|
||||
switch (factor) {
|
||||
case PixelBlendFactor::OTHERCOLOR:
|
||||
return dst.rgb();
|
||||
|
||||
case PixelBlendFactor::INVOTHERCOLOR:
|
||||
return Vec3<int>::AssignToAll(255) - dst.rgb();
|
||||
|
||||
case PixelBlendFactor::SRCALPHA:
|
||||
#if defined(_M_SSE)
|
||||
return Vec3<int>(_mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3)));
|
||||
#else
|
||||
return Vec3<int>::AssignToAll(source.a());
|
||||
#endif
|
||||
|
||||
case PixelBlendFactor::INVSRCALPHA:
|
||||
#if defined(_M_SSE)
|
||||
return Vec3<int>(_mm_sub_epi32(_mm_set1_epi32(255), _mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3))));
|
||||
#else
|
||||
return Vec3<int>::AssignToAll(255 - source.a());
|
||||
#endif
|
||||
|
||||
case PixelBlendFactor::DSTALPHA:
|
||||
return Vec3<int>::AssignToAll(dst.a());
|
||||
|
||||
case PixelBlendFactor::INVDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - dst.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLESRCALPHA:
|
||||
return Vec3<int>::AssignToAll(2 * source.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLEINVSRCALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - std::min(2 * source.a(), 255));
|
||||
|
||||
case PixelBlendFactor::DOUBLEDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(2 * dst.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLEINVDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - std::min(2 * dst.a(), 255));
|
||||
|
||||
case PixelBlendFactor::FIX:
|
||||
default:
|
||||
// All other dest factors (> 10) are treated as FIXA.
|
||||
return Vec3<int>::FromRGB(fix);
|
||||
|
||||
case PixelBlendFactor::ZERO:
|
||||
return Vec3<int>::AssignToAll(0);
|
||||
|
||||
case PixelBlendFactor::ONE:
|
||||
return Vec3<int>::AssignToAll(255);
|
||||
}
|
||||
}
|
||||
|
||||
static inline Vec3<int> GetDestFactor(PixelBlendFactor factor, const Vec4<int> &source, const Vec4<int> &dst, uint32_t fix) {
|
||||
switch (factor) {
|
||||
case PixelBlendFactor::OTHERCOLOR:
|
||||
return source.rgb();
|
||||
|
||||
case PixelBlendFactor::INVOTHERCOLOR:
|
||||
return Vec3<int>::AssignToAll(255) - source.rgb();
|
||||
|
||||
case PixelBlendFactor::SRCALPHA:
|
||||
#if defined(_M_SSE)
|
||||
return Vec3<int>(_mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3)));
|
||||
#else
|
||||
return Vec3<int>::AssignToAll(source.a());
|
||||
#endif
|
||||
|
||||
case PixelBlendFactor::INVSRCALPHA:
|
||||
#if defined(_M_SSE)
|
||||
return Vec3<int>(_mm_sub_epi32(_mm_set1_epi32(255), _mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3))));
|
||||
#else
|
||||
return Vec3<int>::AssignToAll(255 - source.a());
|
||||
#endif
|
||||
|
||||
case PixelBlendFactor::DSTALPHA:
|
||||
return Vec3<int>::AssignToAll(dst.a());
|
||||
|
||||
case PixelBlendFactor::INVDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - dst.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLESRCALPHA:
|
||||
return Vec3<int>::AssignToAll(2 * source.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLEINVSRCALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - std::min(2 * source.a(), 255));
|
||||
|
||||
case PixelBlendFactor::DOUBLEDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(2 * dst.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLEINVDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - std::min(2 * dst.a(), 255));
|
||||
|
||||
case PixelBlendFactor::FIX:
|
||||
default:
|
||||
// All other dest factors (> 10) are treated as FIXB.
|
||||
return Vec3<int>::FromRGB(fix);
|
||||
|
||||
case PixelBlendFactor::ZERO:
|
||||
return Vec3<int>::AssignToAll(0);
|
||||
|
||||
case PixelBlendFactor::ONE:
|
||||
return Vec3<int>::AssignToAll(255);
|
||||
}
|
||||
}
|
||||
|
||||
// Removed inline here - it was never chosen to be inlined by the compiler anyway, too complex.
|
||||
static Vec3<int> AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4<int> &source, const Vec4<int> &dst) {
|
||||
// Note: These factors cannot go below 0, but they can go above 255 when doubling.
|
||||
Vec3<int> srcfactor = GetSourceFactor(pixelID.AlphaBlendSrc(), source, dst, pixelID.cached.alphaBlendSrc);
|
||||
Vec3<int> dstfactor = GetDestFactor(pixelID.AlphaBlendDst(), source, dst, pixelID.cached.alphaBlendDst);
|
||||
|
||||
switch (pixelID.AlphaBlendEq()) {
|
||||
case GE_BLENDMODE_MUL_AND_ADD:
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
// We switch to 16 bit to use mulhi, and we use 4 bits of decimal to make the 16 bit shift free.
|
||||
const __m128i half = _mm_set1_epi16(1 << 3);
|
||||
|
||||
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
|
||||
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
|
||||
const __m128i s = _mm_mulhi_epi16(srgb, sf);
|
||||
|
||||
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
|
||||
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
|
||||
const __m128i d = _mm_mulhi_epi16(drgb, df);
|
||||
|
||||
return Vec3<int>(_mm_unpacklo_epi16(_mm_adds_epi16(s, d), _mm_setzero_si128()));
|
||||
#else
|
||||
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
|
||||
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
|
||||
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
|
||||
return lhs + rhs;
|
||||
#endif
|
||||
}
|
||||
|
||||
case GE_BLENDMODE_MUL_AND_SUBTRACT:
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
const __m128i half = _mm_set1_epi16(1 << 3);
|
||||
|
||||
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
|
||||
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
|
||||
const __m128i s = _mm_mulhi_epi16(srgb, sf);
|
||||
|
||||
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
|
||||
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
|
||||
const __m128i d = _mm_mulhi_epi16(drgb, df);
|
||||
|
||||
return Vec3<int>(_mm_unpacklo_epi16(_mm_max_epi16(_mm_subs_epi16(s, d), _mm_setzero_si128()), _mm_setzero_si128()));
|
||||
#else
|
||||
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
|
||||
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
|
||||
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
|
||||
return lhs - rhs;
|
||||
#endif
|
||||
}
|
||||
|
||||
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
const __m128i half = _mm_set1_epi16(1 << 3);
|
||||
|
||||
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
|
||||
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
|
||||
const __m128i s = _mm_mulhi_epi16(srgb, sf);
|
||||
|
||||
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
|
||||
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
|
||||
const __m128i d = _mm_mulhi_epi16(drgb, df);
|
||||
|
||||
return Vec3<int>(_mm_unpacklo_epi16(_mm_max_epi16(_mm_subs_epi16(d, s), _mm_setzero_si128()), _mm_setzero_si128()));
|
||||
#else
|
||||
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
|
||||
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
|
||||
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
|
||||
return rhs - lhs;
|
||||
#endif
|
||||
}
|
||||
|
||||
case GE_BLENDMODE_MIN:
|
||||
return Vec3<int>(std::min(source.r(), dst.r()),
|
||||
std::min(source.g(), dst.g()),
|
||||
std::min(source.b(), dst.b()));
|
||||
|
||||
case GE_BLENDMODE_MAX:
|
||||
return Vec3<int>(std::max(source.r(), dst.r()),
|
||||
std::max(source.g(), dst.g()),
|
||||
std::max(source.b(), dst.b()));
|
||||
|
||||
case GE_BLENDMODE_ABSDIFF:
|
||||
return Vec3<int>(::abs(source.r() - dst.r()),
|
||||
::abs(source.g() - dst.g()),
|
||||
::abs(source.b() - dst.b()));
|
||||
|
||||
default:
|
||||
return source.rgb();
|
||||
}
|
||||
}
|
||||
|
||||
template <bool clearMode, GEBufferFormat fbFormat>
|
||||
void SOFTRAST_CALL DrawSinglePixel(int x, int y, int z, int fog, Vec4IntArg color_in, const PixelFuncID &pixelID) {
|
||||
Vec4<int> prim_color = Vec4<int>(color_in).Clamp(0, 255);
|
||||
|
|
|
@ -270,206 +270,6 @@ static inline bool IsRightSideOrFlatBottomLine(const Vec2<int>& vertex, const Ve
|
|||
}
|
||||
}
|
||||
|
||||
static inline Vec3<int> GetSourceFactor(PixelBlendFactor factor, const Vec4<int> &source, const Vec4<int> &dst, uint32_t fix) {
|
||||
switch (factor) {
|
||||
case PixelBlendFactor::OTHERCOLOR:
|
||||
return dst.rgb();
|
||||
|
||||
case PixelBlendFactor::INVOTHERCOLOR:
|
||||
return Vec3<int>::AssignToAll(255) - dst.rgb();
|
||||
|
||||
case PixelBlendFactor::SRCALPHA:
|
||||
#if defined(_M_SSE)
|
||||
return Vec3<int>(_mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3)));
|
||||
#else
|
||||
return Vec3<int>::AssignToAll(source.a());
|
||||
#endif
|
||||
|
||||
case PixelBlendFactor::INVSRCALPHA:
|
||||
#if defined(_M_SSE)
|
||||
return Vec3<int>(_mm_sub_epi32(_mm_set1_epi32(255), _mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3))));
|
||||
#else
|
||||
return Vec3<int>::AssignToAll(255 - source.a());
|
||||
#endif
|
||||
|
||||
case PixelBlendFactor::DSTALPHA:
|
||||
return Vec3<int>::AssignToAll(dst.a());
|
||||
|
||||
case PixelBlendFactor::INVDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - dst.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLESRCALPHA:
|
||||
return Vec3<int>::AssignToAll(2 * source.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLEINVSRCALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - std::min(2 * source.a(), 255));
|
||||
|
||||
case PixelBlendFactor::DOUBLEDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(2 * dst.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLEINVDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - std::min(2 * dst.a(), 255));
|
||||
|
||||
case PixelBlendFactor::FIX:
|
||||
default:
|
||||
// All other dest factors (> 10) are treated as FIXA.
|
||||
return Vec3<int>::FromRGB(fix);
|
||||
|
||||
case PixelBlendFactor::ZERO:
|
||||
return Vec3<int>::AssignToAll(0);
|
||||
|
||||
case PixelBlendFactor::ONE:
|
||||
return Vec3<int>::AssignToAll(255);
|
||||
}
|
||||
}
|
||||
|
||||
static inline Vec3<int> GetDestFactor(PixelBlendFactor factor, const Vec4<int> &source, const Vec4<int> &dst, uint32_t fix) {
|
||||
switch (factor) {
|
||||
case PixelBlendFactor::OTHERCOLOR:
|
||||
return source.rgb();
|
||||
|
||||
case PixelBlendFactor::INVOTHERCOLOR:
|
||||
return Vec3<int>::AssignToAll(255) - source.rgb();
|
||||
|
||||
case PixelBlendFactor::SRCALPHA:
|
||||
#if defined(_M_SSE)
|
||||
return Vec3<int>(_mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3)));
|
||||
#else
|
||||
return Vec3<int>::AssignToAll(source.a());
|
||||
#endif
|
||||
|
||||
case PixelBlendFactor::INVSRCALPHA:
|
||||
#if defined(_M_SSE)
|
||||
return Vec3<int>(_mm_sub_epi32(_mm_set1_epi32(255), _mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3))));
|
||||
#else
|
||||
return Vec3<int>::AssignToAll(255 - source.a());
|
||||
#endif
|
||||
|
||||
case PixelBlendFactor::DSTALPHA:
|
||||
return Vec3<int>::AssignToAll(dst.a());
|
||||
|
||||
case PixelBlendFactor::INVDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - dst.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLESRCALPHA:
|
||||
return Vec3<int>::AssignToAll(2 * source.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLEINVSRCALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - std::min(2 * source.a(), 255));
|
||||
|
||||
case PixelBlendFactor::DOUBLEDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(2 * dst.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLEINVDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - std::min(2 * dst.a(), 255));
|
||||
|
||||
case PixelBlendFactor::FIX:
|
||||
default:
|
||||
// All other dest factors (> 10) are treated as FIXB.
|
||||
return Vec3<int>::FromRGB(fix);
|
||||
|
||||
case PixelBlendFactor::ZERO:
|
||||
return Vec3<int>::AssignToAll(0);
|
||||
|
||||
case PixelBlendFactor::ONE:
|
||||
return Vec3<int>::AssignToAll(255);
|
||||
}
|
||||
}
|
||||
|
||||
// Removed inline here - it was never chosen to be inlined by the compiler anyway, too complex.
|
||||
Vec3<int> AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4<int> &source, const Vec4<int> &dst) {
|
||||
// Note: These factors cannot go below 0, but they can go above 255 when doubling.
|
||||
Vec3<int> srcfactor = GetSourceFactor(pixelID.AlphaBlendSrc(), source, dst, pixelID.cached.alphaBlendSrc);
|
||||
Vec3<int> dstfactor = GetDestFactor(pixelID.AlphaBlendDst(), source, dst, pixelID.cached.alphaBlendDst);
|
||||
|
||||
switch (pixelID.AlphaBlendEq()) {
|
||||
case GE_BLENDMODE_MUL_AND_ADD:
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
// We switch to 16 bit to use mulhi, and we use 4 bits of decimal to make the 16 bit shift free.
|
||||
const __m128i half = _mm_set1_epi16(1 << 3);
|
||||
|
||||
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
|
||||
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
|
||||
const __m128i s = _mm_mulhi_epi16(srgb, sf);
|
||||
|
||||
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
|
||||
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
|
||||
const __m128i d = _mm_mulhi_epi16(drgb, df);
|
||||
|
||||
return Vec3<int>(_mm_unpacklo_epi16(_mm_adds_epi16(s, d), _mm_setzero_si128()));
|
||||
#else
|
||||
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
|
||||
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
|
||||
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
|
||||
return lhs + rhs;
|
||||
#endif
|
||||
}
|
||||
|
||||
case GE_BLENDMODE_MUL_AND_SUBTRACT:
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
const __m128i half = _mm_set1_epi16(1 << 3);
|
||||
|
||||
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
|
||||
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
|
||||
const __m128i s = _mm_mulhi_epi16(srgb, sf);
|
||||
|
||||
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
|
||||
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
|
||||
const __m128i d = _mm_mulhi_epi16(drgb, df);
|
||||
|
||||
return Vec3<int>(_mm_unpacklo_epi16(_mm_max_epi16(_mm_subs_epi16(s, d), _mm_setzero_si128()), _mm_setzero_si128()));
|
||||
#else
|
||||
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
|
||||
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
|
||||
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
|
||||
return lhs - rhs;
|
||||
#endif
|
||||
}
|
||||
|
||||
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
const __m128i half = _mm_set1_epi16(1 << 3);
|
||||
|
||||
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
|
||||
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
|
||||
const __m128i s = _mm_mulhi_epi16(srgb, sf);
|
||||
|
||||
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
|
||||
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
|
||||
const __m128i d = _mm_mulhi_epi16(drgb, df);
|
||||
|
||||
return Vec3<int>(_mm_unpacklo_epi16(_mm_max_epi16(_mm_subs_epi16(d, s), _mm_setzero_si128()), _mm_setzero_si128()));
|
||||
#else
|
||||
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
|
||||
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
|
||||
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
|
||||
return rhs - lhs;
|
||||
#endif
|
||||
}
|
||||
|
||||
case GE_BLENDMODE_MIN:
|
||||
return Vec3<int>(std::min(source.r(), dst.r()),
|
||||
std::min(source.g(), dst.g()),
|
||||
std::min(source.b(), dst.b()));
|
||||
|
||||
case GE_BLENDMODE_MAX:
|
||||
return Vec3<int>(std::max(source.r(), dst.r()),
|
||||
std::max(source.g(), dst.g()),
|
||||
std::max(source.b(), dst.b()));
|
||||
|
||||
case GE_BLENDMODE_ABSDIFF:
|
||||
return Vec3<int>(::abs(source.r() - dst.r()),
|
||||
::abs(source.g() - dst.g()),
|
||||
::abs(source.b() - dst.b()));
|
||||
|
||||
default:
|
||||
return source.rgb();
|
||||
}
|
||||
}
|
||||
|
||||
static inline Vec4IntResult SOFTRAST_CALL ApplyTexturing(float s, float t, int x, int y, Vec4IntArg prim_color, int texlevel, int frac_texlevel, bool bilinear, const RasterizerState &state) {
|
||||
const u8 **tptr0 = const_cast<const u8 **>(&state.texptr[texlevel]);
|
||||
const uint16_t *bufw0 = &state.texbufw[texlevel];
|
||||
|
|
|
@ -76,7 +76,4 @@ void ClearRectangle(const VertexData &v0, const VertexData &v1, const BinCoords
|
|||
|
||||
bool GetCurrentTexture(GPUDebugBuffer &buffer, int level);
|
||||
|
||||
// Shared functions with RasterizerRectangle.cpp
|
||||
Vec3<int> AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4<int> &source, const Vec4<int> &dst);
|
||||
|
||||
} // namespace Rasterizer
|
||||
|
|
|
@ -32,18 +32,54 @@ extern bool currentDialogActive;
|
|||
|
||||
namespace Rasterizer {
|
||||
|
||||
// This essentially AlphaBlendingResult() with fixed src.a / 1 - src.a factors and ADD equation.
|
||||
// It allows us to skip round trips between 32-bit and 16-bit color values.
|
||||
static uint32_t StandardAlphaBlend(uint32_t source, uint32_t dst) {
|
||||
#if defined(_M_SSE)
|
||||
const __m128i alpha = _mm_cvtsi32_si128(source >> 24);
|
||||
// Keep the alpha lane of the srcfactor zero, so we keep dest alpha.
|
||||
const __m128i srcfactor = _mm_shufflelo_epi16(alpha, _MM_SHUFFLE(1, 0, 0, 0));
|
||||
const __m128i dstfactor = _mm_sub_epi16(_mm_set1_epi16(255), srcfactor);
|
||||
|
||||
const __m128i z = _mm_setzero_si128();
|
||||
const __m128i sourcevec = _mm_unpacklo_epi8(_mm_cvtsi32_si128(source), z);
|
||||
const __m128i dstvec = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dst), z);
|
||||
|
||||
// We switch to 16 bit to use mulhi, and we use 4 bits of decimal to make the 16 bit shift free.
|
||||
const __m128i half = _mm_set1_epi16(1 << 3);
|
||||
|
||||
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(sourcevec, 4), half);
|
||||
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(srcfactor, 4), half);
|
||||
const __m128i s = _mm_mulhi_epi16(srgb, sf);
|
||||
|
||||
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(dstvec, 4), half);
|
||||
const __m128i df = _mm_add_epi16(_mm_slli_epi16(dstfactor, 4), half);
|
||||
const __m128i d = _mm_mulhi_epi16(drgb, df);
|
||||
|
||||
const __m128i blended16 = _mm_adds_epi16(s, d);
|
||||
return _mm_cvtsi128_si32(_mm_packus_epi16(blended16, blended16));
|
||||
#else
|
||||
Vec3<int> srcfactor = Vec3<int>::AssignToAll(source >> 24);
|
||||
Vec3<int> dstfactor = Vec3<int>::AssignToAll(255 - (source >> 24));
|
||||
|
||||
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
|
||||
Vec3<int> lhs = ((Vec3<int>::FromRGB(source) * 2 + half) * (srcfactor * 2 + half)) / 1024;
|
||||
Vec3<int> rhs = ((Vec3<int>::FromRGB(dst) * 2 + half) * (dstfactor * 2 + half)) / 1024;
|
||||
Vec3<int> blended = lhs + rhs;
|
||||
|
||||
return clamp_u8(blended.r()) | (clamp_u8(blended.g()) << 8) | (clamp_u8(blended.b()) << 16);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Through mode, with the specific Darkstalker settings.
|
||||
inline void DrawSinglePixel5551(u16 *pixel, const u32 color_in, const PixelFuncID &pixelID) {
|
||||
inline void DrawSinglePixel5551(u16 *pixel, const u32 color_in) {
|
||||
u32 new_color;
|
||||
// Because of this check, we only support src.a / 1-src.a blending.
|
||||
if ((color_in >> 24) == 255) {
|
||||
new_color = color_in & 0xFFFFFF;
|
||||
} else {
|
||||
const u32 old_color = RGBA5551ToRGBA8888(*pixel);
|
||||
const Vec4<int> dst = Vec4<int>::FromRGBA(old_color);
|
||||
Vec3<int> blended = AlphaBlendingResult(pixelID, Vec4<int>::FromRGBA(color_in), dst);
|
||||
// ToRGB() always automatically clamps.
|
||||
new_color = blended.ToRGB();
|
||||
new_color = StandardAlphaBlend(color_in, old_color);
|
||||
}
|
||||
new_color |= (*pixel & 0x8000) ? 0xff000000 : 0x00000000;
|
||||
*pixel = RGBA8888ToRGBA5551(new_color);
|
||||
|
@ -182,7 +218,7 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
|
|||
for (int x = pos0.x; x < pos1.x; x++) {
|
||||
u32 tex_color = Vec4<int>(fetchFunc(s, t, texptr, texbufw, 0, state.samplerID)).ToRGBA();
|
||||
if (tex_color & 0xFF000000) {
|
||||
DrawSinglePixel5551(pixel, tex_color, pixelID);
|
||||
DrawSinglePixel5551(pixel, tex_color);
|
||||
}
|
||||
s += ds;
|
||||
pixel++;
|
||||
|
@ -200,7 +236,7 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
|
|||
Vec4<int> tex_color = fetchFunc(s, t, texptr, texbufw, 0, state.samplerID);
|
||||
prim_color = Vec4<int>(ModulateRGBA(ToVec4IntArg(prim_color), ToVec4IntArg(tex_color), state.samplerID));
|
||||
if (prim_color.a() > 0) {
|
||||
DrawSinglePixel5551(pixel, prim_color.ToRGBA(), pixelID);
|
||||
DrawSinglePixel5551(pixel, prim_color.ToRGBA());
|
||||
}
|
||||
s += ds;
|
||||
pixel++;
|
||||
|
@ -258,7 +294,7 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
|
|||
for (int y = pos0.y; y < pos1.y; y++) {
|
||||
u16 *pixel = fb.Get16Ptr(pos0.x, y, pixelID.cached.framebufStride);
|
||||
for (int x = pos0.x; x < pos1.x; x++) {
|
||||
DrawSinglePixel5551(pixel, v1.color0, pixelID);
|
||||
DrawSinglePixel5551(pixel, v1.color0);
|
||||
pixel++;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue