Delete a lot of specialized alpha checking code.

This was now only used to check alpha in CLUTs, and the generic functions will not actually be any slower.
This commit is contained in:
Henrik Rydgård 2022-04-15 12:34:50 +02:00
parent 062423597d
commit c4dfbf4f1a
12 changed files with 153 additions and 576 deletions

View file

@ -839,6 +839,7 @@ void ReplacedTexture::PrepareData(int level) {
int w, h, f;
uint8_t *image;
if (LoadZIMPtr(&zim[0], zimSize, &w, &h, &f, &image)) {
if (w > info.w || h > info.h) {
ERROR_LOG(G3D, "Texture replacement changed since header read: %s", info.file.c_str());
@ -857,7 +858,7 @@ void ReplacedTexture::PrepareData(int level) {
free(image);
}
CheckAlphaResult res = CheckAlphaRGBA8888Basic((u32 *)&out[0], info.w, w, h);
CheckAlphaResult res = CheckAlpha32Rect((u32 *)&out[0], info.w, w, h, 0xFF000000);
if (res == CHECKALPHA_ANY || level == 0) {
alphaStatus_ = ReplacedTextureAlpha(res);
}
@ -897,7 +898,7 @@ void ReplacedTexture::PrepareData(int level) {
if (!checkedAlpha) {
// This will only check the hashed bits.
CheckAlphaResult res = CheckAlphaRGBA8888Basic((u32 *)&out[0], info.w, png.width, png.height);
CheckAlphaResult res = CheckAlpha32Rect((u32 *)&out[0], info.w, png.width, png.height, 0xFF000000);
if (res == CHECKALPHA_ANY || level == 0) {
alphaStatus_ = ReplacedTextureAlpha(res);
}

View file

@ -1430,118 +1430,6 @@ inline u32 TfmtRawToFullAlpha(GETextureFormat fmt) {
return 0;
}
#ifdef _M_SSE
inline u32 SSEReduce32And(__m128i value) {
// TODO: Should use a shuffle instead of slri, probably.
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
return _mm_cvtsi128_si32(value);
}
inline u32 SSEReduce16And(__m128i value) {
// TODO: Should use a shuffle instead of slri, probably.
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
value = _mm_and_si128(value, _mm_srli_si128(value, 16));
return _mm_cvtsi128_si32(value);
}
#endif
#if PPSSPP_ARCH(ARM_NEON)
inline u32 NEONReduce32And(uint32x4_t value) {
// TODO: Maybe a shuffle and a vector and, or something?
return vgetq_lane_u32(value, 0) & vgetq_lane_u32(value, 1) & vgetq_lane_u32(value, 2) & vgetq_lane_u32(value, 3);
}
#endif
// TODO: SSE/SIMD
// At least on x86, compiler actually SIMDs these pretty well.
void CopyAndSumMask16(u16 *dst, const u16 *src, int width, u32 *outMask) {
u16 mask = 0xFFFF;
for (int i = 0; i < width; i++) {
u16 color = src[i];
mask &= color;
dst[i] = color;
}
*outMask &= (u32)mask;
}
// Used in video playback so nice to have being fast.
void CopyAndSumMask32(u32 *dst, const u32 *src, int width, u32 *outMask) {
u32 mask = 0xFFFFFFFF;
#ifdef _M_SSE
if (width >= 4) {
__m128i wideMask = _mm_set1_epi32(0xFFFFFFFF);
while (width >= 4) {
__m128i color = _mm_loadu_si128((__m128i *)src);
wideMask = _mm_and_si128(wideMask, color);
_mm_storeu_si128((__m128i *)dst, color);
src += 4;
dst += 4;
width -= 4;
}
mask = SSEReduce32And(wideMask);
}
#elif PPSSPP_ARCH(ARM_NEON)
if (width >= 4) {
uint32x4_t wideMask = vdupq_n_u32(0xFFFFFFFF);
while (width >= 4) {
uint32x4_t colors = vld1q_u32(src);
wideMask = vandq_u32(wideMask, colors);
vst1q_u32(dst, colors);
src += 4;
dst += 4;
width -= 4;
}
mask = NEONReduce32And(wideMask);
}
#endif
for (int i = 0; i < width; i++) {
u32 color = src[i];
mask &= color;
dst[i] = color;
}
*outMask &= (u32)mask;
}
void CheckMask16(const u16 *src, int width, u32 *outMask) {
u16 mask = 0xFFFF;
for (int i = 0; i < width; i++) {
mask &= src[i];
}
*outMask &= (u32)mask;
}
void CheckMask32(const u32 *src, int width, u32 *outMask) {
u32 mask = 0xFFFFFFFF;
#ifdef _M_SSE
if (width >= 4) {
__m128i wideMask = _mm_set1_epi32(0xFFFFFFFF);
while (width >= 4) {
wideMask = _mm_and_si128(wideMask, _mm_loadu_si128((__m128i *)src));
src += 4;
width -= 4;
}
mask = SSEReduce32And(wideMask);
}
#elif PPSSPP_ARCH(ARM_NEON)
if (width >= 4) {
uint32x4_t wideMask = vdupq_n_u32(0xFFFFFFFF);
while (width >= 4) {
wideMask = vandq_u32(wideMask, vld1q_u32(src));
src += 4;
width -= 4;
}
mask = NEONReduce32And(wideMask);
}
#endif
for (int i = 0; i < width; i++) {
mask &= src[i];
}
*outMask &= (u32)mask;
}
CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, bool reverseColors, bool useBGRA, bool expandTo32bit) {
u32 alphaSum = 0xFFFFFFFF;
u32 fullAlphaMask = 0x0;

View file

@ -647,417 +647,113 @@ void DecodeDXT5Block(u32 *dst, const DXT5Block *src, int pitch, int height) {
}
#ifdef _M_SSE
static inline u32 CombineSSEBitsToDWORD(const __m128i &v) {
__m128i temp;
temp = _mm_or_si128(v, _mm_srli_si128(v, 8));
temp = _mm_or_si128(temp, _mm_srli_si128(temp, 4));
return _mm_cvtsi128_si32(temp);
inline u32 SSEReduce32And(__m128i value) {
// TODO: Should use a shuffle instead of slri, probably.
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
return _mm_cvtsi128_si32(value);
}
CheckAlphaResult CheckAlphaRGBA8888SSE2(const u32 *pixelData, int stride, int w, int h) {
const __m128i mask = _mm_set1_epi32(0xFF000000);
const __m128i *p = (const __m128i *)pixelData;
const int w4 = w / 4;
const int stride4 = stride / 4;
__m128i bits = mask;
for (int y = 0; y < h; ++y) {
for (int i = 0; i < w4; ++i) {
const __m128i a = _mm_load_si128(&p[i]);
bits = _mm_and_si128(bits, a);
}
__m128i result = _mm_xor_si128(bits, mask);
if (CombineSSEBitsToDWORD(result) != 0) {
return CHECKALPHA_ANY;
}
p += stride4;
}
return CHECKALPHA_FULL;
inline u32 SSEReduce16And(__m128i value) {
// TODO: Should use a shuffle instead of slri, probably.
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
value = _mm_and_si128(value, _mm_srli_si128(value, 16));
return _mm_cvtsi128_si32(value);
}
CheckAlphaResult CheckAlphaABGR4444SSE2(const u32 *pixelData, int stride, int w, int h) {
const __m128i mask = _mm_set1_epi16((short)0x000F);
const __m128i *p = (const __m128i *)pixelData;
const int w8 = w / 8;
const int stride8 = stride / 8;
__m128i bits = mask;
for (int y = 0; y < h; ++y) {
for (int i = 0; i < w8; ++i) {
const __m128i a = _mm_load_si128(&p[i]);
bits = _mm_and_si128(bits, a);
}
__m128i result = _mm_xor_si128(bits, mask);
if (CombineSSEBitsToDWORD(result) != 0) {
return CHECKALPHA_ANY;
}
p += stride8;
}
return CHECKALPHA_FULL;
}
CheckAlphaResult CheckAlphaABGR1555SSE2(const u32 *pixelData, int stride, int w, int h) {
const __m128i mask = _mm_set1_epi16((short)0x0001);
const __m128i *p = (const __m128i *)pixelData;
const int w8 = w / 8;
const int stride8 = stride / 8;
__m128i bits = mask;
for (int y = 0; y < h; ++y) {
for (int i = 0; i < w8; ++i) {
const __m128i a = _mm_load_si128(&p[i]);
bits = _mm_and_si128(bits, a);
}
__m128i result = _mm_xor_si128(bits, mask);
if (CombineSSEBitsToDWORD(result) != 0) {
return CHECKALPHA_ANY;
}
p += stride8;
}
return CHECKALPHA_FULL;
}
CheckAlphaResult CheckAlphaRGBA4444SSE2(const u32 *pixelData, int stride, int w, int h) {
const __m128i mask = _mm_set1_epi16((short)0xF000);
const __m128i *p = (const __m128i *)pixelData;
const int w8 = w / 8;
const int stride8 = stride / 8;
__m128i bits = mask;
for (int y = 0; y < h; ++y) {
for (int i = 0; i < w8; ++i) {
const __m128i a = _mm_load_si128(&p[i]);
bits = _mm_and_si128(bits, a);
}
__m128i result = _mm_xor_si128(bits, mask);
if (CombineSSEBitsToDWORD(result) != 0) {
return CHECKALPHA_ANY;
}
p += stride8;
}
return CHECKALPHA_FULL;
}
CheckAlphaResult CheckAlphaRGBA5551SSE2(const u32 *pixelData, int stride, int w, int h) {
const __m128i mask = _mm_set1_epi16((short)0x8000);
const __m128i *p = (const __m128i *)pixelData;
const int w8 = w / 8;
const int stride8 = stride / 8;
__m128i bits = mask;
for (int y = 0; y < h; ++y) {
for (int i = 0; i < w8; ++i) {
const __m128i a = _mm_load_si128(&p[i]);
bits = _mm_and_si128(bits, a);
}
__m128i result = _mm_xor_si128(bits, mask);
if (CombineSSEBitsToDWORD(result) != 0) {
return CHECKALPHA_ANY;
}
p += stride8;
}
return CHECKALPHA_FULL;
}
#endif // _M_SSE
#endif
#if PPSSPP_ARCH(ARM_NEON)
static inline bool VectorIsNonZeroNEON(const uint32x4_t &v) {
u64 low = vgetq_lane_u64(vreinterpretq_u64_u32(v), 0);
u64 high = vgetq_lane_u64(vreinterpretq_u64_u32(v), 1);
return (low | high) != 0;
}
#ifndef _MSC_VER
// MSVC consider this function the same as the one above! uint16x8_t is typedef'd to the same type as uint32x4_t.
static inline bool VectorIsNonZeroNEON(const uint16x8_t &v) {
u64 low = vgetq_lane_u64(vreinterpretq_u64_u16(v), 0);
u64 high = vgetq_lane_u64(vreinterpretq_u64_u16(v), 1);
return (low | high) != 0;
inline u32 NEONReduce32And(uint32x4_t value) {
// TODO: Maybe a shuffle and a vector and, or something?
return vgetq_lane_u32(value, 0) & vgetq_lane_u32(value, 1) & vgetq_lane_u32(value, 2) & vgetq_lane_u32(value, 3);
}
#endif
CheckAlphaResult CheckAlphaRGBA8888NEON(const u32 *pixelData, int stride, int w, int h) {
const u32 *p = (const u32 *)pixelData;
const uint32x4_t mask = vdupq_n_u32(0xFF000000);
uint32x4_t bits = mask;
for (int y = 0; y < h; ++y) {
for (int i = 0; i < w; i += 4) {
const uint32x4_t a = vld1q_u32(&p[i]);
bits = vandq_u32(bits, a);
}
uint32x4_t result = veorq_u32(bits, mask);
if (VectorIsNonZeroNEON(result)) {
return CHECKALPHA_ANY;
}
p += stride;
// TODO: SSE/SIMD
// At least on x86, compiler actually SIMDs these pretty well.
void CopyAndSumMask16(u16 *dst, const u16 *src, int width, u32 *outMask) {
u16 mask = 0xFFFF;
for (int i = 0; i < width; i++) {
u16 color = src[i];
mask &= color;
dst[i] = color;
}
return CHECKALPHA_FULL;
*outMask &= (u32)mask;
}
CheckAlphaResult CheckAlphaABGR4444NEON(const u32 *pixelData, int stride, int w, int h) {
const u16 *p = (const u16 *)pixelData;
const uint16x8_t mask = vdupq_n_u16((u16)0x000F);
uint16x8_t bits = mask;
for (int y = 0; y < h; ++y) {
for (int i = 0; i < w; i += 8) {
const uint16x8_t a = vld1q_u16(&p[i]);
bits = vandq_u16(bits, a);
}
uint16x8_t result = veorq_u16(bits, mask);
if (VectorIsNonZeroNEON(result)) {
return CHECKALPHA_ANY;
}
p += stride;
}
return CHECKALPHA_FULL;
}
CheckAlphaResult CheckAlphaABGR1555NEON(const u32 *pixelData, int stride, int w, int h) {
const u16 *p = (const u16 *)pixelData;
const uint16x8_t mask = vdupq_n_u16((u16)0x0001);
uint16x8_t bits = mask;
for (int y = 0; y < h; ++y) {
for (int i = 0; i < w; i += 8) {
const uint16x8_t a = vld1q_u16(&p[i]);
bits = vandq_u16(bits, a);
}
uint16x8_t result = veorq_u16(bits, mask);
if (VectorIsNonZeroNEON(result)) {
return CHECKALPHA_ANY;
}
p += stride;
}
return CHECKALPHA_FULL;
}
CheckAlphaResult CheckAlphaRGBA4444NEON(const u32 *pixelData, int stride, int w, int h) {
const u16 *p = (const u16 *)pixelData;
const uint16x8_t mask = vdupq_n_u16((u16)0xF000);
uint16x8_t bits = mask;
for (int y = 0; y < h; ++y) {
for (int i = 0; i < w; i += 8) {
const uint16x8_t a = vld1q_u16(&p[i]);
bits = vandq_u16(bits, a);
}
uint16x8_t result = veorq_u16(bits, mask);
if (VectorIsNonZeroNEON(result)) {
return CHECKALPHA_ANY;
}
p += stride;
}
return CHECKALPHA_FULL;
}
CheckAlphaResult CheckAlphaRGBA5551NEON(const u32 *pixelData, int stride, int w, int h) {
const u16 *p = (const u16 *)pixelData;
const uint16x8_t mask = vdupq_n_u16((u16)0x8000);
uint16x8_t bits = mask;
for (int y = 0; y < h; ++y) {
for (int i = 0; i < w; i += 8) {
const uint16x8_t a = vld1q_u16(&p[i]);
bits = vandq_u16(bits, a);
}
uint16x8_t result = veorq_u16(bits, mask);
if (VectorIsNonZeroNEON(result)) {
return CHECKALPHA_ANY;
}
p += stride;
}
return CHECKALPHA_FULL;
}
#endif
CheckAlphaResult CheckAlphaRGBA8888Basic(const u32 *pixelData, int stride, int w, int h) {
// Use SIMD if aligned to 16 bytes / 4 pixels (almost always the case.)
if ((w & 3) == 0 && (stride & 3) == 0) {
// Used in video playback so nice to have being fast.
void CopyAndSumMask32(u32 *dst, const u32 *src, int width, u32 *outMask) {
u32 mask = 0xFFFFFFFF;
#ifdef _M_SSE
return CheckAlphaRGBA8888SSE2(pixelData, stride, w, h);
if (width >= 4) {
__m128i wideMask = _mm_set1_epi32(0xFFFFFFFF);
while (width >= 4) {
__m128i color = _mm_loadu_si128((__m128i *)src);
wideMask = _mm_and_si128(wideMask, color);
_mm_storeu_si128((__m128i *)dst, color);
src += 4;
dst += 4;
width -= 4;
}
mask = SSEReduce32And(wideMask);
}
#elif PPSSPP_ARCH(ARM_NEON)
return CheckAlphaRGBA8888NEON(pixelData, stride, w, h);
if (width >= 4) {
uint32x4_t wideMask = vdupq_n_u32(0xFFFFFFFF);
while (width >= 4) {
uint32x4_t colors = vld1q_u32(src);
wideMask = vandq_u32(wideMask, colors);
vst1q_u32(dst, colors);
src += 4;
dst += 4;
width -= 4;
}
mask = NEONReduce32And(wideMask);
}
#endif
for (int i = 0; i < width; i++) {
u32 color = src[i];
mask &= color;
dst[i] = color;
}
const u32 *p = pixelData;
for (int y = 0; y < h; ++y) {
u32 bits = 0xFF000000;
for (int i = 0; i < w; ++i) {
bits &= p[i];
}
if (bits != 0xFF000000) {
// We're done, we hit non-full alpha.
return CHECKALPHA_ANY;
}
p += stride;
}
return CHECKALPHA_FULL;
*outMask &= (u32)mask;
}
CheckAlphaResult CheckAlphaABGR4444Basic(const u32 *pixelData, int stride, int w, int h) {
// Use SIMD if aligned to 16 bytes / 8 pixels (usually the case.)
if ((w & 7) == 0 && (stride & 7) == 0) {
void CheckMask16(const u16 *src, int width, u32 *outMask) {
u16 mask = 0xFFFF;
for (int i = 0; i < width; i++) {
mask &= src[i];
}
*outMask &= (u32)mask;
}
void CheckMask32(const u32 *src, int width, u32 *outMask) {
u32 mask = 0xFFFFFFFF;
#ifdef _M_SSE
return CheckAlphaABGR4444SSE2(pixelData, stride, w, h);
if (width >= 4) {
__m128i wideMask = _mm_set1_epi32(0xFFFFFFFF);
while (width >= 4) {
wideMask = _mm_and_si128(wideMask, _mm_loadu_si128((__m128i *)src));
src += 4;
width -= 4;
}
mask = SSEReduce32And(wideMask);
}
#elif PPSSPP_ARCH(ARM_NEON)
return CheckAlphaABGR4444NEON(pixelData, stride, w, h);
if (width >= 4) {
uint32x4_t wideMask = vdupq_n_u32(0xFFFFFFFF);
while (width >= 4) {
wideMask = vandq_u32(wideMask, vld1q_u32(src));
src += 4;
width -= 4;
}
mask = NEONReduce32And(wideMask);
}
#endif
for (int i = 0; i < width; i++) {
mask &= src[i];
}
const u32 *p = pixelData;
const int w2 = (w + 1) / 2;
const int stride2 = (stride + 1) / 2;
for (int y = 0; y < h; ++y) {
u32 bits = 0x000F000F;
for (int i = 0; i < w2; ++i) {
bits &= p[i];
}
if (bits != 0x000F000F) {
// We're done, we hit non-full alpha.
return CHECKALPHA_ANY;
}
p += stride2;
}
return CHECKALPHA_FULL;
}
CheckAlphaResult CheckAlphaABGR1555Basic(const u32 *pixelData, int stride, int w, int h) {
// Use SIMD if aligned to 16 bytes / 8 pixels (usually the case.)
if ((w & 7) == 0 && (stride & 7) == 0) {
#ifdef _M_SSE
return CheckAlphaABGR1555SSE2(pixelData, stride, w, h);
#elif PPSSPP_ARCH(ARM_NEON)
return CheckAlphaABGR1555NEON(pixelData, stride, w, h);
#endif
}
const u32 *p = pixelData;
const int w2 = (w + 1) / 2;
const int stride2 = (stride + 1) / 2;
for (int y = 0; y < h; ++y) {
u32 bits = 0x00010001;
for (int i = 0; i < w2; ++i) {
bits &= p[i];
}
if (bits != 0x00010001) {
return CHECKALPHA_ANY;
}
p += stride2;
}
return CHECKALPHA_FULL;
}
CheckAlphaResult CheckAlphaRGBA4444Basic(const u32 *pixelData, int stride, int w, int h) {
// Use SSE if aligned to 16 bytes / 8 pixels (usually the case.)
if ((w & 7) == 0 && (stride & 7) == 0) {
#ifdef _M_SSE
return CheckAlphaRGBA4444SSE2(pixelData, stride, w, h);
#elif PPSSPP_ARCH(ARM_NEON)
return CheckAlphaRGBA4444NEON(pixelData, stride, w, h);
#endif
}
const u32 *p = pixelData;
const int w2 = (w + 1) / 2;
const int stride2 = (stride + 1) / 2;
for (int y = 0; y < h; ++y) {
u32 bits = 0xF000F000;
for (int i = 0; i < w2; ++i) {
bits &= p[i];
}
if (bits != 0xF000F000) {
// We're done, we hit non-full alpha.
return CHECKALPHA_ANY;
}
p += stride2;
}
return CHECKALPHA_FULL;
}
CheckAlphaResult CheckAlphaRGBA5551Basic(const u32 *pixelData, int stride, int w, int h) {
// Use SSE if aligned to 16 bytes / 8 pixels (usually the case.)
if ((w & 7) == 0 && (stride & 7) == 0) {
#ifdef _M_SSE
return CheckAlphaRGBA5551SSE2(pixelData, stride, w, h);
#elif PPSSPP_ARCH(ARM_NEON)
return CheckAlphaRGBA5551NEON(pixelData, stride, w, h);
#endif
}
const u32 *p = pixelData;
const int w2 = (w + 1) / 2;
const int stride2 = (stride + 1) / 2;
for (int y = 0; y < h; ++y) {
u32 bits = 0x80008000;
for (int i = 0; i < w2; ++i) {
bits &= p[i];
}
if (bits != 0x80008000) {
return CHECKALPHA_ANY;
}
p += stride2;
}
return CHECKALPHA_FULL;
*outMask &= (u32)mask;
}

View file

@ -38,11 +38,11 @@ void DoUnswizzleTex16(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch
u32 StableQuickTexHash(const void *checkp, u32 size);
CheckAlphaResult CheckAlphaRGBA8888Basic(const u32 *pixelData, int stride, int w, int h);
CheckAlphaResult CheckAlphaABGR4444Basic(const u32 *pixelData, int stride, int w, int h);
CheckAlphaResult CheckAlphaRGBA4444Basic(const u32 *pixelData, int stride, int w, int h);
CheckAlphaResult CheckAlphaABGR1555Basic(const u32 *pixelData, int stride, int w, int h);
CheckAlphaResult CheckAlphaRGBA5551Basic(const u32 *pixelData, int stride, int w, int h);
// outMask is an in/out parameter.
void CopyAndSumMask16(u16 *dst, const u16 *src, int width, u32 *outMask);
void CopyAndSumMask32(u32 *dst, const u32 *src, int width, u32 *outMask);
void CheckMask16(const u16 *src, int width, u32 *outMask);
void CheckMask32(const u32 *src, int width, u32 *outMask);
// All these DXT structs are in the reverse order, as compared to PC.
// On PC, alpha comes before color, and interpolants are before the tile data.
@ -98,6 +98,26 @@ inline bool AlphaSumIsFull(u32 alphaSum, u32 fullAlphaMask) {
return fullAlphaMask != 0 && (alphaSum & fullAlphaMask) == fullAlphaMask;
}
inline CheckAlphaResult CheckAlpha16(const u16 *pixelData, int width, u32 fullAlphaMask) {
u32 alphaSum = 0xFFFFFFFF;
CheckMask16(pixelData, width, &alphaSum);
return AlphaSumIsFull(alphaSum, fullAlphaMask) ? CHECKALPHA_FULL : CHECKALPHA_ANY;
}
inline CheckAlphaResult CheckAlpha32(const u32 *pixelData, int width, u32 fullAlphaMask) {
u32 alphaSum = 0xFFFFFFFF;
CheckMask32(pixelData, width, &alphaSum);
return AlphaSumIsFull(alphaSum, fullAlphaMask) ? CHECKALPHA_FULL : CHECKALPHA_ANY;
}
inline CheckAlphaResult CheckAlpha32Rect(const u32 *pixelData, int stride, int width, int height, u32 fullAlphaMask) {
u32 alphaSum = 0xFFFFFFFF;
for (int y = 0; y < height; y++) {
CheckMask32(pixelData + stride * y, width, &alphaSum);
}
return AlphaSumIsFull(alphaSum, fullAlphaMask) ? CHECKALPHA_FULL : CHECKALPHA_ANY;
}
template <typename IndexT, typename ClutT>
inline void DeIndexTexture(/*WRITEONLY*/ ClutT *dest, const IndexT *indexed, int length, const ClutT *clut, u32 *outAlphaSum) {
// Usually, there is no special offset, mask, or shift.

View file

@ -418,8 +418,8 @@ void TextureCacheD3D11::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer,
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
TexCacheEntry::TexStatus alphaStatus = CheckAlpha(clutBuf_, GetClutDestFormatD3D11(clutFormat), clutTotalColors, clutTotalColors, 1);
gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
CheckAlphaResult alphaStatus = CheckAlpha(clutBuf_, GetClutDestFormatD3D11(clutFormat), clutTotalColors);
gstate_c.SetTextureFullAlpha(alphaStatus == CHECKALPHA_FULL);
} else {
gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650);
framebufferManager_->RebindFramebuffer("RebindFramebuffer - ApplyTextureFramebuffer");
@ -591,25 +591,18 @@ DXGI_FORMAT TextureCacheD3D11::GetDestFormat(GETextureFormat format, GEPaletteFo
}
}
TexCacheEntry::TexStatus TextureCacheD3D11::CheckAlpha(const u32 *pixelData, u32 dstFmt, int stride, int w, int h) {
CheckAlphaResult res;
CheckAlphaResult TextureCacheD3D11::CheckAlpha(const u32 *pixelData, u32 dstFmt, int w) {
switch (dstFmt) {
case DXGI_FORMAT_B4G4R4A4_UNORM:
res = CheckAlphaRGBA4444Basic(pixelData, stride, w, h);
break;
return CheckAlpha16((const u16 *)pixelData, w, 0xF000);
case DXGI_FORMAT_B5G5R5A1_UNORM:
res = CheckAlphaRGBA5551Basic(pixelData, stride, w, h);
break;
return CheckAlpha16((const u16 *)pixelData, w, 0x8000);
case DXGI_FORMAT_B5G6R5_UNORM:
// Never has any alpha.
res = CHECKALPHA_FULL;
break;
return CHECKALPHA_FULL;
default:
res = CheckAlphaRGBA8888Basic(pixelData, stride, w, h);
break;
return CheckAlpha32((const u32 *)pixelData, w, 0xFF000000);
}
return (TexCacheEntry::TexStatus)res;
}
ReplacedTextureFormat FromD3D11Format(u32 fmt) {

View file

@ -70,7 +70,7 @@ protected:
private:
void LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int maxLevel, int scaleFactor, DXGI_FORMAT dstFmt);
DXGI_FORMAT GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const;
static TexCacheEntry::TexStatus CheckAlpha(const u32 *pixelData, u32 dstFmt, int stride, int w, int h);
static CheckAlphaResult CheckAlpha(const u32 *pixelData, u32 dstFmt, int w);
void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) override;
void ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) override;

View file

@ -375,8 +375,8 @@ void TextureCacheDX9::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, G
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
TexCacheEntry::TexStatus alphaStatus = CheckAlpha(clutBuf_, getClutDestFormat(clutFormat), clutTotalColors, clutTotalColors, 1);
gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
CheckAlphaResult alphaStatus = CheckAlpha(clutBuf_, getClutDestFormat(clutFormat), clutTotalColors);
gstate_c.SetTextureFullAlpha(alphaStatus == CHECKALPHA_FULL);
} else {
framebufferManagerDX9_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
@ -525,25 +525,18 @@ D3DFORMAT TextureCacheDX9::GetDestFormat(GETextureFormat format, GEPaletteFormat
}
}
TexCacheEntry::TexStatus TextureCacheDX9::CheckAlpha(const u32 *pixelData, u32 dstFmt, int stride, int w, int h) {
CheckAlphaResult res;
CheckAlphaResult TextureCacheDX9::CheckAlpha(const u32 *pixelData, u32 dstFmt, int w) {
switch (dstFmt) {
case D3DFMT_A4R4G4B4:
res = CheckAlphaRGBA4444Basic(pixelData, stride, w, h);
break;
return CheckAlpha16((const u16 *)pixelData, w, 0xF000);
case D3DFMT_A1R5G5B5:
res = CheckAlphaRGBA5551Basic(pixelData, stride, w, h);
break;
return CheckAlpha16((const u16 *)pixelData, w, 0x8000);
case D3DFMT_R5G6B5:
// Never has any alpha.
res = CHECKALPHA_FULL;
break;
return CHECKALPHA_FULL;
default:
res = CheckAlphaRGBA8888Basic(pixelData, stride, w, h);
break;
return CheckAlpha32(pixelData, w, 0xFF000000);
}
return (TexCacheEntry::TexStatus)res;
}
ReplacedTextureFormat FromD3D9Format(u32 fmt) {

View file

@ -64,7 +64,7 @@ private:
void LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int maxLevel, int scaleFactor, u32 dstFmt);
D3DFORMAT GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const;
static TexCacheEntry::TexStatus CheckAlpha(const u32 *pixelData, u32 dstFmt, int stride, int w, int h);
static CheckAlphaResult CheckAlpha(const u32 *pixelData, u32 dstFmt, int w);
void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) override;
void ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) override;

View file

@ -373,8 +373,8 @@ void TextureCacheGLES::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer,
gstate_c.depalFramebufferFormat = framebuffer->drawnFormat;
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
TexCacheEntry::TexStatus alphaStatus = CheckAlpha((const uint8_t *)clutBuf_, getClutDestFormat(clutFormat), clutTotalColors, clutTotalColors, 1);
gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
CheckAlphaResult alphaStatus = CheckAlpha((const uint8_t *)clutBuf_, getClutDestFormat(clutFormat), clutTotalColors);
gstate_c.SetTextureFullAlpha(alphaStatus == CHECKALPHA_FULL);
return;
}
@ -407,8 +407,8 @@ void TextureCacheGLES::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer,
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
TexCacheEntry::TexStatus alphaStatus = CheckAlpha((const uint8_t *)clutBuf_, getClutDestFormat(clutFormat), clutTotalColors, clutTotalColors, 1);
gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
CheckAlphaResult alphaStatus = CheckAlpha((const uint8_t *)clutBuf_, getClutDestFormat(clutFormat), clutTotalColors);
gstate_c.SetTextureFullAlpha(alphaStatus == CHECKALPHA_FULL);
} else {
framebufferManagerGL_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
@ -614,25 +614,18 @@ Draw::DataFormat TextureCacheGLES::GetDestFormat(GETextureFormat format, GEPalet
}
}
TexCacheEntry::TexStatus TextureCacheGLES::CheckAlpha(const uint8_t *pixelData, Draw::DataFormat dstFmt, int stride, int w, int h) {
CheckAlphaResult res;
CheckAlphaResult TextureCacheGLES::CheckAlpha(const uint8_t *pixelData, Draw::DataFormat dstFmt, int w) {
switch (dstFmt) {
case Draw::DataFormat::R4G4B4A4_UNORM_PACK16:
res = CheckAlphaABGR4444Basic((const uint32_t *)pixelData, stride, w, h);
break;
return CheckAlpha16((const u16 *)pixelData, w, 0x000F);
case Draw::DataFormat::R5G5B5A1_UNORM_PACK16:
res = CheckAlphaABGR1555Basic((const uint32_t *)pixelData, stride, w, h);
break;
return CheckAlpha16((const u16 *)pixelData, w, 0x0001);
case Draw::DataFormat::R5G6B5_UNORM_PACK16:
// Never has any alpha.
res = CHECKALPHA_FULL;
break;
return CHECKALPHA_FULL;
default:
res = CheckAlphaRGBA8888Basic((const uint32_t *)pixelData, stride, w, h);
break;
return CheckAlpha32((const u32 *)pixelData, w, 0xFF000000); // note, the normal order here, unlike the 16-bit formats
}
return (TexCacheEntry::TexStatus)res;
}
void TextureCacheGLES::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int scaleFactor, Draw::DataFormat dstFmt) {

View file

@ -73,7 +73,7 @@ private:
void LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int scaleFactor, Draw::DataFormat dstFmt);
Draw::DataFormat GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const;
static TexCacheEntry::TexStatus CheckAlpha(const uint8_t *pixelData, Draw::DataFormat dstFmt, int stride, int w, int h);
static CheckAlphaResult CheckAlpha(const uint8_t *pixelData, Draw::DataFormat dstFmt, int w);
void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) override;
void ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) override;

View file

@ -428,8 +428,8 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
gstate_c.depalFramebufferFormat = framebuffer->drawnFormat;
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
TexCacheEntry::TexStatus alphaStatus = CheckAlpha(clutBuf_, getClutDestFormatVulkan(clutFormat), clutTotalColors, clutTotalColors, 1);
gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
CheckAlphaResult alphaStatus = CheckAlpha(clutBuf_, getClutDestFormatVulkan(clutFormat), clutTotalColors);
gstate_c.SetTextureFullAlpha(alphaStatus == CHECKALPHA_FULL);
curSampler_ = samplerCache_.GetOrCreateSampler(samplerKey);
if (framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET)) {
imageView_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE0_IMAGEVIEW);
@ -532,8 +532,8 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
TexCacheEntry::TexStatus alphaStatus = CheckAlpha(clutBuf_, getClutDestFormatVulkan(clutFormat), clutTotalColors, clutTotalColors, 1);
gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
CheckAlphaResult alphaStatus = CheckAlpha(clutBuf_, getClutDestFormatVulkan(clutFormat), clutTotalColors);
gstate_c.SetTextureFullAlpha(alphaStatus == CHECKALPHA_FULL);
framebufferManager_->RebindFramebuffer("RebindFramebuffer - ApplyTextureFramebuffer");
draw_->BindFramebufferAsTexture(depalFBO, 0, Draw::FB_COLOR_BIT, 0);
@ -936,25 +936,18 @@ VkFormat TextureCacheVulkan::GetDestFormat(GETextureFormat format, GEPaletteForm
}
}
TexCacheEntry::TexStatus TextureCacheVulkan::CheckAlpha(const u32 *pixelData, VkFormat dstFmt, int stride, int w, int h) {
CheckAlphaResult res;
CheckAlphaResult TextureCacheVulkan::CheckAlpha(const u32 *pixelData, VkFormat dstFmt, int w) {
switch (dstFmt) {
case VULKAN_4444_FORMAT:
res = CheckAlphaRGBA4444Basic(pixelData, stride, w, h);
break;
return CheckAlpha16((const u16 *)pixelData, w, 0xF000);
case VULKAN_1555_FORMAT:
res = CheckAlphaRGBA5551Basic(pixelData, stride, w, h);
break;
return CheckAlpha16((const u16 *)pixelData, w, 0x8000);
case VULKAN_565_FORMAT:
// Never has any alpha.
res = CHECKALPHA_FULL;
break;
return CHECKALPHA_FULL;
default:
res = CheckAlphaRGBA8888Basic(pixelData, stride, w, h);
break;
return CheckAlpha32(pixelData, w, 0xFF000000);
}
return (TexCacheEntry::TexStatus)res;
}
void TextureCacheVulkan::LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePtr, int rowPitch, int level, int scaleFactor, VkFormat dstFmt) {

View file

@ -107,7 +107,7 @@ protected:
private:
void LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePtr, int rowPitch, int level, int scaleFactor, VkFormat dstFmt);
VkFormat GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const;
static TexCacheEntry::TexStatus CheckAlpha(const u32 *pixelData, VkFormat dstFmt, int stride, int w, int h);
static CheckAlphaResult CheckAlpha(const u32 *pixelData, VkFormat dstFmt, int w);
void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) override;
void ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) override;