Protect Unswizzle from bad alignment of the destination. Might help #9134

This commit is contained in:
Henrik Rydgard 2017-02-23 23:03:01 +01:00
parent 98763655e7
commit b1971d266b

View file

@ -251,51 +251,55 @@ void DoSwizzleTex16(const u32 *ysrcp, u8 *texptr, int bxc, int byc, u32 pitch) {
void DoUnswizzleTex16Basic(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch) {
// ydestp is in 32-bits, so this is convenient.
const u32 pitchBy32 = pitch >> 2;
#ifdef _M_SSE
const __m128i *src = (const __m128i *)texptr;
// The pitch parameter is in bytes, so shift down for 128-bit.
// Note: it's always aligned to 16 bytes, so this is safe.
const u32 pitchBy128 = pitch >> 4;
for (int by = 0; by < byc; by++) {
__m128i *xdest = (__m128i *)ydestp;
for (int bx = 0; bx < bxc; bx++) {
__m128i *dest = xdest;
for (int n = 0; n < 2; n++) {
// Textures are always 16-byte aligned so this is fine.
__m128i temp1 = _mm_load_si128(src);
__m128i temp2 = _mm_load_si128(src + 1);
__m128i temp3 = _mm_load_si128(src + 2);
__m128i temp4 = _mm_load_si128(src + 3);
_mm_store_si128(dest, temp1);
dest += pitchBy128;
_mm_store_si128(dest, temp2);
dest += pitchBy128;
_mm_store_si128(dest, temp3);
dest += pitchBy128;
_mm_store_si128(dest, temp4);
dest += pitchBy128;
src += 4;
if (((uintptr_t)ydestp & 0xF) == 0) {
const __m128i *src = (const __m128i *)texptr;
// The pitch parameter is in bytes, so shift down for 128-bit.
// Note: it's always aligned to 16 bytes, so this is safe.
const u32 pitchBy128 = pitch >> 4;
for (int by = 0; by < byc; by++) {
__m128i *xdest = (__m128i *)ydestp;
for (int bx = 0; bx < bxc; bx++) {
__m128i *dest = xdest;
for (int n = 0; n < 2; n++) {
// Textures are always 16-byte aligned so this is fine.
__m128i temp1 = _mm_load_si128(src);
__m128i temp2 = _mm_load_si128(src + 1);
__m128i temp3 = _mm_load_si128(src + 2);
__m128i temp4 = _mm_load_si128(src + 3);
_mm_store_si128(dest, temp1);
dest += pitchBy128;
_mm_store_si128(dest, temp2);
dest += pitchBy128;
_mm_store_si128(dest, temp3);
dest += pitchBy128;
_mm_store_si128(dest, temp4);
dest += pitchBy128;
src += 4;
}
xdest++;
}
xdest++;
ydestp += pitchBy32 * 8;
}
ydestp += pitchBy32 * 8;
}
#else
const u32 *src = (const u32 *)texptr;
for (int by = 0; by < byc; by++) {
u32 *xdest = ydestp;
for (int bx = 0; bx < bxc; bx++) {
u32 *dest = xdest;
for (int n = 0; n < 8; n++) {
memcpy(dest, src, 16);
dest += pitchBy32;
src += 4;
}
xdest += 4;
}
ydestp += pitchBy32 * 8;
}
} else
#endif
{
const u32 *src = (const u32 *)texptr;
for (int by = 0; by < byc; by++) {
u32 *xdest = ydestp;
for (int bx = 0; bx < bxc; bx++) {
u32 *dest = xdest;
for (int n = 0; n < 8; n++) {
memcpy(dest, src, 16);
dest += pitchBy32;
src += 4;
}
xdest += 4;
}
ydestp += pitchBy32 * 8;
}
}
}
#ifndef _M_SSE