mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Protect Unswizzle from bad alignment of the destination. Might help #9134
This commit is contained in:
parent
98763655e7
commit
b1971d266b
1 changed files with 45 additions and 41 deletions
|
@ -251,51 +251,55 @@ void DoSwizzleTex16(const u32 *ysrcp, u8 *texptr, int bxc, int byc, u32 pitch) {
|
|||
void DoUnswizzleTex16Basic(const u8 *texptr, u32 *ydestp, int bxc, int byc, u32 pitch) {
|
||||
// ydestp is in 32-bits, so this is convenient.
|
||||
const u32 pitchBy32 = pitch >> 2;
|
||||
|
||||
#ifdef _M_SSE
|
||||
const __m128i *src = (const __m128i *)texptr;
|
||||
// The pitch parameter is in bytes, so shift down for 128-bit.
|
||||
// Note: it's always aligned to 16 bytes, so this is safe.
|
||||
const u32 pitchBy128 = pitch >> 4;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
__m128i *xdest = (__m128i *)ydestp;
|
||||
for (int bx = 0; bx < bxc; bx++) {
|
||||
__m128i *dest = xdest;
|
||||
for (int n = 0; n < 2; n++) {
|
||||
// Textures are always 16-byte aligned so this is fine.
|
||||
__m128i temp1 = _mm_load_si128(src);
|
||||
__m128i temp2 = _mm_load_si128(src + 1);
|
||||
__m128i temp3 = _mm_load_si128(src + 2);
|
||||
__m128i temp4 = _mm_load_si128(src + 3);
|
||||
_mm_store_si128(dest, temp1);
|
||||
dest += pitchBy128;
|
||||
_mm_store_si128(dest, temp2);
|
||||
dest += pitchBy128;
|
||||
_mm_store_si128(dest, temp3);
|
||||
dest += pitchBy128;
|
||||
_mm_store_si128(dest, temp4);
|
||||
dest += pitchBy128;
|
||||
src += 4;
|
||||
if (((uintptr_t)ydestp & 0xF) == 0) {
|
||||
const __m128i *src = (const __m128i *)texptr;
|
||||
// The pitch parameter is in bytes, so shift down for 128-bit.
|
||||
// Note: it's always aligned to 16 bytes, so this is safe.
|
||||
const u32 pitchBy128 = pitch >> 4;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
__m128i *xdest = (__m128i *)ydestp;
|
||||
for (int bx = 0; bx < bxc; bx++) {
|
||||
__m128i *dest = xdest;
|
||||
for (int n = 0; n < 2; n++) {
|
||||
// Textures are always 16-byte aligned so this is fine.
|
||||
__m128i temp1 = _mm_load_si128(src);
|
||||
__m128i temp2 = _mm_load_si128(src + 1);
|
||||
__m128i temp3 = _mm_load_si128(src + 2);
|
||||
__m128i temp4 = _mm_load_si128(src + 3);
|
||||
_mm_store_si128(dest, temp1);
|
||||
dest += pitchBy128;
|
||||
_mm_store_si128(dest, temp2);
|
||||
dest += pitchBy128;
|
||||
_mm_store_si128(dest, temp3);
|
||||
dest += pitchBy128;
|
||||
_mm_store_si128(dest, temp4);
|
||||
dest += pitchBy128;
|
||||
src += 4;
|
||||
}
|
||||
xdest++;
|
||||
}
|
||||
xdest++;
|
||||
ydestp += pitchBy32 * 8;
|
||||
}
|
||||
ydestp += pitchBy32 * 8;
|
||||
}
|
||||
#else
|
||||
const u32 *src = (const u32 *)texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
u32 *xdest = ydestp;
|
||||
for (int bx = 0; bx < bxc; bx++) {
|
||||
u32 *dest = xdest;
|
||||
for (int n = 0; n < 8; n++) {
|
||||
memcpy(dest, src, 16);
|
||||
dest += pitchBy32;
|
||||
src += 4;
|
||||
}
|
||||
xdest += 4;
|
||||
}
|
||||
ydestp += pitchBy32 * 8;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
const u32 *src = (const u32 *)texptr;
|
||||
for (int by = 0; by < byc; by++) {
|
||||
u32 *xdest = ydestp;
|
||||
for (int bx = 0; bx < bxc; bx++) {
|
||||
u32 *dest = xdest;
|
||||
for (int n = 0; n < 8; n++) {
|
||||
memcpy(dest, src, 16);
|
||||
dest += pitchBy32;
|
||||
src += 4;
|
||||
}
|
||||
xdest += 4;
|
||||
}
|
||||
ydestp += pitchBy32 * 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef _M_SSE
|
||||
|
|
Loading…
Add table
Reference in a new issue