Merge pull request #7683 from unknownbrackets/clut-align

Fix clut load alignment and access outside valid memory
This commit is contained in:
Henrik Rydgård 2015-04-13 09:34:54 +02:00
commit 1b0a9a48ae
5 changed files with 84 additions and 36 deletions

View file

@ -275,18 +275,42 @@ inline void MemcpyUnchecked(const u32 to_address, const u32 from_address, const
inline bool IsValidAddress(const u32 address) {
if ((address & 0x3E000000) == 0x08000000) {
return true;
} else if ((address & 0x3F800000) == 0x04000000) {
return true;
} else if ((address & 0xBFFF0000) == 0x00010000) {
return true;
} else if ((address & 0x3F000000) >= 0x08000000 && (address & 0x3F000000) < 0x08000000 + g_MemorySize) {
return true;
} else {
return false;
}
}
inline u32 ValidSize(const u32 address, const u32 requested_size) {
u32 max_size;
if ((address & 0x3E000000) == 0x08000000) {
max_size = 0x08000000 + g_MemorySize - address;
}
else if ((address & 0x3F800000) == 0x04000000) {
return true;
max_size = 0x04800000 - address;
}
else if ((address & 0xBFFF0000) == 0x00010000) {
return true;
max_size = 0x00014000 - address;
}
else if ((address & 0x3F000000) >= 0x08000000 && (address & 0x3F000000) < 0x08000000 + g_MemorySize) {
return true;
max_size = 0x08000000 + g_MemorySize - address;
} else {
max_size = 0;
}
else
return false;
if (requested_size > max_size) {
return max_size;
}
return requested_size;
}
inline bool IsValidRange(const u32 address, const u32 size) {
return IsValidAddress(address) && ValidSize(address, size) == size;
}
};

View file

@ -824,25 +824,35 @@ inline bool TextureCacheDX9::TexCacheEntry::Matches(u16 dim2, u8 format2, int ma
void TextureCacheDX9::LoadClut() {
u32 clutAddr = gstate.getClutAddress();
clutTotalBytes_ = gstate.getClutLoadBytes();
if (Memory::IsValidAddress(clutAddr)) {
// It's possible for a game to (successfully) access outside valid memory.
u32 bytes = Memory::ValidSize(clutAddr, clutTotalBytes_);
#ifdef _M_SSE
int numBlocks = gstate.getClutLoadBlocks();
clutTotalBytes_ = numBlocks * 32;
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
__m128i *dest = (__m128i *)clutBufRaw_;
for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
__m128i data1 = _mm_loadu_si128(source);
__m128i data2 = _mm_loadu_si128(source + 1);
_mm_store_si128(dest, data1);
_mm_store_si128(dest + 1, data2);
int numBlocks = bytes / 16;
if (bytes == clutTotalBytes_) {
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
__m128i *dest = (__m128i *)clutBufRaw_;
for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
__m128i data1 = _mm_loadu_si128(source);
__m128i data2 = _mm_loadu_si128(source + 1);
_mm_store_si128(dest, data1);
_mm_store_si128(dest + 1, data2);
}
} else {
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < clutTotalBytes_) {
memset(clutBufRaw_ + bytes, 0x00, clutTotalBytes_ - bytes);
}
}
#else
clutTotalBytes_ = gstate.getClutLoadBytes();
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, clutTotalBytes_);
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < clutTotalBytes_) {
memset(clutBufRaw_ + bytes, 0x00, clutTotalBytes_ - bytes);
}
#endif
} else {
clutTotalBytes_ = gstate.getClutLoadBytes();
memset(clutBufRaw_, 0xFF, clutTotalBytes_);
memset(clutBufRaw_, 0x00, clutTotalBytes_);
}
// Reload the clut next time.
clutLastFormat_ = 0xFFFFFFFF;

View file

@ -951,25 +951,35 @@ inline bool TextureCache::TexCacheEntry::Matches(u16 dim2, u8 format2, int maxLe
void TextureCache::LoadClut() {
u32 clutAddr = gstate.getClutAddress();
clutTotalBytes_ = gstate.getClutLoadBytes();
if (Memory::IsValidAddress(clutAddr)) {
// It's possible for a game to (successfully) access outside valid memory.
u32 bytes = Memory::ValidSize(clutAddr, clutTotalBytes_);
#ifdef _M_SSE
int numBlocks = gstate.getClutLoadBlocks();
clutTotalBytes_ = numBlocks * 32;
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
__m128i *dest = (__m128i *)clutBufRaw_;
for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
__m128i data1 = _mm_loadu_si128(source);
__m128i data2 = _mm_loadu_si128(source + 1);
_mm_store_si128(dest, data1);
_mm_store_si128(dest + 1, data2);
int numBlocks = bytes / 16;
if (bytes == clutTotalBytes_) {
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
__m128i *dest = (__m128i *)clutBufRaw_;
for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
__m128i data1 = _mm_loadu_si128(source);
__m128i data2 = _mm_loadu_si128(source + 1);
_mm_store_si128(dest, data1);
_mm_store_si128(dest + 1, data2);
}
} else {
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < clutTotalBytes_) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, clutTotalBytes_ - bytes);
}
}
#else
clutTotalBytes_ = gstate.getClutLoadBytes();
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, clutTotalBytes_);
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < clutTotalBytes_) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, clutTotalBytes_ - bytes);
}
#endif
} else {
clutTotalBytes_ = gstate.getClutLoadBytes();
memset(clutBufRaw_, 0xFF, clutTotalBytes_);
memset(clutBufRaw_, 0x00, clutTotalBytes_);
}
// Reload the clut next time.
clutLastFormat_ = 0xFFFFFFFF;

View file

@ -302,7 +302,7 @@ struct GPUgstate
int getTextureEnvColR() const { return texenvcolor&0xFF; }
int getTextureEnvColG() const { return (texenvcolor>>8)&0xFF; }
int getTextureEnvColB() const { return (texenvcolor>>16)&0xFF; }
u32 getClutAddress() const { return (clutaddr & 0x00FFFFFF) | ((clutaddrupper << 8) & 0x0F000000); }
u32 getClutAddress() const { return (clutaddr & 0x00FFFFF0) | ((clutaddrupper << 8) & 0x0F000000); }
int getClutLoadBytes() const { return (loadclut & 0x3F) * 32; }
int getClutLoadBlocks() const { return (loadclut & 0x3F); }
GEPaletteFormat getClutPaletteFormat() const { return static_cast<GEPaletteFormat>(clutformat & 3); }

View file

@ -577,12 +577,16 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff)
u32 clutTotalBytes = gstate.getClutLoadBytes();
if (Memory::IsValidAddress(clutAddr)) {
Memory::MemcpyUnchecked(clut, clutAddr, clutTotalBytes);
// TODO: Do something to the CLUT with 0?
u32 validSize = Memory::ValidSize(clutAddr, clutTotalBytes);
Memory::MemcpyUnchecked(clut, clutAddr, validSize);
if (validSize < clutTotalBytes) {
// Zero out the parts that were outside valid memory.
memset((u8 *)clut + validSize, 0x00, clutTotalBytes - validSize);
}
} else if (clutAddr != 0) {
// TODO: Does this make any sense?
// Some invalid addresses trigger a crash, others fill with zero. We always fill zero.
ERROR_LOG_REPORT_ONCE(badClut, G3D, "Software: Invalid CLUT address, filling with garbage instead of crashing");
memset(clut, 0xFF, clutTotalBytes);
memset(clut, 0x00, clutTotalBytes);
}
}
break;