From 391ca8bed3082a109dd750bda23d37ecf0c86314 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 12 Apr 2015 22:37:55 -0700 Subject: [PATCH 1/3] Add functions to validate memory ranges. --- Core/MemMap.h | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/Core/MemMap.h b/Core/MemMap.h index 7bdc25083e..ab61e5e533 100644 --- a/Core/MemMap.h +++ b/Core/MemMap.h @@ -275,18 +275,42 @@ inline void MemcpyUnchecked(const u32 to_address, const u32 from_address, const inline bool IsValidAddress(const u32 address) { if ((address & 0x3E000000) == 0x08000000) { return true; + } else if ((address & 0x3F800000) == 0x04000000) { + return true; + } else if ((address & 0xBFFF0000) == 0x00010000) { + return true; + } else if ((address & 0x3F000000) >= 0x08000000 && (address & 0x3F000000) < 0x08000000 + g_MemorySize) { + return true; + } else { + return false; + } +} + +inline u32 ValidSize(const u32 address, const u32 requested_size) { + u32 max_size; + if ((address & 0x3E000000) == 0x08000000) { + max_size = 0x08000000 + g_MemorySize - address; } else if ((address & 0x3F800000) == 0x04000000) { - return true; + max_size = 0x04800000 - address; } else if ((address & 0xBFFF0000) == 0x00010000) { - return true; + max_size = 0x00014000 - address; } else if ((address & 0x3F000000) >= 0x08000000 && (address & 0x3F000000) < 0x08000000 + g_MemorySize) { - return true; + max_size = 0x08000000 + g_MemorySize - address; + } else { + max_size = 0; } - else - return false; + + if (requested_size > max_size) { + return max_size; + } + return requested_size; +} + +inline bool IsValidRange(const u32 address, const u32 size) { + return IsValidAddress(address) && ValidSize(address, size) == size; } }; From 1e8f2c2630178606f6a23c111e1bb7905a70a6d7 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 12 Apr 2015 22:38:17 -0700 Subject: [PATCH 2/3] Allow clut load outside valid mem, fill with zero. And only for the parts outside memory. --- GPU/Directx9/TextureCacheDX9.cpp | 36 ++++++++++++++++++++------------ GPU/GLES/TextureCache.cpp | 36 ++++++++++++++++++++------------ GPU/Software/SoftGpu.cpp | 12 +++++++---- 3 files changed, 54 insertions(+), 30 deletions(-) diff --git a/GPU/Directx9/TextureCacheDX9.cpp b/GPU/Directx9/TextureCacheDX9.cpp index 357b160148..cc8639c6be 100644 --- a/GPU/Directx9/TextureCacheDX9.cpp +++ b/GPU/Directx9/TextureCacheDX9.cpp @@ -824,25 +824,35 @@ inline bool TextureCacheDX9::TexCacheEntry::Matches(u16 dim2, u8 format2, int ma void TextureCacheDX9::LoadClut() { u32 clutAddr = gstate.getClutAddress(); + clutTotalBytes_ = gstate.getClutLoadBytes(); if (Memory::IsValidAddress(clutAddr)) { + // It's possible for a game to (successfully) access outside valid memory. + u32 bytes = Memory::ValidSize(clutAddr, clutTotalBytes_); #ifdef _M_SSE - int numBlocks = gstate.getClutLoadBlocks(); - clutTotalBytes_ = numBlocks * 32; - const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr); - __m128i *dest = (__m128i *)clutBufRaw_; - for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) { - __m128i data1 = _mm_loadu_si128(source); - __m128i data2 = _mm_loadu_si128(source + 1); - _mm_store_si128(dest, data1); - _mm_store_si128(dest + 1, data2); + int numBlocks = bytes / 16; + if (bytes == clutTotalBytes_) { + const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr); + __m128i *dest = (__m128i *)clutBufRaw_; + for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) { + __m128i data1 = _mm_loadu_si128(source); + __m128i data2 = _mm_loadu_si128(source + 1); + _mm_store_si128(dest, data1); + _mm_store_si128(dest + 1, data2); + } + } else { + Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes); + if (bytes < clutTotalBytes_) { + memset(clutBufRaw_ + bytes, 0x00, clutTotalBytes_ - bytes); + } } #else - clutTotalBytes_ = gstate.getClutLoadBytes(); - Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, clutTotalBytes_); + Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes); + if (bytes < clutTotalBytes_) { + memset(clutBufRaw_ + bytes, 0x00, clutTotalBytes_ - bytes); + } #endif } else { - clutTotalBytes_ = gstate.getClutLoadBytes(); - memset(clutBufRaw_, 0xFF, clutTotalBytes_); + memset(clutBufRaw_, 0x00, clutTotalBytes_); } // Reload the clut next time. clutLastFormat_ = 0xFFFFFFFF; diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index 01fafdf2d6..28ad071d47 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -951,25 +951,35 @@ inline bool TextureCache::TexCacheEntry::Matches(u16 dim2, u8 format2, int maxLe void TextureCache::LoadClut() { u32 clutAddr = gstate.getClutAddress(); + clutTotalBytes_ = gstate.getClutLoadBytes(); if (Memory::IsValidAddress(clutAddr)) { + // It's possible for a game to (successfully) access outside valid memory. + u32 bytes = Memory::ValidSize(clutAddr, clutTotalBytes_); #ifdef _M_SSE - int numBlocks = gstate.getClutLoadBlocks(); - clutTotalBytes_ = numBlocks * 32; - const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr); - __m128i *dest = (__m128i *)clutBufRaw_; - for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) { - __m128i data1 = _mm_loadu_si128(source); - __m128i data2 = _mm_loadu_si128(source + 1); - _mm_store_si128(dest, data1); - _mm_store_si128(dest + 1, data2); + int numBlocks = bytes / 16; + if (bytes == clutTotalBytes_) { + const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr); + __m128i *dest = (__m128i *)clutBufRaw_; + for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) { + __m128i data1 = _mm_loadu_si128(source); + __m128i data2 = _mm_loadu_si128(source + 1); + _mm_store_si128(dest, data1); + _mm_store_si128(dest + 1, data2); + } + } else { + Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes); + if (bytes < clutTotalBytes_) { + memset((u8 *)clutBufRaw_ + bytes, 0x00, clutTotalBytes_ - bytes); + } } #else - clutTotalBytes_ = gstate.getClutLoadBytes(); - Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, clutTotalBytes_); + Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes); + if (bytes < clutTotalBytes_) { + memset((u8 *)clutBufRaw_ + bytes, 0x00, clutTotalBytes_ - bytes); + } #endif } else { - clutTotalBytes_ = gstate.getClutLoadBytes(); - memset(clutBufRaw_, 0xFF, clutTotalBytes_); + memset(clutBufRaw_, 0x00, clutTotalBytes_); } // Reload the clut next time. clutLastFormat_ = 0xFFFFFFFF; diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 332de87010..58fa06dcae 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -577,12 +577,16 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) u32 clutTotalBytes = gstate.getClutLoadBytes(); if (Memory::IsValidAddress(clutAddr)) { - Memory::MemcpyUnchecked(clut, clutAddr, clutTotalBytes); - // TODO: Do something to the CLUT with 0? + u32 validSize = Memory::ValidSize(clutAddr, clutTotalBytes); + Memory::MemcpyUnchecked(clut, clutAddr, validSize); + if (validSize < clutTotalBytes) { + // Zero out the parts that were outside valid memory. + memset((u8 *)clut + validSize, 0x00, clutTotalBytes - validSize); + } } else if (clutAddr != 0) { - // TODO: Does this make any sense? + // Some invalid addresses trigger a crash, others fill with zero. We always fill zero. ERROR_LOG_REPORT_ONCE(badClut, G3D, "Software: Invalid CLUT address, filling with garbage instead of crashing"); - memset(clut, 0xFF, clutTotalBytes); + memset(clut, 0x00, clutTotalBytes); } } break; From 67662ee23fa14467f815bfa018e222ed5b614fc4 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 12 Apr 2015 22:39:49 -0700 Subject: [PATCH 3/3] Force clut address to align to 16. Hardware seems to ignore the lower bits when loading. --- GPU/GPUState.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUState.h b/GPU/GPUState.h index b14a94f9b0..36f71cc6db 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -302,7 +302,7 @@ struct GPUgstate int getTextureEnvColR() const { return texenvcolor&0xFF; } int getTextureEnvColG() const { return (texenvcolor>>8)&0xFF; } int getTextureEnvColB() const { return (texenvcolor>>16)&0xFF; } - u32 getClutAddress() const { return (clutaddr & 0x00FFFFFF) | ((clutaddrupper << 8) & 0x0F000000); } + u32 getClutAddress() const { return (clutaddr & 0x00FFFFF0) | ((clutaddrupper << 8) & 0x0F000000); } int getClutLoadBytes() const { return (loadclut & 0x3F) * 32; } int getClutLoadBlocks() const { return (loadclut & 0x3F); } GEPaletteFormat getClutPaletteFormat() const { return static_cast(clutformat & 3); }