diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index 675aaac0a0..7ff6b3961a 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -420,6 +420,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame vfb->last_frame_used = 0; vfb->last_frame_attached = 0; vfb->last_frame_displayed = 0; + vfb->last_frame_clut = 0; frameLastFramebufUsed_ = gpuStats.numFlips; vfbs_.push_back(vfb); currentRenderVfb_ = vfb; @@ -571,6 +572,13 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, srcBuffer = vfb; srcY = yOffset; srcH = 1; + } else if (yOffset == 0 && yOffset < srcY) { + // Okay, last try - it might be a clut. + if (vfb->usageFlags & FB_USAGE_CLUT) { + srcBuffer = vfb; + srcY = yOffset; + srcH = 1; + } } } } @@ -664,7 +672,13 @@ void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dst // Grand Knights History copies with a mismatching stride but a full line at a time. // Makes it hard to detect the wrong transfers in e.g. God of War. if (width != dstStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) { - match = false; + // However, some other games write cluts to framebuffers. + // Let's catch this and upload. Otherwise reject the match. + match = (vfb->usageFlags & FB_USAGE_CLUT) != 0; + if (match) { + dstWidth = byteStride * height / vfb_bpp; + dstHeight = 1; + } } else { dstWidth = byteStride * height / vfb_bpp; dstHeight = 1; @@ -866,6 +880,7 @@ void FramebufferManagerCommon::UpdateFramebufUsage(VirtualFramebuffer *vfb) { checkFlag(FB_USAGE_DISPLAYED_FRAMEBUFFER, vfb->last_frame_displayed); checkFlag(FB_USAGE_TEXTURE, vfb->last_frame_used); checkFlag(FB_USAGE_RENDERTARGET, vfb->last_frame_render); + checkFlag(FB_USAGE_CLUT, vfb->last_frame_clut); } void FramebufferManagerCommon::ShowScreenResolution() { diff --git a/GPU/Common/FramebufferCommon.h b/GPU/Common/FramebufferCommon.h index 8549e88001..1fc2767c88 100644 --- a/GPU/Common/FramebufferCommon.h +++ b/GPU/Common/FramebufferCommon.h @@ -29,6 +29,7 @@ enum { FB_USAGE_DISPLAYED_FRAMEBUFFER = 1, FB_USAGE_RENDERTARGET = 2, FB_USAGE_TEXTURE = 4, + FB_USAGE_CLUT = 8, }; enum { @@ -55,6 +56,7 @@ struct VirtualFramebuffer { int last_frame_attached; int last_frame_render; int last_frame_displayed; + int last_frame_clut; bool memoryUpdated; bool depthUpdated; diff --git a/GPU/Directx9/TextureCacheDX9.cpp b/GPU/Directx9/TextureCacheDX9.cpp index bbbc9cb1d8..4a0268280b 100644 --- a/GPU/Directx9/TextureCacheDX9.cpp +++ b/GPU/Directx9/TextureCacheDX9.cpp @@ -61,7 +61,7 @@ namespace DX9 { #define TEXCACHE_MIN_PRESSURE 16 * 1024 * 1024 // Total in VRAM #define TEXCACHE_SECOND_MIN_PRESSURE 4 * 1024 * 1024 -TextureCacheDX9::TextureCacheDX9() : cacheSizeEstimate_(0), secondCacheSizeEstimate_(0), clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL), clutMaxBytes_(0), texelsScaledThisFrame_(0) { +TextureCacheDX9::TextureCacheDX9() : cacheSizeEstimate_(0), secondCacheSizeEstimate_(0), clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL), clutMaxBytes_(0), clutRenderAddress_(0), texelsScaledThisFrame_(0) { timesInvalidatedAllThisFrame_ = 0; lastBoundTexture = INVALID_TEX; decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL; @@ -779,10 +779,30 @@ static inline u32 QuickTexHash(u32 addr, int bufw, int w, int h, GETextureFormat } void TextureCacheDX9::LoadClut(u32 clutAddr, u32 loadBytes) { + // Clear the uncached bit, etc. to match framebuffers. + clutAddr = clutAddr & 0x3FFFFFFF; + bool foundFramebuffer = false; + + clutRenderAddress_ = 0; + for (size_t i = 0, n = fbCache_.size(); i < n; ++i) { + auto framebuffer = fbCache_[i]; + if ((framebuffer->fb_address | 0x04000000) == clutAddr) { + framebuffer->last_frame_clut = gpuStats.numFlips; + framebuffer->usageFlags |= FB_USAGE_CLUT; + foundFramebuffer = true; + WARN_LOG_REPORT_ONCE(clutrenderdx9, G3D, "Using rendered CLUT for texture decode at %08x (%dx%dx%d)", clutAddr, framebuffer->width, framebuffer->height, framebuffer->colorDepth); + clutRenderAddress_ = framebuffer->fb_address; + } + } + clutTotalBytes_ = loadBytes; if (Memory::IsValidAddress(clutAddr)) { // It's possible for a game to (successfully) access outside valid memory. u32 bytes = Memory::ValidSize(clutAddr, loadBytes); + if (foundFramebuffer && !g_Config.bDisableSlowFramebufEffects) { + gpu->PerformMemoryDownload(clutAddr, bytes); + } + #ifdef _M_SSE int numBlocks = bytes / 16; if (bytes == loadBytes) { @@ -1167,6 +1187,10 @@ void TextureCacheDX9::SetTexture(bool force) { // Check for FBO - slow! if (entry->framebuffer) { if (match) { + if (hasClut && clutRenderAddress_ != 0) { + WARN_LOG_REPORT_ONCE(clutAndTexRender, G3D, "Using rendered texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat()); + } + SetTextureFramebuffer(entry, entry->framebuffer); entry->lastFrame = gpuStats.numFlips; return; @@ -1331,6 +1355,10 @@ void TextureCacheDX9::SetTexture(bool force) { TexCacheEntry entryNew = {0}; cache[cachekey] = entryNew; + if (hasClut && clutRenderAddress_ != 0) { + WARN_LOG_REPORT_ONCE(clutUseRender, G3D, "Using texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat()); + } + entry = &cache[cachekey]; if (g_Config.bTextureBackoffCache) { entry->status = TexCacheEntry::STATUS_HASHING; diff --git a/GPU/Directx9/TextureCacheDX9.h b/GPU/Directx9/TextureCacheDX9.h index a6e7e26802..df99d26930 100644 --- a/GPU/Directx9/TextureCacheDX9.h +++ b/GPU/Directx9/TextureCacheDX9.h @@ -150,6 +150,7 @@ private: // True if the clut is just alpha values in the same order (RGBA4444-bit only.) bool clutAlphaLinear_; u16 clutAlphaLinearColor_; + u32 clutRenderAddress_; LPDIRECT3DTEXTURE9 lastBoundTexture; float maxAnisotropyLevel; diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index d8c6555c0c..eb8a1c5667 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -68,7 +68,7 @@ // Hack! extern int g_iNumVideos; -TextureCache::TextureCache() : cacheSizeEstimate_(0), secondCacheSizeEstimate_(0), clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL), clutMaxBytes_(0), texelsScaledThisFrame_(0) { +TextureCache::TextureCache() : cacheSizeEstimate_(0), secondCacheSizeEstimate_(0), clearCacheNextFrame_(false), lowMemoryMode_(false), clutBuf_(NULL), clutMaxBytes_(0), clutRenderAddress_(0), texelsScaledThisFrame_(0) { timesInvalidatedAllThisFrame_ = 0; lastBoundTexture = -1; decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL; @@ -801,10 +801,30 @@ static inline u32 QuickTexHash(u32 addr, int bufw, int w, int h, GETextureFormat } void TextureCache::LoadClut(u32 clutAddr, u32 loadBytes) { + // Clear the uncached bit, etc. to match framebuffers. + clutAddr = clutAddr & 0x3FFFFFFF; + bool foundFramebuffer = false; + + clutRenderAddress_ = 0; + for (size_t i = 0, n = fbCache_.size(); i < n; ++i) { + auto framebuffer = fbCache_[i]; + if ((framebuffer->fb_address | 0x04000000) == clutAddr) { + framebuffer->last_frame_clut = gpuStats.numFlips; + framebuffer->usageFlags |= FB_USAGE_CLUT; + foundFramebuffer = true; + WARN_LOG_REPORT_ONCE(clutrender, G3D, "Using rendered CLUT for texture decode at %08x (%dx%dx%d)", clutAddr, framebuffer->width, framebuffer->height, framebuffer->colorDepth); + clutRenderAddress_ = framebuffer->fb_address; + } + } + clutTotalBytes_ = loadBytes; if (Memory::IsValidAddress(clutAddr)) { // It's possible for a game to (successfully) access outside valid memory. u32 bytes = Memory::ValidSize(clutAddr, loadBytes); + if (foundFramebuffer && !g_Config.bDisableSlowFramebufEffects) { + gpu->PerformMemoryDownload(clutAddr, bytes); + } + #ifdef _M_SSE int numBlocks = bytes / 16; if (bytes == loadBytes) { @@ -1237,6 +1257,10 @@ void TextureCache::SetTexture(bool force) { // Check for FBO - slow! if (entry->framebuffer) { if (match) { + if (hasClut && clutRenderAddress_ != 0) { + WARN_LOG_REPORT_ONCE(clutAndTexRender, G3D, "Using rendered texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat()); + } + SetTextureFramebuffer(entry, entry->framebuffer); entry->lastFrame = gpuStats.numFlips; return; @@ -1399,6 +1423,10 @@ void TextureCache::SetTexture(bool force) { TexCacheEntry entryNew = {0}; cache[cachekey] = entryNew; + if (hasClut && clutRenderAddress_ != 0) { + WARN_LOG_REPORT_ONCE(clutUseRender, G3D, "Using texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat()); + } + entry = &cache[cachekey]; if (g_Config.bTextureBackoffCache) { entry->status = TexCacheEntry::STATUS_HASHING; diff --git a/GPU/GLES/TextureCache.h b/GPU/GLES/TextureCache.h index 8936c0bc7f..124d6e585c 100644 --- a/GPU/GLES/TextureCache.h +++ b/GPU/GLES/TextureCache.h @@ -152,6 +152,7 @@ private: // True if the clut is just alpha values in the same order (RGBA4444-bit only.) bool clutAlphaLinear_; u16 clutAlphaLinearColor_; + u32 clutRenderAddress_; u32 lastBoundTexture; float maxAnisotropyLevel;