diff --git a/GPU/Common/ReplacedTexture.cpp b/GPU/Common/ReplacedTexture.cpp index 7083b85048..293d7c1a76 100644 --- a/GPU/Common/ReplacedTexture.cpp +++ b/GPU/Common/ReplacedTexture.cpp @@ -192,6 +192,10 @@ bool ReplacedTexture::IsReady(double budget) { return false; } +inline uint32_t RoundUpTo4(uint32_t value) { + return (value + 3) & ~3; +} + void ReplacedTexture::Prepare(VFSBackend *vfs) { this->vfs_ = vfs; @@ -260,6 +264,14 @@ void ReplacedTexture::Prepare(VFSBackend *vfs) { for (auto &level : levels_) { level.fullW = (level.w * desc_.w) / desc_.newW; level.fullH = (level.h * desc_.h) / desc_.newH; + + int blockSize; + bool bc = Draw::DataFormatIsBlockCompressed(fmt, &blockSize); + if (!bc) { + level.fullDataSize = level.fullW * level.fullH * 4; + } else { + level.fullDataSize = RoundUpTo4(level.fullW) * RoundUpTo4(level.fullH) * blockSize / 16; + } } SetState(ReplacementState::ACTIVE); @@ -268,10 +280,6 @@ void ReplacedTexture::Prepare(VFSBackend *vfs) { threadWaitable_->Notify(); } -inline uint32_t RoundUpTo4(uint32_t value) { - return (value + 3) & ~3; -} - // Returns true if Prepare should keep calling this to load more levels. ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference *fileRef, const std::string &filename, int mipLevel, Draw::DataFormat *pixelFormat) { bool good = false; @@ -661,7 +669,7 @@ ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference return LoadLevelResult::LOAD_ERROR; } -bool ReplacedTexture::CopyLevelTo(int level, void *out, int rowPitch) { +bool ReplacedTexture::CopyLevelTo(int level, uint8_t *out, size_t outDataSize, int rowPitch) { _assert_msg_((size_t)level < levels_.size(), "Invalid miplevel"); _assert_msg_(out != nullptr && rowPitch > 0, "Invalid out/pitch"); @@ -690,7 +698,13 @@ bool ReplacedTexture::CopyLevelTo(int level, void *out, int rowPitch) { #define PARALLEL_COPY - if (fmt == Draw::DataFormat::R8G8B8A8_UNORM) { + int blockSize; + if (!Draw::DataFormatIsBlockCompressed(fmt, &blockSize)) { + if (fmt != Draw::DataFormat::R8G8B8A8_UNORM) { + ERROR_LOG(G3D, "Unexpected linear data format"); + return false; + } + if (rowPitch < info.w * 4) { ERROR_LOG(G3D, "Replacement rowPitch=%d, but w=%d (level=%d) (too small)", rowPitch, info.w * 4, level); return false; @@ -715,24 +729,47 @@ bool ReplacedTexture::CopyLevelTo(int level, void *out, int rowPitch) { memset((uint8_t *)out + rowPitch * y + info.w * 4, 0, extraPixels * 4); } }, 0, info.h, MIN_LINES_PER_THREAD); - // Memset the rest of the padding. - for (int y = info.h; y < outH; y++) { - uint8_t *dest = (uint8_t *)out + rowPitch * y; - memset(dest, 0, outW * 4); - } #else for (int y = 0; y < info.h; ++y) { memcpy((uint8_t *)out + rowPitch * y, data.data() + info.w * 4 * y, info.w * 4); } #endif + // Memset the rest of the padding to avoid leaky edge pixels. Guess we could parallelize this too, but meh. + for (int y = info.h; y < outH; y++) { + uint8_t *dest = (uint8_t *)out + rowPitch * y; + memset(dest, 0, outW * 4); + } } } else { #ifdef PARALLEL_COPY - // TODO: Add sanity checks here for other formats? - ParallelMemcpy(&g_threadManager, out, data.data(), data.size()); -#else - memcpy(out, data.data(), data.size()); + // Only parallel copy in the simple case for now. + if (info.w == outW && info.h == outH) { + // TODO: Add sanity checks here for other formats? + ParallelMemcpy(&g_threadManager, out, data.data(), data.size()); + return true; + } #endif + // Alright, so careful copying of blocks it is, padding with zero-blocks as needed. + int inBlocksW = (info.w + 3) / 4; + int inBlocksH = (info.h + 3) / 4; + int outBlocksW = (info.fullW + 3) / 4; + int outBlocksH = (info.fullH + 3) / 4; + + int paddingBlocksX = outBlocksW - inBlocksW; + + // Copy all the known blocks, and zero-fill out the lines. + for (int y = 0; y < inBlocksH; y++) { + const uint8_t *input = data.data() + y * inBlocksW * blockSize; + uint8_t *output = (uint8_t *)out + y * outBlocksW * blockSize; + memcpy(output, input, inBlocksW * blockSize); + memset(output + inBlocksW * blockSize, 0, paddingBlocksX * blockSize); + } + + // Vertical zero-padding. + for (int y = inBlocksH; y < outBlocksH; y++) { + uint8_t *output = (uint8_t *)out + y * outBlocksW * blockSize; + memset(output, 0, outBlocksW * blockSize); + } } return true; diff --git a/GPU/Common/ReplacedTexture.h b/GPU/Common/ReplacedTexture.h index 507fca4eed..eaeb5ed230 100644 --- a/GPU/Common/ReplacedTexture.h +++ b/GPU/Common/ReplacedTexture.h @@ -100,6 +100,8 @@ struct ReplacedTextureLevel { int fullW = 0; int fullH = 0; + int fullDataSize = 0; + // To be able to reload, we need to be able to reopen, unfortunate we can't use zip_file_t. // TODO: This really belongs on the level in the cache, not in the individual ReplacedTextureLevel objects. VFSFileReference *fileRef = nullptr; @@ -129,9 +131,9 @@ public: *h = levels_[level].fullH; } - int GetLevelDataSize(int level) const { - _dbg_assert_(State() == ReplacementState::ACTIVE); - return (int)data_[level].size(); + int GetLevelDataSizeAfterCopy(int level) const { + // Includes padding etc. + return levels_[level].fullDataSize; } size_t GetTotalDataSize() const { @@ -160,7 +162,7 @@ public: } bool IsReady(double budget); - bool CopyLevelTo(int level, void *out, int rowPitch); + bool CopyLevelTo(int level, uint8_t *out, size_t outDataSize, int rowPitch); std::string logId_; diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index ddb9ee71f1..7c3b0d3b92 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -2882,7 +2882,8 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt return true; } -void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, int stride, BuildTexturePlan &plan, int srcLevel, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags) { +// Passing 0 into dataSize will disable checking. +void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, size_t dataSize, int stride, BuildTexturePlan &plan, int srcLevel, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags) { int w = gstate.getTextureWidth(srcLevel); int h = gstate.getTextureHeight(srcLevel); @@ -2891,7 +2892,7 @@ void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, i if (plan.replaceValid) { plan.replaced->GetSize(srcLevel, &w, &h); double replaceStart = time_now_d(); - plan.replaced->CopyLevelTo(srcLevel, data, stride); + plan.replaced->CopyLevelTo(srcLevel, data, dataSize, stride); replacementTimeThisFrame_ += time_now_d() - replaceStart; } else { GETextureFormat tfmt = (GETextureFormat)entry.format; diff --git a/GPU/Common/TextureCacheCommon.h b/GPU/Common/TextureCacheCommon.h index f1ee7be387..b70e1ea1a4 100644 --- a/GPU/Common/TextureCacheCommon.h +++ b/GPU/Common/TextureCacheCommon.h @@ -386,7 +386,7 @@ protected: ReplacedTexture *FindReplacement(TexCacheEntry *entry, int &w, int &h, int &d); // Return value is mapData normally, but could be another buffer allocated with AllocateAlignedMemory. - void LoadTextureLevel(TexCacheEntry &entry, uint8_t *mapData, int mapRowPitch, BuildTexturePlan &plan, int srcLevel, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags); + void LoadTextureLevel(TexCacheEntry &entry, uint8_t *mapData, size_t dataSize, int mapRowPitch, BuildTexturePlan &plan, int srcLevel, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags); template inline const T *GetCurrentClut() { diff --git a/GPU/D3D11/TextureCacheD3D11.cpp b/GPU/D3D11/TextureCacheD3D11.cpp index 70f0a4e941..d26d48a5fa 100644 --- a/GPU/D3D11/TextureCacheD3D11.cpp +++ b/GPU/D3D11/TextureCacheD3D11.cpp @@ -301,8 +301,8 @@ void TextureCacheD3D11::BuildTexture(TexCacheEntry *const entry) { if (plan.replaceValid) { int blockSize = 0; if (Draw::DataFormatIsBlockCompressed(plan.replaced->Format(), &blockSize)) { - stride = ((mipWidth + 3) & ~3) * blockSize / 4; // This stride value doesn't quite make sense to me, but it works? - dataSize = plan.replaced->GetLevelDataSize(i); + stride = ((mipWidth + 3) & ~3) * blockSize / 4; // Number of blocks * 4 * Size of a block / 4 + dataSize = plan.replaced->GetLevelDataSizeAfterCopy(i); } else { int bpp = (int)Draw::DataFormatSizeInBytes(plan.replaced->Format()); stride = std::max(mipWidth * bpp, 16); @@ -338,7 +338,7 @@ void TextureCacheD3D11::BuildTexture(TexCacheEntry *const entry) { return; } - LoadTextureLevel(*entry, data, stride, plan, srcLevel, texFmt, TexDecodeFlags{}); + LoadTextureLevel(*entry, data, 0, stride, plan, srcLevel, texFmt, TexDecodeFlags{}); } int tw; diff --git a/GPU/Directx9/TextureCacheDX9.cpp b/GPU/Directx9/TextureCacheDX9.cpp index 55433f5c1b..3de9c870c1 100644 --- a/GPU/Directx9/TextureCacheDX9.cpp +++ b/GPU/Directx9/TextureCacheDX9.cpp @@ -290,7 +290,7 @@ void TextureCacheDX9::BuildTexture(TexCacheEntry *const entry) { } uint8_t *data = (uint8_t *)rect.pBits; int stride = rect.Pitch; - LoadTextureLevel(*entry, data, stride, plan, (i == 0) ? plan.baseLevelSrc : i, FromD3D9Format(dstFmt), TexDecodeFlags{}); + LoadTextureLevel(*entry, data, 0, stride, plan, (i == 0) ? plan.baseLevelSrc : i, FromD3D9Format(dstFmt), TexDecodeFlags{}); ((LPDIRECT3DTEXTURE9)texture)->UnlockRect(dstLevel); } } else { @@ -305,7 +305,7 @@ void TextureCacheDX9::BuildTexture(TexCacheEntry *const entry) { uint8_t *data = (uint8_t *)box.pBits; int stride = box.RowPitch; for (int i = 0; i < plan.depth; i++) { - LoadTextureLevel(*entry, data, stride, plan, (i == 0) ? plan.baseLevelSrc : i, FromD3D9Format(dstFmt), TexDecodeFlags{}); + LoadTextureLevel(*entry, data, 0, stride, plan, (i == 0) ? plan.baseLevelSrc : i, FromD3D9Format(dstFmt), TexDecodeFlags{}); data += box.SlicePitch; } ((LPDIRECT3DVOLUMETEXTURE9)texture)->UnlockBox(0); diff --git a/GPU/GLES/TextureCacheGLES.cpp b/GPU/GLES/TextureCacheGLES.cpp index b5d8fdd63e..9445d67421 100644 --- a/GPU/GLES/TextureCacheGLES.cpp +++ b/GPU/GLES/TextureCacheGLES.cpp @@ -300,7 +300,7 @@ void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) { int blockSize = 0; if (Draw::DataFormatIsBlockCompressed(plan.replaced->Format(), &blockSize)) { stride = mipWidth * 4; - dataSize = plan.replaced->GetLevelDataSize(i); + dataSize = plan.replaced->GetLevelDataSizeAfterCopy(i); bc = true; } else { int bpp = (int)Draw::DataFormatSizeInBytes(plan.replaced->Format()); @@ -325,7 +325,7 @@ void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) { return; } - LoadTextureLevel(*entry, data, stride, plan, srcLevel, dstFmt, TexDecodeFlags::REVERSE_COLORS); + LoadTextureLevel(*entry, data, dataSize, stride, plan, srcLevel, dstFmt, TexDecodeFlags::REVERSE_COLORS); // NOTE: TextureImage takes ownership of data, so we don't free it afterwards. render_->TextureImage(entry->textureName, i, mipWidth, mipHeight, 1, dstFmt, data, GLRAllocType::ALIGNED); @@ -339,12 +339,13 @@ void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) { int stride = bpp * (plan.w * plan.scaleFactor); int levelStride = stride * (plan.h * plan.scaleFactor); - u8 *data = (u8 *)AllocateAlignedMemory(levelStride * plan.depth, 16); + size_t dataSize = levelStride * plan.depth; + u8 *data = (u8 *)AllocateAlignedMemory(dataSize, 16); memset(data, 0, levelStride * plan.depth); u8 *p = data; for (int i = 0; i < plan.depth; i++) { - LoadTextureLevel(*entry, p, stride, plan, i, dstFmt, TexDecodeFlags::REVERSE_COLORS); + LoadTextureLevel(*entry, p, dataSize, stride, plan, i, dstFmt, TexDecodeFlags::REVERSE_COLORS); p += levelStride; } diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index 61b8033daa..dcbb764a38 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -595,13 +595,13 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { int rowLength = pixelStride; if (bcFormat) { // For block compressed formats, we just set the upload size to the data size.. - uploadSize = plan.replaced->GetLevelDataSize(plan.baseLevelSrc + i); + uploadSize = plan.replaced->GetLevelDataSizeAfterCopy(plan.baseLevelSrc + i); rowLength = (mipWidth + 3) & ~3; } // Directly load the replaced image. data = pushBuffer->Allocate(uploadSize, pushAlignment, &texBuf, &bufferOffset); double replaceStart = time_now_d(); - if (!plan.replaced->CopyLevelTo(plan.baseLevelSrc + i, data, byteStride)) { // If plan.replaceValid, this shouldn't fail. + if (!plan.replaced->CopyLevelTo(plan.baseLevelSrc + i, (uint8_t *)data, uploadSize, byteStride)) { // If plan.replaceValid, this shouldn't fail. WARN_LOG(G3D, "Failed to copy replaced texture level"); // TODO: Fill with some pattern? }