Merge pull request #17139 from hrydgard/replacement-padding-support

Texture replacement: Improve padding support
This commit is contained in:
Henrik Rydgård 2023-03-17 14:50:23 +01:00 committed by GitHub
commit cd06b9c98f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 119 additions and 56 deletions

View file

@ -250,7 +250,7 @@ void *AllocateAlignedMemory(size_t size, size_t alignment) {
#endif
#endif
_assert_msg_(ptr != nullptr, "Failed to allocate aligned memory");
_assert_msg_(ptr != nullptr, "Failed to allocate aligned memory of size %llu", size);
return ptr;
}

View file

@ -79,7 +79,6 @@ static bool RealPath(const std::string &currentDirectory, const std::string &inP
size_t inLen = inPath.length();
if (inLen == 0)
{
WARN_LOG(FILESYS, "RealPath: inPath is empty");
outPath = currentDirectory;
return true;
}

View file

@ -141,7 +141,7 @@ void ReplacedTexture::PurgeIfNotUsedSinceTime(double t) {
alphaStatus_ = ReplacedTextureAlpha::UNKNOWN;
// This means we have to reload. If we never purge any, there's no need.
SetState(ReplacementState::POPULATED);
SetState(ReplacementState::UNLOADED);
}
// This can only return true if ACTIVE or NOT_FOUND.
@ -165,13 +165,10 @@ bool ReplacedTexture::IsReady(double budget) {
}
lastUsed_ = now;
return true;
case ReplacementState::UNINITIALIZED:
// _dbg_assert_(false);
return false;
case ReplacementState::CANCEL_INIT:
case ReplacementState::PENDING:
return false;
case ReplacementState::POPULATED:
case ReplacementState::UNLOADED:
// We're gonna need to spawn a task.
break;
}
@ -195,6 +192,10 @@ bool ReplacedTexture::IsReady(double budget) {
return false;
}
inline uint32_t RoundUpTo4(uint32_t value) {
return (value + 3) & ~3;
}
void ReplacedTexture::Prepare(VFSBackend *vfs) {
this->vfs_ = vfs;
@ -259,16 +260,26 @@ void ReplacedTexture::Prepare(VFSBackend *vfs) {
return;
}
// Update the level dimensions.
for (auto &level : levels_) {
level.fullW = (level.w * desc_.w) / desc_.newW;
level.fullH = (level.h * desc_.h) / desc_.newH;
int blockSize;
bool bc = Draw::DataFormatIsBlockCompressed(fmt, &blockSize);
if (!bc) {
level.fullDataSize = level.fullW * level.fullH * 4;
} else {
level.fullDataSize = RoundUpTo4(level.fullW) * RoundUpTo4(level.fullH) * blockSize / 16;
}
}
SetState(ReplacementState::ACTIVE);
if (threadWaitable_)
threadWaitable_->Notify();
}
inline uint32_t RoundUpTo4(uint32_t value) {
return (value + 3) & ~3;
}
// Returns true if Prepare should keep calling this to load more levels.
ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference *fileRef, const std::string &filename, int mipLevel, Draw::DataFormat *pixelFormat) {
bool good = false;
@ -393,7 +404,6 @@ ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference
ERROR_LOG(G3D, "Could not load texture replacement info: %s - unsupported format %s", filename.c_str(), magic.c_str());
}
// Already populated from cache. TODO: Move this above the first read, and take level.w/h from the cache.
if (!data_[mipLevel].empty()) {
vfs_->CloseFile(openFile);
@ -401,13 +411,10 @@ ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference
return LoadLevelResult::DONE;
}
// Is this really the right place to do it?
level.w = (level.w * desc_.w) / desc_.newW;
level.h = (level.h * desc_.h) / desc_.newH;
if (good && mipLevel != 0) {
// Check that the mipmap size is correct. Can't load mips of the wrong size.
if (level.w != (levels_[0].w >> mipLevel) || level.h != (levels_[0].h >> mipLevel)) {
// If loading a low mip directly (through png most likely), check that the mipmap size is correct.
// Can't load mips of the wrong size.
if (level.w != std::max(1, (levels_[0].w >> mipLevel)) || level.h != std::max(1, (levels_[0].h >> mipLevel))) {
WARN_LOG(G3D, "Replacement mipmap invalid: size=%dx%d, expected=%dx%d (level %d)",
level.w, level.h, levels_[0].w >> mipLevel, levels_[0].h >> mipLevel, mipLevel);
good = false;
@ -662,7 +669,7 @@ ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference
return LoadLevelResult::LOAD_ERROR;
}
bool ReplacedTexture::CopyLevelTo(int level, void *out, int rowPitch) {
bool ReplacedTexture::CopyLevelTo(int level, uint8_t *out, size_t outDataSize, int rowPitch) {
_assert_msg_((size_t)level < levels_.size(), "Invalid miplevel");
_assert_msg_(out != nullptr && rowPitch > 0, "Invalid out/pitch");
@ -671,6 +678,13 @@ bool ReplacedTexture::CopyLevelTo(int level, void *out, int rowPitch) {
return false;
}
// We pad the images right here during the copy.
// TODO: Add support for the texture cache to scale texture coordinates instead.
// It already supports this for render target textures that aren't powers of 2.
int outW = levels_[level].fullW;
int outH = levels_[level].fullH;
// We probably could avoid this lock, but better to play it safe.
std::lock_guard<std::mutex> guard(lock_);
@ -684,9 +698,15 @@ bool ReplacedTexture::CopyLevelTo(int level, void *out, int rowPitch) {
#define PARALLEL_COPY
if (fmt == Draw::DataFormat::R8G8B8A8_UNORM) {
int blockSize;
if (!Draw::DataFormatIsBlockCompressed(fmt, &blockSize)) {
if (fmt != Draw::DataFormat::R8G8B8A8_UNORM) {
ERROR_LOG(G3D, "Unexpected linear data format");
return false;
}
if (rowPitch < info.w * 4) {
ERROR_LOG(G3D, "Replacement rowPitch=%d, but w=%d (level=%d)", rowPitch, info.w * 4, level);
ERROR_LOG(G3D, "Replacement rowPitch=%d, but w=%d (level=%d) (too small)", rowPitch, info.w * 4, level);
return false;
}
@ -702,8 +722,11 @@ bool ReplacedTexture::CopyLevelTo(int level, void *out, int rowPitch) {
#ifdef PARALLEL_COPY
const int MIN_LINES_PER_THREAD = 4;
ParallelRangeLoop(&g_threadManager, [&](int l, int h) {
int extraPixels = outW - info.w;
for (int y = l; y < h; ++y) {
memcpy((uint8_t *)out + rowPitch * y, data.data() + info.w * 4 * y, info.w * 4);
// Fill the rest of the line with black.
memset((uint8_t *)out + rowPitch * y + info.w * 4, 0, extraPixels * 4);
}
}, 0, info.h, MIN_LINES_PER_THREAD);
#else
@ -711,14 +734,42 @@ bool ReplacedTexture::CopyLevelTo(int level, void *out, int rowPitch) {
memcpy((uint8_t *)out + rowPitch * y, data.data() + info.w * 4 * y, info.w * 4);
}
#endif
// Memset the rest of the padding to avoid leaky edge pixels. Guess we could parallelize this too, but meh.
for (int y = info.h; y < outH; y++) {
uint8_t *dest = (uint8_t *)out + rowPitch * y;
memset(dest, 0, outW * 4);
}
}
} else {
#ifdef PARALLEL_COPY
// TODO: Add sanity checks here for other formats?
ParallelMemcpy(&g_threadManager, out, data.data(), data.size());
#else
memcpy(out, data.data(), data.size());
// Only parallel copy in the simple case for now.
if (info.w == outW && info.h == outH) {
// TODO: Add sanity checks here for other formats?
ParallelMemcpy(&g_threadManager, out, data.data(), data.size());
return true;
}
#endif
// Alright, so careful copying of blocks it is, padding with zero-blocks as needed.
int inBlocksW = (info.w + 3) / 4;
int inBlocksH = (info.h + 3) / 4;
int outBlocksW = (info.fullW + 3) / 4;
int outBlocksH = (info.fullH + 3) / 4;
int paddingBlocksX = outBlocksW - inBlocksW;
// Copy all the known blocks, and zero-fill out the lines.
for (int y = 0; y < inBlocksH; y++) {
const uint8_t *input = data.data() + y * inBlocksW * blockSize;
uint8_t *output = (uint8_t *)out + y * outBlocksW * blockSize;
memcpy(output, input, inBlocksW * blockSize);
memset(output + inBlocksW * blockSize, 0, paddingBlocksX * blockSize);
}
// Vertical zero-padding.
for (int y = inBlocksH; y < outBlocksH; y++) {
uint8_t *output = (uint8_t *)out + y * outBlocksW * blockSize;
memset(output, 0, outBlocksW * blockSize);
}
}
return true;
@ -726,8 +777,7 @@ bool ReplacedTexture::CopyLevelTo(int level, void *out, int rowPitch) {
const char *StateString(ReplacementState state) {
switch (state) {
case ReplacementState::UNINITIALIZED: return "UNINITIALIZED";
case ReplacementState::POPULATED: return "PREPARED";
case ReplacementState::UNLOADED: return "PREPARED";
case ReplacementState::PENDING: return "PENDING";
case ReplacementState::NOT_FOUND: return "NOT_FOUND";
case ReplacementState::ACTIVE: return "ACTIVE";

View file

@ -52,8 +52,7 @@ enum class ReplacedImageType {
static const int MAX_REPLACEMENT_MIP_LEVELS = 12; // 12 should be plenty, 8 is the max mip levels supported by the PSP.
enum class ReplacementState : uint32_t {
UNINITIALIZED,
POPULATED, // We located the texture files but have not started the thread.
UNLOADED,
PENDING,
NOT_FOUND, // Also used on error loading the images.
ACTIVE,
@ -94,8 +93,14 @@ struct ReplacedTextureRef {
// Metadata about a given texture level.
struct ReplacedTextureLevel {
// Data dimensions
int w = 0;
int h = 0;
// PSP texture dimensions
int fullW = 0;
int fullH = 0;
int fullDataSize = 0;
// To be able to reload, we need to be able to reopen, unfortunate we can't use zip_file_t.
// TODO: This really belongs on the level in the cache, not in the individual ReplacedTextureLevel objects.
@ -122,13 +127,13 @@ public:
void GetSize(int level, int *w, int *h) const {
_dbg_assert_(State() == ReplacementState::ACTIVE);
_dbg_assert_(level < levels_.size());
*w = levels_[level].w;
*h = levels_[level].h;
*w = levels_[level].fullW;
*h = levels_[level].fullH;
}
int GetLevelDataSize(int level) const {
_dbg_assert_(State() == ReplacementState::ACTIVE);
return (int)data_[level].size();
int GetLevelDataSizeAfterCopy(int level) const {
// Includes padding etc.
return levels_[level].fullDataSize;
}
size_t GetTotalDataSize() const {
@ -157,7 +162,7 @@ public:
}
bool IsReady(double budget);
bool CopyLevelTo(int level, void *out, int rowPitch);
bool CopyLevelTo(int level, uint8_t *out, size_t outDataSize, int rowPitch);
std::string logId_;
@ -182,7 +187,7 @@ private:
ReplacedTextureAlpha alphaStatus_ = ReplacedTextureAlpha::UNKNOWN;
double lastUsed = 0.0;
std::atomic<ReplacementState> state_ = ReplacementState::POPULATED;
std::atomic<ReplacementState> state_ = ReplacementState::UNLOADED;
VFSBackend *vfs_ = nullptr;
ReplacementDesc desc_;

View file

@ -1545,9 +1545,8 @@ ReplacedTexture *TextureCacheCommon::FindReplacement(TexCacheEntry *entry, int &
}
switch (replaced->State()) {
case ReplacementState::POPULATED:
case ReplacementState::UNLOADED:
case ReplacementState::PENDING:
case ReplacementState::UNINITIALIZED:
// Make sure we keep polling.
entry->status |= TexCacheEntry::STATUS_TO_REPLACE;
break;
@ -2883,7 +2882,8 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt
return true;
}
void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, int stride, BuildTexturePlan &plan, int srcLevel, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags) {
// Passing 0 into dataSize will disable checking.
void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, size_t dataSize, int stride, BuildTexturePlan &plan, int srcLevel, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags) {
int w = gstate.getTextureWidth(srcLevel);
int h = gstate.getTextureHeight(srcLevel);
@ -2892,7 +2892,7 @@ void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, i
if (plan.replaceValid) {
plan.replaced->GetSize(srcLevel, &w, &h);
double replaceStart = time_now_d();
plan.replaced->CopyLevelTo(srcLevel, data, stride);
plan.replaced->CopyLevelTo(srcLevel, data, dataSize, stride);
replacementTimeThisFrame_ += time_now_d() - replaceStart;
} else {
GETextureFormat tfmt = (GETextureFormat)entry.format;

View file

@ -180,6 +180,7 @@ struct TexCacheEntry {
u32 fullhash;
u32 cluthash;
u16 maxSeenV;
ReplacedTexture *replacedTexture;
TexStatus GetHashStatus() {
return TexStatus(status & STATUS_MASK);
@ -385,7 +386,7 @@ protected:
ReplacedTexture *FindReplacement(TexCacheEntry *entry, int &w, int &h, int &d);
// Return value is mapData normally, but could be another buffer allocated with AllocateAlignedMemory.
void LoadTextureLevel(TexCacheEntry &entry, uint8_t *mapData, int mapRowPitch, BuildTexturePlan &plan, int srcLevel, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags);
void LoadTextureLevel(TexCacheEntry &entry, uint8_t *mapData, size_t dataSize, int mapRowPitch, BuildTexturePlan &plan, int srcLevel, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags);
template <typename T>
inline const T *GetCurrentClut() {

View file

@ -904,12 +904,14 @@ bool TextureReplacer::GenerateIni(const std::string &gameID, Path &generatedFile
// Let's also write some defaults.
fprintf(f, R"(# This file is optional and describes your textures.
# Some information on syntax available here:
# https://github.com/hrydgard/ppsspp/wiki/Texture-replacement-ini-syntax
# Documentation about the options and syntax is available here:
# https://www.ppsspp.org/docs/reference/texture-replacement
[options]
version = 1
hash = quick
ignoreMipmap = false
ignoreMipmap = false # Set to true to avoid dumping mipmaps. Instead use basisu to generate them, see docs.
reduceHash = false # Usually a good idea to use.
allowVideo = false
[games]
# Used to make it easier to install, and override settings for other regions.
@ -921,8 +923,11 @@ ignoreMipmap = false
# See wiki for more info.
[hashranges]
# See the documentation.
# Example: 08b31020,512,512 = 480,272
[filtering]
# You can enforce specific filtering modes with this. See the docs.
[reducehashranges]
)", gameID.c_str(), INI_FILENAME.c_str());

View file

@ -138,11 +138,12 @@ protected:
bool allowVideo_ = false;
bool ignoreAddress_ = false;
bool reduceHash_ = false;
bool ignoreMipmap_ = false;
float reduceHashSize = 1.0f; // default value with reduceHash to false
float reduceHashGlobalValue = 0.5f; // Global value for textures dump pngs of all sizes, 0.5 by default but can be set in textures.ini
double lastTextureCacheSizeGB_ = 0.0;
bool ignoreMipmap_ = false;
std::string gameID_;
Path basePath_;
Path newTextureDir_;
@ -150,6 +151,7 @@ protected:
VFSBackend *vfs_ = nullptr;
bool vfsIsZip_ = false;
GPUFormatSupport formatSupport_{};
typedef std::pair<int, int> WidthHeightPair;

View file

@ -301,8 +301,8 @@ void TextureCacheD3D11::BuildTexture(TexCacheEntry *const entry) {
if (plan.replaceValid) {
int blockSize = 0;
if (Draw::DataFormatIsBlockCompressed(plan.replaced->Format(), &blockSize)) {
stride = ((mipWidth + 3) & ~3) * blockSize / 4; // This stride value doesn't quite make sense to me, but it works?
dataSize = plan.replaced->GetLevelDataSize(i);
stride = ((mipWidth + 3) & ~3) * blockSize / 4; // Number of blocks * 4 * Size of a block / 4
dataSize = plan.replaced->GetLevelDataSizeAfterCopy(i);
} else {
int bpp = (int)Draw::DataFormatSizeInBytes(plan.replaced->Format());
stride = std::max(mipWidth * bpp, 16);
@ -338,7 +338,7 @@ void TextureCacheD3D11::BuildTexture(TexCacheEntry *const entry) {
return;
}
LoadTextureLevel(*entry, data, stride, plan, srcLevel, texFmt, TexDecodeFlags{});
LoadTextureLevel(*entry, data, 0, stride, plan, srcLevel, texFmt, TexDecodeFlags{});
}
int tw;

View file

@ -290,7 +290,7 @@ void TextureCacheDX9::BuildTexture(TexCacheEntry *const entry) {
}
uint8_t *data = (uint8_t *)rect.pBits;
int stride = rect.Pitch;
LoadTextureLevel(*entry, data, stride, plan, (i == 0) ? plan.baseLevelSrc : i, FromD3D9Format(dstFmt), TexDecodeFlags{});
LoadTextureLevel(*entry, data, 0, stride, plan, (i == 0) ? plan.baseLevelSrc : i, FromD3D9Format(dstFmt), TexDecodeFlags{});
((LPDIRECT3DTEXTURE9)texture)->UnlockRect(dstLevel);
}
} else {
@ -305,7 +305,7 @@ void TextureCacheDX9::BuildTexture(TexCacheEntry *const entry) {
uint8_t *data = (uint8_t *)box.pBits;
int stride = box.RowPitch;
for (int i = 0; i < plan.depth; i++) {
LoadTextureLevel(*entry, data, stride, plan, (i == 0) ? plan.baseLevelSrc : i, FromD3D9Format(dstFmt), TexDecodeFlags{});
LoadTextureLevel(*entry, data, 0, stride, plan, (i == 0) ? plan.baseLevelSrc : i, FromD3D9Format(dstFmt), TexDecodeFlags{});
data += box.SlicePitch;
}
((LPDIRECT3DVOLUMETEXTURE9)texture)->UnlockBox(0);

View file

@ -300,7 +300,7 @@ void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) {
int blockSize = 0;
if (Draw::DataFormatIsBlockCompressed(plan.replaced->Format(), &blockSize)) {
stride = mipWidth * 4;
dataSize = plan.replaced->GetLevelDataSize(i);
dataSize = plan.replaced->GetLevelDataSizeAfterCopy(i);
bc = true;
} else {
int bpp = (int)Draw::DataFormatSizeInBytes(plan.replaced->Format());
@ -325,7 +325,7 @@ void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) {
return;
}
LoadTextureLevel(*entry, data, stride, plan, srcLevel, dstFmt, TexDecodeFlags::REVERSE_COLORS);
LoadTextureLevel(*entry, data, dataSize, stride, plan, srcLevel, dstFmt, TexDecodeFlags::REVERSE_COLORS);
// NOTE: TextureImage takes ownership of data, so we don't free it afterwards.
render_->TextureImage(entry->textureName, i, mipWidth, mipHeight, 1, dstFmt, data, GLRAllocType::ALIGNED);
@ -339,12 +339,13 @@ void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) {
int stride = bpp * (plan.w * plan.scaleFactor);
int levelStride = stride * (plan.h * plan.scaleFactor);
u8 *data = (u8 *)AllocateAlignedMemory(levelStride * plan.depth, 16);
size_t dataSize = levelStride * plan.depth;
u8 *data = (u8 *)AllocateAlignedMemory(dataSize, 16);
memset(data, 0, levelStride * plan.depth);
u8 *p = data;
for (int i = 0; i < plan.depth; i++) {
LoadTextureLevel(*entry, p, stride, plan, i, dstFmt, TexDecodeFlags::REVERSE_COLORS);
LoadTextureLevel(*entry, p, dataSize, stride, plan, i, dstFmt, TexDecodeFlags::REVERSE_COLORS);
p += levelStride;
}

View file

@ -595,13 +595,13 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
int rowLength = pixelStride;
if (bcFormat) {
// For block compressed formats, we just set the upload size to the data size..
uploadSize = plan.replaced->GetLevelDataSize(plan.baseLevelSrc + i);
uploadSize = plan.replaced->GetLevelDataSizeAfterCopy(plan.baseLevelSrc + i);
rowLength = (mipWidth + 3) & ~3;
}
// Directly load the replaced image.
data = pushBuffer->Allocate(uploadSize, pushAlignment, &texBuf, &bufferOffset);
double replaceStart = time_now_d();
if (!plan.replaced->CopyLevelTo(plan.baseLevelSrc + i, data, byteStride)) { // If plan.replaceValid, this shouldn't fail.
if (!plan.replaced->CopyLevelTo(plan.baseLevelSrc + i, (uint8_t *)data, uploadSize, byteStride)) { // If plan.replaceValid, this shouldn't fail.
WARN_LOG(G3D, "Failed to copy replaced texture level");
// TODO: Fill with some pattern?
}