Merge pull request #17134 from hrydgard/more-replacement-work

Refactor the replacement cache
This commit is contained in:
Henrik Rydgård 2023-03-16 12:37:48 +01:00 committed by GitHub
commit ae8c804328
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 220 additions and 214 deletions

View file

@ -159,7 +159,7 @@ void VulkanPushBuffer::GetDebugString(char *buffer, size_t bufSize) const {
sum += size_ * (buffers_.size() - 1);
sum += offset_;
size_t capacity = size_ * buffers_.size();
snprintf(buffer, bufSize, "Push %s: %s/%s", name_, NiceSizeFormat(capacity).c_str(), NiceSizeFormat(sum).c_str());
snprintf(buffer, bufSize, "Push %s: %s / %s", name_, NiceSizeFormat(sum).c_str(), NiceSizeFormat(capacity).c_str());
}
void VulkanPushBuffer::Map() {

View file

@ -93,11 +93,15 @@ private:
LimitedWaitable *waitable_;
};
ReplacedTexture::ReplacedTexture(VFSBackend *vfs, const ReplacementDesc &desc) : vfs_(vfs), desc_(desc) {
logId_ = desc.logId;
}
ReplacedTexture::~ReplacedTexture() {
if (threadWaitable_) {
SetState(ReplacementState::CANCEL_INIT);
std::unique_lock<std::mutex> lock(mutex_);
std::unique_lock<std::mutex> lock(lock_);
threadWaitable_->WaitAndRelease();
threadWaitable_ = nullptr;
}
@ -108,19 +112,36 @@ ReplacedTexture::~ReplacedTexture() {
}
}
void ReplacedTexture::PurgeIfOlder(double t) {
if (threadWaitable_ && !threadWaitable_->WaitFor(0.0))
void ReplacedTexture::PurgeIfNotUsedSinceTime(double t) {
if (State() != ReplacementState::ACTIVE) {
return;
if (lastUsed_ >= t)
return;
if (levelData_ && levelData_->lastUsed < t) {
// We have to lock since multiple textures might reference this same data.
std::lock_guard<std::mutex> guard(levelData_->lock);
levelData_->data.clear();
// This means we have to reload. If we never purge any, there's no need.
SetState(ReplacementState::POPULATED);
}
// If there's some leftover threadWaitable, get rid of it.
if (threadWaitable_) {
if (threadWaitable_->WaitFor(0.0)) {
delete threadWaitable_;
threadWaitable_ = nullptr;
// Continue with purging.
} else {
// Try next time.
return;
}
}
// This is the only place except shutdown where a texture can transition
// from ACTIVE to anything else, so we don't actually need to lock here.
if (lastUsed_ >= t) {
return;
}
data_.clear();
levels_.clear();
fmt = Draw::DataFormat::UNDEFINED;
alphaStatus_ = ReplacedTextureAlpha::UNKNOWN;
// This means we have to reload. If we never purge any, there's no need.
SetState(ReplacementState::POPULATED);
}
// This can only return true if ACTIVE or NOT_FOUND.
@ -140,9 +161,7 @@ bool ReplacedTexture::IsReady(double budget) {
// Successfully waited! Can get rid of it.
threadWaitable_->WaitAndRelease();
threadWaitable_ = nullptr;
if (levelData_) {
levelData_->lastUsed = now;
}
lastUsed = now;
}
lastUsed_ = now;
return true;
@ -176,43 +195,29 @@ bool ReplacedTexture::IsReady(double budget) {
return false;
}
void ReplacedTexture::FinishPopulate(ReplacementDesc *desc) {
logId_ = desc->logId;
levelData_ = desc->cache;
desc_ = desc;
SetState(ReplacementState::POPULATED);
// TODO: What used to be here is now done on the thread task.
}
void ReplacedTexture::Prepare(VFSBackend *vfs) {
this->vfs_ = vfs;
std::unique_lock<std::mutex> lock(mutex_);
_assert_msg_(levelData_ != nullptr, "Level cache not set");
// We must lock around access to levelData_ in case two textures try to load it at once.
std::lock_guard<std::mutex> guard(levelData_->lock);
std::unique_lock<std::mutex> lock(lock_);
fmt = Draw::DataFormat::UNDEFINED;
Draw::DataFormat pixelFormat;
LoadLevelResult result = LoadLevelResult::LOAD_ERROR;
if (desc_->filenames.empty()) {
if (desc_.filenames.empty()) {
result = LoadLevelResult::DONE;
}
for (int i = 0; i < std::min(MAX_REPLACEMENT_MIP_LEVELS, (int)desc_->filenames.size()); ++i) {
for (int i = 0; i < std::min(MAX_REPLACEMENT_MIP_LEVELS, (int)desc_.filenames.size()); ++i) {
if (State() == ReplacementState::CANCEL_INIT) {
break;
}
if (desc_->filenames[i].empty()) {
if (desc_.filenames[i].empty()) {
// Out of valid mip levels. Bail out.
break;
}
VFSFileReference *fileRef = vfs_->GetFile(desc_->filenames[i].c_str());
VFSFileReference *fileRef = vfs_->GetFile(desc_.filenames[i].c_str());
if (!fileRef) {
// If the file doesn't exist, let's just bail immediately here.
// Mark as DONE, not error.
@ -224,7 +229,7 @@ void ReplacedTexture::Prepare(VFSBackend *vfs) {
fmt = Draw::DataFormat::R8G8B8A8_UNORM;
}
result = LoadLevelData(fileRef, desc_->filenames[i], i, &pixelFormat);
result = LoadLevelData(fileRef, desc_.filenames[i], i, &pixelFormat);
if (result == LoadLevelResult::DONE) {
// Loaded all the levels we're gonna get.
fmt = pixelFormat;
@ -246,23 +251,17 @@ void ReplacedTexture::Prepare(VFSBackend *vfs) {
if (levels_.empty()) {
// No replacement found.
std::string name = TextureReplacer::HashName(desc_->cachekey, desc_->hash, 0);
std::string name = TextureReplacer::HashName(desc_.cachekey, desc_.hash, 0);
if (result == LoadLevelResult::LOAD_ERROR) {
WARN_LOG(G3D, "Failed to load replacement texture '%s'", name.c_str());
}
SetState(ReplacementState::NOT_FOUND);
levelData_ = nullptr;
delete desc_;
desc_ = nullptr;
return;
}
levelData_->fmt = fmt;
fmt = fmt;
SetState(ReplacementState::ACTIVE);
delete desc_;
desc_ = nullptr;
if (threadWaitable_)
threadWaitable_->Notify();
}
@ -275,8 +274,8 @@ inline uint32_t RoundUpTo4(uint32_t value) {
ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference *fileRef, const std::string &filename, int mipLevel, Draw::DataFormat *pixelFormat) {
bool good = false;
if (levelData_->data.size() <= mipLevel) {
levelData_->data.resize(mipLevel + 1);
if (data_.size() <= mipLevel) {
data_.resize(mipLevel + 1);
}
ReplacedTextureLevel level;
@ -325,7 +324,7 @@ ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference
switch (format) {
case 98: // DXGI_FORMAT_BC7_UNORM:
case 99: // DXGI_FORMAT_BC7_UNORM_SRGB:
if (!desc_->formatSupport.bc7) {
if (!desc_.formatSupport.bc7) {
WARN_LOG(G3D, "BC1-3 formats not supported, skipping texture");
good = false;
}
@ -336,7 +335,7 @@ ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference
good = false;
}
} else {
if (!desc_->formatSupport.bc123) {
if (!desc_.formatSupport.bc123) {
WARN_LOG(G3D, "BC1-3 formats not supported");
good = false;
}
@ -397,15 +396,15 @@ ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference
// Already populated from cache. TODO: Move this above the first read, and take level.w/h from the cache.
if (!levelData_->data[mipLevel].empty()) {
if (!data_[mipLevel].empty()) {
vfs_->CloseFile(openFile);
*pixelFormat = levelData_->fmt;
*pixelFormat = fmt;
return LoadLevelResult::DONE;
}
// Is this really the right place to do it?
level.w = (level.w * desc_->w) / desc_->newW;
level.h = (level.h * desc_->h) / desc_->newH;
level.w = (level.w * desc_.w) / desc_.newW;
level.h = (level.h * desc_.h) / desc_.newH;
if (good && mipLevel != 0) {
// Check that the mipmap size is correct. Can't load mips of the wrong size.
@ -448,10 +447,10 @@ ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference
// We only support opaque colors with this compression method.
alphaStatus_ = ReplacedTextureAlpha::FULL;
// Let's pick a suitable compatible format.
if (desc_->formatSupport.bc123) {
if (desc_.formatSupport.bc123) {
transcoderFormat = basist::transcoder_texture_format::cTFBC1;
*pixelFormat = Draw::DataFormat::BC1_RGBA_UNORM_BLOCK;
} else if (desc_->formatSupport.etc2) {
} else if (desc_.formatSupport.etc2) {
transcoderFormat = basist::transcoder_texture_format::cTFETC1_RGB;
*pixelFormat = Draw::DataFormat::ETC2_R8G8B8_UNORM_BLOCK;
} else {
@ -464,10 +463,10 @@ ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference
// TODO: Try to recover some indication of alpha from the actual data blocks.
alphaStatus_ = ReplacedTextureAlpha::UNKNOWN;
// Let's pick a suitable compatible format.
if (desc_->formatSupport.bc7) {
if (desc_.formatSupport.bc7) {
transcoderFormat = basist::transcoder_texture_format::cTFBC7_RGBA;
*pixelFormat = Draw::DataFormat::BC7_UNORM_BLOCK;
} else if (desc_->formatSupport.astc) {
} else if (desc_.formatSupport.astc) {
transcoderFormat = basist::transcoder_texture_format::cTFASTC_4x4_RGBA;
*pixelFormat = Draw::DataFormat::ASTC_4x4_UNORM_BLOCK;
} else {
@ -486,13 +485,13 @@ ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference
bool bc = Draw::DataFormatIsBlockCompressed(*pixelFormat, &blockSize);
_dbg_assert_(bc || *pixelFormat == Draw::DataFormat::R8G8B8A8_UNORM);
levelData_->data.resize(numMips);
data_.resize(numMips);
basist::ktx2_transcoder_state transcodeState; // Each thread needs one of these.
transcoder.start_transcoding();
for (int i = 0; i < numMips; i++) {
std::vector<uint8_t> &out = levelData_->data[mipLevel + i];
std::vector<uint8_t> &out = data_[mipLevel + i];
basist::ktx2_image_level_info levelInfo;
bool result = transcoder.get_image_level_info(levelInfo, i, 0, 0);
@ -507,7 +506,7 @@ ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference
outputSize = levelInfo.m_orig_width * levelInfo.m_orig_height;
outputPitch = levelInfo.m_orig_width;
}
levelData_->data[i].resize(dataSizeBytes);
data_[i].resize(dataSizeBytes);
transcoder.transcode_image_level(i, 0, 0, &out[0], (uint32_t)outputSize, transcoderFormat, 0, (uint32_t)outputPitch, level.h, -1, -1, &transcodeState);
level.w = levelInfo.m_orig_width;
@ -535,11 +534,11 @@ ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference
bool bc = Draw::DataFormatIsBlockCompressed(*pixelFormat, &blockSize);
_dbg_assert_(bc);
levelData_->data.resize(numMips);
data_.resize(numMips);
// A DDS File can contain multiple mipmaps.
for (int i = 0; i < numMips; i++) {
std::vector<uint8_t> &out = levelData_->data[mipLevel + i];
std::vector<uint8_t> &out = data_[mipLevel + i];
int bytesToRead = RoundUpTo4(level.w) * RoundUpTo4(level.h) * blockSize / 16;
out.resize(bytesToRead);
@ -574,7 +573,7 @@ ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference
int w, h, f;
uint8_t *image;
std::vector<uint8_t> &out = levelData_->data[mipLevel];
std::vector<uint8_t> &out = data_[mipLevel];
// TODO: Zim files can actually hold mipmaps (although no tool has ever been made to create them :P)
if (LoadZIMPtr(&zim[0], fileSize, &w, &h, &f, &image)) {
if (w > level.w || h > level.h) {
@ -633,7 +632,7 @@ ReplacedTexture::LoadLevelResult ReplacedTexture::LoadLevelData(VFSFileReference
}
png.format = PNG_FORMAT_RGBA;
std::vector<uint8_t> &out = levelData_->data[mipLevel];
std::vector<uint8_t> &out = data_[mipLevel];
out.resize(level.w * level.h * 4);
if (!png_image_finish_read(&png, nullptr, &out[0], level.w * 4, nullptr)) {
ERROR_LOG(G3D, "Could not load texture replacement: %s - %s", filename.c_str(), png.message);
@ -672,10 +671,10 @@ bool ReplacedTexture::CopyLevelTo(int level, void *out, int rowPitch) {
}
// We probably could avoid this lock, but better to play it safe.
std::lock_guard<std::mutex> guard(levelData_->lock);
std::lock_guard<std::mutex> guard(lock_);
const ReplacedTextureLevel &info = levels_[level];
const std::vector<uint8_t> &data = levelData_->data[level];
const std::vector<uint8_t> &data = data_[level];
if (data.empty()) {
WARN_LOG(G3D, "Level %d is empty", level);
@ -694,7 +693,7 @@ bool ReplacedTexture::CopyLevelTo(int level, void *out, int rowPitch) {
if (rowPitch == info.w * 4) {
#ifdef PARALLEL_COPY
ParallelMemcpy(&g_threadManager, out, &data[0], info.w * 4 * info.h);
ParallelMemcpy(&g_threadManager, out, data.data(), info.w * 4 * info.h);
#else
memcpy(out, data.data(), info.w * 4 * info.h);
#endif
@ -703,12 +702,12 @@ bool ReplacedTexture::CopyLevelTo(int level, void *out, int rowPitch) {
const int MIN_LINES_PER_THREAD = 4;
ParallelRangeLoop(&g_threadManager, [&](int l, int h) {
for (int y = l; y < h; ++y) {
memcpy((uint8_t *)out + rowPitch * y, &data[0] + info.w * 4 * y, info.w * 4);
memcpy((uint8_t *)out + rowPitch * y, data.data() + info.w * 4 * y, info.w * 4);
}
}, 0, info.h, MIN_LINES_PER_THREAD);
#else
for (int y = 0; y < info.h; ++y) {
memcpy((uint8_t *)out + rowPitch * y, &data[0] + info.w * 4 * y, info.w * 4);
memcpy((uint8_t *)out + rowPitch * y, data.data() + info.w * 4 * y, info.w * 4);
}
#endif
}

View file

@ -24,7 +24,6 @@
#include "Common/GPU/thin3d.h"
#include "Common/Log.h"
struct ReplacedLevelsCache;
class TextureReplacer;
class LimitedWaitable;
@ -52,16 +51,6 @@ enum class ReplacedImageType {
static const int MAX_REPLACEMENT_MIP_LEVELS = 12; // 12 should be plenty, 8 is the max mip levels supported by the PSP.
// Metadata about a given texture level.
struct ReplacedTextureLevel {
int w = 0;
int h = 0;
// To be able to reload, we need to be able to reopen, unfortunate we can't use zip_file_t.
// TODO: This really belongs on the level in the cache, not in the individual ReplacedTextureLevel objects.
VFSFileReference *fileRef = nullptr;
};
enum class ReplacementState : uint32_t {
UNINITIALIZED,
POPULATED, // We located the texture files but have not started the thread.
@ -89,23 +78,33 @@ struct ReplacementDesc {
int h;
std::string hashfiles;
Path basePath;
bool foundAlias;
std::vector<std::string> filenames;
std::string logId;
ReplacedLevelsCache *cache;
GPUFormatSupport formatSupport;
};
struct ReplacedLevelsCache {
Draw::DataFormat fmt = Draw::DataFormat::UNDEFINED;
std::mutex lock;
std::vector<std::vector<uint8_t>> data;
double lastUsed = 0.0;
};
class ReplacedTexture;
// These aren't actually all replaced, they can also represent a placeholder for a not-found
// replacement (state_ == NOT_FOUND).
struct ReplacedTexture {
// replacement (texture == nullptr).
struct ReplacedTextureRef {
ReplacedTexture *texture; // shortcut
std::string hashfiles; // key into the cache
};
// Metadata about a given texture level.
struct ReplacedTextureLevel {
int w = 0;
int h = 0;
// To be able to reload, we need to be able to reopen, unfortunate we can't use zip_file_t.
// TODO: This really belongs on the level in the cache, not in the individual ReplacedTextureLevel objects.
VFSFileReference *fileRef = nullptr;
};
class ReplacedTexture {
public:
ReplacedTexture(VFSBackend *vfs, const ReplacementDesc &desc);
~ReplacedTexture();
inline ReplacementState State() const {
@ -129,7 +128,18 @@ struct ReplacedTexture {
int GetLevelDataSize(int level) const {
_dbg_assert_(State() == ReplacementState::ACTIVE);
return (int)levelData_->data[level].size();
return (int)data_[level].size();
}
size_t GetTotalDataSize() const {
if (State() != ReplacementState::ACTIVE) {
return 0;
}
size_t sz = 0;
for (auto &data : data_) {
sz += data.size();
}
return sz;
}
int NumLevels() const {
@ -149,7 +159,6 @@ struct ReplacedTexture {
bool IsReady(double budget);
bool CopyLevelTo(int level, void *out, int rowPitch);
void FinishPopulate(ReplacementDesc *desc);
std::string logId_;
private:
@ -161,21 +170,22 @@ private:
void Prepare(VFSBackend *vfs);
LoadLevelResult LoadLevelData(VFSFileReference *fileRef, const std::string &filename, int level, Draw::DataFormat *pixelFormat);
void PurgeIfOlder(double t);
void PurgeIfNotUsedSinceTime(double t);
std::vector<std::vector<uint8_t>> data_;
std::vector<ReplacedTextureLevel> levels_;
ReplacedLevelsCache *levelData_ = nullptr;
ReplacedTextureAlpha alphaStatus_ = ReplacedTextureAlpha::UNKNOWN;
double lastUsed_ = 0.0;
LimitedWaitable *threadWaitable_ = nullptr;
std::mutex mutex_;
std::mutex lock_;
Draw::DataFormat fmt = Draw::DataFormat::UNDEFINED; // NOTE: Right now, the only supported format is Draw::DataFormat::R8G8B8A8_UNORM.
ReplacedTextureAlpha alphaStatus_ = ReplacedTextureAlpha::UNKNOWN;
double lastUsed = 0.0;
std::atomic<ReplacementState> state_ = ReplacementState::UNINITIALIZED;
std::atomic<ReplacementState> state_ = ReplacementState::POPULATED;
VFSBackend *vfs_ = nullptr;
ReplacementDesc *desc_ = nullptr;
ReplacementDesc desc_;
friend class TextureReplacer;
friend class ReplacedTextureTask;

View file

@ -1523,9 +1523,8 @@ ReplacedTexture *TextureCacheCommon::FindReplacement(TexCacheEntry *entry, int &
constexpr double MAX_BUDGET_PER_TEX = 0.25 / 60.0;
double replaceStart = time_now_d();
double budget = std::min(MAX_BUDGET_PER_TEX, replacementFrameBudget_ - replacementTimeThisFrame_);
u64 cachekey = replacer_.Enabled() ? entry->CacheKey() : 0;
ReplacedTexture *replaced = replacer_.FindReplacement(cachekey, entry->fullhash, w, h, budget);
ReplacedTexture *replaced = replacer_.FindReplacement(cachekey, entry->fullhash, w, h);
if (!replaced) {
// TODO: Remove the flag here?
// entry->status &= ~TexCacheEntry::STATUS_TO_REPLACE;
@ -1533,6 +1532,7 @@ ReplacedTexture *TextureCacheCommon::FindReplacement(TexCacheEntry *entry, int &
return nullptr;
}
double budget = std::min(MAX_BUDGET_PER_TEX, replacementFrameBudget_ - replacementTimeThisFrame_);
if (replaced->IsReady(budget)) {
if (replaced->State() == ReplacementState::ACTIVE) {
replaced->GetSize(0, &w, &h);
@ -2847,7 +2847,6 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt
replacedInfo.hash = entry->fullhash;
replacedInfo.addr = entry->addr;
replacedInfo.isFinal = (entry->status & TexCacheEntry::STATUS_TO_SCALE) == 0;
replacedInfo.scaleFactor = plan.scaleFactor;
replacedInfo.isVideo = plan.isVideo;
replacedInfo.fmt = Draw::DataFormat::R8G8B8A8_UNORM;
plan.saveTexture = replacer_.WillSave(replacedInfo);
@ -2899,7 +2898,7 @@ void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, i
GETextureFormat tfmt = (GETextureFormat)entry.format;
GEPaletteFormat clutformat = gstate.getClutPaletteFormat();
u32 texaddr = gstate.getTextureAddress(srcLevel);
int bufw = GetTextureBufw(srcLevel, texaddr, tfmt);
const int bufw = GetTextureBufw(srcLevel, texaddr, tfmt);
u32 *pixelData;
int decPitch;
if (plan.scaleFactor > 1) {
@ -2922,19 +2921,20 @@ void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, i
CheckAlphaResult alphaResult = DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, srcLevel, bufw, texDecFlags);
entry.SetAlphaStatus(alphaResult, srcLevel);
int scaledW = w, scaledH = h;
if (plan.scaleFactor > 1) {
// Note that this updates w and h!
scaler_.ScaleAlways((u32 *)data, pixelData, w, h, plan.scaleFactor);
scaler_.ScaleAlways((u32 *)data, pixelData, w, h, &scaledW, &scaledH, plan.scaleFactor);
pixelData = (u32 *)data;
decPitch = w * 4;
decPitch = scaledW * sizeof(u32);
if (decPitch != stride) {
// Rearrange in place to match the requested pitch.
// (it can only be larger than w * bpp, and a match is likely.)
// Note! This is bad because it reads the mapped memory! TODO: Look into if DX9 does this right.
for (int y = h - 1; y >= 0; --y) {
memcpy((u8 *)data + stride * y, (u8 *)data + decPitch * y, w * 4);
for (int y = scaledH - 1; y >= 0; --y) {
memcpy((u8 *)data + stride * y, (u8 *)data + decPitch * y, scaledW *4);
}
decPitch = stride;
}
@ -2947,11 +2947,10 @@ void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, i
replacedInfo.addr = entry.addr;
replacedInfo.isVideo = IsVideo(entry.addr);
replacedInfo.isFinal = (entry.status & TexCacheEntry::STATUS_TO_SCALE) == 0;
replacedInfo.scaleFactor = plan.scaleFactor;
replacedInfo.fmt = dstFmt;
// NOTE: Reading the decoded texture here may be very slow, if we just wrote it to write-combined memory.
replacer_.NotifyTextureDecoded(replacedInfo, pixelData, decPitch, srcLevel, w, h);
replacer_.NotifyTextureDecoded(replacedInfo, pixelData, decPitch, srcLevel, w, h, scaledW, scaledH);
}
}
}

View file

@ -69,10 +69,9 @@ TextureReplacer::TextureReplacer(Draw::DrawContext *draw) {
}
TextureReplacer::~TextureReplacer() {
for (auto &iter : cache_) {
for (auto iter : levelCache_) {
delete iter.second;
}
delete vfs_;
}
@ -179,7 +178,7 @@ bool TextureReplacer::LoadIni() {
// If we have stuff loaded from before, need to update the vfs pointers to avoid
// crash on exit. The actual problem is that we tend to call LoadIni a little too much...
for (auto &repl : cache_) {
for (auto &repl : levelCache_) {
repl.second->vfs_ = vfs_;
}
@ -395,7 +394,7 @@ void TextureReplacer::ParseReduceHashRange(const std::string& key, const std::st
u32 TextureReplacer::ComputeHash(u32 addr, int bufw, int w, int h, GETextureFormat fmt, u16 maxSeenV) {
_dbg_assert_msg_(enabled_, "Replacement not enabled");
if (!LookupHashRange(addr, w, h)) {
if (!LookupHashRange(addr, w, h, &w, &h)) {
// There wasn't any hash range, let's fall back to maxSeenV logic.
if (h == 512 && maxSeenV < 512 && maxSeenV != 0) {
h = (int)maxSeenV;
@ -461,7 +460,7 @@ u32 TextureReplacer::ComputeHash(u32 addr, int bufw, int w, int h, GETextureForm
}
}
ReplacedTexture *TextureReplacer::FindReplacement(u64 cachekey, u32 hash, int w, int h, double budget) {
ReplacedTexture *TextureReplacer::FindReplacement(u64 cachekey, u32 hash, int w, int h) {
// Only actually replace if we're replacing. We might just be saving.
if (!Enabled() || !g_Config.bReplaceTextures) {
return nullptr;
@ -470,74 +469,76 @@ ReplacedTexture *TextureReplacer::FindReplacement(u64 cachekey, u32 hash, int w,
ReplacementCacheKey replacementKey(cachekey, hash);
auto it = cache_.find(replacementKey);
if (it != cache_.end()) {
if (it->second->State() == ReplacementState::UNINITIALIZED && budget > 0.0) {
// We don't do this on a thread, but we only do it while within budget.
PopulateReplacement(it->second, cachekey, hash, w, h);
}
return it->second;
return it->second.texture;
}
// Okay, let's construct the result.
ReplacedTexture *result = new ReplacedTexture();
result->vfs_ = this->vfs_;
if (budget > 0.0) {
_dbg_assert_(result->State() == ReplacementState::UNINITIALIZED);
PopulateReplacement(result, cachekey, hash, w, h);
} else {
// WARN_LOG(G3D, "Postponing preparing texture (%dx%d)", w, h);
}
cache_[replacementKey] = result;
return result;
}
void TextureReplacer::PopulateReplacement(ReplacedTexture *texture, u64 cachekey, u32 hash, int w, int h) {
// We pass this to a thread, so can't keep it on the stack.
ReplacementDesc *desc = new ReplacementDesc();
desc->newW = w;
desc->newH = h;
desc->w = w;
desc->h = h;
desc->cachekey = cachekey;
desc->hash = hash;
desc->basePath = basePath_;
desc->formatSupport = formatSupport_;
LookupHashRange(cachekey >> 32, desc->newW, desc->newH);
ReplacementDesc desc;
desc.newW = w;
desc.newH = h;
desc.w = w;
desc.h = h;
desc.cachekey = cachekey;
desc.hash = hash;
LookupHashRange(cachekey >> 32, w, h, &desc.newW, &desc.newH);
if (ignoreAddress_) {
cachekey = cachekey & 0xFFFFFFFFULL;
}
desc->foundAlias = false;
bool foundAlias = false;
bool ignored = false;
desc->hashfiles = LookupHashFile(cachekey, hash, &desc->foundAlias, &ignored);
std::string hashfiles = LookupHashFile(cachekey, hash, &foundAlias, &ignored);
// Early-out for ignored textures, let's not bother even starting a thread task.
if (ignored) {
// WARN_LOG(G3D, "Not found/ignored: %s (%d, %d)", hashfiles.c_str(), (int)foundReplacement, (int)ignored);
// nothing to do?
texture->SetState(ReplacementState::NOT_FOUND);
return;
// Insert an entry into the cache for faster lookup next time.
ReplacedTextureRef ref{};
cache_.emplace(std::make_pair(replacementKey, ref));
return nullptr;
}
if (!desc->foundAlias) {
if (!foundAlias) {
// We'll just need to generate the names for each level.
// By default, we look for png since that's also what's dumped.
// For other file formats, use the ini to create aliases.
desc->filenames.resize(MAX_REPLACEMENT_MIP_LEVELS);
for (int level = 0; level < desc->filenames.size(); level++) {
desc->filenames[level] = TextureReplacer::HashName(desc->cachekey, desc->hash, level) + ".png";
desc.filenames.resize(MAX_REPLACEMENT_MIP_LEVELS);
for (int level = 0; level < desc.filenames.size(); level++) {
desc.filenames[level] = TextureReplacer::HashName(cachekey, hash, level) + ".png";
}
desc->logId = desc->filenames[0];
desc->hashfiles = desc->filenames[0]; // This is used as the key in the data cache.
desc.logId = desc.filenames[0];
desc.hashfiles = desc.filenames[0]; // The generated filename of the top level is used as the key in the data cache.
} else {
desc->logId = desc->hashfiles;
SplitString(desc->hashfiles, '|', desc->filenames);
desc.logId = hashfiles;
SplitString(hashfiles, '|', desc.filenames);
desc.hashfiles = hashfiles;
}
desc->cache = &levelCache_[desc->hashfiles];
// OK, we might already have a matching texture, we use hashfiles as a key. Look it up in the level cache.
auto iter = levelCache_.find(hashfiles);
if (iter != levelCache_.end()) {
// Insert an entry into the cache for faster lookup next time.
ReplacedTextureRef ref;
ref.hashfiles = hashfiles;
ref.texture = iter->second;
cache_.emplace(std::make_pair(replacementKey, ref));
return iter->second;
}
texture->FinishPopulate(desc);
// Final path - we actually need a new replacement texture, because we haven't seen "hashfiles" before.
desc.basePath = basePath_;
desc.formatSupport = formatSupport_;
ReplacedTexture *texture = new ReplacedTexture(vfs_, desc);
ReplacedTextureRef ref;
ref.hashfiles = hashfiles;
ref.texture = texture;
cache_.emplace(std::make_pair(replacementKey, ref));
// Also, insert the level in the level cache so we can look up by desc_->hashfiles again.
levelCache_.emplace(std::make_pair(hashfiles, texture));
return texture;
}
static bool WriteTextureToPNG(png_imagep image, const Path &filename, int convert_to_8bit, const void *buffer, png_int_32 row_stride, const void *colormap) {
@ -631,7 +632,7 @@ bool TextureReplacer::WillSave(const ReplacedTextureDecodeInfo &replacedInfo) {
return true;
}
void TextureReplacer::NotifyTextureDecoded(const ReplacedTextureDecodeInfo &replacedInfo, const void *data, int pitch, int level, int w, int h) {
void TextureReplacer::NotifyTextureDecoded(const ReplacedTextureDecodeInfo &replacedInfo, const void *data, int pitch, int level, int origW, int origH, int scaledW, int scaledH) {
_assert_msg_(enabled_, "Replacement not enabled");
if (!WillSave(replacedInfo)) {
// Ignore.
@ -663,23 +664,21 @@ void TextureReplacer::NotifyTextureDecoded(const ReplacedTextureDecodeInfo &repl
bool skipIfExists = false;
double now = time_now_d();
if (it != savedCache_.end()) {
// We've already saved this texture. Let's only save if it's bigger (e.g. scaled now.)
// This check isn't backwards, it's just to check if we should *skip* saving, a bit confusing.
if (it->second.levelW[level] >= w && it->second.levelH[level] >= h) {
// If it's been more than 5 seconds, we'll check again. Maybe they deleted.
double age = now - it->second.lastTimeSaved;
if (age < 5.0)
return;
skipIfExists = true;
}
// We've already saved this texture. Ignore it.
// We don't really care about changing the scale factor during runtime, only confusing.
return;
}
// Width/height of the image to save.
int w = scaledW;
int h = scaledH;
// Only save the hashed portion of the PNG.
int lookupW = w / replacedInfo.scaleFactor;
int lookupH = h / replacedInfo.scaleFactor;
if (LookupHashRange(replacedInfo.addr, lookupW, lookupH)) {
w = lookupW * replacedInfo.scaleFactor;
h = lookupH * replacedInfo.scaleFactor;
int lookupW;
int lookupH;
if (LookupHashRange(replacedInfo.addr, origW, origH, &lookupW, &lookupH)) {
w = lookupW * (scaledW / origW);
h = lookupH * (scaledH / origH);
}
std::vector<u8> saveBuf;
@ -744,15 +743,12 @@ void TextureReplacer::Decimate(ReplacerDecimateMode mode) {
}
const double threshold = time_now_d() - age;
for (auto &item : cache_) {
item.second->PurgeIfOlder(threshold);
// don't actually delete the items here, just clean out the data.
}
size_t totalSize = 0;
for (auto &item : levelCache_) {
std::lock_guard<std::mutex> guard(item.second.lock);
totalSize += item.second.data.size();
std::lock_guard<std::mutex> guard(item.second->lock_);
item.second->PurgeIfNotUsedSinceTime(threshold);
totalSize += item.second->GetTotalDataSize(); // TODO: Make something better.
// don't actually delete the items here, just clean out the data.
}
double totalSizeGB = totalSize / (1024.0 * 1024.0 * 1024.0);
@ -852,17 +848,19 @@ std::string TextureReplacer::HashName(u64 cachekey, u32 hash, int level) {
return hashname;
}
bool TextureReplacer::LookupHashRange(u32 addr, int &w, int &h) {
bool TextureReplacer::LookupHashRange(u32 addr, int w, int h, int *newW, int *newH) {
const u64 rangeKey = ((u64)addr << 32) | ((u64)w << 16) | h;
auto range = hashranges_.find(rangeKey);
if (range != hashranges_.end()) {
const WidthHeightPair &wh = range->second;
w = wh.first;
h = wh.second;
*newW = wh.first;
*newH = wh.second;
return true;
} else {
*newW = w;
*newH = h;
return false;
}
return false;
}
float TextureReplacer::LookupReduceHashRange(int& w, int& h) {

View file

@ -81,7 +81,6 @@ struct ReplacedTextureDecodeInfo {
u32 addr;
bool isVideo;
bool isFinal;
int scaleFactor;
Draw::DataFormat fmt;
};
@ -106,14 +105,14 @@ public:
u32 ComputeHash(u32 addr, int bufw, int w, int h, GETextureFormat fmt, u16 maxSeenV);
// Returns nullptr if not found.
ReplacedTexture *FindReplacement(u64 cachekey, u32 hash, int w, int h, double budget);
ReplacedTexture *FindReplacement(u64 cachekey, u32 hash, int w, int h);
bool FindFiltering(u64 cachekey, u32 hash, TextureFiltering *forceFiltering);
// Check if a NotifyTextureDecoded for this texture is desired (used to avoid reads from write-combined memory.)
bool WillSave(const ReplacedTextureDecodeInfo &replacedInfo);
// Notify that a new texture was decoded. May already be upscaled, saves the data passed.
void NotifyTextureDecoded(const ReplacedTextureDecodeInfo &replacedInfo, const void *data, int pitch, int level, int w, int h);
// Notify that a new texture was decoded. May already be upscaled, saves the data passed.
void NotifyTextureDecoded(const ReplacedTextureDecodeInfo &replacedInfo, const void *data, int pitch, int level, int origW, int origH, int scaledW, int scaledH);
void Decimate(ReplacerDecimateMode mode);
@ -131,10 +130,9 @@ protected:
void ParseHashRange(const std::string &key, const std::string &value);
void ParseFiltering(const std::string &key, const std::string &value);
void ParseReduceHashRange(const std::string& key, const std::string& value);
bool LookupHashRange(u32 addr, int &w, int &h);
bool LookupHashRange(u32 addr, int w, int h, int *newW, int *newH);
float LookupReduceHashRange(int& w, int& h);
std::string LookupHashFile(u64 cachekey, u32 hash, bool *foundAlias, bool *ignored);
void PopulateReplacement(ReplacedTexture *result, u64 cachekey, u32 hash, int w, int h);
bool enabled_ = false;
bool allowVideo_ = false;
@ -157,12 +155,14 @@ protected:
typedef std::pair<int, int> WidthHeightPair;
std::unordered_map<u64, WidthHeightPair> hashranges_;
std::unordered_map<u64, float> reducehashranges_;
std::unordered_map<ReplacementCacheKey, std::string> aliases_;
std::unordered_map<ReplacementCacheKey, TextureFiltering> filtering_;
std::unordered_map<ReplacementCacheKey, ReplacedTexture *> cache_;
std::unordered_map<ReplacementCacheKey, ReplacedTextureRef> cache_;
std::unordered_map<ReplacementCacheKey, SavedTextureCacheData> savedCache_;
// the key is from aliases_. It's a |-separated sequence of texture filenames of the levels of a texture.
std::unordered_map<std::string, ReplacedLevelsCache> levelCache_;
// the key is either from aliases_, in which case it's a |-separated sequence of texture filenames of the levels of a texture.
// alternatively the key is from the generated texture filename.
std::unordered_map<std::string, ReplacedTexture *> levelCache_;
};

View file

@ -605,29 +605,31 @@ bool TextureScalerCommon::IsEmptyOrFlat(const u32 *data, int pixels) const {
return true;
}
void TextureScalerCommon::ScaleAlways(u32 *out, u32 *src, int &width, int &height, int factor) {
void TextureScalerCommon::ScaleAlways(u32 *out, u32 *src, int width, int height, int *scaledWidth, int *scaledHeight, int factor) {
if (IsEmptyOrFlat(src, width * height)) {
// This means it was a flat texture. Vulkan wants the size up front, so we need to make it happen.
u32 pixel = *src;
width *= factor;
height *= factor;
*scaledWidth = width * factor;
*scaledHeight = height * factor;
size_t pixelCount = *scaledWidth * *scaledHeight;
// ABCD. If A = D, and AB = CD, then they must all be equal (B = C, etc.)
if ((pixel & 0x000000FF) == (pixel >> 24) && (pixel & 0x0000FFFF) == (pixel >> 16)) {
memset(out, pixel & 0xFF, width * height * sizeof(u32));
memset(out, pixel & 0xFF, pixelCount * sizeof(u32));
} else {
// Let's hope this is vectorized.
for (int i = 0; i < width * height; ++i) {
for (int i = 0; i < pixelCount; ++i) {
out[i] = pixel;
}
}
} else {
ScaleInto(out, src, width, height, factor);
ScaleInto(out, src, width, height, scaledWidth, scaledHeight, factor);
}
}
bool TextureScalerCommon::ScaleInto(u32 *outputBuf, u32 *src, int &width, int &height, int factor) {
bool TextureScalerCommon::ScaleInto(u32 *outputBuf, u32 *src, int width, int height, int *scaledWidth, int *scaledHeight, int factor) {
#ifdef SCALING_MEASURE_TIME
double t_start = time_now_d();
#endif
@ -660,21 +662,21 @@ bool TextureScalerCommon::ScaleInto(u32 *outputBuf, u32 *src, int &width, int &h
}
// update values accordingly
width *= factor;
height *= factor;
*scaledWidth = width * factor;
*scaledHeight = height * factor;
#ifdef SCALING_MEASURE_TIME
if (width*height > 64 * 64 * factor*factor) {
if (*scaledWidth* *scaledHeight > 64 * 64 * factor*factor) {
double t = time_now_d() - t_start;
NOTICE_LOG(G3D, "TextureScaler: processed %9d pixels in %6.5lf seconds. (%9.2lf Mpixels/second)",
width*height, t, (width*height) / (t * 1000 * 1000));
*scaledWidth * *scaledHeight, t, (*scaledWidth * *scaledHeight) / (t * 1000 * 1000));
}
#endif
return true;
}
bool TextureScalerCommon::Scale(u32* &data, int &width, int &height, int factor) {
bool TextureScalerCommon::Scale(u32* &data, int width, int height, int *scaledWidth, int *scaledHeight, int factor) {
// prevent processing empty or flat textures (this happens a lot in some games)
// doesn't hurt the standard case, will be very quick for textures with actual texture
if (IsEmptyOrFlat(data, width*height)) {
@ -685,7 +687,7 @@ bool TextureScalerCommon::Scale(u32* &data, int &width, int &height, int factor)
bufOutput.resize(width * height * (factor * factor)); // used to store the upscaled image
u32 *outputBuf = bufOutput.data();
if (ScaleInto(outputBuf, data, width, height, factor)) {
if (ScaleInto(outputBuf, data, width, height, scaledWidth, scaledHeight, factor)) {
data = outputBuf;
return true;
}

View file

@ -30,9 +30,9 @@ public:
TextureScalerCommon();
~TextureScalerCommon();
void ScaleAlways(u32 *out, u32 *src, int &width, int &height, int factor);
bool Scale(u32 *&data, int &width, int &height, int factor);
bool ScaleInto(u32 *out, u32 *src, int &width, int &height, int factor);
void ScaleAlways(u32 *out, u32 *src, int width, int height, int *scaledWidth, int *scaledHeight, int factor);
bool Scale(u32 *&data, int width, int height, int *scaledWidth, int *scaledHeight, int factor);
bool ScaleInto(u32 *out, u32 *src, int width, int height, int *scaledWidth, int *scaledHeight, int factor);
enum { XBRZ = 0, HYBRID = 1, BICUBIC = 2, HYBRID_BICUBIC = 3 };

View file

@ -647,10 +647,8 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
replacedInfo.addr = entry->addr;
replacedInfo.isVideo = IsVideo(entry->addr);
replacedInfo.isFinal = (entry->status & TexCacheEntry::STATUS_TO_SCALE) == 0;
replacedInfo.scaleFactor = plan.scaleFactor;
replacedInfo.fmt = FromVulkanFormat(actualFmt);
replacer_.NotifyTextureDecoded(replacedInfo, data, byteStride, plan.baseLevelSrc + i, w, h);
replacer_.NotifyTextureDecoded(replacedInfo, data, byteStride, plan.baseLevelSrc + i, mipUnscaledWidth, mipUnscaledHeight, w, h);
}
}
}
@ -754,7 +752,7 @@ void TextureCacheVulkan::LoadVulkanTextureLevel(TexCacheEntry &entry, uint8_t *w
u32 fmt = dstFmt;
// CPU scaling reads from the destination buffer so we want cached RAM.
uint8_t *rearrange = (uint8_t *)AllocateAlignedMemory(w * scaleFactor * h * scaleFactor * 4, 16);
scaler_.ScaleAlways((u32 *)rearrange, pixelData, w, h, scaleFactor);
scaler_.ScaleAlways((u32 *)rearrange, pixelData, w, h, &w, &h, scaleFactor);
pixelData = (u32 *)writePtr;
// We always end up at 8888. Other parts assume this.