diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index 5738f930e8..ff43f6d504 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -49,24 +49,12 @@ namespace Draw { class VulkanFBO; struct VirtualFramebuffer { - int last_frame_used; - int last_frame_attached; - int last_frame_render; - int last_frame_displayed; - int last_frame_clut; - int last_frame_failed; - int last_frame_depth_updated; - int last_frame_depth_render; - u32 clutUpdatedBytes; - bool memoryUpdated; - bool firstFrameSaved; - u32 fb_address; u32 z_address; // If 0, it's a "RAM" framebuffer. int fb_stride; int z_stride; - // There's also a top left of the drawing region, but meh... + GEBufferFormat format; // virtual, right now they are all RGBA8888 // width/height: The detected size of the current framebuffer, in original PSP pixels. u16 width; @@ -89,8 +77,6 @@ struct VirtualFramebuffer { u16 newHeight; int lastFrameNewSize; - GEBufferFormat format; // virtual, right now they are all RGBA8888 - // TODO: Handle fbo and colorDepth better. u8 colorDepth; Draw::Framebuffer *fbo; @@ -103,6 +89,18 @@ struct VirtualFramebuffer { bool dirtyAfterDisplay; bool reallyDirtyAfterDisplay; // takes frame skipping into account + + int last_frame_used; + int last_frame_attached; + int last_frame_render; + int last_frame_displayed; + int last_frame_clut; + int last_frame_failed; + int last_frame_depth_updated; + int last_frame_depth_render; + u32 clutUpdatedBytes; + bool memoryUpdated; + bool firstFrameSaved; }; struct FramebufferHeuristicParams { @@ -313,6 +311,10 @@ public: virtual bool GetStencilbuffer(u32 fb_address, int fb_stride, GPUDebugBuffer &buffer); virtual bool GetOutputFramebuffer(GPUDebugBuffer &buffer); + const std::vector &Framebuffers() { + return vfbs_; + } + protected: virtual void PackFramebufferSync_(VirtualFramebuffer *vfb, int x, int y, int w, int h); void SetViewport2D(int x, int y, int w, int h); @@ -354,7 +356,7 @@ protected: void UpdateFramebufUsage(VirtualFramebuffer *vfb); - void SetColorUpdated(VirtualFramebuffer *dstBuffer, int skipDrawReason) { + static void SetColorUpdated(VirtualFramebuffer *dstBuffer, int skipDrawReason) { dstBuffer->memoryUpdated = false; dstBuffer->clutUpdatedBytes = 0; dstBuffer->dirtyAfterDisplay = true; diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 4df49bac7d..69b33818ce 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -99,12 +99,9 @@ inline int dimHeight(u16 dim) { // TODO TextureCacheCommon::TextureCacheCommon(Draw::DrawContext *draw) : draw_(draw), - clearCacheNextFrame_(false), - lowMemoryMode_(false), texelsScaledThisFrame_(0), cacheSizeEstimate_(0), secondCacheSizeEstimate_(0), - nextTexture_(nullptr), clutLastFormat_(0xFFFFFFFF), clutTotalBytes_(0), clutMaxBytes_(0), @@ -134,18 +131,6 @@ TextureCacheCommon::~TextureCacheCommon() { FreeAlignedMemory(clutBufRaw_); } -int TextureCacheCommon::AttachedDrawingHeight() { - if (nextTexture_) { - if (nextTexture_->framebuffer) { - return nextTexture_->framebuffer->height; - } - u16 dim = nextTexture_->dim; - const u8 dimY = dim >> 8; - return 1 << dimY; - } - return 0; -} - // Produces a signed 1.23.8 value. static int TexLog2(float delta) { union FloatBits { @@ -262,10 +247,6 @@ void TextureCacheCommon::UpdateSamplingParams(TexCacheEntry &entry, SamplerCache break; } } - - if (entry.framebuffer) { - WARN_LOG_REPORT_ONCE(wrongFramebufAttach, G3D, "Framebuffer still attached in UpdateSamplingParams()?"); - } } void TextureCacheCommon::UpdateMaxSeenV(TexCacheEntry *entry, bool throughMode) { @@ -314,16 +295,7 @@ void TextureCacheCommon::UpdateMaxSeenV(TexCacheEntry *entry, bool throughMode) } } - -void TextureCacheCommon::SetTexture(bool force) { -#ifdef DEBUG_TEXTURES - if (SetDebugTexture()) { - // A different texture was bound, let's rebind next time. - InvalidateLastTexture(); - return; - } -#endif - +TexCacheEntry *TextureCacheCommon::SetTexture(bool force) { if (force) { InvalidateLastTexture(); } @@ -335,7 +307,7 @@ void TextureCacheCommon::SetTexture(bool force) { if (!Memory::IsValidAddress(texaddr)) { // Bind a null texture and return. Unbind(); - return; + return nullptr; } const u16 dim = gstate.getTextureDimension(level); @@ -386,21 +358,10 @@ void TextureCacheCommon::SetTexture(bool force) { bool match = entry->Matches(dim, format, maxLevel); const char *reason = "different params"; - // Check for FBO - slow! - if (entry->framebuffer) { - if (match) { - if (hasClut && clutRenderAddress_ != 0xFFFFFFFF) { - WARN_LOG_REPORT_ONCE(clutAndTexRender, G3D, "Using rendered texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat()); - } - - SetTextureFramebuffer(entry, entry->framebuffer); - return; - } else { - // Make sure we re-evaluate framebuffers. - DetachFramebuffer(entry, texaddr, entry->framebuffer, (entry->status & TexCacheEntry::STATUS_DEPTH) ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR); - reason = "detached framebuf"; - match = false; - } + // Check for FBO changes. + if (entry->status & TexCacheEntry::STATUS_FRAMEBUFFER_OVERLAP) { + // Fall through to the end where we'll delete the entry if there's a framebuffer. + match = false; } bool rehash = entry->GetHashStatus() == TexCacheEntry::STATUS_UNRELIABLE; @@ -463,7 +424,7 @@ void TextureCacheCommon::SetTexture(bool force) { if (match) { // TODO: Mark the entry reliable if it's been safe for long enough? - //got one! + // got one! gstate_c.curTextureWidth = w; gstate_c.curTextureHeight = h; if (rehash) { @@ -479,14 +440,39 @@ void TextureCacheCommon::SetTexture(bool force) { // Might need a rebuild if the hash fails, but that will be set later. nextNeedsRebuild_ = false; VERBOSE_LOG(G3D, "Texture at %08x Found in Cache, applying", texaddr); - return; //Done! + return entry; //Done! } else { // Wasn't a match, we will rebuild. nextChangeReason_ = reason; nextNeedsChange_ = true; + // Fall through to the rebuild case. } - } else { - VERBOSE_LOG(G3D, "No texture in cache, decoding..."); + } + + // No texture found, or changed (depending on entry). + // Check for framebuffers. + + TextureDefinition def{}; + def.addr = texaddr; + def.dim = dim; + def.format = format; + def.bufw = bufw; + + std::vector candidates = GetFramebufferCandidates(def, 0); + if (candidates.size() > 0) { + int index = GetBestCandidateIndex(candidates); + if (index != -1) { + nextTexture_ = nullptr; + nextNeedsRebuild_ = false; + SetTextureFramebuffer(candidates[index]); + return nullptr; + } + } + + // Didn't match a framebuffer, keep going. + + if (!entry) { + VERBOSE_LOG(G3D, "No texture in cache for %08x, decoding...", texaddr); TexCacheEntry *entryNew = new TexCacheEntry{}; cache_[cachekey].reset(entryNew); @@ -539,102 +525,80 @@ void TextureCacheCommon::SetTexture(bool force) { gstate_c.curTextureWidth = w; gstate_c.curTextureHeight = h; - // Before we go reading the texture from memory, let's check for render-to-texture. - // We must do this early so we have the right w/h. - entry->framebuffer = nullptr; - if (Memory::IsDepthTexVRAMAddress(texaddr)) { - entry->status |= TexCacheEntry::STATUS_DEPTH; - } - - AttachFramebufferToEntry(entry, 0); - - // If we ended up with a framebuffer, attach it - no texture decoding needed. - if (entry->framebuffer) { - SetTextureFramebuffer(entry, entry->framebuffer); - } - nextTexture_ = entry; - nextNeedsRehash_ = entry->framebuffer == nullptr; + nextNeedsRehash_ = false; // We still need to rebuild, to allocate a texture. But we'll bail early. nextNeedsRebuild_ = true; + return entry; } -bool TextureCacheCommon::AttachFramebufferToEntry(TexCacheEntry *entry, u32 texAddrOffset) { +std::vector TextureCacheCommon::GetFramebufferCandidates(const TextureDefinition &entry, u32 texAddrOffset) { + gpuStats.numFramebufferEvaluations++; bool success = false; - bool anyIgnores = false; std::vector candidates; - std::vector detaches; - FramebufferNotificationChannel channel = (entry->status & TexCacheEntry::STATUS_DEPTH) ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR; - for (size_t i = 0, n = fbCache_.size(); i < n; ++i) { - auto framebuffer = fbCache_[i]; - uint32_t fb_addr = channel == NOTIFY_FB_DEPTH ? framebuffer->z_address : framebuffer->fb_address; - FramebufferMatchInfo match = MatchFramebuffer(entry, fb_addr, framebuffer, texAddrOffset, channel); - if (match.match == FramebufferMatch::IGNORE) { - anyIgnores = true; - } else if (match.match == FramebufferMatch::NO_MATCH) { - detaches.push_back(AttachCandidate{ match, entry, framebuffer, channel }); - } else { + FramebufferNotificationChannel channel = Memory::IsDepthTexVRAMAddress(entry.addr) ? FramebufferNotificationChannel::NOTIFY_FB_DEPTH : FramebufferNotificationChannel::NOTIFY_FB_COLOR; + + auto framebuffers = framebufferManager_->Framebuffers(); + + for (size_t i = 0, n = framebuffers.size(); i < n; ++i) { + auto framebuffer = framebuffers[i]; + FramebufferMatchInfo match = MatchFramebuffer(entry, framebuffer, texAddrOffset, channel); + switch (match.match) { + case FramebufferMatch::VALID: candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel }); + break; + default: + break; } } - // If this is set, we want to defer the decision, apparently. - if (!anyIgnores) { - // If not set, always detach. They may affect inexact matches. - for (AttachCandidate &candidate : detaches) { - DetachFramebuffer(entry, entry->addr, candidate.fb, channel); - } - } - - if (!candidates.size()) { - // No candidates at all. - return false; - } - if (candidates.size() > 1) { bool depth = channel == FramebufferNotificationChannel::NOTIFY_FB_DEPTH; - WARN_LOG_REPORT_ONCE(multifbcandidate, G3D, "AttachFramebufferToEntry(%s): Multiple (%d) candidate framebuffers. texaddr: %08x offset: %d (%dx%d stride %d, %s)", - depth ? "DEPTH" : "COLOR", (int)candidates.size(), entry->addr, texAddrOffset, dimWidth(entry->dim), dimHeight(entry->dim), entry->bufw, GeTextureFormatToString((GETextureFormat)entry->format)); + WARN_LOG_REPORT_ONCE(multifbcandidate, G3D, "GetFramebufferCandidates(%s): Multiple (%d) candidate framebuffers. texaddr: %08x offset: %d (%dx%d stride %d, %s)", + depth ? "DEPTH" : "COLOR", (int)candidates.size(), entry.addr, texAddrOffset, dimWidth(entry.dim), dimHeight(entry.dim), entry.bufw, GeTextureFormatToString(entry.format)); } - return AttachBestCandidate(candidates); + return candidates; } -bool TextureCacheCommon::AttachBestCandidate(const std::vector &candidates) { +int TextureCacheCommon::GetBestCandidateIndex(const std::vector &candidates) { _dbg_assert_(!candidates.empty()); if (candidates.size() == 1) { - VirtualFramebuffer *framebuffer = candidates[0].fb; - return ApplyFramebufferMatch(candidates[0].match, candidates[0].entry, framebuffer->fb_address, framebuffer, candidates[0].channel); + return 0; } // OK, multiple possible candidates. Will need to figure out which one is the most relevant. int bestRelevancy = -1; int bestIndex = -1; + // TODO: Instead of scores, we probably want to use std::min_element to pick the top element, using + // a comparison function. for (int i = 0; i < (int)candidates.size(); i++) { const AttachCandidate &candidate = candidates[i]; int relevancy = 0; switch (candidate.match.match) { case FramebufferMatch::VALID: - case FramebufferMatch::VALID_DEPAL: relevancy += 1000; break; - case FramebufferMatch::INEXACT: - relevancy += 100; - break; } // Bonus point for matching stride. - if (candidate.channel == NOTIFY_FB_COLOR && candidate.fb->fb_stride == candidate.entry->bufw) { - relevancy += 10; + if (candidate.channel == NOTIFY_FB_COLOR && candidate.fb->fb_stride == candidate.entry.bufw) { + relevancy += 100; } // Bonus points for no offset. if (candidate.match.xOffset == 0 && candidate.match.yOffset == 0) { - relevancy += 9; + relevancy += 10; + } + + if (candidate.channel == NOTIFY_FB_COLOR && candidate.fb->last_frame_render == gpuStats.numFlips) { + relevancy += 5; + } else if (candidate.channel == NOTIFY_FB_DEPTH && candidate.fb->last_frame_depth_render == gpuStats.numFlips) { + relevancy += 5; } if (relevancy > bestRelevancy) { @@ -643,8 +607,7 @@ bool TextureCacheCommon::AttachBestCandidate(const std::vector } } - VirtualFramebuffer *framebuffer = candidates[bestIndex].fb; - return ApplyFramebufferMatch(candidates[bestIndex].match, candidates[bestIndex].entry, framebuffer->fb_address, framebuffer, candidates[bestIndex].channel); + return bestIndex; } // Removes old textures. @@ -765,25 +728,14 @@ void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *fram // Try to match the new framebuffer to existing textures. // Backwards from the "usual" texturing case so can't share a utility function. - // Ensure it's in the framebuffer cache. - if (std::find(fbCache_.begin(), fbCache_.end(), framebuffer) == fbCache_.end()) { - // TODO: This is kind of silly. We should probably simply share this list of framebuffers - // with the framebuffer manager. - WARN_LOG(G3D, "TextureCache got info about new framebuffer, at %08x", address); - fbCache_.push_back(framebuffer); - } - std::vector candidates; // TODO: Rework this to not try to "apply" all matches, only the best one. if (channel == FramebufferNotificationChannel::NOTIFY_FB_COLOR) { // Color - no need to look in the mirrors. for (auto it = cache_.lower_bound(cacheKey), end = cache_.upper_bound(cacheKeyEnd); it != end; ++it) { - TexCacheEntry *entry = it->second.get(); - FramebufferMatchInfo match = MatchFramebuffer(entry, addr, framebuffer, 0, channel); - if (match.match != FramebufferMatch::IGNORE && match.match != FramebufferMatch::NO_MATCH) { - candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel }); - } + // Just mark them all dirty somehow. + it->second->status |= TexCacheEntry::STATUS_FRAMEBUFFER_OVERLAP; } } else { // Depth. Just look in the mirrors. @@ -791,137 +743,25 @@ void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *fram const u64 mirrorlessKey = it->first & ~0x0060000000000000ULL; // Let's still make sure it's in the cache range. if (mirrorlessKey >= cacheKey && mirrorlessKey <= cacheKeyEnd) { - TexCacheEntry *entry = it->second.get(); - FramebufferMatchInfo match = MatchFramebuffer(entry, addr, framebuffer, 0, channel); - if (match.match != FramebufferMatch::IGNORE && match.match != FramebufferMatch::NO_MATCH) { - candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel }); - } + // Just mark them all dirty somehow. + it->second->status |= TexCacheEntry::STATUS_FRAMEBUFFER_OVERLAP; } } } - - if (!candidates.empty()) { - // There can actually be multiple ones to update here! This can be the case where two textures point to different framebuffers that share depth buffers. - // So we have no choice but to run all the matches. - for (int i = 0; i < (int)candidates.size(); i++) { - ApplyFramebufferMatch(candidates[i].match, candidates[i].entry, framebuffer->fb_address, framebuffer, candidates[i].channel); - } - } break; } - case NOTIFY_FB_DESTROYED: - fbCache_.erase(std::remove(fbCache_.begin(), fbCache_.end(), framebuffer), fbCache_.end()); - - // We may have an offset texture attached. So we use fbTexInfo as a guide. - // We're not likely to have many attached framebuffers. - for (auto it = fbTexInfo_.begin(); it != fbTexInfo_.end(); ) { - u64 cachekey = it->first; - // We might erase, so move to the next one already (which won't become invalid.) - ++it; - - DetachFramebuffer(cache_[cachekey].get(), addr, framebuffer, channel); - } - break; } } -void TextureCacheCommon::AttachFramebufferValid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const FramebufferMatchInfo &fbInfo, FramebufferNotificationChannel channel) { - _dbg_assert_((fbInfo.match == FramebufferMatch::VALID) || (fbInfo.match == FramebufferMatch::VALID_DEPAL)); - const u64 cachekey = entry->CacheKey(); - const bool hasInvalidFramebuffer = entry->framebuffer == nullptr || entry->invalidHint == -1; - const bool hasOlderFramebuffer = entry->framebuffer != nullptr && entry->framebuffer->last_frame_render < framebuffer->last_frame_render; - bool hasFartherFramebuffer = false; - - if (!hasInvalidFramebuffer && !hasOlderFramebuffer) { - // If it's valid, but the offset is greater, then we still win. - // TODO: This check should probably be moved to MatchFramebuffer somehow. - if (fbTexInfo_[cachekey].yOffset == fbInfo.yOffset) - hasFartherFramebuffer = fbTexInfo_[cachekey].xOffset > fbInfo.xOffset; - else - hasFartherFramebuffer = fbTexInfo_[cachekey].yOffset > fbInfo.yOffset; - } - - if (hasInvalidFramebuffer || hasOlderFramebuffer || hasFartherFramebuffer) { - if (entry->framebuffer == nullptr) { - cacheSizeEstimate_ -= EstimateTexMemoryUsage(entry); - } - ReleaseTexture(entry, true); - - entry->framebuffer = framebuffer; - entry->invalidHint = 0; - entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE; - entry->maxLevel = 0; - fbTexInfo_[cachekey] = fbInfo; - framebuffer->last_frame_attached = gpuStats.numFlips; - GPUDebug::NotifyTextureAttachment(entry->addr); - } else if (entry->framebuffer == framebuffer) { - framebuffer->last_frame_attached = gpuStats.numFlips; - } -} - -void TextureCacheCommon::AttachFramebufferInexact(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const FramebufferMatchInfo &fbInfo, FramebufferNotificationChannel channel) { - _dbg_assert_(fbInfo.match == FramebufferMatch::INEXACT); - const u64 cachekey = entry->CacheKey(); - - if (entry->framebuffer == nullptr || entry->framebuffer == framebuffer) { - if (entry->framebuffer == nullptr) { - cacheSizeEstimate_ -= EstimateTexMemoryUsage(entry); - } - ReleaseTexture(entry, true); - entry->framebuffer = framebuffer; - entry->invalidHint = -1; - entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE; - entry->maxLevel = 0; - fbTexInfo_[cachekey] = fbInfo; - GPUDebug::NotifyTextureAttachment(entry->addr); - } -} - -void TextureCacheCommon::DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, FramebufferNotificationChannel channel) { - if (entry->framebuffer == framebuffer) { - const u64 cachekey = entry->CacheKey(); - cacheSizeEstimate_ += EstimateTexMemoryUsage(entry); - entry->framebuffer = nullptr; - // Force recreate the texture in case we had one before and the hash matches. - // Otherwise we never recreate the texture. - entry->status |= TexCacheEntry::STATUS_FORCE_REBUILD; - fbTexInfo_.erase(cachekey); - GPUDebug::NotifyTextureAttachment(entry->addr); - InvalidateLastTexture(entry); - } -} - -bool TextureCacheCommon::ApplyFramebufferMatch(FramebufferMatchInfo match, TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, FramebufferNotificationChannel channel) { - // There were five possible outcomes of the old ApplyFramebuffer, these have been - // mapped to the FramebufferMatch enum, and we handle them the same old ways here. - switch (match.match) { - case FramebufferMatch::VALID: - AttachFramebufferValid(entry, framebuffer, match, channel); - return true; - case FramebufferMatch::VALID_DEPAL: - AttachFramebufferValid(entry, framebuffer, match, channel); - entry->status |= TexCacheEntry::STATUS_DEPALETTIZE; - return true; - case FramebufferMatch::INEXACT: - AttachFramebufferInexact(entry, framebuffer, match, channel); - return true; - case FramebufferMatch::NO_MATCH: - DetachFramebuffer(entry, address, framebuffer, channel); - return false; - case FramebufferMatch::IGNORE: - // The purpose of this seems to be to delay a decision to the next frame. - default: - return false; - } -} - -FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry, u32 fb_address, VirtualFramebuffer *framebuffer, u32 texaddrOffset, FramebufferNotificationChannel channel) const { +FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer( + const TextureDefinition &entry, + VirtualFramebuffer *framebuffer, u32 texaddrOffset, FramebufferNotificationChannel channel) const { static const u32 MAX_SUBAREA_Y_OFFSET_SAFE = 32; - const u32 mirrorMask = 0x00600000; + uint32_t fb_address = channel == NOTIFY_FB_DEPTH ? framebuffer->z_address : framebuffer->fb_address; u32 addr = fb_address & 0x3FFFFFFF; - u32 texaddr = entry->addr + texaddrOffset; + u32 texaddr = entry.addr + texaddrOffset; bool texInVRAM = Memory::IsVRAMAddress(texaddr); bool fbInVRAM = Memory::IsVRAMAddress(fb_address); @@ -932,25 +772,26 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry, } if (texInVRAM) { + const u32 mirrorMask = 0x00600000; + // This bit controls swizzle. The swizzles at 0x00200000 and 0x00600000 are designed // to perfectly match reading depth as color (which one to use I think might be related // to the bpp of the color format used when rendering to it). // It's fairly unlikely that games would screw this up since the result will be garbage so // we use it to filter out unlikely matches. - switch (entry->addr & mirrorMask) { + switch (entry.addr & mirrorMask) { case 0x00000000: case 0x00400000: // Don't match the depth channel with these addresses when texturing. if (channel == FramebufferNotificationChannel::NOTIFY_FB_DEPTH) { - // God of War: If we actively detach here, the shadows disappear. - return FramebufferMatchInfo{ FramebufferMatch::IGNORE }; + return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH }; } break; case 0x00200000: case 0x00600000: // Don't match the color channel with these addresses when texturing. if (channel == FramebufferNotificationChannel::NOTIFY_FB_COLOR) { - return FramebufferMatchInfo{ FramebufferMatch::IGNORE }; + return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH }; } break; } @@ -960,30 +801,21 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry, } const bool noOffset = texaddr == addr; - const bool exactMatch = noOffset && entry->format < 4 && channel == NOTIFY_FB_COLOR; - const u32 w = 1 << ((entry->dim >> 0) & 0xf); - const u32 h = 1 << ((entry->dim >> 8) & 0xf); + const bool exactMatch = noOffset && entry.format < 4 && channel == NOTIFY_FB_COLOR; + const u32 w = 1 << ((entry.dim >> 0) & 0xf); + const u32 h = 1 << ((entry.dim >> 8) & 0xf); // 512 on a 272 framebuffer is sane, so let's be lenient. const u32 minSubareaHeight = h / 4; // If they match "exactly", it's non-CLUT and from the top left. if (exactMatch) { - if (framebuffer->fb_stride != entry->bufw) { - WARN_LOG_ONCE(diffStrides1, G3D, "Texturing from framebuffer with different strides %d != %d", entry->bufw, framebuffer->fb_stride); + if (framebuffer->fb_stride != entry.bufw) { + WARN_LOG_ONCE(diffStrides1, G3D, "Texturing from framebuffer with different strides %d != %d", entry.bufw, framebuffer->fb_stride); } // NOTE: This check is okay because the first texture formats are the same as the buffer formats. - if (entry->format != (GETextureFormat)framebuffer->format) { - WARN_LOG_ONCE(diffFormat1, G3D, "Texturing from framebuffer with different formats %s != %s", GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format)); - // Let's avoid using it when we know the format is wrong. May be a video/etc. updating memory. - // However, some games use a different format to clear the buffer. - if (framebuffer->last_frame_attached + 1 < gpuStats.numFlips) { - return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH }; - } else { - // TODO: This is a weird outcome. The purpose seems to be to delay - // a decision to the next frame. - // Should really try to map it to something else. - return FramebufferMatchInfo{ FramebufferMatch::IGNORE }; - } + if (entry.format != (GETextureFormat)framebuffer->format) { + WARN_LOG_ONCE(diffFormat1, G3D, "Texturing from framebuffer with different formats %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format)); + return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH }; } else { return FramebufferMatchInfo{ FramebufferMatch::VALID }; } @@ -995,21 +827,21 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry, // Check works for D16 too (???) const bool matchingClutFormat = - (channel != NOTIFY_FB_COLOR && entry->format == GE_TFMT_CLUT16) || - (channel == NOTIFY_FB_COLOR && framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32) || - (channel == NOTIFY_FB_COLOR && framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16); + (channel != NOTIFY_FB_COLOR && entry.format == GE_TFMT_CLUT16) || + (channel == NOTIFY_FB_COLOR && framebuffer->format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT32) || + (channel == NOTIFY_FB_COLOR && framebuffer->format != GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT16); - const bool clutFormat = IsClutFormat((GETextureFormat)(entry->format)); + const bool clutFormat = IsClutFormat((GETextureFormat)(entry.format)); // To avoid ruining git blame, kept the same name as the old struct. FramebufferMatchInfo fbInfo{ FramebufferMatch::VALID }; const u32 bitOffset = (texaddr - addr) * 8; if (bitOffset != 0) { - const u32 pixelOffset = bitOffset / std::max(1U, (u32)textureBitsPerPixel[entry->format]); + const u32 pixelOffset = bitOffset / std::max(1U, (u32)textureBitsPerPixel[entry.format]); - fbInfo.yOffset = entry->bufw == 0 ? 0 : pixelOffset / entry->bufw; - fbInfo.xOffset = entry->bufw == 0 ? 0 : pixelOffset % entry->bufw; + fbInfo.yOffset = entry.bufw == 0 ? 0 : pixelOffset / entry.bufw; + fbInfo.xOffset = entry.bufw == 0 ? 0 : pixelOffset % entry.bufw; } if (fbInfo.yOffset + minSubareaHeight >= framebuffer->height) { @@ -1017,9 +849,9 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry, return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH }; } - if (framebuffer->fb_stride != entry->bufw) { + if (framebuffer->fb_stride != entry.bufw) { if (noOffset) { - WARN_LOG_ONCE(diffStrides2, G3D, "Texturing from framebuffer (matching_clut=%s) different strides %d != %d", matchingClutFormat ? "yes" : "no", entry->bufw, framebuffer->fb_stride); + WARN_LOG_ONCE(diffStrides2, G3D, "Texturing from framebuffer (matching_clut=%s) different strides %d != %d", matchingClutFormat ? "yes" : "no", entry.bufw, framebuffer->fb_stride); // Continue on with other checks. // Not actually sure why we even try here. There's no way it'll go well if the strides are different. } else { @@ -1047,41 +879,39 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry, if (!noOffset) { WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer using CLUT with offset at %08x +%dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset); } - fbInfo.match = FramebufferMatch::VALID_DEPAL; + fbInfo.match = FramebufferMatch::VALID; // We check the format again later, no need to return a special value here. return fbInfo; - } else if (IsClutFormat((GETextureFormat)(entry->format)) || IsDXTFormat((GETextureFormat)(entry->format))) { - WARN_LOG_ONCE(fourEightBit, G3D, "%s format not supported when texturing from framebuffer of format %s", GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format)); + } else if (IsClutFormat((GETextureFormat)(entry.format)) || IsDXTFormat((GETextureFormat)(entry.format))) { + WARN_LOG_ONCE(fourEightBit, G3D, "%s format not supported when texturing from framebuffer of format %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format)); return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH }; } // This is either normal or we failed to generate a shader to depalettize - if (framebuffer->format == entry->format || matchingClutFormat) { - if (framebuffer->format != entry->format) { + if (framebuffer->format == entry.format || matchingClutFormat) { + if (framebuffer->format != entry.format) { WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with different formats %s != %s at %08x", - GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format), fb_address); - return fbInfo; // Valid! + GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format), fb_address); + return fbInfo; } else { - WARN_LOG_ONCE(subarea, G3D, "Render to area containing texture at %08x +%dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset); - // If we return VALID here, God of War Ghost of Sparta/Chains of Olympus will be missing some special effect according to an old comment. - fbInfo.match = FramebufferMatch::INEXACT; + WARN_LOG_ONCE(subarea, G3D, "Texturing from framebuffer at %08x +%dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset); return fbInfo; } } else { WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with incompatible format %s != %s at %08x", - GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format), fb_address); + GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format), fb_address); return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH }; } } } -void TextureCacheCommon::SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) { +void TextureCacheCommon::SetTextureFramebuffer(const AttachCandidate &candidate) { + VirtualFramebuffer *framebuffer = candidate.fb; + FramebufferMatchInfo fbInfo = candidate.match; + _dbg_assert_msg_(framebuffer != nullptr, "Framebuffer must not be null."); framebuffer->usageFlags |= FB_USAGE_TEXTURE; if (framebufferManager_->UseBufferedRendering()) { - const u64 cachekey = entry->CacheKey(); - const auto &fbInfo = fbTexInfo_[cachekey]; - // Keep the framebuffer alive. framebuffer->last_frame_used = gpuStats.numFlips; @@ -1103,7 +933,8 @@ void TextureCacheCommon::SetTextureFramebuffer(TexCacheEntry *entry, VirtualFram gstate_c.SetNeedShaderTexclamp(true); } - nextTexture_ = entry; + nextTexture_ = nullptr; + nextFramebufferTexture_ = framebuffer; } else { if (framebuffer->fbo) { framebuffer->fbo->Release(); @@ -1118,6 +949,7 @@ void TextureCacheCommon::SetTextureFramebuffer(TexCacheEntry *entry, VirtualFram nextNeedsRebuild_ = false; } +// Only looks for framebuffers. bool TextureCacheCommon::SetOffsetTexture(u32 yOffset) { if (!framebufferManager_->UseBufferedRendering()) { return false; @@ -1132,22 +964,20 @@ bool TextureCacheCommon::SetOffsetTexture(u32 yOffset) { return false; } - const u16 dim = gstate.getTextureDimension(0); - u64 cachekey = TexCacheEntry::CacheKey(texaddr, fmt, dim, 0); - TexCache::iterator iter = cache_.find(cachekey); - if (iter == cache_.end()) { - return false; + TextureDefinition def; + def.addr = texaddr; + def.format = fmt; + def.bufw = GetTextureBufw(0, texaddr, fmt); + def.dim = gstate.getTextureDimension(0); + + std::vector candidates = GetFramebufferCandidates(def, texaddrOffset); + if (candidates.size() > 0) { + int index = GetBestCandidateIndex(candidates); + if (index != -1) { + SetTextureFramebuffer(candidates[index]); + return true; + } } - TexCacheEntry *entry = iter->second.get(); - - bool success = AttachFramebufferToEntry(entry, texaddrOffset); - - if (success && entry->framebuffer) { - // This will not apply the texture immediately. - SetTextureFramebuffer(entry, entry->framebuffer); - return true; - } - return false; } @@ -1205,8 +1035,9 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) { static const u32 MAX_CLUT_OFFSET = 4096; clutRenderOffset_ = MAX_CLUT_OFFSET; - for (size_t i = 0, n = fbCache_.size(); i < n; ++i) { - auto framebuffer = fbCache_[i]; + auto framebuffers = framebufferManager_->Framebuffers(); + for (size_t i = 0, n = framebuffers.size(); i < n; ++i) { + auto framebuffer = framebuffers[i]; const u32 fb_address = framebuffer->fb_address & 0x3FFFFFFF; const u32 bpp = framebuffer->drawnFormat == GE_FORMAT_8888 ? 4 : 2; u32 offset = clutFramebufAddr - fb_address; @@ -1716,8 +1547,15 @@ void TextureCacheCommon::ReadIndexedTex(u8 *out, int outPitch, int level, const void TextureCacheCommon::ApplyTexture() { TexCacheEntry *entry = nextTexture_; if (entry == nullptr) { + // Maybe we bound a framebuffer? + if (nextFramebufferTexture_) { + bool depth = Memory::IsDepthTexVRAMAddress(gstate.getTextureAddress(0)); + ApplyTextureFramebuffer(nextFramebufferTexture_, gstate.getTextureFormat(), depth ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR); + nextFramebufferTexture_ = nullptr; + } return; } + nextTexture_ = nullptr; UpdateMaxSeenV(entry, gstate.isModeThrough()); @@ -1767,12 +1605,8 @@ void TextureCacheCommon::ApplyTexture() { } entry->lastFrame = gpuStats.numFlips; - if (entry->framebuffer) { - ApplyTextureFramebuffer(entry, entry->framebuffer); - } else { - BindTexture(entry); - gstate_c.SetTextureFullAlpha(entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL); - } + BindTexture(entry); + gstate_c.SetTextureFullAlpha(entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL); } void TextureCacheCommon::Clear(bool delete_them) { @@ -1791,16 +1625,11 @@ void TextureCacheCommon::Clear(bool delete_them) { cacheSizeEstimate_ = 0; secondCacheSizeEstimate_ = 0; } - fbTexInfo_.clear(); videos_.clear(); } void TextureCacheCommon::DeleteTexture(TexCache::iterator it) { ReleaseTexture(it->second.get(), true); - auto fbInfo = fbTexInfo_.find(it->first); - if (fbInfo != fbTexInfo_.end()) { - fbTexInfo_.erase(fbInfo); - } cacheSizeEstimate_ -= EstimateTexMemoryUsage(it->second.get()); cache_.erase(it); } @@ -1929,7 +1758,7 @@ void TextureCacheCommon::Invalidate(u32 addr, int size, GPUInvalidationType type } } iter->second->framesUntilNextFullHash = 0; - } else if (!iter->second->framebuffer) { + } else { iter->second->invalidHint++; } } @@ -1951,9 +1780,7 @@ void TextureCacheCommon::InvalidateAll(GPUInvalidationType /*unused*/) { if (iter->second->GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) { iter->second->SetHashStatus(TexCacheEntry::STATUS_HASHING); } - if (!iter->second->framebuffer) { - iter->second->invalidHint++; - } + iter->second->invalidHint++; } } diff --git a/GPU/Common/TextureCacheCommon.h b/GPU/Common/TextureCacheCommon.h index 4a90489bd0..609693521c 100644 --- a/GPU/Common/TextureCacheCommon.h +++ b/GPU/Common/TextureCacheCommon.h @@ -93,7 +93,19 @@ struct SamplerCacheKey { class GLRTexture; class VulkanTexture; +// Enough information about a texture to match it to framebuffers. +struct TextureDefinition { + u32 addr; + GETextureFormat format; + u32 dim; + u32 bufw; +}; + + // TODO: Shrink this struct. There is some fluff. + +// NOTE: These only handle textures loaded directly from PSP memory contents. +// Framebuffer textures do not have entries, we bind the framebuffers directly. struct TexCacheEntry { ~TexCacheEntry() { if (texturePtr || textureName || vkTex) @@ -115,7 +127,6 @@ struct TexCacheEntry { STATUS_CLUT_VARIANTS = 0x08, // Has multiple CLUT variants. STATUS_CHANGE_FREQUENT = 0x10, // Changes often (less than 6 frames in between.) STATUS_CLUT_RECHECK = 0x20, // Another texture with same addr had a hashfail. - STATUS_DEPALETTIZE = 0x40, // Needs to go through a depalettize pass. STATUS_TO_SCALE = 0x80, // Pending texture scaling in a later frame. STATUS_IS_SCALED = 0x100, // Has been scaled (can't be replaceImages'd.) // When hashing large textures, we optimize 512x512 down to 512x272 by default, since this @@ -125,7 +136,8 @@ struct TexCacheEntry { STATUS_BAD_MIPS = 0x400, // Has bad or unusable mipmap levels. - STATUS_DEPTH = 0x800, + STATUS_FRAMEBUFFER_OVERLAP = 0x800, + STATUS_FORCE_REBUILD = 0x1000, }; @@ -134,7 +146,6 @@ struct TexCacheEntry { u32 addr; u32 hash; - VirtualFramebuffer *framebuffer; // if null, not sourced from an FBO. TODO: Collapse into texturePtr u32 sizeInRAM; // Could be computed u8 format; // GeTextureFormat u8 maxLevel; @@ -181,8 +192,7 @@ struct TexCacheEntry { static u64 CacheKey(u32 addr, u8 format, u16 dim, u32 cluthash); }; -class FramebufferManagerCommon; -// Can't be unordered_map, we use lower_bound ... although for some reason that compiles on MSVC. +// Can't be unordered_map, we use lower_bound ... although for some reason that (used to?) compiles on MSVC. // Would really like to replace this with DenseHashMap but can't as long as we need lower_bound. typedef std::map> TexCache; @@ -195,17 +205,10 @@ typedef std::map> TexCache; enum class FramebufferMatch { // Valid, exact match. VALID = 0, - // Valid match that is exact after depal. - VALID_DEPAL, - // Inexact match (such as wrong fmt or at a questionable offset.) - INEXACT, // Not a match, remove if currently attached. NO_MATCH, - // Not a match, but don't remove yet. Used to avoid deatching depth mismatch. - IGNORE, }; -// Separate to keep main texture cache size down. struct FramebufferMatchInfo { FramebufferMatch match; u32 xOffset; @@ -214,11 +217,13 @@ struct FramebufferMatchInfo { struct AttachCandidate { FramebufferMatchInfo match; - TexCacheEntry *entry; + TextureDefinition entry; VirtualFramebuffer *fb; FramebufferNotificationChannel channel; }; +class FramebufferManagerCommon; + class TextureCacheCommon { public: TextureCacheCommon(Draw::DrawContext *draw); @@ -227,7 +232,7 @@ public: void LoadClut(u32 clutAddr, u32 loadBytes); bool GetCurrentClutBuffer(GPUDebugBuffer &buffer); - void SetTexture(bool force = false); + TexCacheEntry *SetTexture(bool force = false); void ApplyTexture(); bool SetOffsetTexture(u32 yOffset); void Invalidate(u32 addr, int size, GPUInvalidationType type); @@ -237,13 +242,12 @@ public: virtual void ForgetLastTexture() = 0; virtual void InvalidateLastTexture(TexCacheEntry *entry = nullptr) = 0; virtual void Clear(bool delete_them); - - // FramebufferManager keeps TextureCache updated about what regions of memory are being rendered to. - void NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg, FramebufferNotificationChannel channel); virtual void NotifyConfigChanged(); - void NotifyVideoUpload(u32 addr, int size, int width, GEBufferFormat fmt); - int AttachedDrawingHeight(); + // FramebufferManager keeps TextureCache updated about what regions of memory are being rendered to, + // so that it can invalidate TexCacheEntries pointed at those addresses. + void NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg, FramebufferNotificationChannel channel); + void NotifyVideoUpload(u32 addr, int size, int width, GEBufferFormat fmt); size_t NumLoadedTextures() const { return cache_.size(); @@ -264,7 +268,8 @@ protected: void DeleteTexture(TexCache::iterator it); void Decimate(bool forcePressure = false); - virtual void ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) = 0; + virtual void ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) = 0; + void HandleTextureChange(TexCacheEntry *const entry, const char *reason, bool initialMatch, bool doDelete); virtual void BuildTexture(TexCacheEntry *const entry) = 0; virtual void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) = 0; @@ -284,19 +289,12 @@ protected: void UpdateSamplingParams(TexCacheEntry &entry, SamplerCacheKey &key); // Used by D3D11 and Vulkan. void UpdateMaxSeenV(TexCacheEntry *entry, bool throughMode); - FramebufferMatchInfo MatchFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset, FramebufferNotificationChannel channel) const; + FramebufferMatchInfo MatchFramebuffer(const TextureDefinition &entry, VirtualFramebuffer *framebuffer, u32 texaddrOffset, FramebufferNotificationChannel channel) const; - bool AttachFramebufferToEntry(TexCacheEntry *entry, u32 texAddrOffset); + std::vector GetFramebufferCandidates(const TextureDefinition &entry, u32 texAddrOffset); + int GetBestCandidateIndex(const std::vector &candidates); - // Temporary utility during conversion - bool ApplyFramebufferMatch(FramebufferMatchInfo match, TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, FramebufferNotificationChannel channel); - bool AttachBestCandidate(const std::vector &candidates); - - void AttachFramebufferValid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const FramebufferMatchInfo &fbInfo, FramebufferNotificationChannel channel); - void AttachFramebufferInexact(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const FramebufferMatchInfo &fbInfo, FramebufferNotificationChannel channel); - void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, FramebufferNotificationChannel channel); - - void SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer); + void SetTextureFramebuffer(const AttachCandidate &candidate); void DecimateVideos(); @@ -327,8 +325,8 @@ protected: TextureReplacer replacer_; FramebufferManagerCommon *framebufferManager_; - bool clearCacheNextFrame_; - bool lowMemoryMode_; + bool clearCacheNextFrame_ = false; + bool lowMemoryMode_ = false; int decimationCounter_; int texelsScaledThisFrame_; @@ -340,15 +338,13 @@ protected: TexCache secondCache_; u32 secondCacheSizeEstimate_; - std::vector fbCache_; - std::map fbTexInfo_; - std::map videos_; SimpleBuf tmpTexBuf32_; SimpleBuf tmpTexBufRearrange_; - TexCacheEntry *nextTexture_; + TexCacheEntry *nextTexture_ = nullptr; + VirtualFramebuffer *nextFramebufferTexture_ = nullptr; u32 clutHash_ = 0; diff --git a/GPU/D3D11/GPU_D3D11.cpp b/GPU/D3D11/GPU_D3D11.cpp index 3d8a463838..552d303555 100644 --- a/GPU/D3D11/GPU_D3D11.cpp +++ b/GPU/D3D11/GPU_D3D11.cpp @@ -308,14 +308,13 @@ void GPU_D3D11::GetStats(char *buffer, size_t bufsize) { float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f; snprintf(buffer, bufsize - 1, "DL processing time: %0.2f ms\n" - "Draw calls: %i, flushes %i, clears %i\n" - "Cached Draw calls: %i\n" + "Draw calls: %i, flushes %i, clears %i (cached: %d)\n" "Num Tracked Vertex Arrays: %i\n" "GPU cycles executed: %d (%f per vertex)\n" "Commands per call level: %i %i %i %i\n" "Vertices submitted: %i\n" "Cached, Uncached Vertices Drawn: %i, %i\n" - "FBOs active: %i\n" + "FBOs active: %i (evaluations: %d)\n" "Textures active: %i, decoded: %i invalidated: %i\n" "Readbacks: %d, uploads: %d\n" "Vertex, Fragment shaders loaded: %i, %i\n", @@ -332,6 +331,7 @@ void GPU_D3D11::GetStats(char *buffer, size_t bufsize) { gpuStats.numCachedVertsDrawn, gpuStats.numUncachedVertsDrawn, (int)framebufferManagerD3D11_->NumVFBs(), + gpuStats.numFramebufferEvaluations, (int)textureCacheD3D11_->NumLoadedTextures(), gpuStats.numTexturesDecoded, gpuStats.numTextureInvalidations, diff --git a/GPU/D3D11/TextureCacheD3D11.cpp b/GPU/D3D11/TextureCacheD3D11.cpp index 9b6ad9b70c..7953eff1d9 100644 --- a/GPU/D3D11/TextureCacheD3D11.cpp +++ b/GPU/D3D11/TextureCacheD3D11.cpp @@ -378,10 +378,11 @@ protected: int renderH_; }; -void TextureCacheD3D11::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) { +void TextureCacheD3D11::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) { ID3D11PixelShader *pshader = nullptr; uint32_t clutMode = gstate.clutformat & 0xFFFFFF; - if ((entry->status & TexCacheEntry::STATUS_DEPALETTIZE) && !g_Config.bDisableSlowFramebufEffects) { + bool need_depalettize = IsClutFormat(texFormat); + if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) { pshader = depalShaderCache_->GetDepalettizePixelShader(clutMode, framebuffer->drawnFormat); } @@ -421,8 +422,6 @@ void TextureCacheD3D11::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFra TexCacheEntry::TexStatus alphaStatus = CheckAlpha(clutBuf_, GetClutDestFormatD3D11(clutFormat), clutTotalColors, clutTotalColors, 1); gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL); } else { - entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE; - framebufferManagerD3D11_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650); @@ -432,7 +431,6 @@ void TextureCacheD3D11::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFra SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight, samplerKey); ID3D11SamplerState *state = samplerCache_.GetOrCreateSampler(device_, samplerKey); context_->PSSetSamplers(0, 1, &state); - InvalidateLastTexture(); gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE); } @@ -444,14 +442,6 @@ void TextureCacheD3D11::BuildTexture(TexCacheEntry *const entry) { // For the estimate, we assume cluts always point to 8888 for simplicity. cacheSizeEstimate_ += EstimateTexMemoryUsage(entry); - // TODO: If a framebuffer is attached here, might end up with a bad entry.texture. - // Should just always create one here or something (like GLES.) - - if (entry->framebuffer) { - // Nothing else to do here. - return; - } - if ((entry->bufw == 0 || (gstate.texbufwidth[0] & 0xf800) != 0) && entry->addr >= PSP_GetKernelMemoryEnd()) { ERROR_LOG_REPORT(G3D, "Texture with unexpected bufw (full=%d)", gstate.texbufwidth[0] & 0xffff); // Proceeding here can cause a crash. @@ -772,26 +762,26 @@ void TextureCacheD3D11::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture & bool TextureCacheD3D11::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level) { SetTexture(false); - if (!nextTexture_) - return false; + if (!nextTexture_) { + if (nextFramebufferTexture_) { + VirtualFramebuffer *vfb = nextFramebufferTexture_; + buffer.Allocate(vfb->bufferWidth, vfb->bufferHeight, GPU_DBG_FORMAT_8888, false); + bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, 0, 0, vfb->bufferWidth, vfb->bufferHeight, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), vfb->bufferWidth, "GetCurrentTextureDebug"); + // Vulkan requires us to re-apply all dynamic state for each command buffer, and the above will cause us to start a new cmdbuf. + // So let's dirty the things that are involved in Vulkan dynamic state. Readbacks are not frequent so this won't hurt other backends. + gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE); + // We may have blitted to a temp FBO. + framebufferManager_->RebindFramebuffer("RebindFramebuffer - GetCurrentTextureDebug"); + return retval; + } else { + return false; + } + } // Apply texture may need to rebuild the texture if we're about to render, or bind a framebuffer. TexCacheEntry *entry = nextTexture_; ApplyTexture(); - // TODO: Centralize. - if (entry->framebuffer) { - VirtualFramebuffer *vfb = entry->framebuffer; - buffer.Allocate(vfb->bufferWidth, vfb->bufferHeight, GPU_DBG_FORMAT_8888, false); - bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, 0, 0, vfb->bufferWidth, vfb->bufferHeight, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), vfb->bufferWidth, "GetCurrentTextureDebug"); - // Vulkan requires us to re-apply all dynamic state for each command buffer, and the above will cause us to start a new cmdbuf. - // So let's dirty the things that are involved in Vulkan dynamic state. Readbacks are not frequent so this won't hurt other backends. - gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE); - // We may have blitted to a temp FBO. - framebufferManager_->RebindFramebuffer("RebindFramebuffer - GetCurrentTextureDebug"); - return retval; - } - ID3D11Texture2D *texture = (ID3D11Texture2D *)entry->texturePtr; if (!texture) return false; diff --git a/GPU/D3D11/TextureCacheD3D11.h b/GPU/D3D11/TextureCacheD3D11.h index 8cfa249269..db154c5a77 100644 --- a/GPU/D3D11/TextureCacheD3D11.h +++ b/GPU/D3D11/TextureCacheD3D11.h @@ -74,7 +74,7 @@ private: TexCacheEntry::TexStatus CheckAlpha(const u32 *pixelData, u32 dstFmt, int stride, int w, int h); void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) override; - void ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) override; + void ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) override; void BuildTexture(TexCacheEntry *const entry) override; ID3D11Device *device_; diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 1ba0958f66..936dcee7a8 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -347,14 +347,13 @@ void GPU_DX9::GetStats(char *buffer, size_t bufsize) { float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f; snprintf(buffer, bufsize - 1, "DL processing time: %0.2f ms\n" - "Draw calls: %i, flushes %i, clears %i\n" - "Cached Draw calls: %i\n" + "Draw calls: %i, flushes %i, clears %i (cached: %d)\n" "Num Tracked Vertex Arrays: %i\n" "GPU cycles executed: %d (%f per vertex)\n" "Commands per call level: %i %i %i %i\n" "Vertices submitted: %i\n" "Cached, Uncached Vertices Drawn: %i, %i\n" - "FBOs active: %i\n" + "FBOs active: %i (evaluations: %d)\n" "Textures active: %i, decoded: %i invalidated: %i\n" "Readbacks: %d, uploads: %d\n" "Vertex, Fragment shaders loaded: %i, %i\n", @@ -371,6 +370,7 @@ void GPU_DX9::GetStats(char *buffer, size_t bufsize) { gpuStats.numCachedVertsDrawn, gpuStats.numUncachedVertsDrawn, (int)framebufferManagerDX9_->NumVFBs(), + gpuStats.numFramebufferEvaluations, (int)textureCacheDX9_->NumLoadedTextures(), gpuStats.numTexturesDecoded, gpuStats.numTextureInvalidations, diff --git a/GPU/Directx9/TextureCacheDX9.cpp b/GPU/Directx9/TextureCacheDX9.cpp index c5e70491ef..5f118f76db 100644 --- a/GPU/Directx9/TextureCacheDX9.cpp +++ b/GPU/Directx9/TextureCacheDX9.cpp @@ -416,10 +416,11 @@ protected: int renderH_; }; -void TextureCacheDX9::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) { +void TextureCacheDX9::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) { LPDIRECT3DPIXELSHADER9 pshader = nullptr; uint32_t clutMode = gstate.clutformat & 0xFFFFFF; - if ((entry->status & TexCacheEntry::STATUS_DEPALETTIZE) && !g_Config.bDisableSlowFramebufEffects) { + bool need_depalettize = IsClutFormat(texFormat); + if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) { pshader = depalShaderCache_->GetDepalettizePixelShader(clutMode, framebuffer->drawnFormat); } @@ -460,8 +461,6 @@ void TextureCacheDX9::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFrame TexCacheEntry::TexStatus alphaStatus = CheckAlpha(clutBuf_, getClutDestFormat(clutFormat), clutTotalColors, clutTotalColors, 1); gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL); } else { - entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE; - framebufferManagerDX9_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650); @@ -469,8 +468,6 @@ void TextureCacheDX9::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFrame framebufferManagerDX9_->RebindFramebuffer("RebindFramebuffer - ApplyTextureFromFramebuffer"); SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight); - - InvalidateLastTexture(); } void TextureCacheDX9::BuildTexture(TexCacheEntry *const entry) { @@ -479,14 +476,6 @@ void TextureCacheDX9::BuildTexture(TexCacheEntry *const entry) { // For the estimate, we assume cluts always point to 8888 for simplicity. cacheSizeEstimate_ += EstimateTexMemoryUsage(entry); - // TODO: If a framebuffer is attached here, might end up with a bad entry.texture. - // Should just always create one here or something (like GLES.) - - if (entry->framebuffer) { - // Nothing else to do here. - return; - } - if ((entry->bufw == 0 || (gstate.texbufwidth[0] & 0xf800) != 0) && entry->addr >= PSP_GetKernelMemoryEnd()) { ERROR_LOG_REPORT(G3D, "Texture with unexpected bufw (full=%d)", gstate.texbufwidth[0] & 0xffff); // Proceeding here can cause a crash. diff --git a/GPU/Directx9/TextureCacheDX9.h b/GPU/Directx9/TextureCacheDX9.h index 4d9b308793..a0d488c77b 100644 --- a/GPU/Directx9/TextureCacheDX9.h +++ b/GPU/Directx9/TextureCacheDX9.h @@ -68,7 +68,7 @@ private: TexCacheEntry::TexStatus CheckAlpha(const u32 *pixelData, u32 dstFmt, int stride, int w, int h); void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) override; - void ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) override; + void ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) override; void BuildTexture(TexCacheEntry *const entry) override; LPDIRECT3DTEXTURE9 &DxTex(TexCacheEntry *entry) { diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index 8c9df27705..e1de25d4e5 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -454,14 +454,13 @@ void GPU_GLES::GetStats(char *buffer, size_t bufsize) { float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f; snprintf(buffer, bufsize - 1, "DL processing time: %0.2f ms\n" - "Draw calls: %i, flushes %i, clears %i\n" - "Cached Draw calls: %i\n" + "Draw calls: %i, flushes %i, clears %i (cached: %d)\n" "Num Tracked Vertex Arrays: %i\n" "GPU cycles executed: %d (%f per vertex)\n" "Commands per call level: %i %i %i %i\n" "Vertices submitted: %i\n" "Cached, Uncached Vertices Drawn: %i, %i\n" - "FBOs active: %i\n" + "FBOs active: %i (evaluations: %d)\n" "Textures active: %i, decoded: %i invalidated: %i\n" "Readbacks: %d, uploads: %d\n" "Vertex, Fragment, Programs loaded: %i, %i, %i\n", @@ -478,6 +477,7 @@ void GPU_GLES::GetStats(char *buffer, size_t bufsize) { gpuStats.numCachedVertsDrawn, gpuStats.numUncachedVertsDrawn, (int)framebufferManagerGL_->NumVFBs(), + gpuStats.numFramebufferEvaluations, (int)textureCacheGL_->NumLoadedTextures(), gpuStats.numTexturesDecoded, gpuStats.numTextureInvalidations, diff --git a/GPU/GLES/TextureCacheGLES.cpp b/GPU/GLES/TextureCacheGLES.cpp index 6ecfd51443..b0de874f69 100644 --- a/GPU/GLES/TextureCacheGLES.cpp +++ b/GPU/GLES/TextureCacheGLES.cpp @@ -282,48 +282,6 @@ void TextureCacheGLES::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBas clutLastFormat_ = gstate.clutformat; } -// #define DEBUG_TEXTURES - -#ifdef DEBUG_TEXTURES -bool SetDebugTexture() { - static const int highlightFrames = 30; - - static int numTextures = 0; - static int lastFrames = 0; - static int mostTextures = 1; - - if (lastFrames != gpuStats.numFlips) { - mostTextures = std::max(mostTextures, numTextures); - numTextures = 0; - lastFrames = gpuStats.numFlips; - } - - static GLuint solidTexture = 0; - - bool changed = false; - if (((gpuStats.numFlips / highlightFrames) % mostTextures) == numTextures) { - if (gpuStats.numFlips % highlightFrames == 0) { - NOTICE_LOG(G3D, "Highlighting texture # %d / %d", numTextures, mostTextures); - } - static const u32 solidTextureData[] = {0x99AA99FF}; - - if (solidTexture == 0) { - glGenTextures(1, &solidTexture); - glBindTexture(GL_TEXTURE_2D, solidTexture); - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - glPixelStorei(GL_PACK_ALIGNMENT, 1); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, solidTextureData); - } else { - glBindTexture(GL_TEXTURE_2D, solidTexture); - } - changed = true; - } - - ++numTextures; - return changed; -} -#endif - void TextureCacheGLES::BindTexture(TexCacheEntry *entry) { if (entry->textureName != lastBoundTexture) { render_->BindTexture(0, entry->textureName); @@ -441,15 +399,16 @@ protected: int renderH_; }; -void TextureCacheGLES::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) { +void TextureCacheGLES::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) { DepalShader *depal = nullptr; uint32_t clutMode = gstate.clutformat & 0xFFFFFF; + bool need_depalettize = IsClutFormat(texFormat); bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && gstate_c.Supports(GPU_SUPPORTS_GLSL_ES_300); if (!gstate_c.Supports(GPU_SUPPORTS_32BIT_INT_FSHADER)) { useShaderDepal = false; } - if ((entry->status & TexCacheEntry::STATUS_DEPALETTIZE) && !g_Config.bDisableSlowFramebufEffects) { + if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) { if (useShaderDepal) { const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); GLRTexture *clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBuf_); @@ -500,8 +459,6 @@ void TextureCacheGLES::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFram TexCacheEntry::TexStatus alphaStatus = CheckAlpha((const uint8_t *)clutBuf_, getClutDestFormat(clutFormat), clutTotalColors, clutTotalColors, 1); gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL); } else { - entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE; - framebufferManagerGL_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650); @@ -510,8 +467,6 @@ void TextureCacheGLES::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFram framebufferManagerGL_->RebindFramebuffer("ApplyTextureFramebuffer"); SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight, false); - InvalidateLastTexture(); - // Since we started/ended render passes, might need these. gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE); } @@ -541,11 +496,6 @@ void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) { // For the estimate, we assume cluts always point to 8888 for simplicity. cacheSizeEstimate_ += EstimateTexMemoryUsage(entry); - if (entry->framebuffer) { - // Nothing else to do here. - return; - } - // Always generate a texture name unless it's a framebuffer, we might need it if the texture is replaced later. if (!entry->textureName) { entry->textureName = render_->CreateTexture(GL_TEXTURE_2D); @@ -829,8 +779,22 @@ bool TextureCacheGLES::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level) SetTexture(true); if (!nextTexture_) { - ERROR_LOG(G3D, "Failed to get debug texture: no texture set"); - return false; + if (nextFramebufferTexture_) { + VirtualFramebuffer *vfb = nextFramebufferTexture_; + buffer.Allocate(vfb->bufferWidth, vfb->bufferHeight, GPU_DBG_FORMAT_8888, false); + bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, 0, 0, vfb->bufferWidth, vfb->bufferHeight, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), vfb->bufferWidth, "GetCurrentTextureDebug"); + // Vulkan requires us to re-apply all dynamic state for each command buffer, and the above will cause us to start a new cmdbuf. + // So let's dirty the things that are involved in Vulkan dynamic state. Readbacks are not frequent so this won't hurt other backends. + gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE); + // We may have blitted to a temp FBO. + framebufferManager_->RebindFramebuffer("RebindFramebuffer - GetCurrentTextureDebug"); + if (!retval) + ERROR_LOG(G3D, "Failed to get debug texture: copy to memory failed"); + return retval; + } else { + ERROR_LOG(G3D, "Failed to get debug texture: no texture set"); + return false; + } } // Apply texture may need to rebuild the texture if we're about to render, or bind a framebuffer. @@ -839,21 +803,6 @@ bool TextureCacheGLES::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level) framebufferManagerGL_->RebindFramebuffer("RebindFramebuffer - GetCurrentTextureDebug"); ApplyTexture(); - // TODO: Centralize? - if (entry->framebuffer) { - VirtualFramebuffer *vfb = entry->framebuffer; - buffer.Allocate(vfb->bufferWidth, vfb->bufferHeight, GPU_DBG_FORMAT_8888, false); - bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, 0, 0, vfb->bufferWidth, vfb->bufferHeight, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), vfb->bufferWidth, "GetCurrentTextureDebug"); - // Vulkan requires us to re-apply all dynamic state for each command buffer, and the above will cause us to start a new cmdbuf. - // So let's dirty the things that are involved in Vulkan dynamic state. Readbacks are not frequent so this won't hurt other backends. - gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE); - // We may have blitted to a temp FBO. - framebufferManager_->RebindFramebuffer("RebindFramebuffer - GetCurrentTextureDebug"); - if (!retval) - ERROR_LOG(G3D, "Failed to get debug texture: copy to memory failed"); - return retval; - } - GLRenderManager *renderManager = (GLRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER); // Not a framebuffer, so let's assume these are right. diff --git a/GPU/GLES/TextureCacheGLES.h b/GPU/GLES/TextureCacheGLES.h index abb4d2d2fb..36e203579f 100644 --- a/GPU/GLES/TextureCacheGLES.h +++ b/GPU/GLES/TextureCacheGLES.h @@ -81,7 +81,7 @@ private: TexCacheEntry::TexStatus CheckAlpha(const uint8_t *pixelData, Draw::DataFormat dstFmt, int stride, int w, int h); void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) override; - void ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) override; + void ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) override; void BuildTexture(TexCacheEntry *const entry) override; diff --git a/GPU/GPU.h b/GPU/GPU.h index 2c2fa9cc70..2c47c53ac5 100644 --- a/GPU/GPU.h +++ b/GPU/GPU.h @@ -67,6 +67,7 @@ struct GPUStatistics { numShaderSwitches = 0; numFlushes = 0; numTexturesDecoded = 0; + numFramebufferEvaluations = 0; numReadbacks = 0; numUploads = 0; numClears = 0; @@ -88,6 +89,7 @@ struct GPUStatistics { int numTextureSwitches; int numShaderSwitches; int numTexturesDecoded; + int numFramebufferEvaluations; int numReadbacks; int numUploads; int numClears; diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 6a5d2415f9..66d9ebfe04 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -558,14 +558,13 @@ void GPU_Vulkan::GetStats(char *buffer, size_t bufsize) { float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f; snprintf(buffer, bufsize - 1, "DL processing time: %0.2f ms\n" - "Draw calls: %i, flushes %i, clears %i\n" - "Cached Draw calls: %i\n" + "Draw calls: %i, flushes %i, clears %i (cached: %d)\n" "Num Tracked Vertex Arrays: %i\n" "GPU cycles executed: %d (%f per vertex)\n" "Commands per call level: %i %i %i %i\n" "Vertices submitted: %i\n" "Cached, Uncached Vertices Drawn: %i, %i\n" - "FBOs active: %i\n" + "FBOs active: %i (evaluations: %d)\n" "Textures active: %i, decoded: %i invalidated: %i\n" "Readbacks: %d, uploads: %d\n" "Vertex, Fragment, Pipelines loaded: %i, %i, %i\n" @@ -584,6 +583,7 @@ void GPU_Vulkan::GetStats(char *buffer, size_t bufsize) { gpuStats.numCachedVertsDrawn, gpuStats.numUncachedVertsDrawn, (int)framebufferManager_->NumVFBs(), + gpuStats.numFramebufferEvaluations, (int)textureCacheVulkan_->NumLoadedTextures(), gpuStats.numTexturesDecoded, gpuStats.numTextureInvalidations, diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index 93e39d87fd..3a2da88759 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -554,7 +554,7 @@ void TextureCacheVulkan::Unbind() { InvalidateLastTexture(); } -void TextureCacheVulkan::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) { +void TextureCacheVulkan::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) { SamplerCacheKey samplerKey{}; SetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight, samplerKey); @@ -562,10 +562,12 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFr uint32_t clutMode = gstate.clutformat & 0xFFFFFF; bool expand32 = !gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS); - bool depth = (entry->status & TexCacheEntry::STATUS_DEPTH) != 0; + bool depth = channel == NOTIFY_FB_DEPTH; bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && !depth; - if ((entry->status & TexCacheEntry::STATUS_DEPALETTIZE) && !g_Config.bDisableSlowFramebufEffects) { + bool need_depalettize = IsClutFormat(texFormat); + + if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) { if (useShaderDepal) { depalShaderCache_->SetPushBuffer(drawEngine_->GetPushBufferForTextureData()); const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat(); @@ -584,7 +586,6 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFr TexCacheEntry::TexStatus alphaStatus = CheckAlpha(clutBuf_, getClutDestFormatVulkan(clutFormat), clutTotalColors, clutTotalColors, 1); gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL); curSampler_ = samplerCache_.GetOrCreateSampler(samplerKey); - InvalidateLastTexture(entry); imageView_ = framebufferManagerVulkan_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); return; } else { @@ -694,16 +695,14 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFr // Since we may have switched render targets, we need to re-set depth/stencil etc states. gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_BLEND_STATE | DIRTY_RASTER_STATE); } else { - entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE; - imageView_ = framebufferManagerVulkan_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); drawEngine_->SetDepalTexture(VK_NULL_HANDLE); gstate_c.SetUseShaderDepal(false); gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650); } + curSampler_ = samplerCache_.GetOrCreateSampler(samplerKey); - InvalidateLastTexture(entry); } ReplacedTextureFormat FromVulkanFormat(VkFormat fmt) { @@ -730,11 +729,6 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { // For the estimate, we assume cluts always point to 8888 for simplicity. cacheSizeEstimate_ += EstimateTexMemoryUsage(entry); - if (entry->framebuffer) { - // Nothing else to do here. - return; - } - if ((entry->bufw == 0 || (gstate.texbufwidth[0] & 0xf800) != 0) && entry->addr >= PSP_GetKernelMemoryEnd()) { ERROR_LOG_REPORT(G3D, "Texture with unexpected bufw (full=%d)", gstate.texbufwidth[0] & 0xffff); // Proceeding here can cause a crash. @@ -1007,16 +1001,9 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, localBuf, localOffset, stride / bpp); } else { - // Don't even try to read depth data. - if (entry->status & TexCacheEntry::STATUS_DEPTH) { - // Clear with a warning value (hot pink). This should not be seen - means we missed matching a framebuffer - // that a game rendered depth to. - entry->vkTex->ClearMip(cmdInit, i, 0xFFFF00FF); - } else { - data = drawEngine_->GetPushBufferForTextureData()->PushAligned(size, &bufferOffset, &texBuf, pushAlignment); - LoadTextureLevel(*entry, (uint8_t *)data, stride, i, scaleFactor, dstFmt); - entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp); - } + data = drawEngine_->GetPushBufferForTextureData()->PushAligned(size, &bufferOffset, &texBuf, pushAlignment); + LoadTextureLevel(*entry, (uint8_t *)data, stride, i, scaleFactor, dstFmt); + entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp); } } if (replacer_.Enabled()) { @@ -1153,26 +1140,26 @@ void TextureCacheVulkan::LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePt bool TextureCacheVulkan::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level) { SetTexture(false); - if (!nextTexture_) - return false; + if (!nextTexture_) { + if (nextFramebufferTexture_) { + VirtualFramebuffer *vfb = nextFramebufferTexture_; + buffer.Allocate(vfb->bufferWidth, vfb->bufferHeight, GPU_DBG_FORMAT_8888, false); + bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, 0, 0, vfb->bufferWidth, vfb->bufferHeight, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), vfb->bufferWidth, "GetCurrentTextureDebug"); + // Vulkan requires us to re-apply all dynamic state for each command buffer, and the above will cause us to start a new cmdbuf. + // So let's dirty the things that are involved in Vulkan dynamic state. Readbacks are not frequent so this won't hurt other backends. + gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE); + // We may have blitted to a temp FBO. + framebufferManager_->RebindFramebuffer("RebindFramebuffer - GetCurrentTextureDebug"); + return retval; + } else { + return false; + } + } // Apply texture may need to rebuild the texture if we're about to render, or bind a framebuffer. TexCacheEntry *entry = nextTexture_; ApplyTexture(); - // TODO: Centralize? - if (entry->framebuffer) { - VirtualFramebuffer *vfb = entry->framebuffer; - buffer.Allocate(vfb->bufferWidth, vfb->bufferHeight, GPU_DBG_FORMAT_8888, false); - bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, 0, 0, vfb->bufferWidth, vfb->bufferHeight, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), vfb->bufferWidth, "GetCurrentTextureDebug"); - // Vulkan requires us to re-apply all dynamic state for each command buffer, and the above will cause us to start a new cmdbuf. - // So let's dirty the things that are involved in Vulkan dynamic state. Readbacks are not frequent so this won't hurt other backends. - gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE); - // We may have blitted to a temp FBO. - framebufferManager_->RebindFramebuffer("RebindFramebuffer - GetCurrentTextureDebug"); - return retval; - } - if (!entry->vkTex) return false; VulkanTexture *texture = entry->vkTex; diff --git a/GPU/Vulkan/TextureCacheVulkan.h b/GPU/Vulkan/TextureCacheVulkan.h index a9163f2e18..059b4be0ec 100644 --- a/GPU/Vulkan/TextureCacheVulkan.h +++ b/GPU/Vulkan/TextureCacheVulkan.h @@ -122,7 +122,7 @@ private: TexCacheEntry::TexStatus CheckAlpha(const u32 *pixelData, VkFormat dstFmt, int stride, int w, int h); void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) override; - void ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer) override; + void ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) override; void BuildTexture(TexCacheEntry *const entry) override; void CompileScalingShader(); diff --git a/Windows/CaptureDevice.cpp b/Windows/CaptureDevice.cpp index 6ae0733e51..a475901137 100644 --- a/Windows/CaptureDevice.cpp +++ b/Windows/CaptureDevice.cpp @@ -111,13 +111,15 @@ MediaParam defaultAudioParam = { 44100, 2, 16, MFAudioFormat_PCM }; HRESULT GetDefaultStride(IMFMediaType *pType, LONG *plStride); -ReaderCallback::ReaderCallback(WindowsCaptureDevice *device): img_convert_ctx(nullptr), resample_ctx(nullptr){ - this->device = device; -} +ReaderCallback::ReaderCallback(WindowsCaptureDevice *_device) : device(_device) {} ReaderCallback::~ReaderCallback() { - sws_freeContext(img_convert_ctx); - swr_free(&resample_ctx); + if (img_convert_ctx) { + sws_freeContext(img_convert_ctx); + } + if (resample_ctx) { + swr_free(&resample_ctx); + } } HRESULT ReaderCallback::QueryInterface(REFIID riid, void** ppv) @@ -439,20 +441,10 @@ u32 ReaderCallback::doResample(u8 **dst, u32 &dstSampleRate, u32 &dstChannels, u return av_samples_get_buffer_size(nullptr, dstChannels, outSamplesCount, AV_SAMPLE_FMT_S16, 0); } -WindowsCaptureDevice::WindowsCaptureDevice(CAPTUREDEVIDE_TYPE type) : - type(type), - m_pCallback(nullptr), - m_pSource(nullptr), - m_pReader(nullptr), - imageRGB(nullptr), - imageJpeg(nullptr), - imgJpegSize(0), - resampleBuf(nullptr), - resampleBufSize(0), - rawAudioBuf(nullptr), +WindowsCaptureDevice::WindowsCaptureDevice(CAPTUREDEVIDE_TYPE _type) : + type(_type), error(CAPTUREDEVIDE_ERROR_NO_ERROR), errorMessage(""), - isDeviceChanged(false), state(CAPTUREDEVIDE_STATE::UNINITIALIZED) { param = { 0 }; deviceParam = { 0 }; @@ -482,6 +474,7 @@ WindowsCaptureDevice::~WindowsCaptureDevice() { break; } } + void WindowsCaptureDevice::CheckDevices() { isDeviceChanged = true; } diff --git a/Windows/CaptureDevice.h b/Windows/CaptureDevice.h index 60f9b61b05..0829bdb2bc 100644 --- a/Windows/CaptureDevice.h +++ b/Windows/CaptureDevice.h @@ -171,8 +171,8 @@ public: protected: WindowsCaptureDevice *device; - SwsContext *img_convert_ctx; - SwrContext *resample_ctx; + SwsContext *img_convert_ctx = nullptr; + SwrContext *resample_ctx = nullptr; }; class WindowsCaptureDevice { @@ -222,12 +222,12 @@ protected: CAPTUREDEVIDE_ERROR error; std::string errorMessage; - bool isDeviceChanged; + bool isDeviceChanged = false; // MF interface. - ReaderCallback *m_pCallback; - IMFSourceReader *m_pReader; - IMFMediaSource *m_pSource; + ReaderCallback *m_pCallback = nullptr; + IMFSourceReader *m_pReader = nullptr; + IMFMediaSource *m_pSource = nullptr; // Message loop. std::mutex mutex; @@ -241,15 +241,15 @@ protected: std::mutex paramMutex; // Camera only - unsigned char *imageRGB; - int imgRGBLineSizes[4]; - unsigned char *imageJpeg; - int imgJpegSize; + unsigned char *imageRGB = nullptr; + int imgRGBLineSizes[4]{}; + unsigned char *imageJpeg = nullptr; + int imgJpegSize = 0; //Microphone only - u8 *resampleBuf; - u32 resampleBufSize; - QueueBuf *rawAudioBuf; + u8 *resampleBuf = nullptr; + u32 resampleBufSize = 0; + QueueBuf *rawAudioBuf = nullptr; }; extern WindowsCaptureDevice *winCamera;