diff --git a/Common/Vulkan/VulkanImage.cpp b/Common/Vulkan/VulkanImage.cpp index 1b856dcff2..2165221668 100644 --- a/Common/Vulkan/VulkanImage.cpp +++ b/Common/Vulkan/VulkanImage.cpp @@ -181,6 +181,20 @@ void VulkanTexture::UploadMip(VkCommandBuffer cmd, int mip, int mipWidth, int mi vkCmdCopyBufferToImage(cmd, buffer, image_, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_region); } +void VulkanTexture::ClearMip(VkCommandBuffer cmd, int mip, uint32_t value) { + // Must be in TRANSFER_DST mode. + VkClearColorValue clearVal; + for (int i = 0; i < 4; i++) { + clearVal.float32[i] = ((value >> (i * 8)) & 0xFF) / 255.0f; + } + VkImageSubresourceRange range{}; + range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + range.layerCount = 1; + range.baseMipLevel = mip; + range.levelCount = 1; + vkCmdClearColorImage(cmd, image_, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearVal, 1, &range); +} + void VulkanTexture::GenerateMip(VkCommandBuffer cmd, int mip) { _assert_msg_(mip != 0, "Cannot generate the first level"); _assert_msg_(mip < numMips_, "Cannot generate mipmaps past the maximum created (%d vs %d)", mip, numMips_); diff --git a/Common/Vulkan/VulkanImage.h b/Common/Vulkan/VulkanImage.h index 2c809b2bdb..91e36dc977 100644 --- a/Common/Vulkan/VulkanImage.h +++ b/Common/Vulkan/VulkanImage.h @@ -19,6 +19,7 @@ public: // Usage must at least include VK_IMAGE_USAGE_TRANSFER_DST_BIT in order to use UploadMip. // When using UploadMip, initialLayout should be VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL. bool CreateDirect(VkCommandBuffer cmd, VulkanDeviceAllocator *allocator, int w, int h, int numMips, VkFormat format, VkImageLayout initialLayout, VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, const VkComponentMapping *mapping = nullptr); + void ClearMip(VkCommandBuffer cmd, int mip, uint32_t value); void UploadMip(VkCommandBuffer cmd, int mip, int mipWidth, int mipHeight, VkBuffer buffer, uint32_t offset, size_t rowLength); // rowLength is in pixels void GenerateMip(VkCommandBuffer cmd, int mip); void EndCreate(VkCommandBuffer cmd, bool vertexTexture = false, VkImageLayout layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); diff --git a/Core/MemMap.h b/Core/MemMap.h index 40a6820698..d0cac9f082 100644 --- a/Core/MemMap.h +++ b/Core/MemMap.h @@ -255,7 +255,12 @@ inline void Write_Float(float f, u32 address) u8* GetPointer(const u32 address); bool IsRAMAddress(const u32 address); -bool IsVRAMAddress(const u32 address); +inline bool IsVRAMAddress(const u32 address) { + return ((address & 0x3F800000) == 0x04000000); +} +inline bool IsDepthTexVRAMAddress(const u32 address) { + return ((address & 0x3FE00000) == 0x04200000) || ((address & 0x3FE00000) == 0x04600000); +} bool IsScratchpadAddress(const u32 address); // Used for auto-converted char * parameters, which can sometimes legitimately be null - diff --git a/Core/MemMapFunctions.cpp b/Core/MemMapFunctions.cpp index 07f912b198..36903cdabb 100644 --- a/Core/MemMapFunctions.cpp +++ b/Core/MemMapFunctions.cpp @@ -112,10 +112,6 @@ bool IsRAMAddress(const u32 address) { } } -bool IsVRAMAddress(const u32 address) { - return ((address & 0x3F800000) == 0x04000000); -} - bool IsScratchpadAddress(const u32 address) { return (address & 0xBFFF0000) == 0x00010000 && (address & 0x0000FFFF) < SCRATCHPAD_SIZE; } diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index ab95f7de01..240b75f28e 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -489,7 +489,7 @@ void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) { if (vfbFormatChanged) { textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_COLOR); - textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_DEPTH); + textureCache_->NotifyFramebuffer(vfb->z_address, vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_DEPTH); if (vfb->drawnFormat != vfb->format) { ReformatFramebufferFrom(vfb, vfb->drawnFormat); } diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index efa80c93e0..73c8a5b918 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -542,6 +542,9 @@ void TextureCacheCommon::SetTexture(bool force) { // Before we go reading the texture from memory, let's check for render-to-texture. // We must do this early so we have the right w/h. entry->framebuffer = nullptr; + if (Memory::IsDepthTexVRAMAddress(texaddr)) { + entry->status |= TexCacheEntry::STATUS_DEPTH; + } AttachFramebufferToEntry(entry, 0); @@ -566,7 +569,8 @@ bool TextureCacheCommon::AttachFramebufferToEntry(TexCacheEntry *entry, u32 texA FramebufferNotificationChannel channel = (entry->status & TexCacheEntry::STATUS_DEPTH) ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR; for (size_t i = 0, n = fbCache_.size(); i < n; ++i) { auto framebuffer = fbCache_[i]; - FramebufferMatchInfo match = MatchFramebuffer(entry, framebuffer->fb_address, framebuffer, texAddrOffset, channel); + uint32_t fb_addr = channel == NOTIFY_FB_DEPTH ? framebuffer->z_address : framebuffer->fb_address; + FramebufferMatchInfo match = MatchFramebuffer(entry, fb_addr, framebuffer, texAddrOffset, channel); if (match.match != FramebufferMatch::IGNORE && match.match != FramebufferMatch::NO_MATCH) { candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel }); } else if (match.match == FramebufferMatch::IGNORE) { @@ -597,7 +601,6 @@ bool TextureCacheCommon::AttachFramebufferToEntry(TexCacheEntry *entry, u32 texA return AttachBestCandidate(candidates); } -// reason is just used for reporting/logging. bool TextureCacheCommon::AttachBestCandidate(const std::vector &candidates) { _dbg_assert_(!candidates.empty()); @@ -628,7 +631,12 @@ bool TextureCacheCommon::AttachBestCandidate(const std::vector relevancy += 10; } - if (relevancy >= bestRelevancy) { + // Bonus points for no offset. + if (candidate.match.xOffset == 0 && candidate.match.yOffset == 0) { + relevancy += 9; + } + + if (relevancy > bestRelevancy) { bestRelevancy = relevancy; bestIndex = i; } @@ -767,36 +775,36 @@ void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *fram std::vector candidates; // TODO: Rework this to not try to "apply" all matches, only the best one. - for (auto it = cache_.lower_bound(cacheKey), end = cache_.upper_bound(cacheKeyEnd); it != end; ++it) { - TexCacheEntry *entry = it->second.get(); - FramebufferMatchInfo match = MatchFramebuffer(entry, addr, framebuffer, 0, channel); - if (match.match != FramebufferMatch::IGNORE && match.match != FramebufferMatch::NO_MATCH) { - candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel }); - } - } - - // Let's assume anything in mirrors is fair game to check. - // TODO: Only do this for depth? - for (auto it = cache_.lower_bound(mirrorCacheKey), end = cache_.upper_bound(mirrorCacheKeyEnd); it != end; ++it) { - const u64 mirrorlessKey = it->first & ~0x0060000000000000ULL; - // Let's still make sure it's in the cache range. - if (mirrorlessKey >= cacheKey && mirrorlessKey <= cacheKeyEnd) { + if (channel == FramebufferNotificationChannel::NOTIFY_FB_COLOR) { + // Color - no need to look in the mirrors. + for (auto it = cache_.lower_bound(cacheKey), end = cache_.upper_bound(cacheKeyEnd); it != end; ++it) { TexCacheEntry *entry = it->second.get(); FramebufferMatchInfo match = MatchFramebuffer(entry, addr, framebuffer, 0, channel); if (match.match != FramebufferMatch::IGNORE && match.match != FramebufferMatch::NO_MATCH) { candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel }); } } + } else { + // Depth. Just look in the mirrors. + for (auto it = cache_.lower_bound(mirrorCacheKey), end = cache_.upper_bound(mirrorCacheKeyEnd); it != end; ++it) { + const u64 mirrorlessKey = it->first & ~0x0060000000000000ULL; + // Let's still make sure it's in the cache range. + if (mirrorlessKey >= cacheKey && mirrorlessKey <= cacheKeyEnd) { + TexCacheEntry *entry = it->second.get(); + FramebufferMatchInfo match = MatchFramebuffer(entry, addr, framebuffer, 0, channel); + if (match.match != FramebufferMatch::IGNORE && match.match != FramebufferMatch::NO_MATCH) { + candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel }); + } + } + } } if (!candidates.empty()) { - if (candidates.size() > 1) { - bool depth = channel == FramebufferNotificationChannel::NOTIFY_FB_DEPTH; - WARN_LOG_REPORT_ONCE(multitexcandidate, G3D, "NotifyFramebuffer(%s): Multiple (%d) candidate textures. fb addr: %08x (%dx%d stride %d, %s)", - depth ? "DEPTH" : "COLOR", (int)candidates.size(), addr, framebuffer->width, framebuffer->height, depth ? framebuffer->z_stride : framebuffer->fb_stride, GeBufferFormatToString(framebuffer->format)); + // There can actually be multiple ones to update here! This can be the case where two textures point to different framebuffers that share depth buffers. + // So we have no choice but to run all the matches. + for (int i = 0; i < (int)candidates.size(); i++) { + ApplyFramebufferMatch(candidates[i].match, candidates[i].entry, framebuffer->fb_address, framebuffer, candidates[i].channel); } - - AttachBestCandidate(candidates); } break; } @@ -825,6 +833,7 @@ void TextureCacheCommon::AttachFramebufferValid(TexCacheEntry *entry, VirtualFra if (!hasInvalidFramebuffer && !hasOlderFramebuffer) { // If it's valid, but the offset is greater, then we still win. + // TODO: This check should probably be moved to MatchFramebuffer somehow. if (fbTexInfo_[cachekey].yOffset == fbInfo.yOffset) hasFartherFramebuffer = fbTexInfo_[cachekey].xOffset > fbInfo.xOffset; else @@ -836,13 +845,11 @@ void TextureCacheCommon::AttachFramebufferValid(TexCacheEntry *entry, VirtualFra cacheSizeEstimate_ -= EstimateTexMemoryUsage(entry); } ReleaseTexture(entry, true); + entry->framebuffer = framebuffer; entry->invalidHint = 0; entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE; entry->maxLevel = 0; - if (channel == NOTIFY_FB_DEPTH) { - entry->status |= TexCacheEntry::STATUS_DEPTH; - } fbTexInfo_[cachekey] = fbInfo; framebuffer->last_frame_attached = gpuStats.numFlips; GPUDebug::NotifyTextureAttachment(entry->addr); @@ -864,9 +871,6 @@ void TextureCacheCommon::AttachFramebufferInvalid(TexCacheEntry *entry, VirtualF entry->invalidHint = -1; entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE; entry->maxLevel = 0; - if (channel == NOTIFY_FB_DEPTH) { - entry->status |= TexCacheEntry::STATUS_DEPTH; - } fbTexInfo_[cachekey] = fbInfo; GPUDebug::NotifyTextureAttachment(entry->addr); } @@ -910,16 +914,16 @@ bool TextureCacheCommon::ApplyFramebufferMatch(FramebufferMatchInfo match, TexCa } } -FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset, FramebufferNotificationChannel channel) const { +FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry, u32 fb_address, VirtualFramebuffer *framebuffer, u32 texaddrOffset, FramebufferNotificationChannel channel) const { static const u32 MAX_SUBAREA_Y_OFFSET_SAFE = 32; const u32 mirrorMask = 0x00600000; - u32 addr = address & 0x3FFFFFFF; + u32 addr = fb_address & 0x3FFFFFFF; u32 texaddr = entry->addr + texaddrOffset; bool texInVRAM = Memory::IsVRAMAddress(texaddr); - bool fbInVRAM = Memory::IsVRAMAddress(framebuffer->fb_address); + bool fbInVRAM = Memory::IsVRAMAddress(fb_address); if (texInVRAM != fbInVRAM) { // Shortcut. Cannot possibly be a match. @@ -953,6 +957,7 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry, addr &= ~mirrorMask; texaddr &= ~mirrorMask; } + const bool noOffset = texaddr == addr; const bool exactMatch = noOffset && entry->format < 4 && channel == NOTIFY_FB_COLOR; const u32 w = 1 << ((entry->dim >> 0) & 0xf); @@ -995,14 +1000,21 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry, const bool clutFormat = IsClutFormat((GETextureFormat)(entry->format)); - const u32 bitOffset = (texaddr - addr) * 8; - const u32 pixelOffset = bitOffset / std::max(1U, (u32)textureBitsPerPixel[entry->format]); - // To avoid ruining git blame, kept the same name as the old struct. FramebufferMatchInfo fbInfo{ FramebufferMatch::VALID }; - fbInfo.yOffset = entry->bufw == 0 ? 0 : pixelOffset / entry->bufw; - fbInfo.xOffset = entry->bufw == 0 ? 0 : pixelOffset % entry->bufw; + const u32 bitOffset = (texaddr - addr) * 8; + if (bitOffset != 0) { + const u32 pixelOffset = bitOffset / std::max(1U, (u32)textureBitsPerPixel[entry->format]); + + fbInfo.yOffset = entry->bufw == 0 ? 0 : pixelOffset / entry->bufw; + fbInfo.xOffset = entry->bufw == 0 ? 0 : pixelOffset % entry->bufw; + } + + if (fbInfo.yOffset + minSubareaHeight >= framebuffer->height) { + // Can't be inside the framebuffer. + return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH }; + } if (framebuffer->fb_stride != entry->bufw) { if (noOffset) { @@ -1021,15 +1033,10 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry, return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH }; } - if (fbInfo.yOffset + minSubareaHeight >= framebuffer->height) { - // Can't be inside the framebuffer then, ram. Detach to be safe. - return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH }; - } - // Trying to play it safe. Below 0x04110000 is almost always framebuffers. // TODO: Maybe we can reduce this check and find a better way above 0x04110000? if (fbInfo.yOffset > MAX_SUBAREA_Y_OFFSET_SAFE && addr > 0x04110000) { - WARN_LOG_REPORT_ONCE(subareaIgnored, G3D, "Ignoring possible texturing from framebuffer at %08x +%dx%d / %dx%d", address, fbInfo.xOffset, fbInfo.yOffset, framebuffer->width, framebuffer->height); + WARN_LOG_REPORT_ONCE(subareaIgnored, G3D, "Ignoring possible texturing from framebuffer at %08x +%dx%d / %dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset, framebuffer->width, framebuffer->height); return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH }; } @@ -1037,7 +1044,7 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry, // 3rd Birthday (and a bunch of other games) render to a 16 bit clut texture. if (matchingClutFormat) { if (!noOffset) { - WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer using CLUT with offset at %08x +%dx%d", address, fbInfo.xOffset, fbInfo.yOffset); + WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer using CLUT with offset at %08x +%dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset); } fbInfo.match = FramebufferMatch::VALID_DEPAL; return fbInfo; @@ -1050,17 +1057,17 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry, if (framebuffer->format == entry->format || matchingClutFormat) { if (framebuffer->format != entry->format) { WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with different formats %s != %s at %08x", - GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format), address); + GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format), fb_address); return fbInfo; // Valid! } else { - WARN_LOG_ONCE(subarea, G3D, "Render to area containing texture at %08x +%dx%d", address, fbInfo.xOffset, fbInfo.yOffset); + WARN_LOG_ONCE(subarea, G3D, "Render to area containing texture at %08x +%dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset); // If we return VALID here, God of War Ghost of Sparta/Chains of Olympus will be missing some special effect according to an old comment. fbInfo.match = FramebufferMatch::INVALID; return fbInfo; } } else { WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with incompatible format %s != %s at %08x", - GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format), address); + GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format), fb_address); return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH }; } } diff --git a/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp b/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp index 87920c52a7..05fdf3a4a9 100644 --- a/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp +++ b/GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp @@ -195,7 +195,8 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer, uint32_ } } else { if (doTextureProjection) { - // We don't use textureProj because we need better control and it's probably not much of a savings anyway. + // We don't use textureProj because we need to manually offset from the divided coordinate to do filtering here. + // On older hardware it has the advantage of higher resolution math, but such old hardware can't run Vulkan. WRITE(p, " vec2 uv = %s.xy/%s.z;\n vec2 uv_round;\n", texcoord, texcoord); } else { WRITE(p, " vec2 uv = %s.xy;\n vec2 uv_round;\n", texcoord); diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index bf51e1f74e..93e39d87fd 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -1007,9 +1007,16 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, localBuf, localOffset, stride / bpp); } else { - data = drawEngine_->GetPushBufferForTextureData()->PushAligned(size, &bufferOffset, &texBuf, pushAlignment); - LoadTextureLevel(*entry, (uint8_t *)data, stride, i, scaleFactor, dstFmt); - entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp); + // Don't even try to read depth data. + if (entry->status & TexCacheEntry::STATUS_DEPTH) { + // Clear with a warning value (hot pink). This should not be seen - means we missed matching a framebuffer + // that a game rendered depth to. + entry->vkTex->ClearMip(cmdInit, i, 0xFFFF00FF); + } else { + data = drawEngine_->GetPushBufferForTextureData()->PushAligned(size, &bufferOffset, &texBuf, pushAlignment); + LoadTextureLevel(*entry, (uint8_t *)data, stride, i, scaleFactor, dstFmt); + entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp); + } } } if (replacer_.Enabled()) {