Merge pull request #13367 from hrydgard/katamari-fix

More depth texturing fixes, re-fixing Me and My Katamari in Vulkan
This commit is contained in:
Henrik Rydgård 2020-09-02 00:32:34 +02:00 committed by GitHub
commit d8a5c710f6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 88 additions and 57 deletions

View file

@ -181,6 +181,20 @@ void VulkanTexture::UploadMip(VkCommandBuffer cmd, int mip, int mipWidth, int mi
vkCmdCopyBufferToImage(cmd, buffer, image_, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy_region);
}
void VulkanTexture::ClearMip(VkCommandBuffer cmd, int mip, uint32_t value) {
// Must be in TRANSFER_DST mode.
VkClearColorValue clearVal;
for (int i = 0; i < 4; i++) {
clearVal.float32[i] = ((value >> (i * 8)) & 0xFF) / 255.0f;
}
VkImageSubresourceRange range{};
range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
range.layerCount = 1;
range.baseMipLevel = mip;
range.levelCount = 1;
vkCmdClearColorImage(cmd, image_, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearVal, 1, &range);
}
void VulkanTexture::GenerateMip(VkCommandBuffer cmd, int mip) {
_assert_msg_(mip != 0, "Cannot generate the first level");
_assert_msg_(mip < numMips_, "Cannot generate mipmaps past the maximum created (%d vs %d)", mip, numMips_);

View file

@ -19,6 +19,7 @@ public:
// Usage must at least include VK_IMAGE_USAGE_TRANSFER_DST_BIT in order to use UploadMip.
// When using UploadMip, initialLayout should be VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL.
bool CreateDirect(VkCommandBuffer cmd, VulkanDeviceAllocator *allocator, int w, int h, int numMips, VkFormat format, VkImageLayout initialLayout, VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, const VkComponentMapping *mapping = nullptr);
void ClearMip(VkCommandBuffer cmd, int mip, uint32_t value);
void UploadMip(VkCommandBuffer cmd, int mip, int mipWidth, int mipHeight, VkBuffer buffer, uint32_t offset, size_t rowLength); // rowLength is in pixels
void GenerateMip(VkCommandBuffer cmd, int mip);
void EndCreate(VkCommandBuffer cmd, bool vertexTexture = false, VkImageLayout layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);

View file

@ -255,7 +255,12 @@ inline void Write_Float(float f, u32 address)
u8* GetPointer(const u32 address);
bool IsRAMAddress(const u32 address);
bool IsVRAMAddress(const u32 address);
inline bool IsVRAMAddress(const u32 address) {
return ((address & 0x3F800000) == 0x04000000);
}
inline bool IsDepthTexVRAMAddress(const u32 address) {
return ((address & 0x3FE00000) == 0x04200000) || ((address & 0x3FE00000) == 0x04600000);
}
bool IsScratchpadAddress(const u32 address);
// Used for auto-converted char * parameters, which can sometimes legitimately be null -

View file

@ -112,10 +112,6 @@ bool IsRAMAddress(const u32 address) {
}
}
bool IsVRAMAddress(const u32 address) {
return ((address & 0x3F800000) == 0x04000000);
}
bool IsScratchpadAddress(const u32 address) {
return (address & 0xBFFF0000) == 0x00010000 && (address & 0x0000FFFF) < SCRATCHPAD_SIZE;
}

View file

@ -489,7 +489,7 @@ void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer
void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) {
if (vfbFormatChanged) {
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_COLOR);
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_DEPTH);
textureCache_->NotifyFramebuffer(vfb->z_address, vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_DEPTH);
if (vfb->drawnFormat != vfb->format) {
ReformatFramebufferFrom(vfb, vfb->drawnFormat);
}

View file

@ -542,6 +542,9 @@ void TextureCacheCommon::SetTexture(bool force) {
// Before we go reading the texture from memory, let's check for render-to-texture.
// We must do this early so we have the right w/h.
entry->framebuffer = nullptr;
if (Memory::IsDepthTexVRAMAddress(texaddr)) {
entry->status |= TexCacheEntry::STATUS_DEPTH;
}
AttachFramebufferToEntry(entry, 0);
@ -566,7 +569,8 @@ bool TextureCacheCommon::AttachFramebufferToEntry(TexCacheEntry *entry, u32 texA
FramebufferNotificationChannel channel = (entry->status & TexCacheEntry::STATUS_DEPTH) ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR;
for (size_t i = 0, n = fbCache_.size(); i < n; ++i) {
auto framebuffer = fbCache_[i];
FramebufferMatchInfo match = MatchFramebuffer(entry, framebuffer->fb_address, framebuffer, texAddrOffset, channel);
uint32_t fb_addr = channel == NOTIFY_FB_DEPTH ? framebuffer->z_address : framebuffer->fb_address;
FramebufferMatchInfo match = MatchFramebuffer(entry, fb_addr, framebuffer, texAddrOffset, channel);
if (match.match != FramebufferMatch::IGNORE && match.match != FramebufferMatch::NO_MATCH) {
candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel });
} else if (match.match == FramebufferMatch::IGNORE) {
@ -597,7 +601,6 @@ bool TextureCacheCommon::AttachFramebufferToEntry(TexCacheEntry *entry, u32 texA
return AttachBestCandidate(candidates);
}
// reason is just used for reporting/logging.
bool TextureCacheCommon::AttachBestCandidate(const std::vector<AttachCandidate> &candidates) {
_dbg_assert_(!candidates.empty());
@ -628,7 +631,12 @@ bool TextureCacheCommon::AttachBestCandidate(const std::vector<AttachCandidate>
relevancy += 10;
}
if (relevancy >= bestRelevancy) {
// Bonus points for no offset.
if (candidate.match.xOffset == 0 && candidate.match.yOffset == 0) {
relevancy += 9;
}
if (relevancy > bestRelevancy) {
bestRelevancy = relevancy;
bestIndex = i;
}
@ -767,36 +775,36 @@ void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *fram
std::vector<AttachCandidate> candidates;
// TODO: Rework this to not try to "apply" all matches, only the best one.
for (auto it = cache_.lower_bound(cacheKey), end = cache_.upper_bound(cacheKeyEnd); it != end; ++it) {
TexCacheEntry *entry = it->second.get();
FramebufferMatchInfo match = MatchFramebuffer(entry, addr, framebuffer, 0, channel);
if (match.match != FramebufferMatch::IGNORE && match.match != FramebufferMatch::NO_MATCH) {
candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel });
}
}
// Let's assume anything in mirrors is fair game to check.
// TODO: Only do this for depth?
for (auto it = cache_.lower_bound(mirrorCacheKey), end = cache_.upper_bound(mirrorCacheKeyEnd); it != end; ++it) {
const u64 mirrorlessKey = it->first & ~0x0060000000000000ULL;
// Let's still make sure it's in the cache range.
if (mirrorlessKey >= cacheKey && mirrorlessKey <= cacheKeyEnd) {
if (channel == FramebufferNotificationChannel::NOTIFY_FB_COLOR) {
// Color - no need to look in the mirrors.
for (auto it = cache_.lower_bound(cacheKey), end = cache_.upper_bound(cacheKeyEnd); it != end; ++it) {
TexCacheEntry *entry = it->second.get();
FramebufferMatchInfo match = MatchFramebuffer(entry, addr, framebuffer, 0, channel);
if (match.match != FramebufferMatch::IGNORE && match.match != FramebufferMatch::NO_MATCH) {
candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel });
}
}
} else {
// Depth. Just look in the mirrors.
for (auto it = cache_.lower_bound(mirrorCacheKey), end = cache_.upper_bound(mirrorCacheKeyEnd); it != end; ++it) {
const u64 mirrorlessKey = it->first & ~0x0060000000000000ULL;
// Let's still make sure it's in the cache range.
if (mirrorlessKey >= cacheKey && mirrorlessKey <= cacheKeyEnd) {
TexCacheEntry *entry = it->second.get();
FramebufferMatchInfo match = MatchFramebuffer(entry, addr, framebuffer, 0, channel);
if (match.match != FramebufferMatch::IGNORE && match.match != FramebufferMatch::NO_MATCH) {
candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel });
}
}
}
}
if (!candidates.empty()) {
if (candidates.size() > 1) {
bool depth = channel == FramebufferNotificationChannel::NOTIFY_FB_DEPTH;
WARN_LOG_REPORT_ONCE(multitexcandidate, G3D, "NotifyFramebuffer(%s): Multiple (%d) candidate textures. fb addr: %08x (%dx%d stride %d, %s)",
depth ? "DEPTH" : "COLOR", (int)candidates.size(), addr, framebuffer->width, framebuffer->height, depth ? framebuffer->z_stride : framebuffer->fb_stride, GeBufferFormatToString(framebuffer->format));
// There can actually be multiple ones to update here! This can be the case where two textures point to different framebuffers that share depth buffers.
// So we have no choice but to run all the matches.
for (int i = 0; i < (int)candidates.size(); i++) {
ApplyFramebufferMatch(candidates[i].match, candidates[i].entry, framebuffer->fb_address, framebuffer, candidates[i].channel);
}
AttachBestCandidate(candidates);
}
break;
}
@ -825,6 +833,7 @@ void TextureCacheCommon::AttachFramebufferValid(TexCacheEntry *entry, VirtualFra
if (!hasInvalidFramebuffer && !hasOlderFramebuffer) {
// If it's valid, but the offset is greater, then we still win.
// TODO: This check should probably be moved to MatchFramebuffer somehow.
if (fbTexInfo_[cachekey].yOffset == fbInfo.yOffset)
hasFartherFramebuffer = fbTexInfo_[cachekey].xOffset > fbInfo.xOffset;
else
@ -836,13 +845,11 @@ void TextureCacheCommon::AttachFramebufferValid(TexCacheEntry *entry, VirtualFra
cacheSizeEstimate_ -= EstimateTexMemoryUsage(entry);
}
ReleaseTexture(entry, true);
entry->framebuffer = framebuffer;
entry->invalidHint = 0;
entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE;
entry->maxLevel = 0;
if (channel == NOTIFY_FB_DEPTH) {
entry->status |= TexCacheEntry::STATUS_DEPTH;
}
fbTexInfo_[cachekey] = fbInfo;
framebuffer->last_frame_attached = gpuStats.numFlips;
GPUDebug::NotifyTextureAttachment(entry->addr);
@ -864,9 +871,6 @@ void TextureCacheCommon::AttachFramebufferInvalid(TexCacheEntry *entry, VirtualF
entry->invalidHint = -1;
entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE;
entry->maxLevel = 0;
if (channel == NOTIFY_FB_DEPTH) {
entry->status |= TexCacheEntry::STATUS_DEPTH;
}
fbTexInfo_[cachekey] = fbInfo;
GPUDebug::NotifyTextureAttachment(entry->addr);
}
@ -910,16 +914,16 @@ bool TextureCacheCommon::ApplyFramebufferMatch(FramebufferMatchInfo match, TexCa
}
}
FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset, FramebufferNotificationChannel channel) const {
FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry, u32 fb_address, VirtualFramebuffer *framebuffer, u32 texaddrOffset, FramebufferNotificationChannel channel) const {
static const u32 MAX_SUBAREA_Y_OFFSET_SAFE = 32;
const u32 mirrorMask = 0x00600000;
u32 addr = address & 0x3FFFFFFF;
u32 addr = fb_address & 0x3FFFFFFF;
u32 texaddr = entry->addr + texaddrOffset;
bool texInVRAM = Memory::IsVRAMAddress(texaddr);
bool fbInVRAM = Memory::IsVRAMAddress(framebuffer->fb_address);
bool fbInVRAM = Memory::IsVRAMAddress(fb_address);
if (texInVRAM != fbInVRAM) {
// Shortcut. Cannot possibly be a match.
@ -953,6 +957,7 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry,
addr &= ~mirrorMask;
texaddr &= ~mirrorMask;
}
const bool noOffset = texaddr == addr;
const bool exactMatch = noOffset && entry->format < 4 && channel == NOTIFY_FB_COLOR;
const u32 w = 1 << ((entry->dim >> 0) & 0xf);
@ -995,14 +1000,21 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry,
const bool clutFormat = IsClutFormat((GETextureFormat)(entry->format));
const u32 bitOffset = (texaddr - addr) * 8;
const u32 pixelOffset = bitOffset / std::max(1U, (u32)textureBitsPerPixel[entry->format]);
// To avoid ruining git blame, kept the same name as the old struct.
FramebufferMatchInfo fbInfo{ FramebufferMatch::VALID };
fbInfo.yOffset = entry->bufw == 0 ? 0 : pixelOffset / entry->bufw;
fbInfo.xOffset = entry->bufw == 0 ? 0 : pixelOffset % entry->bufw;
const u32 bitOffset = (texaddr - addr) * 8;
if (bitOffset != 0) {
const u32 pixelOffset = bitOffset / std::max(1U, (u32)textureBitsPerPixel[entry->format]);
fbInfo.yOffset = entry->bufw == 0 ? 0 : pixelOffset / entry->bufw;
fbInfo.xOffset = entry->bufw == 0 ? 0 : pixelOffset % entry->bufw;
}
if (fbInfo.yOffset + minSubareaHeight >= framebuffer->height) {
// Can't be inside the framebuffer.
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
}
if (framebuffer->fb_stride != entry->bufw) {
if (noOffset) {
@ -1021,15 +1033,10 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry,
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
}
if (fbInfo.yOffset + minSubareaHeight >= framebuffer->height) {
// Can't be inside the framebuffer then, ram. Detach to be safe.
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
}
// Trying to play it safe. Below 0x04110000 is almost always framebuffers.
// TODO: Maybe we can reduce this check and find a better way above 0x04110000?
if (fbInfo.yOffset > MAX_SUBAREA_Y_OFFSET_SAFE && addr > 0x04110000) {
WARN_LOG_REPORT_ONCE(subareaIgnored, G3D, "Ignoring possible texturing from framebuffer at %08x +%dx%d / %dx%d", address, fbInfo.xOffset, fbInfo.yOffset, framebuffer->width, framebuffer->height);
WARN_LOG_REPORT_ONCE(subareaIgnored, G3D, "Ignoring possible texturing from framebuffer at %08x +%dx%d / %dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset, framebuffer->width, framebuffer->height);
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
}
@ -1037,7 +1044,7 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry,
// 3rd Birthday (and a bunch of other games) render to a 16 bit clut texture.
if (matchingClutFormat) {
if (!noOffset) {
WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer using CLUT with offset at %08x +%dx%d", address, fbInfo.xOffset, fbInfo.yOffset);
WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer using CLUT with offset at %08x +%dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset);
}
fbInfo.match = FramebufferMatch::VALID_DEPAL;
return fbInfo;
@ -1050,17 +1057,17 @@ FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(TexCacheEntry *entry,
if (framebuffer->format == entry->format || matchingClutFormat) {
if (framebuffer->format != entry->format) {
WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with different formats %s != %s at %08x",
GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format), address);
GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format), fb_address);
return fbInfo; // Valid!
} else {
WARN_LOG_ONCE(subarea, G3D, "Render to area containing texture at %08x +%dx%d", address, fbInfo.xOffset, fbInfo.yOffset);
WARN_LOG_ONCE(subarea, G3D, "Render to area containing texture at %08x +%dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset);
// If we return VALID here, God of War Ghost of Sparta/Chains of Olympus will be missing some special effect according to an old comment.
fbInfo.match = FramebufferMatch::INVALID;
return fbInfo;
}
} else {
WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with incompatible format %s != %s at %08x",
GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format), address);
GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format), fb_address);
return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
}
}

View file

@ -195,7 +195,8 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer, uint32_
}
} else {
if (doTextureProjection) {
// We don't use textureProj because we need better control and it's probably not much of a savings anyway.
// We don't use textureProj because we need to manually offset from the divided coordinate to do filtering here.
// On older hardware it has the advantage of higher resolution math, but such old hardware can't run Vulkan.
WRITE(p, " vec2 uv = %s.xy/%s.z;\n vec2 uv_round;\n", texcoord, texcoord);
} else {
WRITE(p, " vec2 uv = %s.xy;\n vec2 uv_round;\n", texcoord);

View file

@ -1007,9 +1007,16 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, localBuf, localOffset, stride / bpp);
} else {
data = drawEngine_->GetPushBufferForTextureData()->PushAligned(size, &bufferOffset, &texBuf, pushAlignment);
LoadTextureLevel(*entry, (uint8_t *)data, stride, i, scaleFactor, dstFmt);
entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp);
// Don't even try to read depth data.
if (entry->status & TexCacheEntry::STATUS_DEPTH) {
// Clear with a warning value (hot pink). This should not be seen - means we missed matching a framebuffer
// that a game rendered depth to.
entry->vkTex->ClearMip(cmdInit, i, 0xFFFF00FF);
} else {
data = drawEngine_->GetPushBufferForTextureData()->PushAligned(size, &bufferOffset, &texBuf, pushAlignment);
LoadTextureLevel(*entry, (uint8_t *)data, stride, i, scaleFactor, dstFmt);
entry->vkTex->UploadMip(cmdInit, i, mipWidth, mipHeight, texBuf, bufferOffset, stride / bpp);
}
}
}
if (replacer_.Enabled()) {