Merge pull request #13262 from hrydgard/depth-texturing

Implement texturing from depth buffers (Vulkan only so far)
This commit is contained in:
Unknown W. Brackets 2020-08-10 03:42:17 -04:00 committed by GitHub
commit d90630c263
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 175 additions and 80 deletions

View file

@ -21,6 +21,7 @@
#include "GPU/Common/ShaderId.h" #include "GPU/Common/ShaderId.h"
#include "GPU/Common/ShaderCommon.h" #include "GPU/Common/ShaderCommon.h"
#include "Common/StringUtils.h"
#include "Common/Log.h" #include "Common/Log.h"
#include "Core/Reporting.h" #include "Core/Reporting.h"
#include "GPU/GPUState.h" #include "GPU/GPUState.h"
@ -45,6 +46,14 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang
WRITE(p, "layout(set = 0, binding = 1) uniform sampler2D pal;\n"); WRITE(p, "layout(set = 0, binding = 1) uniform sampler2D pal;\n");
WRITE(p, "layout(location = 0) in vec2 v_texcoord0;\n"); WRITE(p, "layout(location = 0) in vec2 v_texcoord0;\n");
WRITE(p, "layout(location = 0) out vec4 fragColor0;\n"); WRITE(p, "layout(location = 0) out vec4 fragColor0;\n");
// Support for depth.
if (pixelFormat == GE_FORMAT_DEPTH16) {
WRITE(p, "layout (push_constant) uniform params {\n");
WRITE(p, " float z_scale; float z_offset;\n");
WRITE(p, "};\n");
}
} else { } else {
if (gl_extensions.IsGLES) { if (gl_extensions.IsGLES) {
WRITE(p, "#version 300 es\n"); WRITE(p, "#version 300 es\n");
@ -63,10 +72,13 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang
WRITE(p, "float4 main(in float2 v_texcoord0 : TEXCOORD0) : SV_Target {\n"); WRITE(p, "float4 main(in float2 v_texcoord0 : TEXCOORD0) : SV_Target {\n");
WRITE(p, " float4 color = tex.Sample(texSamp, v_texcoord0);\n"); WRITE(p, " float4 color = tex.Sample(texSamp, v_texcoord0);\n");
} else { } else {
// TODO: Add support for integer textures. Though it hardly matters.
WRITE(p, "void main() {\n"); WRITE(p, "void main() {\n");
if (pixelFormat == GE_FORMAT_DEPTH16) {
WRITE(p, " float color = texture(tex, v_texcoord0).r;\n");
} else {
WRITE(p, " vec4 color = texture(tex, v_texcoord0);\n"); WRITE(p, " vec4 color = texture(tex, v_texcoord0);\n");
} }
}
int mask = gstate.getClutIndexMask(); int mask = gstate.getClutIndexMask();
int shift = gstate.getClutIndexShift(); int shift = gstate.getClutIndexShift();
@ -105,6 +117,11 @@ void GenerateDepalShader300(char *buffer, GEBufferFormat pixelFormat, ShaderLang
if (shiftedMask & 0x8000) WRITE(p, " int a = int(color.a);\n"); else WRITE(p, " int a = 0;\n"); if (shiftedMask & 0x8000) WRITE(p, " int a = int(color.a);\n"); else WRITE(p, " int a = 0;\n");
WRITE(p, " int index = (a << 15) | (b << 10) | (g << 5) | (r);\n"); WRITE(p, " int index = (a << 15) | (b << 10) | (g << 5) | (r);\n");
break; break;
case GE_FORMAT_DEPTH16:
// Remap depth buffer.
WRITE(p, " float depth = (color - z_offset) * z_scale;\n");
WRITE(p, " int index = int(clamp(depth, 0.0, 65535.0));\n");
break;
default: default:
break; break;
} }
@ -225,6 +242,17 @@ void GenerateDepalShaderFloat(char *buffer, GEBufferFormat pixelFormat, ShaderLa
formatOK = false; formatOK = false;
} }
break; break;
case GE_FORMAT_DEPTH16:
{
// TODO: I think we can handle most scenarios here, but texturing from depth buffers requires an extension on ES 2.0 anyway.
if ((mask & (mask + 1)) == 0 && shift < 16) {
index_multiplier = 1.0f / (float)(1 << shift);
truncate_cpy(lookupMethod, "index.r");
} else {
formatOK = false;
}
break;
}
default: default:
break; break;
} }

View file

@ -374,7 +374,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
ResizeFramebufFBO(vfb, drawing_width, drawing_height, true); ResizeFramebufFBO(vfb, drawing_width, drawing_height, true);
NotifyRenderFramebufferCreated(vfb); NotifyRenderFramebufferCreated(vfb);
INFO_LOG(FRAMEBUF, "Creating FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->format); INFO_LOG(FRAMEBUF, "Creating FBO for %08x (z: %08x) : %i x %i x %i", vfb->fb_address, vfb->z_address, vfb->width, vfb->height, vfb->format);
vfb->last_frame_render = gpuStats.numFlips; vfb->last_frame_render = gpuStats.numFlips;
frameLastFramebufUsed_ = gpuStats.numFlips; frameLastFramebufUsed_ = gpuStats.numFlips;
@ -445,7 +445,9 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
} }
void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) { void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) {
textureCache_->NotifyFramebuffer(v->fb_address, v, NOTIFY_FB_DESTROYED); // Notify the texture cache of both the color and depth buffers.
textureCache_->NotifyFramebuffer(v->fb_address, v, NOTIFY_FB_DESTROYED, NOTIFY_FB_COLOR);
textureCache_->NotifyFramebuffer(v->z_address, v, NOTIFY_FB_DESTROYED, NOTIFY_FB_DEPTH);
if (v->fbo) { if (v->fbo) {
v->fbo->Release(); v->fbo->Release();
v->fbo = nullptr; v->fbo = nullptr;
@ -472,7 +474,8 @@ void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer
DownloadFramebufferOnSwitch(currentRenderVfb_); DownloadFramebufferOnSwitch(currentRenderVfb_);
} }
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_CREATED); textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_CREATED, NOTIFY_FB_COLOR);
textureCache_->NotifyFramebuffer(vfb->z_address, vfb, NOTIFY_FB_CREATED, NOTIFY_FB_DEPTH);
// Ugly... // Ugly...
if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) { if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) {
@ -486,7 +489,8 @@ void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer
void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) { void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb, bool vfbFormatChanged) {
if (vfbFormatChanged) { if (vfbFormatChanged) {
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED); textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_COLOR);
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_DEPTH);
if (vfb->drawnFormat != vfb->format) { if (vfb->drawnFormat != vfb->format) {
ReformatFramebufferFrom(vfb, vfb->drawnFormat); ReformatFramebufferFrom(vfb, vfb->drawnFormat);
} }
@ -552,7 +556,8 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe
} else { } else {
if (vfb->fbo) { if (vfb->fbo) {
// This should only happen very briefly when toggling useBufferedRendering_. // This should only happen very briefly when toggling useBufferedRendering_.
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_DESTROYED); textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_DESTROYED, NOTIFY_FB_COLOR);
textureCache_->NotifyFramebuffer(vfb->z_address, vfb, NOTIFY_FB_DESTROYED, NOTIFY_FB_DEPTH);
vfb->fbo->Release(); vfb->fbo->Release();
vfb->fbo = nullptr; vfb->fbo = nullptr;
} }
@ -564,7 +569,8 @@ void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffe
gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB; gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB;
} }
} }
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED); textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_COLOR);
textureCache_->NotifyFramebuffer(vfb->z_address, vfb, NOTIFY_FB_UPDATED, NOTIFY_FB_DEPTH);
// ugly... is all this needed? // ugly... is all this needed?
if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) { if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) {
@ -1164,9 +1170,9 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
if (dstBuffer && srcBuffer && !isMemset) { if (dstBuffer && srcBuffer && !isMemset) {
if (srcBuffer == dstBuffer) { if (srcBuffer == dstBuffer) {
WARN_LOG_REPORT_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x", src, dst); WARN_LOG_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x (size: %x)", src, dst, size);
} else { } else {
WARN_LOG_REPORT_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x", src, dst); WARN_LOG_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x (size: %x)", src, dst, size);
// Just do the blit! // Just do the blit!
BlitFramebuffer(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0); BlitFramebuffer(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0);
SetColorUpdated(dstBuffer, skipDrawReason); SetColorUpdated(dstBuffer, skipDrawReason);
@ -1177,7 +1183,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
if (isMemset) { if (isMemset) {
gpuStats.numClears++; gpuStats.numClears++;
} }
WARN_LOG_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x", src, dst); WARN_LOG_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x (size: %x)", src, dst, size);
FlushBeforeCopy(); FlushBeforeCopy();
const u8 *srcBase = Memory::GetPointerUnchecked(src); const u8 *srcBase = Memory::GetPointerUnchecked(src);
DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstH); DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->format, dstBuffer->fb_stride, dstBuffer->width, dstH);
@ -1189,7 +1195,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
WARN_LOG_ONCE(btdcpy, G3D, "Memcpy fbo download %08x -> %08x", src, dst); WARN_LOG_ONCE(btdcpy, G3D, "Memcpy fbo download %08x -> %08x", src, dst);
FlushBeforeCopy(); FlushBeforeCopy();
if (srcH == 0 || srcY + srcH > srcBuffer->bufferHeight) { if (srcH == 0 || srcY + srcH > srcBuffer->bufferHeight) {
WARN_LOG_REPORT_ONCE(btdcpyheight, G3D, "Memcpy fbo download %08x -> %08x skipped, %d+%d is taller than %d", src, dst, srcY, srcH, srcBuffer->bufferHeight); WARN_LOG_ONCE(btdcpyheight, G3D, "Memcpy fbo download %08x -> %08x skipped, %d+%d is taller than %d", src, dst, srcY, srcH, srcBuffer->bufferHeight);
} else if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated && !PSP_CoreParameter().compat.flags().DisableReadbacks) { } else if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated && !PSP_CoreParameter().compat.flags().DisableReadbacks) {
ReadFramebufferToMemory(srcBuffer, 0, srcY, srcBuffer->width, srcH); ReadFramebufferToMemory(srcBuffer, 0, srcY, srcBuffer->width, srcH);
srcBuffer->usageFlags = (srcBuffer->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR; srcBuffer->usageFlags = (srcBuffer->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
@ -1330,9 +1336,9 @@ VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAd
vfb->drawnFormat = GE_FORMAT_8888; vfb->drawnFormat = GE_FORMAT_8888;
vfb->usageFlags = FB_USAGE_RENDERTARGET; vfb->usageFlags = FB_USAGE_RENDERTARGET;
SetColorUpdated(vfb, 0); SetColorUpdated(vfb, 0);
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_CREATED);
char name[64]; char name[64];
snprintf(name, sizeof(name), "%08x_color_RAM", vfb->fb_address); snprintf(name, sizeof(name), "%08x_color_RAM", vfb->fb_address);
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb, NOTIFY_FB_CREATED, NOTIFY_FB_COLOR);
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, 1, true, (Draw::FBColorDepth)vfb->colorDepth, name }); vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, 1, true, (Draw::FBColorDepth)vfb->colorDepth, name });
vfbs_.push_back(vfb); vfbs_.push_back(vfb);

View file

@ -633,9 +633,6 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
float vpWidth = fabsf(gstate_c.vpWidth); float vpWidth = fabsf(gstate_c.vpWidth);
float vpHeight = fabsf(gstate_c.vpHeight); float vpHeight = fabsf(gstate_c.vpHeight);
// We used to apply the viewport here via glstate, but there are limits which vary by driver.
// This may mean some games won't work, or at least won't work at higher render resolutions.
// So we apply it in the shader instead.
float left = renderX + vpX0; float left = renderX + vpX0;
float top = renderY + vpY0; float top = renderY + vpY0;
float right = left + vpWidth; float right = left + vpWidth;

View file

@ -389,7 +389,7 @@ void TextureCacheCommon::SetTexture(bool force) {
return; return;
} else { } else {
// Make sure we re-evaluate framebuffers. // Make sure we re-evaluate framebuffers.
DetachFramebuffer(entry, texaddr, entry->framebuffer); DetachFramebuffer(entry, texaddr, entry->framebuffer, (entry->status & TexCacheEntry::STATUS_DEPTH) ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR);
reason = "detached framebuf"; reason = "detached framebuf";
match = false; match = false;
} }
@ -530,7 +530,7 @@ void TextureCacheCommon::SetTexture(bool force) {
entry->framebuffer = nullptr; entry->framebuffer = nullptr;
for (size_t i = 0, n = fbCache_.size(); i < n; ++i) { for (size_t i = 0, n = fbCache_.size(); i < n; ++i) {
auto framebuffer = fbCache_[i]; auto framebuffer = fbCache_[i];
AttachFramebuffer(entry, framebuffer->fb_address, framebuffer); AttachFramebuffer(entry, framebuffer->fb_address, framebuffer, 0, (entry->status & TexCacheEntry::STATUS_DEPTH) ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR);
} }
// If we ended up with a framebuffer, attach it - no texture decoding needed. // If we ended up with a framebuffer, attach it - no texture decoding needed.
@ -640,12 +640,12 @@ void TextureCacheCommon::HandleTextureChange(TexCacheEntry *const entry, const c
entry->numFrames = 0; entry->numFrames = 0;
} }
void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg) { void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg, FramebufferNotificationChannel channel) {
// Mask to ignore the Z memory mirrors if the address is in VRAM. // Mask to ignore the Z memory mirrors if the address is in VRAM.
// These checks are mainly to reduce scanning all textures. // These checks are mainly to reduce scanning all textures.
const u32 mirrorMask = 0x00600000; const u32 mirrorMask = 0x00600000;
const u32 addr = Memory::IsVRAMAddress(address) ? (address & ~mirrorMask) : address; const u32 addr = Memory::IsVRAMAddress(address) ? (address & ~mirrorMask) : address;
const u32 bpp = framebuffer->format == GE_FORMAT_8888 ? 4 : 2; const u32 bpp = (framebuffer->format == GE_FORMAT_8888 && channel == NOTIFY_FB_COLOR) ? 4 : 2;
const u64 cacheKey = (u64)addr << 32; const u64 cacheKey = (u64)addr << 32;
// If it has a clut, those are the low 32 bits, so it'll be inside this range. // If it has a clut, those are the low 32 bits, so it'll be inside this range.
// Also, if it's a subsample of the buffer, it'll also be within the FBO. // Also, if it's a subsample of the buffer, it'll also be within the FBO.
@ -663,14 +663,14 @@ void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *fram
fbCache_.push_back(framebuffer); fbCache_.push_back(framebuffer);
} }
for (auto it = cache_.lower_bound(cacheKey), end = cache_.upper_bound(cacheKeyEnd); it != end; ++it) { for (auto it = cache_.lower_bound(cacheKey), end = cache_.upper_bound(cacheKeyEnd); it != end; ++it) {
AttachFramebuffer(it->second.get(), addr, framebuffer); AttachFramebuffer(it->second.get(), addr, framebuffer, 0, channel);
} }
// Let's assume anything in mirrors is fair game to check. // Let's assume anything in mirrors is fair game to check.
for (auto it = cache_.lower_bound(mirrorCacheKey), end = cache_.upper_bound(mirrorCacheKeyEnd); it != end; ++it) { for (auto it = cache_.lower_bound(mirrorCacheKey), end = cache_.upper_bound(mirrorCacheKeyEnd); it != end; ++it) {
const u64 mirrorlessKey = it->first & ~0x0060000000000000ULL; const u64 mirrorlessKey = it->first & ~0x0060000000000000ULL;
// Let's still make sure it's in the cache range. // Let's still make sure it's in the cache range.
if (mirrorlessKey >= cacheKey && mirrorlessKey <= cacheKeyEnd) { if (mirrorlessKey >= cacheKey && mirrorlessKey <= cacheKeyEnd) {
AttachFramebuffer(it->second.get(), addr, framebuffer); AttachFramebuffer(it->second.get(), addr, framebuffer, 0, channel);
} }
} }
break; break;
@ -685,12 +685,13 @@ void TextureCacheCommon::NotifyFramebuffer(u32 address, VirtualFramebuffer *fram
// We might erase, so move to the next one already (which won't become invalid.) // We might erase, so move to the next one already (which won't become invalid.)
++it; ++it;
DetachFramebuffer(cache_[cachekey].get(), addr, framebuffer); DetachFramebuffer(cache_[cachekey].get(), addr, framebuffer, channel);
} }
break; break;
} }
} }
void TextureCacheCommon::AttachFramebufferValid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo) {
void TextureCacheCommon::AttachFramebufferValid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo, FramebufferNotificationChannel channel) {
const u64 cachekey = entry->CacheKey(); const u64 cachekey = entry->CacheKey();
const bool hasInvalidFramebuffer = entry->framebuffer == nullptr || entry->invalidHint == -1; const bool hasInvalidFramebuffer = entry->framebuffer == nullptr || entry->invalidHint == -1;
const bool hasOlderFramebuffer = entry->framebuffer != nullptr && entry->framebuffer->last_frame_render < framebuffer->last_frame_render; const bool hasOlderFramebuffer = entry->framebuffer != nullptr && entry->framebuffer->last_frame_render < framebuffer->last_frame_render;
@ -713,6 +714,9 @@ void TextureCacheCommon::AttachFramebufferValid(TexCacheEntry *entry, VirtualFra
entry->invalidHint = 0; entry->invalidHint = 0;
entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE; entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE;
entry->maxLevel = 0; entry->maxLevel = 0;
if (channel == NOTIFY_FB_DEPTH) {
entry->status |= TexCacheEntry::STATUS_DEPTH;
}
fbTexInfo_[cachekey] = fbInfo; fbTexInfo_[cachekey] = fbInfo;
framebuffer->last_frame_attached = gpuStats.numFlips; framebuffer->last_frame_attached = gpuStats.numFlips;
GPUDebug::NotifyTextureAttachment(entry->addr); GPUDebug::NotifyTextureAttachment(entry->addr);
@ -721,7 +725,7 @@ void TextureCacheCommon::AttachFramebufferValid(TexCacheEntry *entry, VirtualFra
} }
} }
void TextureCacheCommon::AttachFramebufferInvalid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo) { void TextureCacheCommon::AttachFramebufferInvalid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo, FramebufferNotificationChannel channel) {
const u64 cachekey = entry->CacheKey(); const u64 cachekey = entry->CacheKey();
if (entry->framebuffer == nullptr || entry->framebuffer == framebuffer) { if (entry->framebuffer == nullptr || entry->framebuffer == framebuffer) {
@ -733,12 +737,14 @@ void TextureCacheCommon::AttachFramebufferInvalid(TexCacheEntry *entry, VirtualF
entry->invalidHint = -1; entry->invalidHint = -1;
entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE; entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE;
entry->maxLevel = 0; entry->maxLevel = 0;
if (channel == NOTIFY_FB_DEPTH)
entry->status |= TexCacheEntry::STATUS_DEPTH;
fbTexInfo_[cachekey] = fbInfo; fbTexInfo_[cachekey] = fbInfo;
GPUDebug::NotifyTextureAttachment(entry->addr); GPUDebug::NotifyTextureAttachment(entry->addr);
} }
} }
void TextureCacheCommon::DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer) { void TextureCacheCommon::DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, FramebufferNotificationChannel channel) {
if (entry->framebuffer == framebuffer) { if (entry->framebuffer == framebuffer) {
const u64 cachekey = entry->CacheKey(); const u64 cachekey = entry->CacheKey();
cacheSizeEstimate_ += EstimateTexMemoryUsage(entry); cacheSizeEstimate_ += EstimateTexMemoryUsage(entry);
@ -751,7 +757,7 @@ void TextureCacheCommon::DetachFramebuffer(TexCacheEntry *entry, u32 address, Vi
} }
} }
bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset) { bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset, FramebufferNotificationChannel channel) {
static const u32 MAX_SUBAREA_Y_OFFSET_SAFE = 32; static const u32 MAX_SUBAREA_Y_OFFSET_SAFE = 32;
AttachedFramebufferInfo fbInfo = { 0 }; AttachedFramebufferInfo fbInfo = { 0 };
@ -764,7 +770,7 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi
texaddr &= ~mirrorMask; texaddr &= ~mirrorMask;
} }
const bool noOffset = texaddr == addr; const bool noOffset = texaddr == addr;
const bool exactMatch = noOffset && entry->format < 4; const bool exactMatch = noOffset && entry->format < 4 && channel == NOTIFY_FB_COLOR;
const u32 w = 1 << ((entry->dim >> 0) & 0xf); const u32 w = 1 << ((entry->dim >> 0) & 0xf);
const u32 h = 1 << ((entry->dim >> 8) & 0xf); const u32 h = 1 << ((entry->dim >> 8) & 0xf);
// 512 on a 272 framebuffer is sane, so let's be lenient. // 512 on a 272 framebuffer is sane, so let's be lenient.
@ -781,10 +787,10 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi
// Let's avoid using it when we know the format is wrong. May be a video/etc. updating memory. // Let's avoid using it when we know the format is wrong. May be a video/etc. updating memory.
// However, some games use a different format to clear the buffer. // However, some games use a different format to clear the buffer.
if (framebuffer->last_frame_attached + 1 < gpuStats.numFlips) { if (framebuffer->last_frame_attached + 1 < gpuStats.numFlips) {
DetachFramebuffer(entry, address, framebuffer); DetachFramebuffer(entry, address, framebuffer, channel);
} }
} else { } else {
AttachFramebufferValid(entry, framebuffer, fbInfo); AttachFramebufferValid(entry, framebuffer, fbInfo, channel);
return true; return true;
} }
} else { } else {
@ -792,9 +798,11 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi
if (!framebufferManager_->UseBufferedRendering()) if (!framebufferManager_->UseBufferedRendering())
return false; return false;
// Check works for D16 too (???)
const bool matchingClutFormat = const bool matchingClutFormat =
(framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32) || (channel != NOTIFY_FB_COLOR && entry->format == GE_TFMT_CLUT16) ||
(framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16); (channel == NOTIFY_FB_COLOR && framebuffer->format == GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT32) ||
(channel == NOTIFY_FB_COLOR && framebuffer->format != GE_FORMAT_8888 && entry->format == GE_TFMT_CLUT16);
const bool clutFormat = IsClutFormat((GETextureFormat)(entry->format)); const bool clutFormat = IsClutFormat((GETextureFormat)(entry->format));
@ -809,7 +817,7 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi
WARN_LOG_ONCE(diffStrides2, G3D, "Texturing from framebuffer (matching_clut=%s) different strides %d != %d", matchingClutFormat ? "yes" : "no", entry->bufw, framebuffer->fb_stride); WARN_LOG_ONCE(diffStrides2, G3D, "Texturing from framebuffer (matching_clut=%s) different strides %d != %d", matchingClutFormat ? "yes" : "no", entry->bufw, framebuffer->fb_stride);
} else { } else {
// Assume any render-to-tex with different bufw + offset is a render from ram. // Assume any render-to-tex with different bufw + offset is a render from ram.
DetachFramebuffer(entry, address, framebuffer); DetachFramebuffer(entry, address, framebuffer, channel);
return false; return false;
} }
} }
@ -817,13 +825,13 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi
// Check if it's in bufferWidth (which might be higher than width and may indicate the framebuffer includes the data.) // Check if it's in bufferWidth (which might be higher than width and may indicate the framebuffer includes the data.)
if (fbInfo.xOffset >= framebuffer->bufferWidth && fbInfo.xOffset + w <= (u32)framebuffer->fb_stride) { if (fbInfo.xOffset >= framebuffer->bufferWidth && fbInfo.xOffset + w <= (u32)framebuffer->fb_stride) {
// This happens in Brave Story, see #10045 - the texture is in the space between strides, with matching stride. // This happens in Brave Story, see #10045 - the texture is in the space between strides, with matching stride.
DetachFramebuffer(entry, address, framebuffer); DetachFramebuffer(entry, address, framebuffer, channel);
return false; return false;
} }
if (fbInfo.yOffset + minSubareaHeight >= framebuffer->height) { if (fbInfo.yOffset + minSubareaHeight >= framebuffer->height) {
// Can't be inside the framebuffer then, ram. Detach to be safe. // Can't be inside the framebuffer then, ram. Detach to be safe.
DetachFramebuffer(entry, address, framebuffer); DetachFramebuffer(entry, address, framebuffer, channel);
return false; return false;
} }
@ -831,7 +839,7 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi
// TODO: Maybe we can reduce this check and find a better way above 0x04110000? // TODO: Maybe we can reduce this check and find a better way above 0x04110000?
if (fbInfo.yOffset > MAX_SUBAREA_Y_OFFSET_SAFE && addr > 0x04110000) { if (fbInfo.yOffset > MAX_SUBAREA_Y_OFFSET_SAFE && addr > 0x04110000) {
WARN_LOG_REPORT_ONCE(subareaIgnored, G3D, "Ignoring possible texturing from framebuffer at %08x +%dx%d / %dx%d", address, fbInfo.xOffset, fbInfo.yOffset, framebuffer->width, framebuffer->height); WARN_LOG_REPORT_ONCE(subareaIgnored, G3D, "Ignoring possible texturing from framebuffer at %08x +%dx%d / %dx%d", address, fbInfo.xOffset, fbInfo.yOffset, framebuffer->width, framebuffer->height);
DetachFramebuffer(entry, address, framebuffer); DetachFramebuffer(entry, address, framebuffer, channel);
return false; return false;
} }
@ -841,13 +849,13 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi
if (!noOffset) { if (!noOffset) {
WARN_LOG_REPORT_ONCE(subareaClut, G3D, "Texturing from framebuffer using CLUT with offset at %08x +%dx%d", address, fbInfo.xOffset, fbInfo.yOffset); WARN_LOG_REPORT_ONCE(subareaClut, G3D, "Texturing from framebuffer using CLUT with offset at %08x +%dx%d", address, fbInfo.xOffset, fbInfo.yOffset);
} }
AttachFramebufferValid(entry, framebuffer, fbInfo); AttachFramebufferValid(entry, framebuffer, fbInfo, channel);
entry->status |= TexCacheEntry::STATUS_DEPALETTIZE; entry->status |= TexCacheEntry::STATUS_DEPALETTIZE;
// We'll validate it compiles later. // We'll validate it compiles later.
return true; return true;
} else if (IsClutFormat((GETextureFormat)(entry->format)) || IsDXTFormat((GETextureFormat)(entry->format))) { } else if (IsClutFormat((GETextureFormat)(entry->format)) || IsDXTFormat((GETextureFormat)(entry->format))) {
WARN_LOG_ONCE(fourEightBit, G3D, "%s format not supported when texturing from framebuffer of format %s", GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format)); WARN_LOG_ONCE(fourEightBit, G3D, "%s format not supported when texturing from framebuffer of format %s", GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format));
DetachFramebuffer(entry, address, framebuffer); DetachFramebuffer(entry, address, framebuffer, channel);
return false; return false;
} }
@ -856,18 +864,18 @@ bool TextureCacheCommon::AttachFramebuffer(TexCacheEntry *entry, u32 address, Vi
if (framebuffer->format != entry->format) { if (framebuffer->format != entry->format) {
WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Texturing from framebuffer with different formats %s != %s at %08x", WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Texturing from framebuffer with different formats %s != %s at %08x",
GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format), address); GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format), address);
AttachFramebufferValid(entry, framebuffer, fbInfo); AttachFramebufferValid(entry, framebuffer, fbInfo, channel);
return true; return true;
} else { } else {
WARN_LOG_ONCE(subarea, G3D, "Render to area containing texture at %08x +%dx%d", address, fbInfo.xOffset, fbInfo.yOffset); WARN_LOG_ONCE(subarea, G3D, "Render to area containing texture at %08x +%dx%d", address, fbInfo.xOffset, fbInfo.yOffset);
// If "AttachFramebufferValid" , God of War Ghost of Sparta/Chains of Olympus will be missing special effect. // If "AttachFramebufferValid" , God of War Ghost of Sparta/Chains of Olympus will be missing special effect.
AttachFramebufferInvalid(entry, framebuffer, fbInfo); AttachFramebufferInvalid(entry, framebuffer, fbInfo, channel);
return true; return true;
} }
} else { } else {
WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Texturing from framebuffer with incompatible format %s != %s at %08x", WARN_LOG_REPORT_ONCE(diffFormat2, G3D, "Texturing from framebuffer with incompatible format %s != %s at %08x",
GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format), address); GeTextureFormatToString((GETextureFormat)entry->format), GeBufferFormatToString(framebuffer->format), address);
DetachFramebuffer(entry, address, framebuffer); DetachFramebuffer(entry, address, framebuffer, channel);
return false; return false;
} }
} }
@ -944,7 +952,7 @@ bool TextureCacheCommon::SetOffsetTexture(u32 yOffset) {
bool success = false; bool success = false;
for (size_t i = 0, n = fbCache_.size(); i < n; ++i) { for (size_t i = 0, n = fbCache_.size(); i < n; ++i) {
auto framebuffer = fbCache_[i]; auto framebuffer = fbCache_[i];
if (AttachFramebuffer(entry, framebuffer->fb_address, framebuffer, texaddrOffset)) { if (AttachFramebuffer(entry, framebuffer->fb_address, framebuffer, texaddrOffset, (entry->status & TexCacheEntry::STATUS_DEPTH) ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR)) {
success = true; success = true;
} }
} }

View file

@ -41,6 +41,11 @@ enum FramebufferNotification {
NOTIFY_FB_DESTROYED, NOTIFY_FB_DESTROYED,
}; };
enum FramebufferNotificationChannel {
NOTIFY_FB_COLOR = 0,
NOTIFY_FB_DEPTH = 1,
};
// Changes more frequent than this will be considered "frequent" and prevent texture scaling. // Changes more frequent than this will be considered "frequent" and prevent texture scaling.
#define TEXCACHE_FRAME_CHANGE_FREQUENT 6 #define TEXCACHE_FRAME_CHANGE_FREQUENT 6
// Note: only used when hash backoff is disabled. // Note: only used when hash backoff is disabled.
@ -200,7 +205,7 @@ public:
virtual void Clear(bool delete_them); virtual void Clear(bool delete_them);
// FramebufferManager keeps TextureCache updated about what regions of memory are being rendered to. // FramebufferManager keeps TextureCache updated about what regions of memory are being rendered to.
void NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg); void NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer, FramebufferNotification msg, FramebufferNotificationChannel channel);
virtual void NotifyConfigChanged(); virtual void NotifyConfigChanged();
void NotifyVideoUpload(u32 addr, int size, int width, GEBufferFormat fmt); void NotifyVideoUpload(u32 addr, int size, int width, GEBufferFormat fmt);
@ -251,10 +256,10 @@ protected:
void UpdateSamplingParams(TexCacheEntry &entry, SamplerCacheKey &key); // Used by D3D11 and Vulkan. void UpdateSamplingParams(TexCacheEntry &entry, SamplerCacheKey &key); // Used by D3D11 and Vulkan.
void UpdateMaxSeenV(TexCacheEntry *entry, bool throughMode); void UpdateMaxSeenV(TexCacheEntry *entry, bool throughMode);
bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset = 0); bool AttachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, u32 texaddrOffset, FramebufferNotificationChannel channel);
void AttachFramebufferValid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo); void AttachFramebufferValid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo, FramebufferNotificationChannel channel);
void AttachFramebufferInvalid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo); void AttachFramebufferInvalid(TexCacheEntry *entry, VirtualFramebuffer *framebuffer, const AttachedFramebufferInfo &fbInfo, FramebufferNotificationChannel channel);
void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer); void DetachFramebuffer(TexCacheEntry *entry, u32 address, VirtualFramebuffer *framebuffer, FramebufferNotificationChannel channel);
void SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer); void SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffer *framebuffer);

View file

@ -429,7 +429,7 @@ void GeDisassembleOp(u32 pc, u32 op, u32 prev, char *buffer, int bufsize) {
case GE_CMD_LOADCLUT: case GE_CMD_LOADCLUT:
// This could be used to "dirty" textures with clut. // This could be used to "dirty" textures with clut.
if (data) if (data)
snprintf(buffer, bufsize, "Clut load: %06x", data); snprintf(buffer, bufsize, "Clut load: %08x, %d bytes, %06x", gstate.getClutAddress(), (data & 0x3F) << 5, data & 0xFFFFC0);
else else
snprintf(buffer, bufsize, "Clut load"); snprintf(buffer, bufsize, "Clut load");
break; break;

View file

@ -103,6 +103,7 @@ DepalShaderVulkan *DepalShaderCacheVulkan::GetDepalettizeShader(uint32_t clutMod
std::string error; std::string error;
VkShaderModule fshader = CompileShaderModule(vulkan_, VK_SHADER_STAGE_FRAGMENT_BIT, buffer, &error); VkShaderModule fshader = CompileShaderModule(vulkan_, VK_SHADER_STAGE_FRAGMENT_BIT, buffer, &error);
if (fshader == VK_NULL_HANDLE) { if (fshader == VK_NULL_HANDLE) {
INFO_LOG(G3D, "Source:\n%s\n\n", buffer);
Crash(); Crash();
delete[] buffer; delete[] buffer;
return nullptr; return nullptr;
@ -111,6 +112,7 @@ DepalShaderVulkan *DepalShaderCacheVulkan::GetDepalettizeShader(uint32_t clutMod
VkPipeline pipeline = vulkan2D_->GetPipeline(rp, vshader_, fshader); VkPipeline pipeline = vulkan2D_->GetPipeline(rp, vshader_, fshader);
// Can delete the shader module now that the pipeline has been created. // Can delete the shader module now that the pipeline has been created.
// Maybe don't even need to queue it.. // Maybe don't even need to queue it..
// "true" keeps the pipeline itself alive, forgetting the fshader.
vulkan2D_->PurgeFragmentShader(fshader, true); vulkan2D_->PurgeFragmentShader(fshader, true);
vulkan_->Delete().QueueDeleteShaderModule(fshader); vulkan_->Delete().QueueDeleteShaderModule(fshader);

View file

@ -361,7 +361,7 @@ VulkanFragmentShader *ShaderManagerVulkan::GetFragmentShaderFromModule(VkShaderM
// instantaneous. // instantaneous.
#define CACHE_HEADER_MAGIC 0xff51f420 #define CACHE_HEADER_MAGIC 0xff51f420
#define CACHE_VERSION 17 #define CACHE_VERSION 18
struct VulkanCacheHeader { struct VulkanCacheHeader {
uint32_t magic; uint32_t magic;
uint32_t version; uint32_t version;

View file

@ -558,8 +558,9 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFr
DepalShaderVulkan *depalShader = nullptr; DepalShaderVulkan *depalShader = nullptr;
uint32_t clutMode = gstate.clutformat & 0xFFFFFF; uint32_t clutMode = gstate.clutformat & 0xFFFFFF;
bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer;
bool expand32 = !gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS); bool expand32 = !gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS);
bool depth = (entry->status & TexCacheEntry::STATUS_DEPTH) != 0;
bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && !depth;
if ((entry->status & TexCacheEntry::STATUS_DEPALETTIZE) && !g_Config.bDisableSlowFramebufEffects) { if ((entry->status & TexCacheEntry::STATUS_DEPALETTIZE) && !g_Config.bDisableSlowFramebufEffects) {
if (useShaderDepal) { if (useShaderDepal) {
@ -584,7 +585,7 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFr
imageView_ = framebufferManagerVulkan_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET); imageView_ = framebufferManagerVulkan_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
return; return;
} else { } else {
depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, framebuffer->drawnFormat); depalShader = depalShaderCache_->GetDepalettizeShader(clutMode, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat);
drawEngine_->SetDepalTexture(VK_NULL_HANDLE); drawEngine_->SetDepalTexture(VK_NULL_HANDLE);
gstate_c.SetUseShaderDepal(false); gstate_c.SetUseShaderDepal(false);
} }
@ -652,12 +653,24 @@ void TextureCacheVulkan::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFr
VkBuffer pushed; VkBuffer pushed;
uint32_t offset = push_->PushAligned(verts, sizeof(verts), 4, &pushed); uint32_t offset = push_->PushAligned(verts, sizeof(verts), 4, &pushed);
draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, Draw::FB_COLOR_BIT, 0); draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0);
VkImageView fbo = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE0_IMAGEVIEW); VkImageView fbo = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE0_IMAGEVIEW);
VkDescriptorSet descSet = vulkan2D_->GetDescriptorSet(fbo, samplerNearest_, clutTexture->GetImageView(), samplerNearest_); VkDescriptorSet descSet = vulkan2D_->GetDescriptorSet(fbo, samplerNearest_, clutTexture->GetImageView(), samplerNearest_);
VulkanRenderManager *renderManager = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER); VulkanRenderManager *renderManager = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
renderManager->BindPipeline(depalShader->pipeline); renderManager->BindPipeline(depalShader->pipeline);
if (depth) {
DepthScaleFactors scaleFactors = GetDepthScaleFactors();
struct DepthPushConstants {
float z_scale;
float z_offset;
};
DepthPushConstants push;
push.z_scale = scaleFactors.scale;
push.z_offset = scaleFactors.offset;
renderManager->PushConstants(vulkan2D_->GetPipelineLayout(), VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(DepthPushConstants), &push);
}
renderManager->SetScissor(VkRect2D{ {0, 0}, { framebuffer->renderWidth, framebuffer->renderHeight} }); renderManager->SetScissor(VkRect2D{ {0, 0}, { framebuffer->renderWidth, framebuffer->renderHeight} });
renderManager->SetViewport(VkViewport{ 0.f, 0.f, (float)framebuffer->renderWidth, (float)framebuffer->renderHeight, 0.f, 1.f }); renderManager->SetViewport(VkViewport{ 0.f, 0.f, (float)framebuffer->renderWidth, (float)framebuffer->renderHeight, 0.f, 1.f });
renderManager->Draw(vulkan2D_->GetPipelineLayout(), descSet, 0, nullptr, pushed, offset, 4); renderManager->Draw(vulkan2D_->GetPipelineLayout(), descSet, 0, nullptr, pushed, offset, 4);

View file

@ -282,6 +282,7 @@ enum GEBufferFormat
GE_FORMAT_5551 = 1, GE_FORMAT_5551 = 1,
GE_FORMAT_4444 = 2, GE_FORMAT_4444 = 2,
GE_FORMAT_8888 = 3, GE_FORMAT_8888 = 3,
GE_FORMAT_DEPTH16 = 4, // Virtual format, just used to pass into Depal
GE_FORMAT_INVALID = 0xFF, GE_FORMAT_INVALID = 0xFF,
}; };

View file

@ -2,6 +2,7 @@
#include "base/timeutil.h" #include "base/timeutil.h"
#include "DataFormat.h" #include "DataFormat.h"
#include "Common/Log.h"
#include "VulkanQueueRunner.h" #include "VulkanQueueRunner.h"
#include "VulkanRenderManager.h" #include "VulkanRenderManager.h"
@ -12,7 +13,7 @@ void VulkanQueueRunner::CreateDeviceObjects() {
InitBackbufferRenderPass(); InitBackbufferRenderPass();
framebufferRenderPass_ = GetRenderPass(VKRRenderPassAction::CLEAR, VKRRenderPassAction::CLEAR, VKRRenderPassAction::CLEAR, framebufferRenderPass_ = GetRenderPass(VKRRenderPassAction::CLEAR, VKRRenderPassAction::CLEAR, VKRRenderPassAction::CLEAR,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
#if 0 #if 0
// Just to check whether it makes sense to split some of these. drawidx is way bigger than the others... // Just to check whether it makes sense to split some of these. drawidx is way bigger than the others...
@ -238,7 +239,7 @@ VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) {
attachments[1].finalLayout = VK_IMAGE_LAYOUT_GENERAL; attachments[1].finalLayout = VK_IMAGE_LAYOUT_GENERAL;
#else #else
attachments[1].initialLayout = key.prevDepthLayout; attachments[1].initialLayout = key.prevDepthLayout;
attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; attachments[1].finalLayout = key.finalDepthStencilLayout;
#endif #endif
attachments[1].flags = 0; attachments[1].flags = 0;
@ -387,11 +388,14 @@ void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector<VKRStep *> &st
for (int j = 0; j < (int)steps.size(); j++) { for (int j = 0; j < (int)steps.size(); j++) {
if (steps[j]->stepType == VKRStepType::RENDER && if (steps[j]->stepType == VKRStepType::RENDER &&
steps[j]->render.framebuffer && steps[j]->render.framebuffer) {
steps[j]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) { if (steps[j]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
// Just leave it at color_optimal.
steps[j]->render.finalColorLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; steps[j]->render.finalColorLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
} }
if (steps[j]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
steps[j]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
}
}
} }
for (int j = 0; j < (int)steps.size() - 1; j++) { for (int j = 0; j < (int)steps.size() - 1; j++) {
@ -1043,20 +1047,44 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
return; return;
} }
if (step.render.framebuffer && step.render.framebuffer->color.layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) { // Write-after-write hazards. Fixed flicker in God of War on ARM (before we added another fix that removed these).
VkImageMemoryBarrier barrier{}; if (step.render.framebuffer) {
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; int n = 0;
barrier.oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; int stage = 0;
barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; VkImageMemoryBarrier barriers[2]{};
barrier.subresourceRange.layerCount = 1; if (step.render.framebuffer->color.layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
barrier.subresourceRange.levelCount = 1; barriers[n].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.image = step.render.framebuffer->color.image; barriers[n].oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; barriers[n].newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; barriers[n].subresourceRange.layerCount = 1;
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; barriers[n].subresourceRange.levelCount = 1;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barriers[n].image = step.render.framebuffer->color.image;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barriers[n].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); barriers[n].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
barriers[n].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
barriers[n].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barriers[n].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
n++;
}
if (step.render.framebuffer->depth.layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL) {
barriers[n].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barriers[n].oldLayout = VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL;
barriers[n].newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
barriers[n].subresourceRange.layerCount = 1;
barriers[n].subresourceRange.levelCount = 1;
barriers[n].image = step.render.framebuffer->depth.image;
barriers[n].srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
barriers[n].dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
barriers[n].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
barriers[n].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barriers[n].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
stage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
n++;
}
if (stage) {
vkCmdPipelineBarrier(cmd, stage, stage, 0, 0, nullptr, 0, nullptr, n, barriers);
}
} }
// This is supposed to bind a vulkan render pass to the command buffer. // This is supposed to bind a vulkan render pass to the command buffer.
@ -1213,6 +1241,7 @@ void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer c
// The renderpass handles the layout transition. // The renderpass handles the layout transition.
if (fb) { if (fb) {
fb->color.layout = step.render.finalColorLayout; fb->color.layout = step.render.finalColorLayout;
fb->depth.layout = step.render.finalDepthStencilLayout;
} }
} }
@ -1225,6 +1254,7 @@ void VulkanQueueRunner::PerformBindFramebufferAsRenderTarget(const VKRStep &step
int h; int h;
if (step.render.framebuffer) { if (step.render.framebuffer) {
_dbg_assert_(step.render.finalColorLayout != VK_IMAGE_LAYOUT_UNDEFINED); _dbg_assert_(step.render.finalColorLayout != VK_IMAGE_LAYOUT_UNDEFINED);
_dbg_assert_(step.render.finalDepthStencilLayout != VK_IMAGE_LAYOUT_UNDEFINED);
VKRFramebuffer *fb = step.render.framebuffer; VKRFramebuffer *fb = step.render.framebuffer;
framebuf = fb->framebuf; framebuf = fb->framebuf;
@ -1249,7 +1279,9 @@ void VulkanQueueRunner::PerformBindFramebufferAsRenderTarget(const VKRStep &step
renderPass = GetRenderPass( renderPass = GetRenderPass(
step.render.color, step.render.depth, step.render.stencil, step.render.color, step.render.depth, step.render.stencil,
fb->color.layout, fb->depth.layout, step.render.finalColorLayout); fb->color.layout, fb->depth.layout,
step.render.finalColorLayout,
step.render.finalDepthStencilLayout);
// We now do any layout pretransitions as part of the render pass. // We now do any layout pretransitions as part of the render pass.
fb->color.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; fb->color.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
@ -1685,6 +1717,7 @@ void VulkanQueueRunner::CopyReadbackBuffer(int width, int height, Draw::DataForm
} else if (srcFormat == Draw::DataFormat::B8G8R8A8_UNORM) { } else if (srcFormat == Draw::DataFormat::B8G8R8A8_UNORM) {
ConvertFromBGRA8888(pixels, (const uint8_t *)mappedData, pixelStride, width, width, height, destFormat); ConvertFromBGRA8888(pixels, (const uint8_t *)mappedData, pixelStride, width, width, height, destFormat);
} else if (srcFormat == destFormat) { } else if (srcFormat == destFormat) {
// Can just memcpy when it matches no matter the format!
uint8_t *dst = pixels; uint8_t *dst = pixels;
const uint8_t *src = (const uint8_t *)mappedData; const uint8_t *src = (const uint8_t *)mappedData;
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
@ -1697,7 +1730,7 @@ void VulkanQueueRunner::CopyReadbackBuffer(int width, int height, Draw::DataForm
} else { } else {
// TODO: Maybe a depth conversion or something? // TODO: Maybe a depth conversion or something?
ELOG("CopyReadbackBuffer: Unknown format"); ELOG("CopyReadbackBuffer: Unknown format");
assert(false); _assert_msg_(false, "CopyReadbackBuffer: Unknown src format %d", (int)srcFormat);
} }
vkUnmapMemory(vulkan_->GetDevice(), readbackMemory_); vkUnmapMemory(vulkan_->GetDevice(), readbackMemory_);
} }

View file

@ -212,14 +212,14 @@ public:
VkImageLayout prevColorLayout; VkImageLayout prevColorLayout;
VkImageLayout prevDepthLayout; VkImageLayout prevDepthLayout;
VkImageLayout finalColorLayout; VkImageLayout finalColorLayout;
// TODO: Also pre-transition depth, for copies etc. VkImageLayout finalDepthStencilLayout;
}; };
// Only call this from the render thread! Also ok during initialization (LoadCache). // Only call this from the render thread! Also ok during initialization (LoadCache).
VkRenderPass GetRenderPass( VkRenderPass GetRenderPass(
VKRRenderPassAction colorLoadAction, VKRRenderPassAction depthLoadAction, VKRRenderPassAction stencilLoadAction, VKRRenderPassAction colorLoadAction, VKRRenderPassAction depthLoadAction, VKRRenderPassAction stencilLoadAction,
VkImageLayout prevColorLayout, VkImageLayout prevDepthLayout, VkImageLayout finalColorLayout) { VkImageLayout prevColorLayout, VkImageLayout prevDepthLayout, VkImageLayout finalColorLayout, VkImageLayout finalDepthStencilLayout) {
RPKey key{ colorLoadAction, depthLoadAction, stencilLoadAction, prevColorLayout, prevDepthLayout, finalColorLayout }; RPKey key{ colorLoadAction, depthLoadAction, stencilLoadAction, prevColorLayout, prevDepthLayout, finalColorLayout, finalDepthStencilLayout };
return GetRenderPass(key); return GetRenderPass(key);
} }

View file

@ -63,6 +63,7 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int
res = vkBindImageMemory(vulkan->GetDevice(), img.image, img.memory, 0); res = vkBindImageMemory(vulkan->GetDevice(), img.image, img.memory, 0);
_dbg_assert_(res == VK_SUCCESS); _dbg_assert_(res == VK_SUCCESS);
VkImageAspectFlags viewAspects = color ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
VkImageAspectFlags aspects = color ? VK_IMAGE_ASPECT_COLOR_BIT : (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); VkImageAspectFlags aspects = color ? VK_IMAGE_ASPECT_COLOR_BIT : (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
VkImageViewCreateInfo ivci{ VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO }; VkImageViewCreateInfo ivci{ VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
@ -70,7 +71,7 @@ void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int
ivci.format = ici.format; ivci.format = ici.format;
ivci.image = img.image; ivci.image = img.image;
ivci.viewType = VK_IMAGE_VIEW_TYPE_2D; ivci.viewType = VK_IMAGE_VIEW_TYPE_2D;
ivci.subresourceRange.aspectMask = aspects; ivci.subresourceRange.aspectMask = viewAspects;
ivci.subresourceRange.layerCount = 1; ivci.subresourceRange.layerCount = 1;
ivci.subresourceRange.levelCount = 1; ivci.subresourceRange.levelCount = 1;
res = vkCreateImageView(vulkan->GetDevice(), &ivci, nullptr, &img.imageView); res = vkCreateImageView(vulkan->GetDevice(), &ivci, nullptr, &img.imageView);
@ -548,6 +549,7 @@ void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRR
step->render.numDraws = 0; step->render.numDraws = 0;
step->render.numReads = 0; step->render.numReads = 0;
step->render.finalColorLayout = !fb ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_UNDEFINED; step->render.finalColorLayout = !fb ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_UNDEFINED;
step->render.finalDepthStencilLayout = !fb ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_UNDEFINED;
step->tag = tag; step->tag = tag;
steps_.push_back(step); steps_.push_back(step);
@ -1016,10 +1018,10 @@ VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, in
curRenderStep_->preTransitions.back().fb == fb && curRenderStep_->preTransitions.back().fb == fb &&
curRenderStep_->preTransitions.back().targetLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { curRenderStep_->preTransitions.back().targetLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
// We're done. // We're done.
return fb->color.imageView; return aspectBit == VK_IMAGE_ASPECT_COLOR_BIT ? fb->color.imageView : fb->depth.imageView;
} else { } else {
curRenderStep_->preTransitions.push_back({ aspectBit, fb, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }); curRenderStep_->preTransitions.push_back({ aspectBit, fb, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL });
return fb->color.imageView; return aspectBit == VK_IMAGE_ASPECT_COLOR_BIT ? fb->color.imageView : fb->depth.imageView;
} }
} }