From a1efed31b9dee4a477506b45cf5ccced06a0b12d Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 3 Oct 2022 20:17:25 -0700 Subject: [PATCH 1/2] GPU: Use flags to fix triggered upload/download. No longer using mirror hacks. --- GPU/Common/FramebufferManagerCommon.cpp | 8 ++++---- GPU/Common/FramebufferManagerCommon.h | 2 +- GPU/Debugger/Record.cpp | 7 ++++--- GPU/GPUCommon.cpp | 17 +++++++---------- GPU/GPUCommon.h | 2 +- GPU/GPUInterface.h | 12 +++++++++++- GPU/Software/SoftGpu.cpp | 6 +++--- GPU/Software/SoftGpu.h | 2 +- 8 files changed, 32 insertions(+), 24 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index f31d8c79d1..878b677466 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1634,7 +1634,7 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, // This is called from detected memcopies and framebuffer initialization from VRAM. Not block transfers. // MotoGP goes this path so we need to catch those copies here. -bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool isMemset, u32 skipDrawReason) { +bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, GPUCopyFlag flags, u32 skipDrawReason) { if (size == 0) { return false; } @@ -1728,7 +1728,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, dstBuffer->last_frame_used = gpuStats.numFlips; } - if (dstBuffer && srcBuffer && !isMemset) { + if (dstBuffer && srcBuffer && !(flags & GPUCopyFlag::MEMSET) && !(flags & GPUCopyFlag::FORCE_SRC_MEM) && !(flags & GPUCopyFlag::FORCE_DST_MEM)) { if (srcBuffer == dstBuffer) { WARN_LOG_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x (size: %x)", src, dst, size); } else { @@ -1739,8 +1739,8 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, RebindFramebuffer("RebindFramebuffer - Inter-buffer memcpy"); } return false; - } else if (dstBuffer) { - if (isMemset) { + } else if (dstBuffer && !(flags & GPUCopyFlag::FORCE_DST_MEM)) { + if (flags & GPUCopyFlag::MEMSET) { gpuStats.numClears++; } WARN_LOG_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x (size: %x)", src, dst, size); diff --git a/GPU/Common/FramebufferManagerCommon.h b/GPU/Common/FramebufferManagerCommon.h index d4bd905f0d..b3bd21d7c9 100644 --- a/GPU/Common/FramebufferManagerCommon.h +++ b/GPU/Common/FramebufferManagerCommon.h @@ -306,7 +306,7 @@ public: void CopyDisplayToOutput(bool reallyDirty); - bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset, u32 skipDrawReason); + bool NotifyFramebufferCopy(u32 src, u32 dest, int size, GPUCopyFlag flags, u32 skipDrawReason); void NotifyVideoUpload(u32 addr, int size, int width, GEBufferFormat fmt); void UpdateFromMemory(u32 addr, int size); void ApplyClearToMemory(int x1, int y1, int x2, int y2, u32 clearColor); diff --git a/GPU/Debugger/Record.cpp b/GPU/Debugger/Record.cpp index 9947519b5e..6058546b0f 100644 --- a/GPU/Debugger/Record.cpp +++ b/GPU/Debugger/Record.cpp @@ -691,10 +691,11 @@ void NotifyUpload(u32 dest, u32 sz) { return; } - CheckEdramTrans(); - NotifyMemcpy(dest, dest, sz); - if (Memory::IsVRAMAddress(dest)) + if (Memory::IsVRAMAddress(dest)) { + // This also checks the edram translation value. + NotifyMemcpy(dest, dest, sz); DirtyVRAM(dest, sz, DirtyVRAMFlag::DIRTY); + } } static bool HasDrawCommands() { diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index a44fb47572..4c43b0a992 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -3046,10 +3046,10 @@ void GPUCommon::DoBlockTransfer(u32 skipDrawReason) { cyclesExecuted += ((height * width * bpp) * 16) / 10; } -bool GPUCommon::PerformMemoryCopy(u32 dest, u32 src, int size) { +bool GPUCommon::PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags) { // Track stray copies of a framebuffer in RAM. MotoGP does this. if (framebufferManager_->MayIntersectFramebuffer(src) || framebufferManager_->MayIntersectFramebuffer(dest)) { - if (!framebufferManager_->NotifyFramebufferCopy(src, dest, size, false, gstate_c.skipDrawReason)) { + if (!framebufferManager_->NotifyFramebufferCopy(src, dest, size, flags, gstate_c.skipDrawReason)) { // We use a little hack for PerformMemoryDownload/PerformMemoryUpload using a VRAM mirror. // Since they're identical we don't need to copy. if (!Memory::IsVRAMAddress(dest) || (dest ^ 0x00400000) != src) { @@ -3071,7 +3071,8 @@ bool GPUCommon::PerformMemoryCopy(u32 dest, u32 src, int size) { NotifyMemInfo(MemBlockFlags::WRITE, dest, size, tag.c_str(), tag.size()); } InvalidateCache(dest, size, GPU_INVALIDATE_HINT); - GPURecord::NotifyMemcpy(dest, src, size); + if (!(flags & GPUCopyFlag::DEBUG_NOTIFIED)) + GPURecord::NotifyMemcpy(dest, src, size); return false; } @@ -3079,7 +3080,7 @@ bool GPUCommon::PerformMemorySet(u32 dest, u8 v, int size) { // This may indicate a memset, usually to 0, of a framebuffer. if (framebufferManager_->MayIntersectFramebuffer(dest)) { Memory::Memset(dest, v, size, "GPUMemset"); - if (!framebufferManager_->NotifyFramebufferCopy(dest, dest, size, true, gstate_c.skipDrawReason)) { + if (!framebufferManager_->NotifyFramebufferCopy(dest, dest, size, GPUCopyFlag::MEMSET, gstate_c.skipDrawReason)) { InvalidateCache(dest, size, GPU_INVALIDATE_HINT); } return true; @@ -3093,20 +3094,16 @@ bool GPUCommon::PerformMemorySet(u32 dest, u8 v, int size) { } bool GPUCommon::PerformMemoryDownload(u32 dest, int size) { - // Cheat a bit to force a download of the framebuffer. - // VRAM + 0x00400000 is simply a VRAM mirror. if (Memory::IsVRAMAddress(dest)) { - return PerformMemoryCopy(dest ^ 0x00400000, dest, size); + return PerformMemoryCopy(dest, dest, size, GPUCopyFlag::FORCE_DST_MEM); } return false; } bool GPUCommon::PerformMemoryUpload(u32 dest, int size) { - // Cheat a bit to force an upload of the framebuffer. - // VRAM + 0x00400000 is simply a VRAM mirror. if (Memory::IsVRAMAddress(dest)) { GPURecord::NotifyUpload(dest, size); - return PerformMemoryCopy(dest, dest ^ 0x00400000, size); + return PerformMemoryCopy(dest, dest, size, GPUCopyFlag::FORCE_SRC_MEM | GPUCopyFlag::DEBUG_NOTIFIED); } return false; } diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index 78f7b15cef..3c478e2cec 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -124,7 +124,7 @@ public: void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) override; void CopyDisplayToOutput(bool reallyDirty) override = 0; void InitClear() override = 0; - bool PerformMemoryCopy(u32 dest, u32 src, int size) override; + bool PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags = GPUCopyFlag::NONE) override; bool PerformMemorySet(u32 dest, u8 v, int size) override; bool PerformMemoryDownload(u32 dest, int size) override; bool PerformMemoryUpload(u32 dest, int size) override; diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index e0c5931326..e7cf2f9af6 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -114,6 +114,16 @@ enum class StencilUpload { }; ENUM_CLASS_BITOPS(StencilUpload); +enum class GPUCopyFlag { + NONE = 0, + FORCE_SRC_MEM = 1, + FORCE_DST_MEM = 2, + // Note: implies src == dst and FORCE_SRC_MEM. + MEMSET = 4, + DEBUG_NOTIFIED = 8, +}; +ENUM_CLASS_BITOPS(GPUCopyFlag); + // Used for debug struct FramebufferInfo { u32 fb_address; @@ -222,7 +232,7 @@ public: virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type) = 0; virtual void NotifyVideoUpload(u32 addr, int size, int width, int format) = 0; // Update either RAM from VRAM, or VRAM from RAM... or even VRAM from VRAM. - virtual bool PerformMemoryCopy(u32 dest, u32 src, int size) = 0; + virtual bool PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags = GPUCopyFlag::NONE) = 0; virtual bool PerformMemorySet(u32 dest, u8 v, int size) = 0; virtual bool PerformMemoryDownload(u32 dest, int size) = 0; virtual bool PerformMemoryUpload(u32 dest, int size) = 0; diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index cddfa557d9..28fe4041fb 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -1265,11 +1265,11 @@ void SoftGPU::NotifyVideoUpload(u32 addr, int size, int width, int format) // Ignore. } -bool SoftGPU::PerformMemoryCopy(u32 dest, u32 src, int size) -{ +bool SoftGPU::PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags) { // Nothing to update. InvalidateCache(dest, size, GPU_INVALIDATE_HINT); - GPURecord::NotifyMemcpy(dest, src, size); + if (!(flags & GPUCopyFlag::DEBUG_NOTIFIED)) + GPURecord::NotifyMemcpy(dest, src, size); // Let's just be safe. MarkDirty(dest, size, SoftGPUVRAMDirty::DIRTY | SoftGPUVRAMDirty::REALLY_DIRTY); return false; diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h index 29ce1e5934..84732eb43a 100644 --- a/GPU/Software/SoftGpu.h +++ b/GPU/Software/SoftGpu.h @@ -139,7 +139,7 @@ public: void GetStats(char *buffer, size_t bufsize) override; void InvalidateCache(u32 addr, int size, GPUInvalidationType type) override; void NotifyVideoUpload(u32 addr, int size, int width, int format) override; - bool PerformMemoryCopy(u32 dest, u32 src, int size) override; + bool PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags = GPUCopyFlag::NONE) override; bool PerformMemorySet(u32 dest, u8 v, int size) override; bool PerformMemoryDownload(u32 dest, int size) override; bool PerformMemoryUpload(u32 dest, int size) override; From 9ac4523fd2238d910d6756d80217e8088ad39d0b Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 3 Oct 2022 20:22:27 -0700 Subject: [PATCH 2/2] GPU: Skip matching a framebuf for RAM. --- GPU/Common/FramebufferManagerCommon.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 878b677466..b688355706 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1650,8 +1650,11 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, // Or at least this should be like the other ones, gathering possible candidates // with the ability to list them out for debugging. - VirtualFramebuffer *dstBuffer = 0; - VirtualFramebuffer *srcBuffer = 0; + VirtualFramebuffer *dstBuffer = nullptr; + VirtualFramebuffer *srcBuffer = nullptr; + bool ignoreDstBuffer = flags & GPUCopyFlag::FORCE_DST_MEM; + bool ignoreSrcBuffer = flags & (GPUCopyFlag::FORCE_SRC_MEM | GPUCopyFlag::MEMSET); + u32 dstY = (u32)-1; u32 dstH = 0; u32 srcY = (u32)-1; @@ -1669,14 +1672,14 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, const int vfb_byteWidth = vfb->width * vfb_bpp; // Heuristic to try to prevent potential glitches with video playback. - if (vfb_address == dst && (size == 0x44000 && vfb_size == 0x88000 || size == 0x88000 && vfb_size == 0x44000)) { + if (!ignoreDstBuffer && vfb_address == dst && (size == 0x44000 && vfb_size == 0x88000 || size == 0x88000 && vfb_size == 0x44000)) { // Not likely to be a correct color format copy for this buffer. Ignore it, there will either be RAM // that can be displayed from, or another matching buffer with the right format if rendering is going on. WARN_LOG_N_TIMES(notify_copy_2x, 5, G3D, "Framebuffer size %08x conspicuously not matching copy size %08x in NotifyFramebufferCopy. Ignoring.", size, vfb_size); continue; } - if (dst >= vfb_address && (dst + size <= vfb_address + vfb_size || dst == vfb_address)) { + if (!ignoreDstBuffer && dst >= vfb_address && (dst + size <= vfb_address + vfb_size || dst == vfb_address)) { const u32 offset = dst - vfb_address; const u32 yOffset = offset / vfb_byteStride; if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < dstY) { @@ -1686,7 +1689,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, } } - if (src >= vfb_address && (src + size <= vfb_address + vfb_size || src == vfb_address)) { + if (!ignoreSrcBuffer && src >= vfb_address && (src + size <= vfb_address + vfb_size || src == vfb_address)) { const u32 offset = src - vfb_address; const u32 yOffset = offset / vfb_byteStride; if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0) && yOffset < srcY) { @@ -1728,7 +1731,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, dstBuffer->last_frame_used = gpuStats.numFlips; } - if (dstBuffer && srcBuffer && !(flags & GPUCopyFlag::MEMSET) && !(flags & GPUCopyFlag::FORCE_SRC_MEM) && !(flags & GPUCopyFlag::FORCE_DST_MEM)) { + if (dstBuffer && srcBuffer) { if (srcBuffer == dstBuffer) { WARN_LOG_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x (size: %x)", src, dst, size); } else { @@ -1739,7 +1742,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, RebindFramebuffer("RebindFramebuffer - Inter-buffer memcpy"); } return false; - } else if (dstBuffer && !(flags & GPUCopyFlag::FORCE_DST_MEM)) { + } else if (dstBuffer) { if (flags & GPUCopyFlag::MEMSET) { gpuStats.numClears++; }