From 93009a31783e6fcb8ec88b5e5f08dd9ff607d095 Mon Sep 17 00:00:00 2001 From: Katharine Chui Date: Sun, 8 Dec 2024 22:02:09 +0100 Subject: [PATCH 1/3] Work around metal buffer bug on MacOS + AMD GPU With VMA_MEMORY_USAGE_CPU_TO_GPU buffers, metal buffer appears 0 filled in metal trace during vkCmdCopyBufferToImage triggered MTLBlitCommandEncoder instance method. Allocate VMA_MEMORY_USAGE_GPU_TO_CPU instead on MacOS + AMD GPU --- Common/GPU/Vulkan/VulkanLoader.cpp | 4 ---- Common/GPU/Vulkan/VulkanMemory.cpp | 15 +++++++++++++++ Common/GPU/Vulkan/VulkanMemory.h | 3 +++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/Common/GPU/Vulkan/VulkanLoader.cpp b/Common/GPU/Vulkan/VulkanLoader.cpp index 92e7e78659..ef911dd3bd 100644 --- a/Common/GPU/Vulkan/VulkanLoader.cpp +++ b/Common/GPU/Vulkan/VulkanLoader.cpp @@ -343,10 +343,6 @@ static VulkanLibraryHandle VulkanLoadLibrary(std::string *errorString) { return nullptr; #elif PPSSPP_PLATFORM(UWP) return nullptr; -#elif PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64) - // Disable Vulkan on Mac/x86. Too many configurations that don't work with MoltenVK - // for whatever reason. - return nullptr; #elif PPSSPP_PLATFORM(WINDOWS) return LoadLibrary(L"vulkan-1.dll"); #else diff --git a/Common/GPU/Vulkan/VulkanMemory.cpp b/Common/GPU/Vulkan/VulkanMemory.cpp index eba7476190..83c52e1db3 100644 --- a/Common/GPU/Vulkan/VulkanMemory.cpp +++ b/Common/GPU/Vulkan/VulkanMemory.cpp @@ -38,6 +38,16 @@ static const double PUSH_GARBAGE_COLLECTION_DELAY = 10.0; VulkanPushPool::VulkanPushPool(VulkanContext *vulkan, const char *name, size_t originalBlockSize, VkBufferUsageFlags usage) : vulkan_(vulkan), name_(name), originalBlockSize_(originalBlockSize), usage_(usage) { RegisterGPUMemoryManager(this); + + #if PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64) + if (vulkan_->GetPhysicalDeviceProperties().properties.vendorID == VULKAN_VENDOR_AMD) { + INFO_LOG(Log::G3D, "MoltenVK with AMD, allocating buffers with VMA_MEMORY_USAGE_GPU_TO_CPU"); + allocation_usage_ = VMA_MEMORY_USAGE_GPU_TO_CPU; + } else { + allocation_usage_ = VMA_MEMORY_USAGE_CPU_TO_GPU; + } + #endif + for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) { blocks_.push_back(CreateBlock(originalBlockSize)); blocks_.back().original = true; @@ -67,7 +77,12 @@ VulkanPushPool::Block VulkanPushPool::CreateBlock(size_t size) { b.usage = usage_; b.sharingMode = VK_SHARING_MODE_EXCLUSIVE; VmaAllocationCreateInfo allocCreateInfo{}; + + #if PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64) + allocCreateInfo.usage = allocation_usage_; + #else allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + #endif VmaAllocationInfo allocInfo{}; VkResult result = vmaCreateBuffer(vulkan_->Allocator(), &b, &allocCreateInfo, &block.buffer, &block.allocation, &allocInfo); diff --git a/Common/GPU/Vulkan/VulkanMemory.h b/Common/GPU/Vulkan/VulkanMemory.h index 14e3b78436..340a959abc 100644 --- a/Common/GPU/Vulkan/VulkanMemory.h +++ b/Common/GPU/Vulkan/VulkanMemory.h @@ -93,4 +93,7 @@ private: VkBufferUsageFlags usage_; int curBlockIndex_ = -1; const char *name_; + #if PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64) + VmaMemoryUsage allocation_usage_; + #endif }; From 286580a6a05419e48c6ae99d7e53f97cd9a95d89 Mon Sep 17 00:00:00 2001 From: Katharine Chui Date: Mon, 9 Dec 2024 12:23:05 +0100 Subject: [PATCH 2/3] Switch to VMA_MEMORY_USAGE_CPU_ONLY on MacOS AMD The ideal allocation would be VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT like in 1.12.3, but vma picked an index that's actually not mappable VMA_MEMORY_USAGE_GPU_TO_CPU selects VK_MEMORY_PROPERTY_HOST_CACHED_BIT, which seems to hurt performance VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT selection with VMA_MEMORY_USAGE_CPU_ONLY would suffice for the workaround, and get better performance --- Common/GPU/Vulkan/VulkanMemory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Common/GPU/Vulkan/VulkanMemory.cpp b/Common/GPU/Vulkan/VulkanMemory.cpp index 83c52e1db3..336bb7dff4 100644 --- a/Common/GPU/Vulkan/VulkanMemory.cpp +++ b/Common/GPU/Vulkan/VulkanMemory.cpp @@ -41,8 +41,8 @@ VulkanPushPool::VulkanPushPool(VulkanContext *vulkan, const char *name, size_t o #if PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64) if (vulkan_->GetPhysicalDeviceProperties().properties.vendorID == VULKAN_VENDOR_AMD) { - INFO_LOG(Log::G3D, "MoltenVK with AMD, allocating buffers with VMA_MEMORY_USAGE_GPU_TO_CPU"); - allocation_usage_ = VMA_MEMORY_USAGE_GPU_TO_CPU; + INFO_LOG(Log::G3D, "MoltenVK with AMD, allocating buffers with VMA_MEMORY_USAGE_CPU_ONLY"); + allocation_usage_ = VMA_MEMORY_USAGE_CPU_ONLY; // VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT in vma type index } else { allocation_usage_ = VMA_MEMORY_USAGE_CPU_TO_GPU; } From 49553bcf8dd1523bbc7b06c4cade4830d00adca1 Mon Sep 17 00:00:00 2001 From: Katharine Chui Date: Mon, 9 Dec 2024 13:41:17 +0100 Subject: [PATCH 3/3] Simplify MacOS AMD GPU workaround Based on https://github.com/KhronosGroup/MoltenVK/issues/960, expand MacOS AMD GPU workaround to all dGPU and instead of changing usage, just append VK_MEMORY_PROPERTY_HOST_COHERENT_BIT --- Common/GPU/Vulkan/VulkanMemory.cpp | 15 +++++++-------- Common/GPU/Vulkan/VulkanMemory.h | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/Common/GPU/Vulkan/VulkanMemory.cpp b/Common/GPU/Vulkan/VulkanMemory.cpp index 336bb7dff4..5482f8026e 100644 --- a/Common/GPU/Vulkan/VulkanMemory.cpp +++ b/Common/GPU/Vulkan/VulkanMemory.cpp @@ -40,11 +40,11 @@ VulkanPushPool::VulkanPushPool(VulkanContext *vulkan, const char *name, size_t o RegisterGPUMemoryManager(this); #if PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64) - if (vulkan_->GetPhysicalDeviceProperties().properties.vendorID == VULKAN_VENDOR_AMD) { - INFO_LOG(Log::G3D, "MoltenVK with AMD, allocating buffers with VMA_MEMORY_USAGE_CPU_ONLY"); - allocation_usage_ = VMA_MEMORY_USAGE_CPU_ONLY; // VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT in vma type index - } else { - allocation_usage_ = VMA_MEMORY_USAGE_CPU_TO_GPU; + allocation_extra_flags_ = 0; + if (vulkan_->GetPhysicalDeviceProperties().properties.vendorID != VULKAN_VENDOR_INTEL) { + // ref https://github.com/KhronosGroup/MoltenVK/issues/960 + INFO_LOG(Log::G3D, "MoltenVK with dedicated gpu, adding VK_MEMORY_PROPERTY_HOST_COHERENT_BIT"); + allocation_extra_flags_ = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; } #endif @@ -78,10 +78,9 @@ VulkanPushPool::Block VulkanPushPool::CreateBlock(size_t size) { b.sharingMode = VK_SHARING_MODE_EXCLUSIVE; VmaAllocationCreateInfo allocCreateInfo{}; - #if PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64) - allocCreateInfo.usage = allocation_usage_; - #else allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + #if PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64) + allocCreateInfo.requiredFlags = allocation_extra_flags_; #endif VmaAllocationInfo allocInfo{}; diff --git a/Common/GPU/Vulkan/VulkanMemory.h b/Common/GPU/Vulkan/VulkanMemory.h index 340a959abc..2c01577e36 100644 --- a/Common/GPU/Vulkan/VulkanMemory.h +++ b/Common/GPU/Vulkan/VulkanMemory.h @@ -94,6 +94,6 @@ private: int curBlockIndex_ = -1; const char *name_; #if PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64) - VmaMemoryUsage allocation_usage_; + VkMemoryPropertyFlags allocation_extra_flags_; #endif };