From 93009a31783e6fcb8ec88b5e5f08dd9ff607d095 Mon Sep 17 00:00:00 2001
From: Katharine Chui <kwchuiaa@connect.ust.hk>
Date: Sun, 8 Dec 2024 22:02:09 +0100
Subject: [PATCH 1/3] Work around metal buffer bug on MacOS + AMD GPU

With VMA_MEMORY_USAGE_CPU_TO_GPU buffers, metal buffer appears 0
filled in metal trace during vkCmdCopyBufferToImage triggered
MTLBlitCommandEncoder instance method.

Allocate VMA_MEMORY_USAGE_GPU_TO_CPU instead on MacOS + AMD GPU
---
 Common/GPU/Vulkan/VulkanLoader.cpp |  4 ----
 Common/GPU/Vulkan/VulkanMemory.cpp | 15 +++++++++++++++
 Common/GPU/Vulkan/VulkanMemory.h   |  3 +++
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/Common/GPU/Vulkan/VulkanLoader.cpp b/Common/GPU/Vulkan/VulkanLoader.cpp
index 92e7e78659..ef911dd3bd 100644
--- a/Common/GPU/Vulkan/VulkanLoader.cpp
+++ b/Common/GPU/Vulkan/VulkanLoader.cpp
@@ -343,10 +343,6 @@ static VulkanLibraryHandle VulkanLoadLibrary(std::string *errorString) {
 	return nullptr;
 #elif PPSSPP_PLATFORM(UWP)
 	return nullptr;
-#elif PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64)
-	// Disable Vulkan on Mac/x86. Too many configurations that don't work with MoltenVK
-	// for whatever reason.
-	return nullptr;
 #elif PPSSPP_PLATFORM(WINDOWS)
 	return LoadLibrary(L"vulkan-1.dll");
 #else
diff --git a/Common/GPU/Vulkan/VulkanMemory.cpp b/Common/GPU/Vulkan/VulkanMemory.cpp
index eba7476190..83c52e1db3 100644
--- a/Common/GPU/Vulkan/VulkanMemory.cpp
+++ b/Common/GPU/Vulkan/VulkanMemory.cpp
@@ -38,6 +38,16 @@ static const double PUSH_GARBAGE_COLLECTION_DELAY = 10.0;
 VulkanPushPool::VulkanPushPool(VulkanContext *vulkan, const char *name, size_t originalBlockSize, VkBufferUsageFlags usage)
 	: vulkan_(vulkan), name_(name), originalBlockSize_(originalBlockSize), usage_(usage) {
 	RegisterGPUMemoryManager(this);
+
+	#if PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64)
+	if (vulkan_->GetPhysicalDeviceProperties().properties.vendorID == VULKAN_VENDOR_AMD) {
+		INFO_LOG(Log::G3D, "MoltenVK with AMD, allocating buffers with VMA_MEMORY_USAGE_GPU_TO_CPU");
+		allocation_usage_ = VMA_MEMORY_USAGE_GPU_TO_CPU;
+	} else {
+		allocation_usage_ = VMA_MEMORY_USAGE_CPU_TO_GPU;
+	}
+	#endif
+
 	for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {
 		blocks_.push_back(CreateBlock(originalBlockSize));
 		blocks_.back().original = true;
@@ -67,7 +77,12 @@ VulkanPushPool::Block VulkanPushPool::CreateBlock(size_t size) {
 	b.usage = usage_;
 	b.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
 	VmaAllocationCreateInfo allocCreateInfo{};
+
+	#if PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64)
+	allocCreateInfo.usage = allocation_usage_;
+	#else
 	allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
+	#endif
 	VmaAllocationInfo allocInfo{};
 	
 	VkResult result = vmaCreateBuffer(vulkan_->Allocator(), &b, &allocCreateInfo, &block.buffer, &block.allocation, &allocInfo);
diff --git a/Common/GPU/Vulkan/VulkanMemory.h b/Common/GPU/Vulkan/VulkanMemory.h
index 14e3b78436..340a959abc 100644
--- a/Common/GPU/Vulkan/VulkanMemory.h
+++ b/Common/GPU/Vulkan/VulkanMemory.h
@@ -93,4 +93,7 @@ private:
 	VkBufferUsageFlags usage_;
 	int curBlockIndex_ = -1;
 	const char *name_;
+	#if PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64)
+	VmaMemoryUsage allocation_usage_;
+	#endif
 };

From 286580a6a05419e48c6ae99d7e53f97cd9a95d89 Mon Sep 17 00:00:00 2001
From: Katharine Chui <kwchuiaa@connect.ust.hk>
Date: Mon, 9 Dec 2024 12:23:05 +0100
Subject: [PATCH 2/3] Switch to VMA_MEMORY_USAGE_CPU_ONLY on MacOS AMD

The ideal allocation would be VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
like in 1.12.3, but vma picked an index that's actually not mappable

VMA_MEMORY_USAGE_GPU_TO_CPU selects VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
which seems to hurt performance

VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
selection with VMA_MEMORY_USAGE_CPU_ONLY would suffice for the
workaround, and get better performance
---
 Common/GPU/Vulkan/VulkanMemory.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Common/GPU/Vulkan/VulkanMemory.cpp b/Common/GPU/Vulkan/VulkanMemory.cpp
index 83c52e1db3..336bb7dff4 100644
--- a/Common/GPU/Vulkan/VulkanMemory.cpp
+++ b/Common/GPU/Vulkan/VulkanMemory.cpp
@@ -41,8 +41,8 @@ VulkanPushPool::VulkanPushPool(VulkanContext *vulkan, const char *name, size_t o
 
 	#if PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64)
 	if (vulkan_->GetPhysicalDeviceProperties().properties.vendorID == VULKAN_VENDOR_AMD) {
-		INFO_LOG(Log::G3D, "MoltenVK with AMD, allocating buffers with VMA_MEMORY_USAGE_GPU_TO_CPU");
-		allocation_usage_ = VMA_MEMORY_USAGE_GPU_TO_CPU;
+		INFO_LOG(Log::G3D, "MoltenVK with AMD, allocating buffers with VMA_MEMORY_USAGE_CPU_ONLY");
+		allocation_usage_ = VMA_MEMORY_USAGE_CPU_ONLY; // VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT in vma type index
 	} else {
 		allocation_usage_ = VMA_MEMORY_USAGE_CPU_TO_GPU;
 	}

From 49553bcf8dd1523bbc7b06c4cade4830d00adca1 Mon Sep 17 00:00:00 2001
From: Katharine Chui <kwchuiaa@connect.ust.hk>
Date: Mon, 9 Dec 2024 13:41:17 +0100
Subject: [PATCH 3/3] Simplify MacOS AMD GPU workaround

Based on https://github.com/KhronosGroup/MoltenVK/issues/960,
expand MacOS AMD GPU workaround to all dGPU and instead of
changing usage, just append VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
---
 Common/GPU/Vulkan/VulkanMemory.cpp | 15 +++++++--------
 Common/GPU/Vulkan/VulkanMemory.h   |  2 +-
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/Common/GPU/Vulkan/VulkanMemory.cpp b/Common/GPU/Vulkan/VulkanMemory.cpp
index 336bb7dff4..5482f8026e 100644
--- a/Common/GPU/Vulkan/VulkanMemory.cpp
+++ b/Common/GPU/Vulkan/VulkanMemory.cpp
@@ -40,11 +40,11 @@ VulkanPushPool::VulkanPushPool(VulkanContext *vulkan, const char *name, size_t o
 	RegisterGPUMemoryManager(this);
 
 	#if PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64)
-	if (vulkan_->GetPhysicalDeviceProperties().properties.vendorID == VULKAN_VENDOR_AMD) {
-		INFO_LOG(Log::G3D, "MoltenVK with AMD, allocating buffers with VMA_MEMORY_USAGE_CPU_ONLY");
-		allocation_usage_ = VMA_MEMORY_USAGE_CPU_ONLY; // VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT in vma type index
-	} else {
-		allocation_usage_ = VMA_MEMORY_USAGE_CPU_TO_GPU;
+	allocation_extra_flags_ = 0;
+	if (vulkan_->GetPhysicalDeviceProperties().properties.vendorID != VULKAN_VENDOR_INTEL) {
+		// ref https://github.com/KhronosGroup/MoltenVK/issues/960
+		INFO_LOG(Log::G3D, "MoltenVK with dedicated gpu, adding VK_MEMORY_PROPERTY_HOST_COHERENT_BIT");
+		allocation_extra_flags_ = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
 	}
 	#endif
 
@@ -78,10 +78,9 @@ VulkanPushPool::Block VulkanPushPool::CreateBlock(size_t size) {
 	b.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
 	VmaAllocationCreateInfo allocCreateInfo{};
 
-	#if PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64)
-	allocCreateInfo.usage = allocation_usage_;
-	#else
 	allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
+	#if PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64)
+	allocCreateInfo.requiredFlags = allocation_extra_flags_;
 	#endif
 	VmaAllocationInfo allocInfo{};
 	
diff --git a/Common/GPU/Vulkan/VulkanMemory.h b/Common/GPU/Vulkan/VulkanMemory.h
index 340a959abc..2c01577e36 100644
--- a/Common/GPU/Vulkan/VulkanMemory.h
+++ b/Common/GPU/Vulkan/VulkanMemory.h
@@ -94,6 +94,6 @@ private:
 	int curBlockIndex_ = -1;
 	const char *name_;
 	#if PPSSPP_PLATFORM(MAC) && PPSSPP_ARCH(AMD64)
-	VmaMemoryUsage allocation_usage_;
+	VkMemoryPropertyFlags allocation_extra_flags_;
 	#endif
 };