From 0ac6cea34dd7f1594c3f9e9bf8c69e6f1300f79b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 13 Apr 2018 18:05:04 +0200 Subject: [PATCH] Add a queue processing hack for Sonic Rivals too. Now it's fast. --- Core/Compatibility.cpp | 1 + Core/Compatibility.h | 1 + GPU/Vulkan/GPU_Vulkan.cpp | 5 +- assets/compat.ini | 4 ++ ext/native/thin3d/VulkanQueueRunner.cpp | 93 +++++++++++++++++++++++-- ext/native/thin3d/VulkanQueueRunner.h | 2 + 6 files changed, 100 insertions(+), 6 deletions(-) diff --git a/Core/Compatibility.cpp b/Core/Compatibility.cpp index 2fed762c0d..1cc0ccb219 100644 --- a/Core/Compatibility.cpp +++ b/Core/Compatibility.cpp @@ -58,6 +58,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) { CheckSetting(iniFile, gameID, "DisableReadbacks", &flags_.DisableReadbacks); CheckSetting(iniFile, gameID, "DisableAccurateDepth", &flags_.DisableAccurateDepth); CheckSetting(iniFile, gameID, "MGS2AcidHack", &flags_.MGS2AcidHack); + CheckSetting(iniFile, gameID, "SonicRivalsHack", &flags_.SonicRivalsHack); } void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) { diff --git a/Core/Compatibility.h b/Core/Compatibility.h index 6f60504a40..f65c68efda 100644 --- a/Core/Compatibility.h +++ b/Core/Compatibility.h @@ -58,6 +58,7 @@ struct CompatFlags { bool DisableReadbacks; bool DisableAccurateDepth; bool MGS2AcidHack; + bool SonicRivalsHack; }; class IniFile; diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index e69bf7454a..e774e735bc 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -461,9 +461,10 @@ void GPU_Vulkan::InitDeviceObjects() { VulkanRenderManager *rm = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER); uint32_t hacks = 0; - if (PSP_CoreParameter().compat.flags().MGS2AcidHack) { + if (PSP_CoreParameter().compat.flags().MGS2AcidHack) hacks |= QUEUE_HACK_MGS2_ACID; - } + if (PSP_CoreParameter().compat.flags().SonicRivalsHack) + hacks |= QUEUE_HACK_SONIC; if (hacks) { rm->GetQueueRunner()->EnableHacks(hacks); } diff --git a/assets/compat.ini b/assets/compat.ini index 8f9d668b05..6d0de1a99f 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -334,3 +334,7 @@ ULJM05001 = true ULAS42007 = true ULUS10006 = true ULUS10077 = true + +[SonicRivalsHack] +ULES00622 = true +ULUS10195 = true diff --git a/ext/native/thin3d/VulkanQueueRunner.cpp b/ext/native/thin3d/VulkanQueueRunner.cpp index 465159ddc7..eba6f52dc2 100644 --- a/ext/native/thin3d/VulkanQueueRunner.cpp +++ b/ext/native/thin3d/VulkanQueueRunner.cpp @@ -398,9 +398,14 @@ void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector &st } // Queue hacks. - if (hacksEnabled_ & QUEUE_HACK_MGS2_ACID) { - // Massive speedup. - ApplyMGSHack(steps); + if (hacksEnabled_) { + if (hacksEnabled_ & QUEUE_HACK_MGS2_ACID) { + // Massive speedup. + ApplyMGSHack(steps); + } + if (hacksEnabled_ & QUEUE_HACK_SONIC) { + ApplySonicHack(steps); + } } for (size_t i = 0; i < steps.size(); i++) { @@ -460,6 +465,8 @@ void VulkanQueueRunner::ApplyMGSHack(std::vector &steps) { // First, let's sort it, keeping the same length. std::vector copies; std::vector renders; + copies.reserve((last - i) / 2); + renders.reserve((last - i) / 2); for (int n = i; n <= last; n++) { if (steps[n]->stepType == VKRStepType::COPY) copies.push_back(steps[n]); @@ -474,7 +481,7 @@ void VulkanQueueRunner::ApplyMGSHack(std::vector &steps) { for (int j = 0; j < (int)renders.size(); j++) { steps[i + j + copies.size()] = renders[j]; } - assert(steps[i + j + copies.size()]->stepType == VKRStepType::RENDER); + assert(steps[i + copies.size()]->stepType == VKRStepType::RENDER); // Combine the renders. for (int j = 1; j < (int)renders.size(); j++) { for (int k = 0; k < renders[j]->commands.size(); k++) { @@ -488,6 +495,84 @@ void VulkanQueueRunner::ApplyMGSHack(std::vector &steps) { } } +void VulkanQueueRunner::ApplySonicHack(std::vector &steps) { + // We want to turn a sequence of render(3),render(1),render(6),render(1),render(6),render(1),render(3) to + // render(1), render(1), render(1), render(6), render(6), render(6) + + for (int i = 0; i < (int)steps.size() - 4; i++) { + int last = -1; + if (!(steps[i]->stepType == VKRStepType::RENDER && + steps[i + 1]->stepType == VKRStepType::RENDER && + steps[i + 2]->stepType == VKRStepType::RENDER && + steps[i + 3]->stepType == VKRStepType::RENDER && + steps[i]->render.numDraws == 3 && + steps[i + 1]->render.numDraws == 1 && + steps[i + 2]->render.numDraws == 6 && + steps[i + 3]->render.numDraws == 1 && + steps[i]->render.framebuffer == steps[i + 2]->render.framebuffer && + steps[i + 1]->render.framebuffer == steps[i + 3]->render.framebuffer)) + continue; + // Looks promising! Let's start by finding the last one. + for (int j = i; j < (int)steps.size(); j++) { + switch (steps[j]->stepType) { + case VKRStepType::RENDER: + if ((j - i) & 1) { + if (steps[j]->render.framebuffer != steps[i + 1]->render.framebuffer) + last = j - 1; + if (steps[j]->render.numDraws != 1) + last = j - 1; + } else { + if (steps[j]->render.framebuffer != steps[i]->render.framebuffer) + last = j - 1; + if (steps[j]->render.numDraws != 3 && steps[j]->render.numDraws != 6) + last = j - 1; + } + } + if (last != -1) + break; + } + + if (last != -1) { + // We've got a sequence from i to last that needs reordering. + // First, let's sort it, keeping the same length. + std::vector type1; + std::vector type2; + type1.reserve((last - i) / 2); + type2.reserve((last - i) / 2); + for (int n = i; n <= last; n++) { + if (steps[n]->render.framebuffer == steps[i]->render.framebuffer) + type1.push_back(steps[n]); + else + type2.push_back(steps[n]); + } + + // Write the renders back in order. Same amount, so deletion will work fine. + for (int j = 0; j < (int)type1.size(); j++) { + steps[i + j] = type1[j]; + } + for (int j = 0; j < (int)type2.size(); j++) { + steps[i + j + type1.size()] = type2[j]; + } + + // Combine the renders. + for (int j = 1; j < (int)type1.size(); j++) { + for (int k = 0; k < (int)type1[j]->commands.size(); k++) { + steps[i]->commands.push_back(type1[j]->commands[k]); + } + steps[i + j]->stepType = VKRStepType::RENDER_SKIP; + } + for (int j = 1; j < (int)type2.size(); j++) { + for (int k = 0; k < (int)type2[j]->commands.size(); k++) { + steps[i + type1.size()]->commands.push_back(type2[j]->commands[k]); + } + steps[i + j + type1.size()]->stepType = VKRStepType::RENDER_SKIP; + } + // We're done. + break; + } + } +} + void VulkanQueueRunner::LogSteps(const std::vector &steps) { ILOG("======================================="); for (size_t i = 0; i < steps.size(); i++) { diff --git a/ext/native/thin3d/VulkanQueueRunner.h b/ext/native/thin3d/VulkanQueueRunner.h index e7f6df1fa2..d743bda730 100644 --- a/ext/native/thin3d/VulkanQueueRunner.h +++ b/ext/native/thin3d/VulkanQueueRunner.h @@ -12,6 +12,7 @@ struct VKRImage; enum { QUEUE_HACK_MGS2_ACID = 1, + QUEUE_HACK_SONIC = 2, }; enum class VKRRenderCommand : uint8_t { @@ -234,6 +235,7 @@ private: void ResizeReadbackBuffer(VkDeviceSize requiredSize); void ApplyMGSHack(std::vector &steps); + void ApplySonicHack(std::vector &steps); static void SetupTransitionToTransferSrc(VKRImage &img, VkImageMemoryBarrier &barrier, VkPipelineStageFlags &stage, VkImageAspectFlags aspect); static void SetupTransitionToTransferDst(VKRImage &img, VkImageMemoryBarrier &barrier, VkPipelineStageFlags &stage, VkImageAspectFlags aspect);