diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h index c9889156b0..089c588aeb 100644 --- a/Common/GPU/OpenGL/GLRenderManager.h +++ b/Common/GPU/OpenGL/GLRenderManager.h @@ -1012,6 +1012,7 @@ private: bool readyForFence = true; bool readyForRun = false; bool readyForSubmit = false; + bool skipSwap = false; GLRRunType type = GLRRunType::END; diff --git a/Common/GPU/Vulkan/VulkanFrameData.cpp b/Common/GPU/Vulkan/VulkanFrameData.cpp index f5d26ef992..adac358f8f 100644 --- a/Common/GPU/Vulkan/VulkanFrameData.cpp +++ b/Common/GPU/Vulkan/VulkanFrameData.cpp @@ -49,22 +49,35 @@ void FrameData::Destroy(VulkanContext *vulkan) { } void FrameData::AcquireNextImage(VulkanContext *vulkan, FrameDataShared &shared) { + _dbg_assert_(!hasAcquired); + // Get the index of the next available swapchain image, and a semaphore to block command buffer execution on. VkResult res = vkAcquireNextImageKHR(vulkan->GetDevice(), vulkan->GetSwapchain(), UINT64_MAX, shared.acquireSemaphore, (VkFence)VK_NULL_HANDLE, &curSwapchainImage); - if (res == VK_SUBOPTIMAL_KHR) { + switch (res) { + case VK_SUCCESS: + hasAcquired = true; + break; + case VK_SUBOPTIMAL_KHR: + hasAcquired = true; // Hopefully the resize will happen shortly. Ignore - one frame might look bad or something. WARN_LOG(G3D, "VK_SUBOPTIMAL_KHR returned - ignoring"); - } else if (res == VK_ERROR_OUT_OF_DATE_KHR) { - WARN_LOG(G3D, "VK_ERROR_OUT_OF_DATE_KHR returned - processing the frame, but not presenting"); + break; + case VK_ERROR_OUT_OF_DATE_KHR: + // We do not set hasAcquired here! + WARN_LOG(G3D, "VK_ERROR_OUT_OF_DATE_KHR returned from AcquireNextImage - processing the frame, but not presenting"); skipSwap = true; - } else { - _assert_msg_(res == VK_SUCCESS, "vkAcquireNextImageKHR failed! result=%s", VulkanResultToString(res)); + break; + default: + // Weird, shouldn't get any other values. Maybe lost device? + _assert_msg_(false, "vkAcquireNextImageKHR failed! result=%s", VulkanResultToString(res)); + break; } } VkResult FrameData::QueuePresent(VulkanContext *vulkan, FrameDataShared &shared) { _dbg_assert_(hasAcquired); hasAcquired = false; + _dbg_assert_(!skipSwap); VkSwapchainKHR swapchain = vulkan->GetSwapchain(); VkPresentInfoKHR present = { VK_STRUCTURE_TYPE_PRESENT_INFO_KHR }; @@ -145,6 +158,7 @@ void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, Frame VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO }; VkPipelineStageFlags waitStage[1]{ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT }; if (type == FrameSubmitType::Present && !skipSwap) { + _dbg_assert_(hasAcquired); submit_info.waitSemaphoreCount = 1; submit_info.pWaitSemaphores = &sharedData.acquireSemaphore; submit_info.pWaitDstStageMask = waitStage; @@ -162,11 +176,17 @@ void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, Frame _assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (main)! result=%s", VulkanResultToString(res)); } + if (type == FrameSubmitType::Sync) { + // Hard stall of the GPU, not ideal, but necessary so the CPU has the contents of the readback. + vkWaitForFences(vulkan->GetDevice(), 1, &readbackFence, true, UINT64_MAX); + vkResetFences(vulkan->GetDevice(), 1, &readbackFence); + } + // When !triggerFence, we notify after syncing with Vulkan. if (type == FrameSubmitType::Present || type == FrameSubmitType::Sync) { VERBOSE_LOG(G3D, "PULL: Frame %d.readyForFence = true", index); std::unique_lock lock(push_mutex); - readyForFence = true; + readyForFence = true; // misnomer in sync mode! push_condVar.notify_all(); } } diff --git a/Common/GPU/Vulkan/VulkanFrameData.h b/Common/GPU/Vulkan/VulkanFrameData.h index 764f681421..bc503bb18a 100644 --- a/Common/GPU/Vulkan/VulkanFrameData.h +++ b/Common/GPU/Vulkan/VulkanFrameData.h @@ -50,9 +50,8 @@ struct FrameData { std::condition_variable pull_condVar; bool readyForFence = true; - bool readyForRun = false; + bool readyForRun = false; // protected by pull_mutex bool skipSwap = false; - VKRRunType type = VKRRunType::END; VkFence fence; VkFence readbackFence; // Strictly speaking we might only need one global of these. @@ -81,9 +80,6 @@ struct FrameData { QueueProfileContext profile; bool profilingEnabled_; - // Metadata for logging etc - int index; - void Init(VulkanContext *vulkan, int index); void Destroy(VulkanContext *vulkan); @@ -93,4 +89,14 @@ struct FrameData { // This will only submit if we are actually recording init commands. void SubmitPending(VulkanContext *vulkan, FrameSubmitType type, FrameDataShared &shared); + + VKRRunType RunType() const { + return runType_; + } + + VKRRunType runType_ = VKRRunType::END; + +private: + // Metadata for logging etc + int index; }; diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index dfe09ecaea..c3f78b810f 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -563,7 +563,6 @@ void VulkanQueueRunner::RunSteps(FrameData &frameData, FrameDataShared &frameDat if (!step.render.framebuffer) { // When stepping in the GE debugger, we can end up here multiple times in a "frame". if (!frameData.hasAcquired) { - frameData.hasAcquired = true; frameData.AcquireNextImage(vulkan_, frameDataShared); SetBackbuffer(framebuffers_[frameData.curSwapchainImage], swapchainImages_[frameData.curSwapchainImage].image); } diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 3f1778bb7d..3e34c6f162 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -491,8 +491,7 @@ void VulkanRenderManager::ThreadFunc() { // but that created a race condition where frames could end up not finished properly on resize etc. // Only increment next time if we're done. - nextFrame = frameData.type == VKRRunType::END; - _dbg_assert_(frameData.type == VKRRunType::END || frameData.type == VKRRunType::SYNC); + nextFrame = frameData.RunType() == VKRRunType::END; } VLOG("PULL: Running frame %d", threadFrame); if (firstFrame) { @@ -658,6 +657,8 @@ void VulkanRenderManager::EndCurRenderStep() { curRenderStep_->render.colorStore, curRenderStep_->render.depthStore, curRenderStep_->render.stencilStore, }; RenderPassType rpType = RP_TYPE_COLOR_DEPTH; + // Save the accumulated pipeline flags so we can use that to configure the render pass. + // We'll often be able to avoid loading/saving the depth/stencil buffer. curRenderStep_->render.pipelineFlags = curPipelineFlags_; if (!curRenderStep_->render.framebuffer) { rpType = RP_TYPE_BACKBUFFER; @@ -665,12 +666,11 @@ void VulkanRenderManager::EndCurRenderStep() { // Not allowed on backbuffers. rpType = RP_TYPE_COLOR_DEPTH_INPUT; } + // TODO: Also add render pass types for depth/stencil-less. VKRRenderPass *renderPass = queueRunner_.GetRenderPass(key); curRenderStep_->render.renderPassType = rpType; - // Save the accumulated pipeline flags so we can use that to configure the render pass. - // We'll often be able to avoid loading/saving the depth/stencil buffer. compileMutex_.lock(); bool needsCompile = false; for (VKRGraphicsPipeline *pipeline : pipelinesToCheck_) { @@ -1162,6 +1162,9 @@ VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, in } } +// Called on main thread. +// Sends the collected commands to the render thread. Submit-latency should be +// measured from here, probably. void VulkanRenderManager::Finish() { EndCurRenderStep(); @@ -1174,13 +1177,14 @@ void VulkanRenderManager::Finish() { int curFrame = vulkan_->GetCurFrame(); FrameData &frameData = frameData_[curFrame]; + { std::unique_lock lock(frameData.pull_mutex); VLOG("PUSH: Frame[%d].readyForRun = true", curFrame); frameData.steps = std::move(steps_); steps_.clear(); frameData.readyForRun = true; - frameData.type = VKRRunType::END; + frameData.runType_ = VKRRunType::END; frameData.pull_condVar.notify_all(); } vulkan_->EndFrame(); @@ -1204,7 +1208,6 @@ void VulkanRenderManager::BeginSubmitFrame(int frame) { FrameData &frameData = frameData_[frame]; // Should only have at most the init command buffer pending here (that one came from the other thread). - _dbg_assert_(!frameData.hasMainCommands); _dbg_assert_(!frameData.hasPresentCommands); frameData.SubmitPending(vulkan_, FrameSubmitType::Pending, frameDataShared_); @@ -1259,6 +1262,28 @@ void VulkanRenderManager::EndSubmitFrame(int frame) { } } +void VulkanRenderManager::EndSyncFrame(int frame) { + FrameData &frameData = frameData_[frame]; + + // The submit will trigger the readbackFence, and wait. + Submit(frame, FrameSubmitType::Sync); + + // At this point we can resume filling the command buffers for the current frame since + // we know the device is idle - and thus all previously enqueued command buffers have been processed. + // No need to switch to the next frame number. + VkCommandBufferBeginInfo begin{ + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + nullptr, + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT + }; + _dbg_assert_(!frameData.hasPresentCommands); // Readbacks should happen before we try to submit any present commands. + frameData.hasBegun = false; // We're gonna record a new main command buffer. + + std::unique_lock lock(frameData.push_mutex); + frameData.readyForFence = true; + frameData.push_condVar.notify_all(); +} + void VulkanRenderManager::Run(int frame) { BeginSubmitFrame(frame); @@ -1267,7 +1292,7 @@ void VulkanRenderManager::Run(int frame) { //queueRunner_.LogSteps(stepsOnThread, false); queueRunner_.RunSteps(frameData, frameDataShared_); - switch (frameData.type) { + switch (frameData.runType_) { case VKRRunType::END: EndSubmitFrame(frame); break; @@ -1283,35 +1308,6 @@ void VulkanRenderManager::Run(int frame) { VLOG("PULL: Finished running frame %d", frame); } -void VulkanRenderManager::EndSyncFrame(int frame) { - FrameData &frameData = frameData_[frame]; - - // The submit will trigger the readbackFence. - Submit(frame, FrameSubmitType::Sync); - - // Hard stall of the GPU, not ideal, but necessary so the CPU has the contents of the readback. - vkWaitForFences(vulkan_->GetDevice(), 1, &frameData.readbackFence, true, UINT64_MAX); - vkResetFences(vulkan_->GetDevice(), 1, &frameData.readbackFence); - - // At this point we can resume filling the command buffers for the current frame since - // we know the device is idle - and thus all previously enqueued command buffers have been processed. - // No need to switch to the next frame number. - VkCommandBufferBeginInfo begin{ - VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - nullptr, - VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT - }; - _dbg_assert_(!frameData.hasPresentCommands); // Readbacks should happen before we try to submit any present commands. - vkResetCommandPool(vulkan_->GetDevice(), frameData.cmdPoolMain, 0); - VkResult res = vkBeginCommandBuffer(frameData.mainCmd, &begin); - frameData.hasMainCommands = true; - _assert_(res == VK_SUCCESS); - - std::unique_lock lock(frameData.push_mutex); - frameData.readyForFence = true; - frameData.push_condVar.notify_all(); -} - void VulkanRenderManager::FlushSync() { renderStepOffset_ += (int)steps_.size(); @@ -1325,7 +1321,7 @@ void VulkanRenderManager::FlushSync() { steps_.clear(); frameData.readyForRun = true; _dbg_assert_(!frameData.readyForFence); - frameData.type = VKRRunType::SYNC; + frameData.runType_ = VKRRunType::SYNC; frameData.pull_condVar.notify_all(); }