diff --git a/Common/GPU/Vulkan/VulkanFrameData.cpp b/Common/GPU/Vulkan/VulkanFrameData.cpp index 2c9c2487fc..fbd04d131f 100644 --- a/Common/GPU/Vulkan/VulkanFrameData.cpp +++ b/Common/GPU/Vulkan/VulkanFrameData.cpp @@ -146,8 +146,11 @@ void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, Frame hasPresentCommands = false; if (type == FrameSubmitType::Present) { + NOTICE_LOG(G3D, "Has present commands, Triggering fence"); fenceToTrigger = fence; } + } else { + NOTICE_LOG(G3D, "No present commands, not triggering fence"); } if (!numCmdBufs && fenceToTrigger == VK_NULL_HANDLE) { @@ -181,14 +184,6 @@ void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, Frame vkWaitForFences(vulkan->GetDevice(), 1, &readbackFence, true, UINT64_MAX); vkResetFences(vulkan->GetDevice(), 1, &readbackFence); } - - // When !triggerFence, we notify after syncing with Vulkan. - if (type == FrameSubmitType::Present || type == FrameSubmitType::Sync) { - VERBOSE_LOG(G3D, "PULL: Frame %d.readyForFence = true", index); - std::unique_lock lock(push_mutex); - readyForFence = true; // misnomer in sync mode! - push_condVar.notify_all(); - } } void FrameDataShared::Init(VulkanContext *vulkan) { diff --git a/Common/GPU/Vulkan/VulkanFrameData.h b/Common/GPU/Vulkan/VulkanFrameData.h index 53de1e8f44..a80210e81c 100644 --- a/Common/GPU/Vulkan/VulkanFrameData.h +++ b/Common/GPU/Vulkan/VulkanFrameData.h @@ -7,15 +7,14 @@ #include "Common/GPU/Vulkan/VulkanContext.h" -struct VKRStep; - enum { MAX_TIMESTAMP_QUERIES = 128, }; enum class VKRRunType { - END, + PRESENT, SYNC, + EXIT, }; struct QueueProfileContext { @@ -43,14 +42,6 @@ enum class FrameSubmitType { // Per-frame data, round-robin so we can overlap submission with execution of the previous frame. struct FrameData { - std::mutex push_mutex; - std::condition_variable push_condVar; - - std::mutex pull_mutex; - std::condition_variable pull_condVar; - - bool readyForFence = true; - bool readyForRun = false; // protected by pull_mutex bool skipSwap = false; VkFence fence; @@ -68,9 +59,10 @@ struct FrameData { bool hasMainCommands = false; bool hasPresentCommands = false; + bool hasFencePending = false; bool hasAcquired = false; - std::vector steps; + bool syncDone = false; // Swapchain. uint32_t curSwapchainImage = -1; @@ -89,12 +81,6 @@ struct FrameData { // This will only submit if we are actually recording init commands. void SubmitPending(VulkanContext *vulkan, FrameSubmitType type, FrameDataShared &shared); - VKRRunType RunType() const { - return runType_; - } - - VKRRunType runType_ = VKRRunType::END; - private: // Metadata for logging etc int index; diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index 8d739277b1..542ba7a1e2 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -554,7 +554,7 @@ void VulkanQueueRunner::PreprocessSteps(std::vector &steps) { } } -void VulkanQueueRunner::RunSteps(FrameData &frameData, FrameDataShared &frameDataShared) { +void VulkanQueueRunner::RunSteps(std::vector &steps, FrameData &frameData, FrameDataShared &frameDataShared) { QueueProfileContext *profile = frameData.profilingEnabled_ ? &frameData.profile : nullptr; if (profile) @@ -564,8 +564,8 @@ void VulkanQueueRunner::RunSteps(FrameData &frameData, FrameDataShared &frameDat VkCommandBuffer cmd = frameData.hasPresentCommands ? frameData.presentCmd : frameData.mainCmd; - for (size_t i = 0; i < frameData.steps.size(); i++) { - const VKRStep &step = *frameData.steps[i]; + for (size_t i = 0; i < steps.size(); i++) { + const VKRStep &step = *steps[i]; if (emitLabels) { VkDebugUtilsLabelEXT labelInfo{ VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT }; @@ -625,11 +625,11 @@ void VulkanQueueRunner::RunSteps(FrameData &frameData, FrameDataShared &frameDat // Deleting all in one go should be easier on the instruction cache than deleting // them as we go - and easier to debug because we can look backwards in the frame. - for (auto step : frameData.steps) { + for (auto step : steps) { delete step; } - frameData.steps.clear(); + steps.clear(); if (profile) profile->cpuEndTime = time_now_d(); diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.h b/Common/GPU/Vulkan/VulkanQueueRunner.h index 0962df3eb7..c767e498fb 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.h +++ b/Common/GPU/Vulkan/VulkanQueueRunner.h @@ -247,6 +247,14 @@ private: RPKey key_; }; +// These are enqueued from the main thread, +// and the render thread pops them off +struct VKRRenderThreadTask { + std::vector steps; + int frame; + VKRRunType runType; +}; + class VulkanQueueRunner { public: VulkanQueueRunner(VulkanContext *vulkan) : vulkan_(vulkan), renderPasses_(16) {} @@ -257,7 +265,7 @@ public: } void PreprocessSteps(std::vector &steps); - void RunSteps(FrameData &frameData, FrameDataShared &frameDataShared); + void RunSteps(std::vector &steps, FrameData &frameData, FrameDataShared &frameDataShared); void LogSteps(const std::vector &steps, bool verbose); std::string StepToString(const VKRStep &step) const; diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 9bdb1cdaee..4229ed5d86 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -14,7 +14,7 @@ #include "Common/Thread/ThreadUtil.h" #if 0 // def _DEBUG -#define VLOG(...) INFO_LOG(G3D, __VA_ARGS__) +#define VLOG(...) NOTICE_LOG(G3D, __VA_ARGS__) #else #define VLOG(...) #endif @@ -333,10 +333,9 @@ bool VulkanRenderManager::CreateBackbuffers() { // Start the thread. if (HasBackbuffers()) { - run_ = true; - // Won't necessarily be 0. - threadInitFrame_ = vulkan_->GetCurFrame(); - INFO_LOG(G3D, "Starting Vulkan submission thread (threadInitFrame_ = %d)", vulkan_->GetCurFrame()); + run_ = true; // For controlling the compiler thread's exit + + INFO_LOG(G3D, "Starting Vulkan submission thread"); thread_ = std::thread(&VulkanRenderManager::ThreadFunc, this); INFO_LOG(G3D, "Starting Vulkan compiler thread"); compileThread_ = std::thread(&VulkanRenderManager::CompileThreadFunc, this); @@ -344,29 +343,32 @@ bool VulkanRenderManager::CreateBackbuffers() { return true; } +// Called from main thread. void VulkanRenderManager::StopThread() { - if (!run_) { - INFO_LOG(G3D, "Vulkan submission thread was already stopped."); - return; + { + // Tell the render thread to quit when it's done. + std::unique_lock lock(pushMutex_); + VKRRenderThreadTask task; + task.frame = vulkan_->GetCurFrame(); + task.runType = VKRRunType::EXIT; + renderThreadQueue_.push(task); + pushCondVar_.notify_one(); } + // Compiler thread still relies on this. run_ = false; + // Stop the thread. + thread_.join(); + for (int i = 0; i < vulkan_->GetInflightFrames(); i++) { auto &frameData = frameData_[i]; - { - std::unique_lock lock(frameData.push_mutex); - frameData.push_condVar.notify_all(); - } - { - std::unique_lock lock(frameData.pull_mutex); - frameData.pull_condVar.notify_all(); - } // Zero the queries so we don't try to pull them later. frameData.profile.timestampDescriptions.clear(); } - thread_.join(); + INFO_LOG(G3D, "Vulkan submission thread joined. Frame=%d", vulkan_->GetCurFrame()); + compileCond_.notify_all(); compileThread_.join(); INFO_LOG(G3D, "Vulkan compiler thread joined."); @@ -374,28 +376,24 @@ void VulkanRenderManager::StopThread() { // Eat whatever has been queued up for this frame if anything. Wipe(); - // Wait for any fences to finish and be resignaled, so we don't have sync issues. - // Also clean out any queued data, which might refer to things that might not be valid - // when we restart... + // Clean out any remaining queued data, which might refer to things that might not be valid + // when we restart the thread... + + // Not sure if this is still needed for (int i = 0; i < vulkan_->GetInflightFrames(); i++) { auto &frameData = frameData_[i]; - _assert_(!frameData.readyForRun); - _assert_(frameData.steps.empty()); if (frameData.hasInitCommands) { // Clear 'em out. This can happen on restart sometimes. vkEndCommandBuffer(frameData.initCmd); frameData.hasInitCommands = false; } - frameData.readyForRun = false; - for (size_t i = 0; i < frameData.steps.size(); i++) { - delete frameData.steps[i]; + if (frameData.hasMainCommands) { + vkEndCommandBuffer(frameData.mainCmd); + frameData.hasMainCommands = false; } - frameData.steps.clear(); - - std::unique_lock lock(frameData.push_mutex); - while (!frameData.readyForFence) { - VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (stop)", i); - frameData.push_condVar.wait(lock); + if (frameData.hasPresentCommands) { + vkEndCommandBuffer(frameData.presentCmd); + frameData.hasPresentCommands = false; } } } @@ -468,45 +466,30 @@ void VulkanRenderManager::DrainCompileQueue() { void VulkanRenderManager::ThreadFunc() { SetCurrentThreadName("RenderMan"); - int threadFrame = threadInitFrame_; - bool nextFrame = false; - bool firstFrame = true; while (true) { + // Pop a task of the queue and execute it. + VKRRenderThreadTask task; { - if (nextFrame) { - threadFrame++; - if (threadFrame >= vulkan_->GetInflightFrames()) - threadFrame = 0; + std::unique_lock lock(pushMutex_); + while (renderThreadQueue_.empty()) { + pushCondVar_.wait(lock); } - FrameData &frameData = frameData_[threadFrame]; - - std::unique_lock lock(frameData.pull_mutex); - while (!frameData.readyForRun && run_) { - VLOG("PULL: Waiting for frame[%d].readyForRun", threadFrame); - frameData.pull_condVar.wait(lock); - } - if (!frameData.readyForRun && !run_) { - // This means we're out of frames to render and run_ is false, so bail. - break; - } - VLOG("PULL: frame[%d].readyForRun = false", threadFrame); - frameData.readyForRun = false; - // Previously we had a quick exit here that avoided calling Run() if run_ was suddenly false, - // but that created a race condition where frames could end up not finished properly on resize etc. - - // Only increment next time if we're done. - nextFrame = frameData.RunType() == VKRRunType::END; + task = renderThreadQueue_.front(); + renderThreadQueue_.pop(); } - VLOG("PULL: Running frame %d", threadFrame); - if (firstFrame) { - INFO_LOG(G3D, "Running first frame (%d)", threadFrame); - firstFrame = false; + + // Oh, we got a task! We can now have pushMutex_ unlocked, allowing the host to + // push more work when it feels like it, and just start working. + if (task.runType == VKRRunType::EXIT) { + // Oh, host wanted out. Let's leave. + break; } - Run(threadFrame); - VLOG("PULL: Finished frame %d", threadFrame); + + Run(task); } // Wait for the device to be done with everything, before tearing stuff down. + // TODO: Do we need this? vkDeviceWaitIdle(vulkan_->GetDevice()); VLOG("PULL: Quitting"); @@ -519,19 +502,10 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile int curFrame = vulkan_->GetCurFrame(); FrameData &frameData = frameData_[curFrame]; - // Make sure the very last command buffer from the frame before the previous has been fully executed. - { - std::unique_lock lock(frameData.push_mutex); - while (!frameData.readyForFence) { - VLOG("PUSH: Waiting for frame[%d].readyForFence = 1", curFrame); - frameData.push_condVar.wait(lock); - } - frameData.readyForFence = false; - } - VLOG("PUSH: Fencing %d", curFrame); // This must be the very first Vulkan call we do in a new frame. + // Makes sure the very last command buffer from the frame before the previous has been fully executed. if (vkWaitForFences(device, 1, &frameData.fence, true, UINT64_MAX) == VK_ERROR_DEVICE_LOST) { _assert_msg_(false, "Device lost in vkWaitForFences"); } @@ -578,9 +552,6 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile // Must be after the fence - this performs deletes. VLOG("PUSH: BeginFrame %d", curFrame); - if (!run_) { - WARN_LOG(G3D, "BeginFrame while !run_!"); - } vulkan_->BeginFrame(enableLogProfiler ? GetInitCmd() : VK_NULL_HANDLE); @@ -1085,7 +1056,6 @@ void VulkanRenderManager::CopyFramebuffer(VKRFramebuffer *src, VkRect2D srcRect, if (dstPos.x != 0 || dstPos.y != 0 || !fillsDst) step->dependencies.insert(dst); - std::unique_lock lock(mutex_); steps_.push_back(step); } @@ -1131,7 +1101,6 @@ void VulkanRenderManager::BlitFramebuffer(VKRFramebuffer *src, VkRect2D srcRect, if (!fillsDst) step->dependencies.insert(dst); - std::unique_lock lock(mutex_); steps_.push_back(step); } @@ -1191,16 +1160,18 @@ void VulkanRenderManager::Finish() { FrameData &frameData = frameData_[curFrame]; { - std::unique_lock lock(frameData.pull_mutex); - VLOG("PUSH: Frame[%d].readyForRun = true", curFrame); - frameData.steps = std::move(steps_); - steps_.clear(); - frameData.readyForRun = true; - frameData.runType_ = VKRRunType::END; - frameData.pull_condVar.notify_all(); + VLOG("PUSH: Frame[%d]", curFrame); + std::unique_lock lock(pushMutex_); + VKRRenderThreadTask task; + task.frame = curFrame; + task.runType = VKRRunType::PRESENT; + renderThreadQueue_.push(task); + renderThreadQueue_.back().steps = std::move(steps_); + pushCondVar_.notify_one(); } - vulkan_->EndFrame(); + steps_.clear(); + vulkan_->EndFrame(); insideFrame_ = false; } @@ -1214,8 +1185,8 @@ void VulkanRenderManager::Wipe() { // Called on the render thread. // // Can be called again after a VKRRunType::SYNC on the same frame. -void VulkanRenderManager::Run(int frame) { - FrameData &frameData = frameData_[frame]; +void VulkanRenderManager::Run(VKRRenderThreadTask &task) { + FrameData &frameData = frameData_[task.frame]; _dbg_assert_(!frameData.hasPresentCommands); frameData.SubmitPending(vulkan_, FrameSubmitType::Pending, frameDataShared_); @@ -1232,16 +1203,15 @@ void VulkanRenderManager::Run(int frame) { _assert_msg_(res == VK_SUCCESS, "vkBeginCommandBuffer failed! result=%s", VulkanResultToString(res)); } - queueRunner_.PreprocessSteps(frameData.steps); + queueRunner_.PreprocessSteps(task.steps); // Likely during shutdown, happens in headless. - if (frameData.steps.empty() && !frameData.hasAcquired) + if (task.steps.empty() && !frameData.hasAcquired) frameData.skipSwap = true; //queueRunner_.LogSteps(stepsOnThread, false); - queueRunner_.RunSteps(frameData, frameDataShared_); - - switch (frameData.runType_) { - case VKRRunType::END: + queueRunner_.RunSteps(task.steps, frameData, frameDataShared_); + switch (task.runType) { + case VKRRunType::PRESENT: frameData.SubmitPending(vulkan_, FrameSubmitType::Present, frameDataShared_); if (!frameData.skipSwap) { @@ -1268,6 +1238,12 @@ void VulkanRenderManager::Run(int frame) { case VKRRunType::SYNC: // The submit will trigger the readbackFence, and also do the wait for it. frameData.SubmitPending(vulkan_, FrameSubmitType::Sync, frameDataShared_); + + { + std::unique_lock lock(syncMutex_); + syncCondVar_.notify_one(); + } + // At this point the GPU is idle, and we can resume filling the command buffers for the // current frame since and thus all previously enqueued command buffers have been // processed. No need to switch to the next frame number, would just be confusing. @@ -1277,7 +1253,7 @@ void VulkanRenderManager::Run(int frame) { _dbg_assert_(false); } - VLOG("PULL: Finished running frame %d", frame); + VLOG("PULL: Finished running frame %d", task.frame); } // Called from main thread. @@ -1288,23 +1264,23 @@ void VulkanRenderManager::FlushSync() { FrameData &frameData = frameData_[curFrame]; { - std::unique_lock lock(frameData.pull_mutex); - VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame); - frameData.steps = std::move(steps_); - steps_.clear(); - frameData.readyForRun = true; - _dbg_assert_(!frameData.readyForFence); - frameData.runType_ = VKRRunType::SYNC; - frameData.pull_condVar.notify_all(); + VLOG("PUSH: Frame[%d]", curFrame); + std::unique_lock lock(pushMutex_); + VKRRenderThreadTask task; + task.frame = curFrame; + task.runType = VKRRunType::SYNC; + renderThreadQueue_.push(task); + renderThreadQueue_.back().steps = std::move(steps_); + pushCondVar_.notify_one(); } { - std::unique_lock lock(frameData.push_mutex); + std::unique_lock lock(syncMutex_); // Wait for the flush to be hit, since we're syncing. - while (!frameData.readyForFence) { + while (!frameData.syncDone) { VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame); - frameData.push_condVar.wait(lock); + syncCondVar_.wait(lock); } - frameData.readyForFence = false; + frameData.syncDone = false; } } diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h index 26eccb94d7..ed9f0d16e5 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.h +++ b/Common/GPU/Vulkan/VulkanRenderManager.h @@ -466,7 +466,7 @@ private: void CompileThreadFunc(); void DrainCompileQueue(); - void Run(int frame); + void Run(VKRRenderThreadTask &task); void BeginSubmitFrame(int frame); // Bad for performance but sometimes necessary for synchronous CPU readbacks (screenshots and whatnot). @@ -492,6 +492,8 @@ private: int curHeight_ = -1; bool insideFrame_ = false; + bool run_ = false; + // This is the offset within this frame, in case of a mid-frame sync. int renderStepOffset_ = 0; VKRStep *curRenderStep_ = nullptr; @@ -503,13 +505,20 @@ private: std::vector steps_; // Execution time state - bool run_ = true; VulkanContext *vulkan_; std::thread thread_; - std::mutex mutex_; - int threadInitFrame_ = 0; VulkanQueueRunner queueRunner_; + // For pushing data on the queue. + std::mutex pushMutex_; + std::condition_variable pushCondVar_; + + std::queue renderThreadQueue_; + + // For readbacks and other reasons we need to sync with the render thread. + std::mutex syncMutex_; + std::condition_variable syncCondVar_; + // Shader compilation thread to compile while emulating the rest of the frame. // Only one right now but we could use more. std::thread compileThread_;