Rewrite some present logic for slightly more clarity. Fixes bad logic and a minor race condition.

This commit is contained in:
Henrik Rydgård 2022-09-20 16:27:05 +02:00
parent c7322edf7b
commit b190c33cc7
5 changed files with 71 additions and 49 deletions

View file

@ -1012,6 +1012,7 @@ private:
bool readyForFence = true;
bool readyForRun = false;
bool readyForSubmit = false;
bool skipSwap = false;
GLRRunType type = GLRRunType::END;

View file

@ -49,22 +49,35 @@ void FrameData::Destroy(VulkanContext *vulkan) {
}
void FrameData::AcquireNextImage(VulkanContext *vulkan, FrameDataShared &shared) {
_dbg_assert_(!hasAcquired);
// Get the index of the next available swapchain image, and a semaphore to block command buffer execution on.
VkResult res = vkAcquireNextImageKHR(vulkan->GetDevice(), vulkan->GetSwapchain(), UINT64_MAX, shared.acquireSemaphore, (VkFence)VK_NULL_HANDLE, &curSwapchainImage);
if (res == VK_SUBOPTIMAL_KHR) {
switch (res) {
case VK_SUCCESS:
hasAcquired = true;
break;
case VK_SUBOPTIMAL_KHR:
hasAcquired = true;
// Hopefully the resize will happen shortly. Ignore - one frame might look bad or something.
WARN_LOG(G3D, "VK_SUBOPTIMAL_KHR returned - ignoring");
} else if (res == VK_ERROR_OUT_OF_DATE_KHR) {
WARN_LOG(G3D, "VK_ERROR_OUT_OF_DATE_KHR returned - processing the frame, but not presenting");
break;
case VK_ERROR_OUT_OF_DATE_KHR:
// We do not set hasAcquired here!
WARN_LOG(G3D, "VK_ERROR_OUT_OF_DATE_KHR returned from AcquireNextImage - processing the frame, but not presenting");
skipSwap = true;
} else {
_assert_msg_(res == VK_SUCCESS, "vkAcquireNextImageKHR failed! result=%s", VulkanResultToString(res));
break;
default:
// Weird, shouldn't get any other values. Maybe lost device?
_assert_msg_(false, "vkAcquireNextImageKHR failed! result=%s", VulkanResultToString(res));
break;
}
}
VkResult FrameData::QueuePresent(VulkanContext *vulkan, FrameDataShared &shared) {
_dbg_assert_(hasAcquired);
hasAcquired = false;
_dbg_assert_(!skipSwap);
VkSwapchainKHR swapchain = vulkan->GetSwapchain();
VkPresentInfoKHR present = { VK_STRUCTURE_TYPE_PRESENT_INFO_KHR };
@ -145,6 +158,7 @@ void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, Frame
VkSubmitInfo submit_info{ VK_STRUCTURE_TYPE_SUBMIT_INFO };
VkPipelineStageFlags waitStage[1]{ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT };
if (type == FrameSubmitType::Present && !skipSwap) {
_dbg_assert_(hasAcquired);
submit_info.waitSemaphoreCount = 1;
submit_info.pWaitSemaphores = &sharedData.acquireSemaphore;
submit_info.pWaitDstStageMask = waitStage;
@ -162,11 +176,17 @@ void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, Frame
_assert_msg_(res == VK_SUCCESS, "vkQueueSubmit failed (main)! result=%s", VulkanResultToString(res));
}
if (type == FrameSubmitType::Sync) {
// Hard stall of the GPU, not ideal, but necessary so the CPU has the contents of the readback.
vkWaitForFences(vulkan->GetDevice(), 1, &readbackFence, true, UINT64_MAX);
vkResetFences(vulkan->GetDevice(), 1, &readbackFence);
}
// When !triggerFence, we notify after syncing with Vulkan.
if (type == FrameSubmitType::Present || type == FrameSubmitType::Sync) {
VERBOSE_LOG(G3D, "PULL: Frame %d.readyForFence = true", index);
std::unique_lock<std::mutex> lock(push_mutex);
readyForFence = true;
readyForFence = true; // misnomer in sync mode!
push_condVar.notify_all();
}
}

View file

@ -50,9 +50,8 @@ struct FrameData {
std::condition_variable pull_condVar;
bool readyForFence = true;
bool readyForRun = false;
bool readyForRun = false; // protected by pull_mutex
bool skipSwap = false;
VKRRunType type = VKRRunType::END;
VkFence fence;
VkFence readbackFence; // Strictly speaking we might only need one global of these.
@ -81,9 +80,6 @@ struct FrameData {
QueueProfileContext profile;
bool profilingEnabled_;
// Metadata for logging etc
int index;
void Init(VulkanContext *vulkan, int index);
void Destroy(VulkanContext *vulkan);
@ -93,4 +89,14 @@ struct FrameData {
// This will only submit if we are actually recording init commands.
void SubmitPending(VulkanContext *vulkan, FrameSubmitType type, FrameDataShared &shared);
VKRRunType RunType() const {
return runType_;
}
VKRRunType runType_ = VKRRunType::END;
private:
// Metadata for logging etc
int index;
};

View file

@ -563,7 +563,6 @@ void VulkanQueueRunner::RunSteps(FrameData &frameData, FrameDataShared &frameDat
if (!step.render.framebuffer) {
// When stepping in the GE debugger, we can end up here multiple times in a "frame".
if (!frameData.hasAcquired) {
frameData.hasAcquired = true;
frameData.AcquireNextImage(vulkan_, frameDataShared);
SetBackbuffer(framebuffers_[frameData.curSwapchainImage], swapchainImages_[frameData.curSwapchainImage].image);
}

View file

@ -491,8 +491,7 @@ void VulkanRenderManager::ThreadFunc() {
// but that created a race condition where frames could end up not finished properly on resize etc.
// Only increment next time if we're done.
nextFrame = frameData.type == VKRRunType::END;
_dbg_assert_(frameData.type == VKRRunType::END || frameData.type == VKRRunType::SYNC);
nextFrame = frameData.RunType() == VKRRunType::END;
}
VLOG("PULL: Running frame %d", threadFrame);
if (firstFrame) {
@ -658,6 +657,8 @@ void VulkanRenderManager::EndCurRenderStep() {
curRenderStep_->render.colorStore, curRenderStep_->render.depthStore, curRenderStep_->render.stencilStore,
};
RenderPassType rpType = RP_TYPE_COLOR_DEPTH;
// Save the accumulated pipeline flags so we can use that to configure the render pass.
// We'll often be able to avoid loading/saving the depth/stencil buffer.
curRenderStep_->render.pipelineFlags = curPipelineFlags_;
if (!curRenderStep_->render.framebuffer) {
rpType = RP_TYPE_BACKBUFFER;
@ -665,12 +666,11 @@ void VulkanRenderManager::EndCurRenderStep() {
// Not allowed on backbuffers.
rpType = RP_TYPE_COLOR_DEPTH_INPUT;
}
// TODO: Also add render pass types for depth/stencil-less.
VKRRenderPass *renderPass = queueRunner_.GetRenderPass(key);
curRenderStep_->render.renderPassType = rpType;
// Save the accumulated pipeline flags so we can use that to configure the render pass.
// We'll often be able to avoid loading/saving the depth/stencil buffer.
compileMutex_.lock();
bool needsCompile = false;
for (VKRGraphicsPipeline *pipeline : pipelinesToCheck_) {
@ -1162,6 +1162,9 @@ VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, in
}
}
// Called on main thread.
// Sends the collected commands to the render thread. Submit-latency should be
// measured from here, probably.
void VulkanRenderManager::Finish() {
EndCurRenderStep();
@ -1174,13 +1177,14 @@ void VulkanRenderManager::Finish() {
int curFrame = vulkan_->GetCurFrame();
FrameData &frameData = frameData_[curFrame];
{
std::unique_lock<std::mutex> lock(frameData.pull_mutex);
VLOG("PUSH: Frame[%d].readyForRun = true", curFrame);
frameData.steps = std::move(steps_);
steps_.clear();
frameData.readyForRun = true;
frameData.type = VKRRunType::END;
frameData.runType_ = VKRRunType::END;
frameData.pull_condVar.notify_all();
}
vulkan_->EndFrame();
@ -1204,7 +1208,6 @@ void VulkanRenderManager::BeginSubmitFrame(int frame) {
FrameData &frameData = frameData_[frame];
// Should only have at most the init command buffer pending here (that one came from the other thread).
_dbg_assert_(!frameData.hasMainCommands);
_dbg_assert_(!frameData.hasPresentCommands);
frameData.SubmitPending(vulkan_, FrameSubmitType::Pending, frameDataShared_);
@ -1259,6 +1262,28 @@ void VulkanRenderManager::EndSubmitFrame(int frame) {
}
}
void VulkanRenderManager::EndSyncFrame(int frame) {
FrameData &frameData = frameData_[frame];
// The submit will trigger the readbackFence, and wait.
Submit(frame, FrameSubmitType::Sync);
// At this point we can resume filling the command buffers for the current frame since
// we know the device is idle - and thus all previously enqueued command buffers have been processed.
// No need to switch to the next frame number.
VkCommandBufferBeginInfo begin{
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
nullptr,
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
};
_dbg_assert_(!frameData.hasPresentCommands); // Readbacks should happen before we try to submit any present commands.
frameData.hasBegun = false; // We're gonna record a new main command buffer.
std::unique_lock<std::mutex> lock(frameData.push_mutex);
frameData.readyForFence = true;
frameData.push_condVar.notify_all();
}
void VulkanRenderManager::Run(int frame) {
BeginSubmitFrame(frame);
@ -1267,7 +1292,7 @@ void VulkanRenderManager::Run(int frame) {
//queueRunner_.LogSteps(stepsOnThread, false);
queueRunner_.RunSteps(frameData, frameDataShared_);
switch (frameData.type) {
switch (frameData.runType_) {
case VKRRunType::END:
EndSubmitFrame(frame);
break;
@ -1283,35 +1308,6 @@ void VulkanRenderManager::Run(int frame) {
VLOG("PULL: Finished running frame %d", frame);
}
void VulkanRenderManager::EndSyncFrame(int frame) {
FrameData &frameData = frameData_[frame];
// The submit will trigger the readbackFence.
Submit(frame, FrameSubmitType::Sync);
// Hard stall of the GPU, not ideal, but necessary so the CPU has the contents of the readback.
vkWaitForFences(vulkan_->GetDevice(), 1, &frameData.readbackFence, true, UINT64_MAX);
vkResetFences(vulkan_->GetDevice(), 1, &frameData.readbackFence);
// At this point we can resume filling the command buffers for the current frame since
// we know the device is idle - and thus all previously enqueued command buffers have been processed.
// No need to switch to the next frame number.
VkCommandBufferBeginInfo begin{
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
nullptr,
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
};
_dbg_assert_(!frameData.hasPresentCommands); // Readbacks should happen before we try to submit any present commands.
vkResetCommandPool(vulkan_->GetDevice(), frameData.cmdPoolMain, 0);
VkResult res = vkBeginCommandBuffer(frameData.mainCmd, &begin);
frameData.hasMainCommands = true;
_assert_(res == VK_SUCCESS);
std::unique_lock<std::mutex> lock(frameData.push_mutex);
frameData.readyForFence = true;
frameData.push_condVar.notify_all();
}
void VulkanRenderManager::FlushSync() {
renderStepOffset_ += (int)steps_.size();
@ -1325,7 +1321,7 @@ void VulkanRenderManager::FlushSync() {
steps_.clear();
frameData.readyForRun = true;
_dbg_assert_(!frameData.readyForFence);
frameData.type = VKRRunType::SYNC;
frameData.runType_ = VKRRunType::SYNC;
frameData.pull_condVar.notify_all();
}