From 2467fddc01adcd0748d23d8a9b2ae6e249f09f43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 18 Sep 2019 00:16:08 +0200 Subject: [PATCH] Vulkan GPU profiler: Measure the CPU time spent on the render thread recording command buffers. --- ext/native/thin3d/VulkanQueueRunner.cpp | 15 ++++++--- ext/native/thin3d/VulkanQueueRunner.h | 10 +++++- ext/native/thin3d/VulkanRenderManager.cpp | 39 ++++++++++++----------- ext/native/thin3d/VulkanRenderManager.h | 6 ++-- 4 files changed, 42 insertions(+), 28 deletions(-) diff --git a/ext/native/thin3d/VulkanQueueRunner.cpp b/ext/native/thin3d/VulkanQueueRunner.cpp index 8be1ca7120..19b9e1c6a3 100644 --- a/ext/native/thin3d/VulkanQueueRunner.cpp +++ b/ext/native/thin3d/VulkanQueueRunner.cpp @@ -1,4 +1,6 @@ #include + +#include "base/timeutil.h" #include "DataFormat.h" #include "VulkanQueueRunner.h" #include "VulkanRenderManager.h" @@ -375,7 +377,9 @@ VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) { return pass; } -void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector &steps, VkQueryPool queryPool, std::vector *timestampDescriptions) { +void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector &steps, QueueProfileContext *profile) { + if (profile) + profile->cpuStartTime = real_time_now(); // Optimizes renderpasses, then sequences them. // Planned optimizations: // * Create copies of render target that are rendered to multiple times and textured from in sequence, and push those render passes @@ -465,9 +469,9 @@ void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector &st break; } - if (queryPool) { - vkCmdWriteTimestamp(cmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, queryPool, (uint32_t)timestampDescriptions->size()); - timestampDescriptions->push_back(StepToString(step)); + if (profile) { + vkCmdWriteTimestamp(cmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, profile->queryPool, (uint32_t)profile->timestampDescriptions.size()); + profile->timestampDescriptions.push_back(StepToString(step)); } } @@ -476,6 +480,9 @@ void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector &st for (size_t i = 0; i < steps.size(); i++) { delete steps[i]; } + + if (profile) + profile->cpuEndTime = real_time_now(); } void VulkanQueueRunner::ApplyMGSHack(std::vector &steps) { diff --git a/ext/native/thin3d/VulkanQueueRunner.h b/ext/native/thin3d/VulkanQueueRunner.h index 3c840856c6..8bb1adfbdf 100644 --- a/ext/native/thin3d/VulkanQueueRunner.h +++ b/ext/native/thin3d/VulkanQueueRunner.h @@ -173,6 +173,14 @@ struct TransitionRequest { VkImageLayout targetLayout; }; +struct QueueProfileContext { + VkQueryPool queryPool; + std::vector timestampDescriptions; + std::string profileSummary; + double cpuStartTime; + double cpuEndTime; +}; + struct VKRStep { VKRStep(VKRStepType _type) : stepType(_type) {} ~VKRStep() {} @@ -232,7 +240,7 @@ public: } // RunSteps can modify steps but will leave it in a valid state. - void RunSteps(VkCommandBuffer cmd, std::vector &steps, VkQueryPool queryPool, std::vector *timestampDescriptions); + void RunSteps(VkCommandBuffer cmd, std::vector &steps, QueueProfileContext *profile); void LogSteps(const std::vector &steps); std::string StepToString(const VKRStep &step) const; diff --git a/ext/native/thin3d/VulkanRenderManager.cpp b/ext/native/thin3d/VulkanRenderManager.cpp index f9428cc462..f6c4778c62 100644 --- a/ext/native/thin3d/VulkanRenderManager.cpp +++ b/ext/native/thin3d/VulkanRenderManager.cpp @@ -137,7 +137,7 @@ VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan) : vulkan_(vulkan VkQueryPoolCreateInfo query_ci{ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO }; query_ci.queryCount = MAX_TIMESTAMP_QUERIES; query_ci.queryType = VK_QUERY_TYPE_TIMESTAMP; - res = vkCreateQueryPool(vulkan_->GetDevice(), &query_ci, nullptr, &frameData_[i].timestampQueryPool_); + res = vkCreateQueryPool(vulkan_->GetDevice(), &query_ci, nullptr, &frameData_[i].profile.queryPool); } queueRunner_.CreateDeviceObjects(); @@ -227,7 +227,7 @@ void VulkanRenderManager::StopThread() { frameData.pull_condVar.notify_all(); } // Zero the queries so we don't try to pull them later. - frameData.timestampDescriptions.clear(); + frameData.profile.timestampDescriptions.clear(); } thread_.join(); ILOG("Vulkan submission thread joined. Frame=%d", vulkan_->GetCurFrame()); @@ -298,7 +298,7 @@ VulkanRenderManager::~VulkanRenderManager() { vkDestroyCommandPool(device, frameData_[i].cmdPoolInit, nullptr); vkDestroyCommandPool(device, frameData_[i].cmdPoolMain, nullptr); vkDestroyFence(device, frameData_[i].fence, nullptr); - vkDestroyQueryPool(device, frameData_[i].timestampQueryPool_, nullptr); + vkDestroyQueryPool(device, frameData_[i].profile.queryPool, nullptr); } queueRunner_.DestroyDeviceObjects(); } @@ -375,11 +375,11 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling) { if (frameData.profilingEnabled_) { // Pull the profiling results from last time and produce a summary! - if (!frameData.timestampDescriptions.empty()) { - int numQueries = (int)frameData.timestampDescriptions.size(); + if (!frameData.profile.timestampDescriptions.empty()) { + int numQueries = (int)frameData.profile.timestampDescriptions.size(); VkResult res = vkGetQueryPoolResults( vulkan_->GetDevice(), - frameData.timestampQueryPool_, 0, numQueries, sizeof(uint64_t) * numQueries, &queryResults[0], sizeof(uint64_t), + frameData.profile.queryPool, 0, numQueries, sizeof(uint64_t) * numQueries, &queryResults[0], sizeof(uint64_t), VK_QUERY_RESULT_64_BIT); if (res == VK_SUCCESS) { double timestampConversionFactor = (double)vulkan_->GetPhysicalDeviceProperties().properties.limits.timestampPeriod * (1.0 / 1000000.0); @@ -390,19 +390,20 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling) { char line[256]; snprintf(line, sizeof(line), "Total GPU time: %0.3f ms\n", ((double)((queryResults[numQueries - 1] - queryResults[0]) & timestampDiffMask) * timestampConversionFactor)); str << line; + snprintf(line, sizeof(line), "Render CPU time: %0.3f ms\n", (frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0); + str << line; for (int i = 0; i < numQueries - 1; i++) { uint64_t diff = (queryResults[i + 1] - queryResults[i]) & timestampDiffMask; double milliseconds = (double)diff * timestampConversionFactor; - snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.timestampDescriptions[i + 1].c_str(), milliseconds); + snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.profile.timestampDescriptions[i + 1].c_str(), milliseconds); str << line; } - frameData.profileSummary = str.str(); + frameData.profile.profileSummary = str.str(); } else { - frameData.profileSummary = "(error getting GPU profile - not ready?)"; + frameData.profile.profileSummary = "(error getting GPU profile - not ready?)"; } - } - else { - frameData.profileSummary = "(no GPU profile data collected)"; + } else { + frameData.profile.profileSummary = "(no GPU profile data collected)"; } } @@ -415,16 +416,16 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling) { insideFrame_ = true; - frameData.timestampDescriptions.clear(); + frameData.profile.timestampDescriptions.clear(); if (frameData_->profilingEnabled_) { // For various reasons, we need to always use an init cmd buffer in this case to perform the vkCmdResetQueryPool, // unless we want to limit ourselves to only measure the main cmd buffer. // Reserve the first two queries for initCmd. - frameData.timestampDescriptions.push_back("initCmd Begin"); - frameData.timestampDescriptions.push_back("initCmd"); + frameData.profile.timestampDescriptions.push_back("initCmd Begin"); + frameData.profile.timestampDescriptions.push_back("initCmd"); VkCommandBuffer initCmd = GetInitCmd(); - vkCmdResetQueryPool(initCmd, frameData.timestampQueryPool_, 0, MAX_TIMESTAMP_QUERIES); - vkCmdWriteTimestamp(initCmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, frameData.timestampQueryPool_, 0); + vkCmdResetQueryPool(initCmd, frameData.profile.queryPool, 0, MAX_TIMESTAMP_QUERIES); + vkCmdWriteTimestamp(initCmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, frameData.profile.queryPool, 0); } } @@ -960,7 +961,7 @@ void VulkanRenderManager::Submit(int frame, bool triggerFence) { if (frameData.hasInitCommands) { if (frameData.profilingEnabled_ && triggerFence) { // Pre-allocated query ID 1. - vkCmdWriteTimestamp(frameData.initCmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frameData.timestampQueryPool_, 1); + vkCmdWriteTimestamp(frameData.initCmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, frameData.profile.queryPool, 1); } VkResult res = vkEndCommandBuffer(frameData.initCmd); _assert_msg_(G3D, res == VK_SUCCESS, "vkEndCommandBuffer failed (init)! result=%s", VulkanResultToString(res)); @@ -1061,7 +1062,7 @@ void VulkanRenderManager::Run(int frame) { auto &stepsOnThread = frameData_[frame].steps; VkCommandBuffer cmd = frameData.mainCmd; // queueRunner_.LogSteps(stepsOnThread); - queueRunner_.RunSteps(cmd, stepsOnThread, frameData.profilingEnabled_ ? frameData.timestampQueryPool_ : VK_NULL_HANDLE, &frameData.timestampDescriptions); + queueRunner_.RunSteps(cmd, stepsOnThread, frameData.profilingEnabled_ ? &frameData.profile : nullptr); stepsOnThread.clear(); switch (frameData.type) { diff --git a/ext/native/thin3d/VulkanRenderManager.h b/ext/native/thin3d/VulkanRenderManager.h index 56a3fa1cf3..d0b0d74793 100644 --- a/ext/native/thin3d/VulkanRenderManager.h +++ b/ext/native/thin3d/VulkanRenderManager.h @@ -258,7 +258,7 @@ public: } std::string GetGpuProfileString() const { - return frameData_[vulkan_->GetCurFrame()].profileSummary; + return frameData_[vulkan_->GetCurFrame()].profile.profileSummary; } private: @@ -305,10 +305,8 @@ private: uint32_t curSwapchainImage = -1; // Profiling. - VkQueryPool timestampQueryPool_ = VK_NULL_HANDLE; + QueueProfileContext profile; bool profilingEnabled_; - std::vector timestampDescriptions; - std::string profileSummary; }; FrameData frameData_[VulkanContext::MAX_INFLIGHT_FRAMES];