From 55fe21db1e418b141e8d3ccb737d3026e8ee6336 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 12 Dec 2021 11:34:05 +0100 Subject: [PATCH] Add a simple GPU profiler to profile individual events, rather than full passes. --- CMakeLists.txt | 2 + Common/Common.vcxproj | 2 + Common/Common.vcxproj.filters | 6 ++ Common/GPU/Vulkan/VulkanContext.cpp | 13 ++- Common/GPU/Vulkan/VulkanContext.h | 10 ++- Common/GPU/Vulkan/VulkanImage.cpp | 1 + Common/GPU/Vulkan/VulkanImage.h | 4 +- Common/GPU/Vulkan/VulkanProfiler.cpp | 98 +++++++++++++++++++++++ Common/GPU/Vulkan/VulkanProfiler.h | 49 ++++++++++++ Common/GPU/Vulkan/VulkanRenderManager.cpp | 2 +- GPU/Vulkan/TextureCacheVulkan.cpp | 2 + android/jni/Android.mk | 1 + libretro/Makefile.common | 1 + 13 files changed, 187 insertions(+), 4 deletions(-) create mode 100644 Common/GPU/Vulkan/VulkanProfiler.cpp create mode 100644 Common/GPU/Vulkan/VulkanProfiler.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 82cf012edf..62ced8270e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -589,6 +589,8 @@ add_library(Common STATIC Common/GPU/Vulkan/VulkanLoader.h Common/GPU/Vulkan/VulkanMemory.cpp Common/GPU/Vulkan/VulkanMemory.h + Common/GPU/Vulkan/VulkanProfiler.cpp + Common/GPU/Vulkan/VulkanProfiler.h Common/GPU/Vulkan/thin3d_vulkan.cpp Common/GPU/Vulkan/VulkanRenderManager.cpp Common/GPU/Vulkan/VulkanRenderManager.h diff --git a/Common/Common.vcxproj b/Common/Common.vcxproj index 2bdf069582..7e61f89776 100644 --- a/Common/Common.vcxproj +++ b/Common/Common.vcxproj @@ -443,6 +443,7 @@ + @@ -870,6 +871,7 @@ + diff --git a/Common/Common.vcxproj.filters b/Common/Common.vcxproj.filters index 32c392cf6a..65b6eaccab 100644 --- a/Common/Common.vcxproj.filters +++ b/Common/Common.vcxproj.filters @@ -412,6 +412,9 @@ GPU\Vulkan + + GPU\Vulkan + @@ -786,6 +789,9 @@ ext\vma + + GPU\Vulkan + diff --git a/Common/GPU/Vulkan/VulkanContext.cpp b/Common/GPU/Vulkan/VulkanContext.cpp index 94c8bb2b06..2cb0e228af 100644 --- a/Common/GPU/Vulkan/VulkanContext.cpp +++ b/Common/GPU/Vulkan/VulkanContext.cpp @@ -289,14 +289,16 @@ void VulkanContext::DestroyInstance() { instance_ = VK_NULL_HANDLE; } -void VulkanContext::BeginFrame() { +void VulkanContext::BeginFrame(VkCommandBuffer firstCommandBuffer) { FrameData *frame = &frame_[curFrame_]; // Process pending deletes. frame->deleteList.PerformDeletes(device_, allocator_); + frame->profiler.BeginFrame(this, firstCommandBuffer); } void VulkanContext::EndFrame() { frame_[curFrame_].deleteList.Take(globalDeleteList_); + frame_[curFrame_].profiler.EndFrame(); curFrame_++; if (curFrame_ >= inflightFrames_) { curFrame_ = 0; @@ -675,6 +677,11 @@ VkResult VulkanContext::CreateDevice() { allocatorInfo.device = device_; allocatorInfo.instance = instance_; vmaCreateAllocator(&allocatorInfo, &allocator_); + + for (int i = 0; i < GetInflightFrames(); i++) { + frame_[i].profiler.Init(this); + } + return res; } @@ -1122,6 +1129,10 @@ void VulkanContext::DestroyDevice() { INFO_LOG(G3D, "VulkanContext::DestroyDevice (performing deletes)"); PerformPendingDeletes(); + for (int i = 0; i < GetInflightFrames(); i++) { + frame_[i].profiler.Shutdown(); + } + vmaDestroyAllocator(allocator_); allocator_ = VK_NULL_HANDLE; diff --git a/Common/GPU/Vulkan/VulkanContext.h b/Common/GPU/Vulkan/VulkanContext.h index c119dc6393..04d163f76f 100644 --- a/Common/GPU/Vulkan/VulkanContext.h +++ b/Common/GPU/Vulkan/VulkanContext.h @@ -9,6 +9,7 @@ #include "Common/GPU/Vulkan/VulkanLoader.h" #include "Common/GPU/Vulkan/VulkanDebug.h" #include "Common/GPU/Vulkan/VulkanAlloc.h" +#include "Common/GPU/Vulkan/VulkanProfiler.h" enum { VULKAN_FLAG_VALIDATE = 1, @@ -62,6 +63,8 @@ struct VulkanPhysicalDeviceInfo { bool canBlitToPreferredDepthStencilFormat; }; +class VulkanProfiler; + // This is a bit repetitive... class VulkanDeleteList { struct BufferWithAlloc { @@ -188,9 +191,13 @@ public: int GetBackbufferWidth() { return (int)swapChainExtent_.width; } int GetBackbufferHeight() { return (int)swapChainExtent_.height; } - void BeginFrame(); + void BeginFrame(VkCommandBuffer firstCommandBuffer); void EndFrame(); + VulkanProfiler *GetProfiler() { + return &frame_[curFrame_].profiler; + } + // Simple workaround for the casting warning. template void SetDebugName(T handle, VkObjectType type, const char *name) { @@ -369,6 +376,7 @@ private: struct FrameData { FrameData() {} VulkanDeleteList deleteList; + VulkanProfiler profiler; }; FrameData frame_[MAX_INFLIGHT_FRAMES]; int curFrame_ = 0; diff --git a/Common/GPU/Vulkan/VulkanImage.cpp b/Common/GPU/Vulkan/VulkanImage.cpp index 3a6cd2ce88..7a4f9bf016 100644 --- a/Common/GPU/Vulkan/VulkanImage.cpp +++ b/Common/GPU/Vulkan/VulkanImage.cpp @@ -1,6 +1,7 @@ #include #include "Common/Log.h" +#include "Common/GPU/Vulkan/VulkanContext.h" #include "Common/GPU/Vulkan/VulkanAlloc.h" #include "Common/GPU/Vulkan/VulkanImage.h" #include "Common/GPU/Vulkan/VulkanMemory.h" diff --git a/Common/GPU/Vulkan/VulkanImage.h b/Common/GPU/Vulkan/VulkanImage.h index 356a517494..adbd167cf5 100644 --- a/Common/GPU/Vulkan/VulkanImage.h +++ b/Common/GPU/Vulkan/VulkanImage.h @@ -1,7 +1,9 @@ #pragma once -#include "Common/GPU/Vulkan/VulkanContext.h" +#include +#include "VulkanLoader.h" +class VulkanContext; class VulkanDeviceAllocator; VK_DEFINE_HANDLE(VmaAllocation); diff --git a/Common/GPU/Vulkan/VulkanProfiler.cpp b/Common/GPU/Vulkan/VulkanProfiler.cpp new file mode 100644 index 0000000000..e23a3b4a86 --- /dev/null +++ b/Common/GPU/Vulkan/VulkanProfiler.cpp @@ -0,0 +1,98 @@ +#include "VulkanProfiler.h" +#include "VulkanContext.h" + +using namespace PPSSPP_VK; + +void VulkanProfiler::Init(VulkanContext *vulkan) { + vulkan_ = vulkan; + + for (int i = 0; i < vulkan->GetInflightFrames(); i++) { + VkQueryPoolCreateInfo ci{ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO }; + ci.queryCount = MAX_QUERY_COUNT; + ci.queryType = VK_QUERY_TYPE_TIMESTAMP; + vkCreateQueryPool(vulkan->GetDevice(), &ci, nullptr, &queryPool_); + } +} + +void VulkanProfiler::Shutdown() { + for (int i = 0; i < vulkan_->GetInflightFrames(); i++) { + vkDestroyQueryPool(vulkan_->GetDevice(), queryPool_, nullptr); + } +} + +void VulkanProfiler::BeginFrame(VulkanContext *vulkan, VkCommandBuffer firstCommandBuf) { + vulkan_ = vulkan; + + // Check for old queries belonging to this frame context that we can log out - these are now + // guaranteed to be done. + if (numQueries_ > 0) { + std::vector results(numQueries_); + vkGetQueryPoolResults(vulkan->GetDevice(), queryPool_, 0, numQueries_, sizeof(uint64_t) * numQueries_, results.data(), sizeof(uint64_t), VK_QUERY_RESULT_64_BIT); + + double timestampConversionFactor = (double)vulkan_->GetPhysicalDeviceProperties().properties.limits.timestampPeriod * (1.0 / 1000000.0); + int validBits = vulkan_->GetQueueFamilyProperties(vulkan_->GetGraphicsQueueFamilyIndex()).timestampValidBits; + uint64_t timestampDiffMask = validBits == 64 ? 0xFFFFFFFFFFFFFFFFULL : ((1ULL << validBits) - 1); + + static const char * const indent[4] = { "", " ", " ", " " }; + // Log it all out. + for (auto &scope : scopes_) { + if (scope.endQueryId == -1) { + NOTICE_LOG(G3D, "Unclosed scope: %s", scope.name.c_str()); + continue; + } + uint64_t startTime = results[scope.startQueryId]; + uint64_t endTime = results[scope.endQueryId]; + + uint64_t delta = (endTime - startTime) & timestampDiffMask; + + double milliseconds = (double)delta * timestampConversionFactor; + + NOTICE_LOG(G3D, "%s%s (%0.3f ms)", indent[scope.level & 3], scope.name.c_str(), milliseconds); + } + } + + // Only need to reset all on the first frame. + if (firstFrame_) { + numQueries_ = MAX_QUERY_COUNT; + firstFrame_ = false; + } + vkCmdResetQueryPool(firstCommandBuf, queryPool_, 0, numQueries_); + numQueries_ = 0; +} + +void VulkanProfiler::EndFrame() { + // Not much to do here really except check that all scopes are closed. +} + +void VulkanProfiler::Begin(VkCommandBuffer cmdBuf, std::string scopeName, VkPipelineStageFlagBits stageFlags) { + if (numQueries_ >= MAX_QUERY_COUNT - 1) { + return; + } + + ProfilerScope scope; + scope.name = scopeName; + scope.startQueryId = numQueries_; + scope.endQueryId = -1; + scope.level = (int)scopeStack_.size(); + + scopeStack_.push_back(scopes_.size()); + scopes_.push_back(scope); + + vkCmdWriteTimestamp(cmdBuf, stageFlags, queryPool_, numQueries_); + numQueries_++; +} + +void VulkanProfiler::End(VkCommandBuffer cmdBuf, VkPipelineStageFlagBits stageFlags) { + if (numQueries_ >= MAX_QUERY_COUNT - 1) { + return; + } + + size_t scopeId = scopeStack_.back(); + scopeStack_.pop_back(); + + ProfilerScope &scope = scopes_[scopeId]; + scope.endQueryId = numQueries_; + + vkCmdWriteTimestamp(cmdBuf, stageFlags, queryPool_, numQueries_); + numQueries_++; +} diff --git a/Common/GPU/Vulkan/VulkanProfiler.h b/Common/GPU/Vulkan/VulkanProfiler.h new file mode 100644 index 0000000000..ab2092aa90 --- /dev/null +++ b/Common/GPU/Vulkan/VulkanProfiler.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include + +#include "Common/Log.h" +#include "VulkanLoader.h" + +// Simple scoped based profiler, initially meant for instant one-time tasks like texture uploads +// etc. Supports recursive scopes. Scopes are not yet tracked separately for each command buffer. +// For the pass profiler in VulkanQueueRunner, a purpose-built separate profiler that can take only +// one measurement between each pass makes more sense. +// +// Put the whole thing in a FrameData to allow for overlap. + +struct ProfilerScope { + std::string name; + size_t startQueryId; + size_t endQueryId; + int level; +}; + +class VulkanContext; + + +class VulkanProfiler { +public: + void Init(VulkanContext *vulkan); + void Shutdown(); + + void BeginFrame(VulkanContext *vulkan, VkCommandBuffer firstCommandBuffer); + + void EndFrame(); + + void Begin(VkCommandBuffer cmdBuf, std::string scopeName, VkPipelineStageFlagBits stage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); + void End(VkCommandBuffer cmdBuf, VkPipelineStageFlagBits stage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); + +private: + VulkanContext *vulkan_; + + VkQueryPool queryPool_ = VK_NULL_HANDLE; + std::vector scopes_; + int numQueries_ = 0; + bool firstFrame_ = true; + + std::vector scopeStack_; + + const int MAX_QUERY_COUNT = 1024; +}; diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 90e7a759e1..aa47c06782 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -583,7 +583,7 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling) { if (!run_) { WARN_LOG(G3D, "BeginFrame while !run_!"); } - vulkan_->BeginFrame(); + vulkan_->BeginFrame(GetInitCmd()); insideFrame_ = true; renderStepOffset_ = 0; diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index c57b2a7c3f..206f238185 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -835,10 +835,12 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { VkImageView view = entry->vkTex->CreateViewForMip(i); VkDescriptorSet descSet = computeShaderManager_.GetDescriptorSet(view, texBuf, bufferOffset, srcSize); struct Params { int x; int y; } params{ mipUnscaledWidth, mipUnscaledHeight }; + vulkan->GetProfiler()->Begin(cmdInit, StringFromFormat("Compute Upload: %dx%d", mipUnscaledWidth, mipUnscaledHeight), VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); vkCmdBindPipeline(cmdInit, VK_PIPELINE_BIND_POINT_COMPUTE, computeShaderManager_.GetPipeline(uploadCS_)); vkCmdBindDescriptorSets(cmdInit, VK_PIPELINE_BIND_POINT_COMPUTE, computeShaderManager_.GetPipelineLayout(), 0, 1, &descSet, 0, nullptr); vkCmdPushConstants(cmdInit, computeShaderManager_.GetPipelineLayout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(params), ¶ms); vkCmdDispatch(cmdInit, (mipUnscaledWidth + 7) / 8, (mipUnscaledHeight + 7) / 8, 1); + vulkan->GetProfiler()->End(cmdInit, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); vulkan->Delete().QueueDeleteImageView(view); } else { data = drawEngine_->GetPushBufferForTextureData()->PushAligned(size, &bufferOffset, &texBuf, pushAlignment); diff --git a/android/jni/Android.mk b/android/jni/Android.mk index c62de2889f..6e255a8c36 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -131,6 +131,7 @@ VULKAN_FILES := \ $(SRC)/Common/GPU/Vulkan/VulkanDebug.cpp \ $(SRC)/Common/GPU/Vulkan/VulkanImage.cpp \ $(SRC)/Common/GPU/Vulkan/VulkanMemory.cpp \ + $(SRC)/Common/GPU/Vulkan/VulkanProfiler.cpp \ $(SRC)/GPU/Vulkan/DrawEngineVulkan.cpp \ $(SRC)/GPU/Vulkan/FramebufferManagerVulkan.cpp \ $(SRC)/GPU/Vulkan/GPU_Vulkan.cpp \ diff --git a/libretro/Makefile.common b/libretro/Makefile.common index 32c10ee8db..9d4a0dcbd1 100644 --- a/libretro/Makefile.common +++ b/libretro/Makefile.common @@ -262,6 +262,7 @@ SOURCES_CXX += \ $(COMMONDIR)/GPU/Vulkan/VulkanDebug.cpp \ $(COMMONDIR)/GPU/Vulkan/VulkanImage.cpp \ $(COMMONDIR)/GPU/Vulkan/VulkanMemory.cpp \ + $(COMMONDIR)/GPU/Vulkan/VulkanProfiler.cpp \ $(COMMONDIR)/Input/GestureDetector.cpp \ $(COMMONDIR)/Input/InputState.cpp \ $(COMMONDIR)/Math/curves.cpp \