Add a simple GPU profiler to profile individual events, rather than full passes.

This commit is contained in:
Henrik Rydgård 2021-12-12 11:34:05 +01:00
parent e1ff73061a
commit 55fe21db1e
13 changed files with 187 additions and 4 deletions

View file

@ -589,6 +589,8 @@ add_library(Common STATIC
Common/GPU/Vulkan/VulkanLoader.h
Common/GPU/Vulkan/VulkanMemory.cpp
Common/GPU/Vulkan/VulkanMemory.h
Common/GPU/Vulkan/VulkanProfiler.cpp
Common/GPU/Vulkan/VulkanProfiler.h
Common/GPU/Vulkan/thin3d_vulkan.cpp
Common/GPU/Vulkan/VulkanRenderManager.cpp
Common/GPU/Vulkan/VulkanRenderManager.h

View file

@ -443,6 +443,7 @@
<ClInclude Include="GPU\Vulkan\VulkanImage.h" />
<ClInclude Include="GPU\Vulkan\VulkanLoader.h" />
<ClInclude Include="GPU\Vulkan\VulkanMemory.h" />
<ClInclude Include="GPU\Vulkan\VulkanProfiler.h" />
<ClInclude Include="GPU\Vulkan\VulkanQueueRunner.h" />
<ClInclude Include="GPU\Vulkan\VulkanRenderManager.h" />
<ClInclude Include="Input\GestureDetector.h" />
@ -870,6 +871,7 @@
<ClCompile Include="GPU\Vulkan\VulkanImage.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanLoader.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanMemory.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanProfiler.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanQueueRunner.cpp" />
<ClCompile Include="GPU\Vulkan\VulkanRenderManager.cpp" />
<ClCompile Include="Input\GestureDetector.cpp" />

View file

@ -412,6 +412,9 @@
<ClInclude Include="GPU\Vulkan\VulkanAlloc.h">
<Filter>GPU\Vulkan</Filter>
</ClInclude>
<ClInclude Include="GPU\Vulkan\VulkanProfiler.h">
<Filter>GPU\Vulkan</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="ABI.cpp" />
@ -786,6 +789,9 @@
<ClCompile Include="..\ext\vma\vk_mem_alloc.cpp">
<Filter>ext\vma</Filter>
</ClCompile>
<ClCompile Include="GPU\Vulkan\VulkanProfiler.cpp">
<Filter>GPU\Vulkan</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="Crypto">

View file

@ -289,14 +289,16 @@ void VulkanContext::DestroyInstance() {
instance_ = VK_NULL_HANDLE;
}
void VulkanContext::BeginFrame() {
void VulkanContext::BeginFrame(VkCommandBuffer firstCommandBuffer) {
FrameData *frame = &frame_[curFrame_];
// Process pending deletes.
frame->deleteList.PerformDeletes(device_, allocator_);
frame->profiler.BeginFrame(this, firstCommandBuffer);
}
void VulkanContext::EndFrame() {
frame_[curFrame_].deleteList.Take(globalDeleteList_);
frame_[curFrame_].profiler.EndFrame();
curFrame_++;
if (curFrame_ >= inflightFrames_) {
curFrame_ = 0;
@ -675,6 +677,11 @@ VkResult VulkanContext::CreateDevice() {
allocatorInfo.device = device_;
allocatorInfo.instance = instance_;
vmaCreateAllocator(&allocatorInfo, &allocator_);
for (int i = 0; i < GetInflightFrames(); i++) {
frame_[i].profiler.Init(this);
}
return res;
}
@ -1122,6 +1129,10 @@ void VulkanContext::DestroyDevice() {
INFO_LOG(G3D, "VulkanContext::DestroyDevice (performing deletes)");
PerformPendingDeletes();
for (int i = 0; i < GetInflightFrames(); i++) {
frame_[i].profiler.Shutdown();
}
vmaDestroyAllocator(allocator_);
allocator_ = VK_NULL_HANDLE;

View file

@ -9,6 +9,7 @@
#include "Common/GPU/Vulkan/VulkanLoader.h"
#include "Common/GPU/Vulkan/VulkanDebug.h"
#include "Common/GPU/Vulkan/VulkanAlloc.h"
#include "Common/GPU/Vulkan/VulkanProfiler.h"
enum {
VULKAN_FLAG_VALIDATE = 1,
@ -62,6 +63,8 @@ struct VulkanPhysicalDeviceInfo {
bool canBlitToPreferredDepthStencilFormat;
};
class VulkanProfiler;
// This is a bit repetitive...
class VulkanDeleteList {
struct BufferWithAlloc {
@ -188,9 +191,13 @@ public:
int GetBackbufferWidth() { return (int)swapChainExtent_.width; }
int GetBackbufferHeight() { return (int)swapChainExtent_.height; }
void BeginFrame();
void BeginFrame(VkCommandBuffer firstCommandBuffer);
void EndFrame();
VulkanProfiler *GetProfiler() {
return &frame_[curFrame_].profiler;
}
// Simple workaround for the casting warning.
template <class T>
void SetDebugName(T handle, VkObjectType type, const char *name) {
@ -369,6 +376,7 @@ private:
struct FrameData {
FrameData() {}
VulkanDeleteList deleteList;
VulkanProfiler profiler;
};
FrameData frame_[MAX_INFLIGHT_FRAMES];
int curFrame_ = 0;

View file

@ -1,6 +1,7 @@
#include <algorithm>
#include "Common/Log.h"
#include "Common/GPU/Vulkan/VulkanContext.h"
#include "Common/GPU/Vulkan/VulkanAlloc.h"
#include "Common/GPU/Vulkan/VulkanImage.h"
#include "Common/GPU/Vulkan/VulkanMemory.h"

View file

@ -1,7 +1,9 @@
#pragma once
#include "Common/GPU/Vulkan/VulkanContext.h"
#include <string>
#include "VulkanLoader.h"
class VulkanContext;
class VulkanDeviceAllocator;
VK_DEFINE_HANDLE(VmaAllocation);

View file

@ -0,0 +1,98 @@
#include "VulkanProfiler.h"
#include "VulkanContext.h"
using namespace PPSSPP_VK;
void VulkanProfiler::Init(VulkanContext *vulkan) {
vulkan_ = vulkan;
for (int i = 0; i < vulkan->GetInflightFrames(); i++) {
VkQueryPoolCreateInfo ci{ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
ci.queryCount = MAX_QUERY_COUNT;
ci.queryType = VK_QUERY_TYPE_TIMESTAMP;
vkCreateQueryPool(vulkan->GetDevice(), &ci, nullptr, &queryPool_);
}
}
void VulkanProfiler::Shutdown() {
for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
vkDestroyQueryPool(vulkan_->GetDevice(), queryPool_, nullptr);
}
}
void VulkanProfiler::BeginFrame(VulkanContext *vulkan, VkCommandBuffer firstCommandBuf) {
vulkan_ = vulkan;
// Check for old queries belonging to this frame context that we can log out - these are now
// guaranteed to be done.
if (numQueries_ > 0) {
std::vector<uint64_t> results(numQueries_);
vkGetQueryPoolResults(vulkan->GetDevice(), queryPool_, 0, numQueries_, sizeof(uint64_t) * numQueries_, results.data(), sizeof(uint64_t), VK_QUERY_RESULT_64_BIT);
double timestampConversionFactor = (double)vulkan_->GetPhysicalDeviceProperties().properties.limits.timestampPeriod * (1.0 / 1000000.0);
int validBits = vulkan_->GetQueueFamilyProperties(vulkan_->GetGraphicsQueueFamilyIndex()).timestampValidBits;
uint64_t timestampDiffMask = validBits == 64 ? 0xFFFFFFFFFFFFFFFFULL : ((1ULL << validBits) - 1);
static const char * const indent[4] = { "", " ", " ", " " };
// Log it all out.
for (auto &scope : scopes_) {
if (scope.endQueryId == -1) {
NOTICE_LOG(G3D, "Unclosed scope: %s", scope.name.c_str());
continue;
}
uint64_t startTime = results[scope.startQueryId];
uint64_t endTime = results[scope.endQueryId];
uint64_t delta = (endTime - startTime) & timestampDiffMask;
double milliseconds = (double)delta * timestampConversionFactor;
NOTICE_LOG(G3D, "%s%s (%0.3f ms)", indent[scope.level & 3], scope.name.c_str(), milliseconds);
}
}
// Only need to reset all on the first frame.
if (firstFrame_) {
numQueries_ = MAX_QUERY_COUNT;
firstFrame_ = false;
}
vkCmdResetQueryPool(firstCommandBuf, queryPool_, 0, numQueries_);
numQueries_ = 0;
}
void VulkanProfiler::EndFrame() {
// Not much to do here really except check that all scopes are closed.
}
void VulkanProfiler::Begin(VkCommandBuffer cmdBuf, std::string scopeName, VkPipelineStageFlagBits stageFlags) {
if (numQueries_ >= MAX_QUERY_COUNT - 1) {
return;
}
ProfilerScope scope;
scope.name = scopeName;
scope.startQueryId = numQueries_;
scope.endQueryId = -1;
scope.level = (int)scopeStack_.size();
scopeStack_.push_back(scopes_.size());
scopes_.push_back(scope);
vkCmdWriteTimestamp(cmdBuf, stageFlags, queryPool_, numQueries_);
numQueries_++;
}
void VulkanProfiler::End(VkCommandBuffer cmdBuf, VkPipelineStageFlagBits stageFlags) {
if (numQueries_ >= MAX_QUERY_COUNT - 1) {
return;
}
size_t scopeId = scopeStack_.back();
scopeStack_.pop_back();
ProfilerScope &scope = scopes_[scopeId];
scope.endQueryId = numQueries_;
vkCmdWriteTimestamp(cmdBuf, stageFlags, queryPool_, numQueries_);
numQueries_++;
}

View file

@ -0,0 +1,49 @@
#pragma once
#include <vector>
#include <string>
#include "Common/Log.h"
#include "VulkanLoader.h"
// Simple scoped based profiler, initially meant for instant one-time tasks like texture uploads
// etc. Supports recursive scopes. Scopes are not yet tracked separately for each command buffer.
// For the pass profiler in VulkanQueueRunner, a purpose-built separate profiler that can take only
// one measurement between each pass makes more sense.
//
// Put the whole thing in a FrameData to allow for overlap.
struct ProfilerScope {
std::string name;
size_t startQueryId;
size_t endQueryId;
int level;
};
class VulkanContext;
class VulkanProfiler {
public:
void Init(VulkanContext *vulkan);
void Shutdown();
void BeginFrame(VulkanContext *vulkan, VkCommandBuffer firstCommandBuffer);
void EndFrame();
void Begin(VkCommandBuffer cmdBuf, std::string scopeName, VkPipelineStageFlagBits stage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
void End(VkCommandBuffer cmdBuf, VkPipelineStageFlagBits stage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
private:
VulkanContext *vulkan_;
VkQueryPool queryPool_ = VK_NULL_HANDLE;
std::vector<ProfilerScope> scopes_;
int numQueries_ = 0;
bool firstFrame_ = true;
std::vector<size_t> scopeStack_;
const int MAX_QUERY_COUNT = 1024;
};

View file

@ -583,7 +583,7 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling) {
if (!run_) {
WARN_LOG(G3D, "BeginFrame while !run_!");
}
vulkan_->BeginFrame();
vulkan_->BeginFrame(GetInitCmd());
insideFrame_ = true;
renderStepOffset_ = 0;

View file

@ -835,10 +835,12 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
VkImageView view = entry->vkTex->CreateViewForMip(i);
VkDescriptorSet descSet = computeShaderManager_.GetDescriptorSet(view, texBuf, bufferOffset, srcSize);
struct Params { int x; int y; } params{ mipUnscaledWidth, mipUnscaledHeight };
vulkan->GetProfiler()->Begin(cmdInit, StringFromFormat("Compute Upload: %dx%d", mipUnscaledWidth, mipUnscaledHeight), VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
vkCmdBindPipeline(cmdInit, VK_PIPELINE_BIND_POINT_COMPUTE, computeShaderManager_.GetPipeline(uploadCS_));
vkCmdBindDescriptorSets(cmdInit, VK_PIPELINE_BIND_POINT_COMPUTE, computeShaderManager_.GetPipelineLayout(), 0, 1, &descSet, 0, nullptr);
vkCmdPushConstants(cmdInit, computeShaderManager_.GetPipelineLayout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(params), &params);
vkCmdDispatch(cmdInit, (mipUnscaledWidth + 7) / 8, (mipUnscaledHeight + 7) / 8, 1);
vulkan->GetProfiler()->End(cmdInit, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
vulkan->Delete().QueueDeleteImageView(view);
} else {
data = drawEngine_->GetPushBufferForTextureData()->PushAligned(size, &bufferOffset, &texBuf, pushAlignment);

View file

@ -131,6 +131,7 @@ VULKAN_FILES := \
$(SRC)/Common/GPU/Vulkan/VulkanDebug.cpp \
$(SRC)/Common/GPU/Vulkan/VulkanImage.cpp \
$(SRC)/Common/GPU/Vulkan/VulkanMemory.cpp \
$(SRC)/Common/GPU/Vulkan/VulkanProfiler.cpp \
$(SRC)/GPU/Vulkan/DrawEngineVulkan.cpp \
$(SRC)/GPU/Vulkan/FramebufferManagerVulkan.cpp \
$(SRC)/GPU/Vulkan/GPU_Vulkan.cpp \

View file

@ -262,6 +262,7 @@ SOURCES_CXX += \
$(COMMONDIR)/GPU/Vulkan/VulkanDebug.cpp \
$(COMMONDIR)/GPU/Vulkan/VulkanImage.cpp \
$(COMMONDIR)/GPU/Vulkan/VulkanMemory.cpp \
$(COMMONDIR)/GPU/Vulkan/VulkanProfiler.cpp \
$(COMMONDIR)/Input/GestureDetector.cpp \
$(COMMONDIR)/Input/InputState.cpp \
$(COMMONDIR)/Math/curves.cpp \