diff --git a/CMakeLists.txt b/CMakeLists.txt
index 82cf012edf..62ced8270e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -589,6 +589,8 @@ add_library(Common STATIC
Common/GPU/Vulkan/VulkanLoader.h
Common/GPU/Vulkan/VulkanMemory.cpp
Common/GPU/Vulkan/VulkanMemory.h
+ Common/GPU/Vulkan/VulkanProfiler.cpp
+ Common/GPU/Vulkan/VulkanProfiler.h
Common/GPU/Vulkan/thin3d_vulkan.cpp
Common/GPU/Vulkan/VulkanRenderManager.cpp
Common/GPU/Vulkan/VulkanRenderManager.h
diff --git a/Common/Common.vcxproj b/Common/Common.vcxproj
index 2bdf069582..7e61f89776 100644
--- a/Common/Common.vcxproj
+++ b/Common/Common.vcxproj
@@ -443,6 +443,7 @@
+
@@ -870,6 +871,7 @@
+
diff --git a/Common/Common.vcxproj.filters b/Common/Common.vcxproj.filters
index 32c392cf6a..65b6eaccab 100644
--- a/Common/Common.vcxproj.filters
+++ b/Common/Common.vcxproj.filters
@@ -412,6 +412,9 @@
GPU\Vulkan
+
+ GPU\Vulkan
+
@@ -786,6 +789,9 @@
ext\vma
+
+ GPU\Vulkan
+
diff --git a/Common/GPU/Vulkan/VulkanContext.cpp b/Common/GPU/Vulkan/VulkanContext.cpp
index 94c8bb2b06..2cb0e228af 100644
--- a/Common/GPU/Vulkan/VulkanContext.cpp
+++ b/Common/GPU/Vulkan/VulkanContext.cpp
@@ -289,14 +289,16 @@ void VulkanContext::DestroyInstance() {
instance_ = VK_NULL_HANDLE;
}
-void VulkanContext::BeginFrame() {
+void VulkanContext::BeginFrame(VkCommandBuffer firstCommandBuffer) {
FrameData *frame = &frame_[curFrame_];
// Process pending deletes.
frame->deleteList.PerformDeletes(device_, allocator_);
+ frame->profiler.BeginFrame(this, firstCommandBuffer);
}
void VulkanContext::EndFrame() {
frame_[curFrame_].deleteList.Take(globalDeleteList_);
+ frame_[curFrame_].profiler.EndFrame();
curFrame_++;
if (curFrame_ >= inflightFrames_) {
curFrame_ = 0;
@@ -675,6 +677,11 @@ VkResult VulkanContext::CreateDevice() {
allocatorInfo.device = device_;
allocatorInfo.instance = instance_;
vmaCreateAllocator(&allocatorInfo, &allocator_);
+
+ for (int i = 0; i < GetInflightFrames(); i++) {
+ frame_[i].profiler.Init(this);
+ }
+
return res;
}
@@ -1122,6 +1129,10 @@ void VulkanContext::DestroyDevice() {
INFO_LOG(G3D, "VulkanContext::DestroyDevice (performing deletes)");
PerformPendingDeletes();
+ for (int i = 0; i < GetInflightFrames(); i++) {
+ frame_[i].profiler.Shutdown();
+ }
+
vmaDestroyAllocator(allocator_);
allocator_ = VK_NULL_HANDLE;
diff --git a/Common/GPU/Vulkan/VulkanContext.h b/Common/GPU/Vulkan/VulkanContext.h
index c119dc6393..04d163f76f 100644
--- a/Common/GPU/Vulkan/VulkanContext.h
+++ b/Common/GPU/Vulkan/VulkanContext.h
@@ -9,6 +9,7 @@
#include "Common/GPU/Vulkan/VulkanLoader.h"
#include "Common/GPU/Vulkan/VulkanDebug.h"
#include "Common/GPU/Vulkan/VulkanAlloc.h"
+#include "Common/GPU/Vulkan/VulkanProfiler.h"
enum {
VULKAN_FLAG_VALIDATE = 1,
@@ -62,6 +63,8 @@ struct VulkanPhysicalDeviceInfo {
bool canBlitToPreferredDepthStencilFormat;
};
+class VulkanProfiler;
+
// This is a bit repetitive...
class VulkanDeleteList {
struct BufferWithAlloc {
@@ -188,9 +191,13 @@ public:
int GetBackbufferWidth() { return (int)swapChainExtent_.width; }
int GetBackbufferHeight() { return (int)swapChainExtent_.height; }
- void BeginFrame();
+ void BeginFrame(VkCommandBuffer firstCommandBuffer);
void EndFrame();
+ VulkanProfiler *GetProfiler() {
+ return &frame_[curFrame_].profiler;
+ }
+
// Simple workaround for the casting warning.
template
void SetDebugName(T handle, VkObjectType type, const char *name) {
@@ -369,6 +376,7 @@ private:
struct FrameData {
FrameData() {}
VulkanDeleteList deleteList;
+ VulkanProfiler profiler;
};
FrameData frame_[MAX_INFLIGHT_FRAMES];
int curFrame_ = 0;
diff --git a/Common/GPU/Vulkan/VulkanImage.cpp b/Common/GPU/Vulkan/VulkanImage.cpp
index 3a6cd2ce88..7a4f9bf016 100644
--- a/Common/GPU/Vulkan/VulkanImage.cpp
+++ b/Common/GPU/Vulkan/VulkanImage.cpp
@@ -1,6 +1,7 @@
#include
#include "Common/Log.h"
+#include "Common/GPU/Vulkan/VulkanContext.h"
#include "Common/GPU/Vulkan/VulkanAlloc.h"
#include "Common/GPU/Vulkan/VulkanImage.h"
#include "Common/GPU/Vulkan/VulkanMemory.h"
diff --git a/Common/GPU/Vulkan/VulkanImage.h b/Common/GPU/Vulkan/VulkanImage.h
index 356a517494..adbd167cf5 100644
--- a/Common/GPU/Vulkan/VulkanImage.h
+++ b/Common/GPU/Vulkan/VulkanImage.h
@@ -1,7 +1,9 @@
#pragma once
-#include "Common/GPU/Vulkan/VulkanContext.h"
+#include
+#include "VulkanLoader.h"
+class VulkanContext;
class VulkanDeviceAllocator;
VK_DEFINE_HANDLE(VmaAllocation);
diff --git a/Common/GPU/Vulkan/VulkanProfiler.cpp b/Common/GPU/Vulkan/VulkanProfiler.cpp
new file mode 100644
index 0000000000..e23a3b4a86
--- /dev/null
+++ b/Common/GPU/Vulkan/VulkanProfiler.cpp
@@ -0,0 +1,98 @@
+#include "VulkanProfiler.h"
+#include "VulkanContext.h"
+
+using namespace PPSSPP_VK;
+
+void VulkanProfiler::Init(VulkanContext *vulkan) {
+ vulkan_ = vulkan;
+
+ for (int i = 0; i < vulkan->GetInflightFrames(); i++) {
+ VkQueryPoolCreateInfo ci{ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
+ ci.queryCount = MAX_QUERY_COUNT;
+ ci.queryType = VK_QUERY_TYPE_TIMESTAMP;
+ vkCreateQueryPool(vulkan->GetDevice(), &ci, nullptr, &queryPool_);
+ }
+}
+
+void VulkanProfiler::Shutdown() {
+ for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
+ vkDestroyQueryPool(vulkan_->GetDevice(), queryPool_, nullptr);
+ }
+}
+
+void VulkanProfiler::BeginFrame(VulkanContext *vulkan, VkCommandBuffer firstCommandBuf) {
+ vulkan_ = vulkan;
+
+ // Check for old queries belonging to this frame context that we can log out - these are now
+ // guaranteed to be done.
+ if (numQueries_ > 0) {
+ std::vector results(numQueries_);
+ vkGetQueryPoolResults(vulkan->GetDevice(), queryPool_, 0, numQueries_, sizeof(uint64_t) * numQueries_, results.data(), sizeof(uint64_t), VK_QUERY_RESULT_64_BIT);
+
+ double timestampConversionFactor = (double)vulkan_->GetPhysicalDeviceProperties().properties.limits.timestampPeriod * (1.0 / 1000000.0);
+ int validBits = vulkan_->GetQueueFamilyProperties(vulkan_->GetGraphicsQueueFamilyIndex()).timestampValidBits;
+ uint64_t timestampDiffMask = validBits == 64 ? 0xFFFFFFFFFFFFFFFFULL : ((1ULL << validBits) - 1);
+
+ static const char * const indent[4] = { "", " ", " ", " " };
+ // Log it all out.
+ for (auto &scope : scopes_) {
+ if (scope.endQueryId == -1) {
+ NOTICE_LOG(G3D, "Unclosed scope: %s", scope.name.c_str());
+ continue;
+ }
+ uint64_t startTime = results[scope.startQueryId];
+ uint64_t endTime = results[scope.endQueryId];
+
+ uint64_t delta = (endTime - startTime) & timestampDiffMask;
+
+ double milliseconds = (double)delta * timestampConversionFactor;
+
+ NOTICE_LOG(G3D, "%s%s (%0.3f ms)", indent[scope.level & 3], scope.name.c_str(), milliseconds);
+ }
+ }
+
+ // Only need to reset all on the first frame.
+ if (firstFrame_) {
+ numQueries_ = MAX_QUERY_COUNT;
+ firstFrame_ = false;
+ }
+ vkCmdResetQueryPool(firstCommandBuf, queryPool_, 0, numQueries_);
+ numQueries_ = 0;
+}
+
+void VulkanProfiler::EndFrame() {
+ // Not much to do here really except check that all scopes are closed.
+}
+
+void VulkanProfiler::Begin(VkCommandBuffer cmdBuf, std::string scopeName, VkPipelineStageFlagBits stageFlags) {
+ if (numQueries_ >= MAX_QUERY_COUNT - 1) {
+ return;
+ }
+
+ ProfilerScope scope;
+ scope.name = scopeName;
+ scope.startQueryId = numQueries_;
+ scope.endQueryId = -1;
+ scope.level = (int)scopeStack_.size();
+
+ scopeStack_.push_back(scopes_.size());
+ scopes_.push_back(scope);
+
+ vkCmdWriteTimestamp(cmdBuf, stageFlags, queryPool_, numQueries_);
+ numQueries_++;
+}
+
+void VulkanProfiler::End(VkCommandBuffer cmdBuf, VkPipelineStageFlagBits stageFlags) {
+ if (numQueries_ >= MAX_QUERY_COUNT - 1) {
+ return;
+ }
+
+ size_t scopeId = scopeStack_.back();
+ scopeStack_.pop_back();
+
+ ProfilerScope &scope = scopes_[scopeId];
+ scope.endQueryId = numQueries_;
+
+ vkCmdWriteTimestamp(cmdBuf, stageFlags, queryPool_, numQueries_);
+ numQueries_++;
+}
diff --git a/Common/GPU/Vulkan/VulkanProfiler.h b/Common/GPU/Vulkan/VulkanProfiler.h
new file mode 100644
index 0000000000..ab2092aa90
--- /dev/null
+++ b/Common/GPU/Vulkan/VulkanProfiler.h
@@ -0,0 +1,49 @@
+#pragma once
+
+#include
+#include
+
+#include "Common/Log.h"
+#include "VulkanLoader.h"
+
+// Simple scoped based profiler, initially meant for instant one-time tasks like texture uploads
+// etc. Supports recursive scopes. Scopes are not yet tracked separately for each command buffer.
+// For the pass profiler in VulkanQueueRunner, a purpose-built separate profiler that can take only
+// one measurement between each pass makes more sense.
+//
+// Put the whole thing in a FrameData to allow for overlap.
+
+struct ProfilerScope {
+ std::string name;
+ size_t startQueryId;
+ size_t endQueryId;
+ int level;
+};
+
+class VulkanContext;
+
+
+class VulkanProfiler {
+public:
+ void Init(VulkanContext *vulkan);
+ void Shutdown();
+
+ void BeginFrame(VulkanContext *vulkan, VkCommandBuffer firstCommandBuffer);
+
+ void EndFrame();
+
+ void Begin(VkCommandBuffer cmdBuf, std::string scopeName, VkPipelineStageFlagBits stage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
+ void End(VkCommandBuffer cmdBuf, VkPipelineStageFlagBits stage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
+
+private:
+ VulkanContext *vulkan_;
+
+ VkQueryPool queryPool_ = VK_NULL_HANDLE;
+ std::vector scopes_;
+ int numQueries_ = 0;
+ bool firstFrame_ = true;
+
+ std::vector scopeStack_;
+
+ const int MAX_QUERY_COUNT = 1024;
+};
diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp
index 90e7a759e1..aa47c06782 100644
--- a/Common/GPU/Vulkan/VulkanRenderManager.cpp
+++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp
@@ -583,7 +583,7 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling) {
if (!run_) {
WARN_LOG(G3D, "BeginFrame while !run_!");
}
- vulkan_->BeginFrame();
+ vulkan_->BeginFrame(GetInitCmd());
insideFrame_ = true;
renderStepOffset_ = 0;
diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp
index c57b2a7c3f..206f238185 100644
--- a/GPU/Vulkan/TextureCacheVulkan.cpp
+++ b/GPU/Vulkan/TextureCacheVulkan.cpp
@@ -835,10 +835,12 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
VkImageView view = entry->vkTex->CreateViewForMip(i);
VkDescriptorSet descSet = computeShaderManager_.GetDescriptorSet(view, texBuf, bufferOffset, srcSize);
struct Params { int x; int y; } params{ mipUnscaledWidth, mipUnscaledHeight };
+ vulkan->GetProfiler()->Begin(cmdInit, StringFromFormat("Compute Upload: %dx%d", mipUnscaledWidth, mipUnscaledHeight), VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
vkCmdBindPipeline(cmdInit, VK_PIPELINE_BIND_POINT_COMPUTE, computeShaderManager_.GetPipeline(uploadCS_));
vkCmdBindDescriptorSets(cmdInit, VK_PIPELINE_BIND_POINT_COMPUTE, computeShaderManager_.GetPipelineLayout(), 0, 1, &descSet, 0, nullptr);
vkCmdPushConstants(cmdInit, computeShaderManager_.GetPipelineLayout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(params), ¶ms);
vkCmdDispatch(cmdInit, (mipUnscaledWidth + 7) / 8, (mipUnscaledHeight + 7) / 8, 1);
+ vulkan->GetProfiler()->End(cmdInit, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
vulkan->Delete().QueueDeleteImageView(view);
} else {
data = drawEngine_->GetPushBufferForTextureData()->PushAligned(size, &bufferOffset, &texBuf, pushAlignment);
diff --git a/android/jni/Android.mk b/android/jni/Android.mk
index c62de2889f..6e255a8c36 100644
--- a/android/jni/Android.mk
+++ b/android/jni/Android.mk
@@ -131,6 +131,7 @@ VULKAN_FILES := \
$(SRC)/Common/GPU/Vulkan/VulkanDebug.cpp \
$(SRC)/Common/GPU/Vulkan/VulkanImage.cpp \
$(SRC)/Common/GPU/Vulkan/VulkanMemory.cpp \
+ $(SRC)/Common/GPU/Vulkan/VulkanProfiler.cpp \
$(SRC)/GPU/Vulkan/DrawEngineVulkan.cpp \
$(SRC)/GPU/Vulkan/FramebufferManagerVulkan.cpp \
$(SRC)/GPU/Vulkan/GPU_Vulkan.cpp \
diff --git a/libretro/Makefile.common b/libretro/Makefile.common
index 32c10ee8db..9d4a0dcbd1 100644
--- a/libretro/Makefile.common
+++ b/libretro/Makefile.common
@@ -262,6 +262,7 @@ SOURCES_CXX += \
$(COMMONDIR)/GPU/Vulkan/VulkanDebug.cpp \
$(COMMONDIR)/GPU/Vulkan/VulkanImage.cpp \
$(COMMONDIR)/GPU/Vulkan/VulkanMemory.cpp \
+ $(COMMONDIR)/GPU/Vulkan/VulkanProfiler.cpp \
$(COMMONDIR)/Input/GestureDetector.cpp \
$(COMMONDIR)/Input/InputState.cpp \
$(COMMONDIR)/Math/curves.cpp \