/* Copyright (c) 2017-2023 Hans-Kristian Arntzen * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "query_pool.hpp" #include "device.hpp" #include namespace Vulkan { static const char *storage_to_str(VkPerformanceCounterStorageKHR storage) { switch (storage) { case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR: return "float32"; case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR: return "float64"; case VK_PERFORMANCE_COUNTER_STORAGE_INT32_KHR: return "int32"; case VK_PERFORMANCE_COUNTER_STORAGE_INT64_KHR: return "int64"; case VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR: return "uint32"; case VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR: return "uint64"; default: return "???"; } } static const char *scope_to_str(VkPerformanceCounterScopeKHR scope) { switch (scope) { case VK_QUERY_SCOPE_COMMAND_BUFFER_KHR: return "command buffer"; case VK_QUERY_SCOPE_RENDER_PASS_KHR: return "render pass"; case VK_QUERY_SCOPE_COMMAND_KHR: return "command"; default: return "???"; } } static const char *unit_to_str(VkPerformanceCounterUnitKHR unit) { switch (unit) { case VK_PERFORMANCE_COUNTER_UNIT_AMPS_KHR: return "A"; case VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR: return "bytes"; case VK_PERFORMANCE_COUNTER_UNIT_BYTES_PER_SECOND_KHR: return "bytes / second"; case VK_PERFORMANCE_COUNTER_UNIT_CYCLES_KHR: return "cycles"; case VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR: return "units"; case VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR: return "Hz"; case VK_PERFORMANCE_COUNTER_UNIT_KELVIN_KHR: return "K"; case VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR: return "ns"; case VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR: return "%"; case VK_PERFORMANCE_COUNTER_UNIT_VOLTS_KHR: return "V"; case VK_PERFORMANCE_COUNTER_UNIT_WATTS_KHR: return "W"; default: return "???"; } } void PerformanceQueryPool::log_available_counters(const VkPerformanceCounterKHR *counters, const VkPerformanceCounterDescriptionKHR *descs, uint32_t count) { for (uint32_t i = 0; i < count; i++) { LOGI(" %s: %s\n", descs[i].name, descs[i].description); LOGI(" Storage: %s\n", storage_to_str(counters[i].storage)); LOGI(" Scope: %s\n", scope_to_str(counters[i].scope)); LOGI(" Unit: %s\n", unit_to_str(counters[i].unit)); } } void PerformanceQueryPool::init_device(Device *device_, uint32_t queue_family_index_) { device = device_; queue_family_index = queue_family_index_; if (!device->get_device_features().performance_query_features.performanceCounterQueryPools) return; uint32_t num_counters = 0; if (vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( device->get_physical_device(), queue_family_index, &num_counters, nullptr, nullptr) != VK_SUCCESS) { LOGE("Failed to enumerate performance counters.\n"); return; } counters.resize(num_counters, { VK_STRUCTURE_TYPE_PERFORMANCE_COUNTER_KHR }); counter_descriptions.resize(num_counters, { VK_STRUCTURE_TYPE_PERFORMANCE_COUNTER_DESCRIPTION_KHR }); if (vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( device->get_physical_device(), queue_family_index, &num_counters, counters.data(), counter_descriptions.data()) != VK_SUCCESS) { LOGE("Failed to enumerate performance counters.\n"); return; } } PerformanceQueryPool::~PerformanceQueryPool() { if (pool) device->get_device_table().vkDestroyQueryPool(device->get_device(), pool, nullptr); } void PerformanceQueryPool::begin_command_buffer(VkCommandBuffer cmd) { if (!pool) return; auto &table = device->get_device_table(); table.vkResetQueryPoolEXT(device->get_device(), pool, 0, 1); table.vkCmdBeginQuery(cmd, pool, 0, 0); VkMemoryBarrier barrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER }; barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; barrier.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT; table.vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr); } void PerformanceQueryPool::end_command_buffer(VkCommandBuffer cmd) { if (!pool) return; auto &table = device->get_device_table(); VkMemoryBarrier barrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER }; barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; barrier.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT; table.vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &barrier, 0, nullptr, 0, nullptr); table.vkCmdEndQuery(cmd, pool, 0); } void PerformanceQueryPool::report() { if (pool == VK_NULL_HANDLE) { LOGE("No query pool is set up.\n"); return; } auto &table = device->get_device_table(); if (table.vkGetQueryPoolResults(device->get_device(), pool, 0, 1, results.size() * sizeof(VkPerformanceCounterResultKHR), results.data(), sizeof(VkPerformanceCounterResultKHR), VK_QUERY_RESULT_WAIT_BIT) != VK_SUCCESS) { LOGE("Getting performance counters did not succeed.\n"); } size_t num_counters = results.size(); LOGI("\n=== Profiling result ===\n"); for (size_t i = 0; i < num_counters; i++) { auto &counter = counters[active_indices[i]]; auto &desc = counter_descriptions[active_indices[i]]; switch (counter.storage) { case VK_PERFORMANCE_COUNTER_STORAGE_INT32_KHR: LOGI(" %s (%s): %d %s\n", desc.name, desc.description, results[i].int32, unit_to_str(counter.unit)); break; case VK_PERFORMANCE_COUNTER_STORAGE_INT64_KHR: LOGI(" %s (%s): %lld %s\n", desc.name, desc.description, static_cast(results[i].int64), unit_to_str(counter.unit)); break; case VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR: LOGI(" %s (%s): %u %s\n", desc.name, desc.description, results[i].uint32, unit_to_str(counter.unit)); break; case VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR: LOGI(" %s (%s): %llu %s\n", desc.name, desc.description, static_cast(results[i].uint64), unit_to_str(counter.unit)); break; case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR: LOGI(" %s (%s): %g %s\n", desc.name, desc.description, results[i].float32, unit_to_str(counter.unit)); break; case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR: LOGI(" %s (%s): %g %s\n", desc.name, desc.description, results[i].float64, unit_to_str(counter.unit)); break; default: break; } } LOGI("================================\n\n"); } uint32_t PerformanceQueryPool::get_num_counters() const { return uint32_t(counters.size()); } const VkPerformanceCounterKHR *PerformanceQueryPool::get_available_counters() const { return counters.data(); } const VkPerformanceCounterDescriptionKHR *PerformanceQueryPool::get_available_counter_descs() const { return counter_descriptions.data(); } bool PerformanceQueryPool::init_counters(const std::vector &counter_names) { if (!device->get_device_features().performance_query_features.performanceCounterQueryPools) { LOGE("Device does not support VK_KHR_performance_query.\n"); return false; } if (!device->get_device_features().vk12_features.hostQueryReset) { LOGE("Device does not support host query reset.\n"); return false; } auto &table = device->get_device_table(); if (pool) table.vkDestroyQueryPool(device->get_device(), pool, nullptr); pool = VK_NULL_HANDLE; VkQueryPoolPerformanceCreateInfoKHR performance_info = { VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR }; VkQueryPoolCreateInfo info = { VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO }; info.pNext = &performance_info; info.queryType = VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR; info.queryCount = 1; active_indices.clear(); for (auto &name : counter_names) { auto itr = find_if(begin(counter_descriptions), end(counter_descriptions), [&](const VkPerformanceCounterDescriptionKHR &desc) { return name == desc.name; }); if (itr != end(counter_descriptions)) { LOGI("Found counter %s: %s\n", itr->name, itr->description); active_indices.push_back(itr - begin(counter_descriptions)); } } if (active_indices.empty()) { LOGW("No performance counters were enabled.\n"); return false; } performance_info.queueFamilyIndex = queue_family_index; performance_info.counterIndexCount = active_indices.size(); performance_info.pCounterIndices = active_indices.data(); results.resize(active_indices.size()); uint32_t num_passes = 0; vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(device->get_physical_device(), &performance_info, &num_passes); if (num_passes != 1) { LOGE("Implementation requires %u passes to query performance counters. Cannot create query pool.\n", num_passes); return false; } if (table.vkCreateQueryPool(device->get_device(), &info, nullptr, &pool) != VK_SUCCESS) { LOGE("Failed to create performance query pool.\n"); return false; } return true; } QueryPool::QueryPool(Device *device_) : device(device_) , table(device_->get_device_table()) { supports_timestamp = device->get_gpu_properties().limits.timestampComputeAndGraphics && device->get_device_features().vk12_features.hostQueryReset; // Ignore timestampValidBits and friends for now. if (supports_timestamp) add_pool(); } QueryPool::~QueryPool() { for (auto &pool : pools) table.vkDestroyQueryPool(device->get_device(), pool.pool, nullptr); } void QueryPool::begin() { for (unsigned i = 0; i <= pool_index; i++) { if (i >= pools.size()) continue; auto &pool = pools[i]; if (pool.index == 0) continue; table.vkGetQueryPoolResults(device->get_device(), pool.pool, 0, pool.index, pool.index * sizeof(uint64_t), pool.query_results.data(), sizeof(uint64_t), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); for (unsigned j = 0; j < pool.index; j++) pool.cookies[j]->signal_timestamp_ticks(pool.query_results[j]); table.vkResetQueryPool(device->get_device(), pool.pool, 0, pool.index); } pool_index = 0; for (auto &pool : pools) pool.index = 0; } void QueryPool::add_pool() { VkQueryPoolCreateInfo pool_info = { VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO }; pool_info.queryType = VK_QUERY_TYPE_TIMESTAMP; pool_info.queryCount = 64; Pool pool; table.vkCreateQueryPool(device->get_device(), &pool_info, nullptr, &pool.pool); pool.size = pool_info.queryCount; pool.index = 0; pool.query_results.resize(pool.size); pool.cookies.resize(pool.size); table.vkResetQueryPool(device->get_device(), pool.pool, 0, pool.size); pools.push_back(std::move(pool)); } QueryPoolHandle QueryPool::write_timestamp(VkCommandBuffer cmd, VkPipelineStageFlags2 stage) { if (!supports_timestamp) { LOGI("Timestamps are not supported on this implementation.\n"); return {}; } VK_ASSERT((stage & (stage - 1)) == 0); if (pools[pool_index].index >= pools[pool_index].size) pool_index++; if (pool_index >= pools.size()) add_pool(); auto &pool = pools[pool_index]; auto cookie = QueryPoolHandle(device->handle_pool.query.allocate(device, true)); pool.cookies[pool.index] = cookie; if (device->get_device_features().vk13_features.synchronization2) table.vkCmdWriteTimestamp2(cmd, stage, pool.pool, pool.index); else { table.vkCmdWriteTimestamp(cmd, static_cast(convert_vk_src_stage2(stage)), pool.pool, pool.index); } pool.index++; return cookie; } void QueryPoolResultDeleter::operator()(QueryPoolResult *query) { query->device->handle_pool.query.free(query); } void TimestampInterval::mark_end_of_frame_context() { if (total_time > 0.0) total_frame_iterations++; } uint64_t TimestampInterval::get_total_accumulations() const { return total_accumulations; } uint64_t TimestampInterval::get_total_frame_iterations() const { return total_frame_iterations; } double TimestampInterval::get_total_time() const { return total_time; } void TimestampInterval::accumulate_time(double t) { total_time += t; total_accumulations++; } double TimestampInterval::get_time_per_iteration() const { if (total_frame_iterations) return total_time / double(total_frame_iterations); else return 0.0; } double TimestampInterval::get_time_per_accumulation() const { if (total_accumulations) return total_time / double(total_accumulations); else return 0.0; } const std::string &TimestampInterval::get_tag() const { return tag; } void TimestampInterval::reset() { total_time = 0.0; total_accumulations = 0; total_frame_iterations = 0; } TimestampInterval::TimestampInterval(std::string tag_) : tag(std::move(tag_)) { } TimestampInterval *TimestampIntervalManager::get_timestamp_tag(const char *tag) { Util::Hasher h; h.string(tag); return timestamps.emplace_yield(h.get(), tag); } void TimestampIntervalManager::mark_end_of_frame_context() { for (auto ×tamp : timestamps) timestamp.mark_end_of_frame_context(); } void TimestampIntervalManager::reset() { for (auto ×tamp : timestamps) timestamp.reset(); } void TimestampIntervalManager::log_simple(const TimestampIntervalReportCallback &func) const { for (auto ×tamp : timestamps) { if (timestamp.get_total_frame_iterations()) { TimestampIntervalReport report = {}; report.time_per_accumulation = timestamp.get_time_per_accumulation(); report.time_per_frame_context = timestamp.get_time_per_iteration(); report.accumulations_per_frame_context = double(timestamp.get_total_accumulations()) / double(timestamp.get_total_frame_iterations()); if (func) { func(timestamp.get_tag(), report); } else { LOGI("Timestamp tag report: %s\n", timestamp.get_tag().c_str()); LOGI(" %.3f ms / iteration\n", 1000.0 * report.time_per_accumulation); LOGI(" %.3f ms / frame context\n", 1000.0 * report.time_per_frame_context); LOGI(" %.3f iterations / frame context\n", report.accumulations_per_frame_context); } } } } }