Kaizen/external/parallel-rdp/parallel-rdp-standalone/vulkan/device.hpp

911 lines
37 KiB
C++

/* Copyright (c) 2017-2023 Hans-Kristian Arntzen
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "buffer.hpp"
#include "command_buffer.hpp"
#include "command_pool.hpp"
#include "fence.hpp"
#include "fence_manager.hpp"
#include "image.hpp"
#include "memory_allocator.hpp"
#include "render_pass.hpp"
#include "sampler.hpp"
#include "semaphore.hpp"
#include "semaphore_manager.hpp"
#include "event_manager.hpp"
#include "shader.hpp"
#include "context.hpp"
#include "query_pool.hpp"
#include "buffer_pool.hpp"
#include "indirect_layout.hpp"
#include <memory>
#include <vector>
#include <functional>
#include <unordered_map>
#include <stdio.h>
#ifdef GRANITE_VULKAN_SYSTEM_HANDLES
#include "shader_manager.hpp"
#include "resource_manager.hpp"
#endif
#include <atomic>
#include <mutex>
#include <condition_variable>
#ifdef GRANITE_VULKAN_FOSSILIZE
#include "fossilize.hpp"
#endif
#include "quirks.hpp"
#include "small_vector.hpp"
namespace Util
{
class TimelineTraceFile;
}
namespace Granite
{
struct TaskGroup;
}
namespace Vulkan
{
enum class SwapchainRenderPass
{
ColorOnly,
Depth,
DepthStencil
};
struct InitialImageBuffer
{
BufferHandle buffer;
Util::SmallVector<VkBufferImageCopy, 32> blits;
};
struct HandlePool
{
VulkanObjectPool<Buffer> buffers;
VulkanObjectPool<Image> images;
VulkanObjectPool<LinearHostImage> linear_images;
VulkanObjectPool<ImageView> image_views;
VulkanObjectPool<BufferView> buffer_views;
VulkanObjectPool<Sampler> samplers;
VulkanObjectPool<FenceHolder> fences;
VulkanObjectPool<SemaphoreHolder> semaphores;
VulkanObjectPool<EventHolder> events;
VulkanObjectPool<QueryPoolResult> query;
VulkanObjectPool<CommandBuffer> command_buffers;
VulkanObjectPool<BindlessDescriptorPool> bindless_descriptor_pool;
VulkanObjectPool<DeviceAllocationOwner> allocations;
};
class DebugChannelInterface
{
public:
union Word
{
uint32_t u32;
int32_t s32;
float f32;
};
virtual void message(const std::string &tag, uint32_t code, uint32_t x, uint32_t y, uint32_t z,
uint32_t word_count, const Word *words) = 0;
};
namespace Helper
{
struct WaitSemaphores
{
Util::SmallVector<VkSemaphoreSubmitInfo> binary_waits;
Util::SmallVector<VkSemaphoreSubmitInfo> timeline_waits;
};
class BatchComposer
{
public:
enum { MaxSubmissions = 8 };
BatchComposer();
void add_wait_submissions(WaitSemaphores &sem);
void add_wait_semaphore(SemaphoreHolder &sem, VkPipelineStageFlags2 stage);
void add_wait_semaphore(VkSemaphore sem, VkPipelineStageFlags2 stage);
void add_signal_semaphore(VkSemaphore sem, VkPipelineStageFlags2 stage, uint64_t count);
void add_command_buffer(VkCommandBuffer cmd);
void begin_batch();
Util::SmallVector<VkSubmitInfo2, MaxSubmissions> &bake(int profiling_iteration = -1);
private:
Util::SmallVector<VkSubmitInfo2, MaxSubmissions> submits;
VkPerformanceQuerySubmitInfoKHR profiling_infos[Helper::BatchComposer::MaxSubmissions];
Util::SmallVector<VkSemaphoreSubmitInfo> waits[MaxSubmissions];
Util::SmallVector<VkSemaphoreSubmitInfo> signals[MaxSubmissions];
Util::SmallVector<VkCommandBufferSubmitInfo> cmds[MaxSubmissions];
unsigned submit_index = 0;
};
}
class Device
: public Util::IntrusivePtrEnabled<Device, std::default_delete<Device>, HandleCounter>
#ifdef GRANITE_VULKAN_FOSSILIZE
, public Fossilize::StateCreatorInterface
#endif
{
public:
// Device-based objects which need to poke at internal data structures when their lifetimes end.
// Don't want to expose a lot of internal guts to make this work.
friend class QueryPool;
friend struct QueryPoolResultDeleter;
friend class EventHolder;
friend struct EventHolderDeleter;
friend class SemaphoreHolder;
friend struct SemaphoreHolderDeleter;
friend class FenceHolder;
friend struct FenceHolderDeleter;
friend class Sampler;
friend struct SamplerDeleter;
friend class ImmutableSampler;
friend class ImmutableYcbcrConversion;
friend class Buffer;
friend struct BufferDeleter;
friend class BufferView;
friend struct BufferViewDeleter;
friend class ImageView;
friend struct ImageViewDeleter;
friend class Image;
friend struct ImageDeleter;
friend struct LinearHostImageDeleter;
friend class CommandBuffer;
friend struct CommandBufferDeleter;
friend class BindlessDescriptorPool;
friend struct BindlessDescriptorPoolDeleter;
friend class Program;
friend class WSI;
friend class Cookie;
friend class Framebuffer;
friend class PipelineLayout;
friend class FramebufferAllocator;
friend class RenderPass;
friend class Texture;
friend class DescriptorSetAllocator;
friend class Shader;
friend class ImageResourceHolder;
friend class DeviceAllocationOwner;
friend struct DeviceAllocationDeleter;
Device();
~Device();
// No move-copy.
void operator=(Device &&) = delete;
Device(Device &&) = delete;
// Only called by main thread, during setup phase.
void set_context(const Context &context);
// This is asynchronous in nature. See query_initialization_progress().
// Kicks off Fossilize and shader manager caching.
void begin_shader_caches();
// For debug or trivial applications, blocks until all shader cache work is done.
void wait_shader_caches();
void init_swapchain(const std::vector<VkImage> &swapchain_images, unsigned width, unsigned height, VkFormat format,
VkSurfaceTransformFlagBitsKHR transform, VkImageUsageFlags usage);
void set_swapchain_queue_family_support(uint32_t queue_family_support);
bool can_touch_swapchain_in_command_buffer(CommandBuffer::Type type) const;
void init_external_swapchain(const std::vector<ImageHandle> &swapchain_images);
void init_frame_contexts(unsigned count);
const VolkDeviceTable &get_device_table() const;
// Profiling
bool init_performance_counters(CommandBuffer::Type type, const std::vector<std::string> &names);
bool acquire_profiling();
void release_profiling();
void query_available_performance_counters(CommandBuffer::Type type,
uint32_t *count,
const VkPerformanceCounterKHR **counters,
const VkPerformanceCounterDescriptionKHR **desc);
ImageView &get_swapchain_view();
ImageView &get_swapchain_view(unsigned index);
unsigned get_num_swapchain_images() const;
unsigned get_num_frame_contexts() const;
unsigned get_swapchain_index() const;
unsigned get_current_frame_context() const;
size_t get_pipeline_cache_size();
bool get_pipeline_cache_data(uint8_t *data, size_t size);
bool init_pipeline_cache(const uint8_t *data, size_t size);
// Frame-pushing interface.
void next_frame_context();
// Normally, the main thread ensures forward progress of the frame context
// so that async tasks don't have to care about it,
// but in the case where async threads are continuously pumping Vulkan work
// in the background, they need to reclaim memory if WSI goes to sleep for a long period of time.
void next_frame_context_in_async_thread();
void set_enable_async_thread_frame_context(bool enable);
void wait_idle();
void end_frame_context();
// RenderDoc integration API for app-guided captures.
static bool init_renderdoc_capture();
// Calls next_frame_context() and begins a renderdoc capture.
void begin_renderdoc_capture();
// Calls next_frame_context() and ends the renderdoc capture.
void end_renderdoc_capture();
// Set names for objects for debuggers and profilers.
void set_name(const Buffer &buffer, const char *name);
void set_name(const Image &image, const char *name);
void set_name(const CommandBuffer &cmd, const char *name);
// Generic version.
void set_name(uint64_t object, VkObjectType type, const char *name);
// Submission interface, may be called from any thread at any time.
void flush_frame();
CommandBufferHandle request_command_buffer(CommandBuffer::Type type = CommandBuffer::Type::Generic);
CommandBufferHandle request_command_buffer_for_thread(unsigned thread_index, CommandBuffer::Type type = CommandBuffer::Type::Generic);
CommandBufferHandle request_profiled_command_buffer(CommandBuffer::Type type = CommandBuffer::Type::Generic);
CommandBufferHandle request_profiled_command_buffer_for_thread(unsigned thread_index, CommandBuffer::Type type = CommandBuffer::Type::Generic);
void submit(CommandBufferHandle &cmd, Fence *fence = nullptr,
unsigned semaphore_count = 0, Semaphore *semaphore = nullptr);
void submit_empty(CommandBuffer::Type type,
Fence *fence = nullptr,
SemaphoreHolder *semaphore = nullptr);
// Mark that there have been work submitted in this frame context outside our control
// that accesses resources Vulkan::Device owns.
void submit_external(CommandBuffer::Type type);
void submit_discard(CommandBufferHandle &cmd);
QueueIndices get_physical_queue_type(CommandBuffer::Type queue_type) const;
void register_time_interval(std::string tid, QueryPoolHandle start_ts, QueryPoolHandle end_ts,
const std::string &tag);
// Request shaders and programs. These objects are owned by the Device.
Shader *request_shader(const uint32_t *code, size_t size, const ResourceLayout *layout = nullptr);
Shader *request_shader_by_hash(Util::Hash hash);
Program *request_program(const uint32_t *task_data, size_t task_size,
const uint32_t *mesh_data, size_t mesh_size,
const uint32_t *fragment_data, size_t fragment_size,
const ResourceLayout *task_layout = nullptr,
const ResourceLayout *mesh_layout = nullptr,
const ResourceLayout *fragment_layout = nullptr);
Program *request_program(const uint32_t *vertex_data, size_t vertex_size,
const uint32_t *fragment_data, size_t fragment_size,
const ResourceLayout *vertex_layout = nullptr,
const ResourceLayout *fragment_layout = nullptr);
Program *request_program(const uint32_t *compute_data, size_t compute_size,
const ResourceLayout *layout = nullptr);
Program *request_program(Shader *task, Shader *mesh, Shader *fragment, const ImmutableSamplerBank *sampler_bank = nullptr);
Program *request_program(Shader *vertex, Shader *fragment, const ImmutableSamplerBank *sampler_bank = nullptr);
Program *request_program(Shader *compute, const ImmutableSamplerBank *sampler_bank = nullptr);
const IndirectLayout *request_indirect_layout(const IndirectLayoutToken *tokens,
uint32_t num_tokens, uint32_t stride);
const ImmutableYcbcrConversion *request_immutable_ycbcr_conversion(const VkSamplerYcbcrConversionCreateInfo &info);
const ImmutableSampler *request_immutable_sampler(const SamplerCreateInfo &info, const ImmutableYcbcrConversion *ycbcr);
// Map and unmap buffer objects.
void *map_host_buffer(const Buffer &buffer, MemoryAccessFlags access);
void unmap_host_buffer(const Buffer &buffer, MemoryAccessFlags access);
void *map_host_buffer(const Buffer &buffer, MemoryAccessFlags access, VkDeviceSize offset, VkDeviceSize length);
void unmap_host_buffer(const Buffer &buffer, MemoryAccessFlags access, VkDeviceSize offset, VkDeviceSize length);
void *map_linear_host_image(const LinearHostImage &image, MemoryAccessFlags access);
void unmap_linear_host_image_and_sync(const LinearHostImage &image, MemoryAccessFlags access);
// Create buffers and images.
BufferHandle create_buffer(const BufferCreateInfo &info, const void *initial = nullptr);
BufferHandle create_imported_host_buffer(const BufferCreateInfo &info, VkExternalMemoryHandleTypeFlagBits type, void *host_buffer);
ImageHandle create_image(const ImageCreateInfo &info, const ImageInitialData *initial = nullptr);
ImageHandle create_image_from_staging_buffer(const ImageCreateInfo &info, const InitialImageBuffer *buffer);
LinearHostImageHandle create_linear_host_image(const LinearHostImageCreateInfo &info);
// Does not create any default image views. Only wraps the VkImage
// as a non-owned handle for purposes of API interop.
ImageHandle wrap_image(const ImageCreateInfo &info, VkImage img);
DeviceAllocationOwnerHandle take_device_allocation_ownership(Image &image);
DeviceAllocationOwnerHandle allocate_memory(const MemoryAllocateInfo &info);
// Create staging buffers for images.
InitialImageBuffer create_image_staging_buffer(const ImageCreateInfo &info, const ImageInitialData *initial);
InitialImageBuffer create_image_staging_buffer(const TextureFormatLayout &layout);
// Create image view, buffer views and samplers.
ImageViewHandle create_image_view(const ImageViewCreateInfo &view_info);
BufferViewHandle create_buffer_view(const BufferViewCreateInfo &view_info);
SamplerHandle create_sampler(const SamplerCreateInfo &info);
BindlessDescriptorPoolHandle create_bindless_descriptor_pool(BindlessResourceType type,
unsigned num_sets, unsigned num_descriptors);
// Render pass helpers.
bool image_format_is_supported(VkFormat format, VkFormatFeatureFlags2KHR required, VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL) const;
void get_format_properties(VkFormat format, VkFormatProperties3KHR *properties) const;
bool get_image_format_properties(VkFormat format, VkImageType type, VkImageTiling tiling,
VkImageUsageFlags usage, VkImageCreateFlags flags,
const void *pNext,
VkImageFormatProperties2 *properties2) const;
VkFormat get_default_depth_stencil_format() const;
VkFormat get_default_depth_format() const;
ImageHandle get_transient_attachment(unsigned width, unsigned height, VkFormat format,
unsigned index = 0, unsigned samples = 1, unsigned layers = 1);
RenderPassInfo get_swapchain_render_pass(SwapchainRenderPass style);
// Semaphore API:
// Semaphores in Granite are abstracted to support both binary and timeline semaphores
// internally.
// In practice this means that semaphores behave like single-use binary semaphores,
// with one signal and one wait.
// A single semaphore handle is not reused for multiple submissions, and they must be recycled through
// the device. The intended use is device.submit(&sem), device.add_wait_semaphore(sem); dispose(sem);
// For timeline semaphores, the semaphore is just a proxy object which
// holds the internally owned VkSemaphore + timeline value and is otherwise lightweight.
//
// However, there are various use cases where we explicitly need semaphore objects:
// - Interoperate with other code that only accepts VkSemaphore.
// - Interoperate with external objects. We need to know whether to use binary or timeline.
// For timelines, we need to know which handle type to use (OPAQUE or ID3D12Fence).
// Binary external semaphore is always opaque with TEMPORARY semantics.
void add_wait_semaphore(CommandBuffer::Type type, Semaphore semaphore, VkPipelineStageFlags2 stages, bool flush);
// If transfer_ownership is set, Semaphore owns the VkSemaphore. Otherwise, application must
// free the semaphore when GPU usage of it is complete.
Semaphore request_semaphore(VkSemaphoreTypeKHR type, VkSemaphore handle = VK_NULL_HANDLE, bool transfer_ownership = false);
// Requests a binary or timeline semaphore that can be used to import/export.
// These semaphores cannot be used directly by add_wait_semaphore() and submit_empty().
// See request_timeline_semaphore_as_binary() for how to use timelines.
Semaphore request_semaphore_external(VkSemaphoreTypeKHR type,
VkExternalSemaphoreHandleTypeFlagBits handle_type);
// The created semaphore does not hold ownership of the VkSemaphore object.
// This is used when we want to wait on or signal an external timeline semaphore at a specific timeline value.
// We must collapse the timeline to a "binary" semaphore before we can call submit_empty or add_wait_semaphore().
Semaphore request_timeline_semaphore_as_binary(const SemaphoreHolder &holder, uint64_t value);
// A proxy semaphore which lets us grab a semaphore handle before we signal it.
// Move assignment can be used to move a payload.
// Mostly useful to deal better with render graph implementation.
// For time being however, we'll support moving the payload over to the proxy object.
Semaphore request_proxy_semaphore();
// For compat with existing code that uses this entry point.
inline Semaphore request_legacy_semaphore() { return request_semaphore(VK_SEMAPHORE_TYPE_BINARY_KHR); }
inline VkDevice get_device() const
{
return device;
}
inline VkPhysicalDevice get_physical_device() const
{
return gpu;
}
inline VkInstance get_instance() const
{
return instance;
}
inline const VkPhysicalDeviceMemoryProperties &get_memory_properties() const
{
return mem_props;
}
inline const VkPhysicalDeviceProperties &get_gpu_properties() const
{
return gpu_props;
}
void get_memory_budget(HeapBudget *budget);
const Sampler &get_stock_sampler(StockSampler sampler) const;
#ifdef GRANITE_VULKAN_SYSTEM_HANDLES
// To obtain ShaderManager, ShaderModules must be observed to be complete
// in query_initialization_progress().
ShaderManager &get_shader_manager();
ResourceManager &get_resource_manager();
#endif
// Useful for loading screens or otherwise figuring out
// when we can start rendering in a stable state.
enum class InitializationStage
{
CacheMaintenance,
// When this is done, shader modules and the shader manager have been populated.
// At this stage it is safe to use shaders in a configuration where we
// don't have SPIRV-Cross and/or shaderc to do on the fly compilation.
// For shipping configurations. We can still compile pipelines, but it may stutter.
ShaderModules,
// When this is done, pipelines should never stutter if Fossilize knows about the pipeline.
Pipelines
};
// 0 -> not started
// [1, 99] rough percentage of completion
// >= 100 done
unsigned query_initialization_progress(InitializationStage status) const;
// For some platforms, the device and queue might be shared, possibly across threads, so need some mechanism to
// lock the global device and queue.
void set_queue_lock(std::function<void ()> lock_callback,
std::function<void ()> unlock_callback);
// Alternative form, when we have to provide lock callbacks to external APIs.
void external_queue_lock();
void external_queue_unlock();
const ImplementationWorkarounds &get_workarounds() const
{
return workarounds;
}
const DeviceFeatures &get_device_features() const
{
return ext;
}
bool consumes_debug_markers() const
{
return debug_marker_sensitive;
}
bool swapchain_touched() const;
double convert_device_timestamp_delta(uint64_t start_ticks, uint64_t end_ticks) const;
// Writes a timestamp on host side, which is calibrated to the GPU timebase.
QueryPoolHandle write_calibrated_timestamp();
// A split version of VkEvent handling which lets us record a wait command before signal is recorded.
PipelineEvent begin_signal_event();
const Context::SystemHandles &get_system_handles() const
{
return system_handles;
}
void configure_default_geometry_samplers(float max_aniso, float lod_bias);
bool supports_subgroup_size_log2(bool subgroup_full_group,
uint8_t subgroup_minimum_size_log2,
uint8_t subgroup_maximum_size_log2,
VkShaderStageFlagBits stage = VK_SHADER_STAGE_COMPUTE_BIT) const;
const QueueInfo &get_queue_info() const;
void timestamp_log_reset();
void timestamp_log(const TimestampIntervalReportCallback &cb) const;
private:
VkInstance instance = VK_NULL_HANDLE;
VkPhysicalDevice gpu = VK_NULL_HANDLE;
VkDevice device = VK_NULL_HANDLE;
const VolkDeviceTable *table = nullptr;
const Context *ctx = nullptr;
QueueInfo queue_info;
unsigned num_thread_indices = 1;
std::atomic_uint64_t cookie;
uint64_t allocate_cookie();
void bake_program(Program &program, const ImmutableSamplerBank *sampler_bank);
void merge_combined_resource_layout(CombinedResourceLayout &layout, const Program &program);
void request_vertex_block(BufferBlock &block, VkDeviceSize size);
void request_index_block(BufferBlock &block, VkDeviceSize size);
void request_uniform_block(BufferBlock &block, VkDeviceSize size);
void request_staging_block(BufferBlock &block, VkDeviceSize size);
QueryPoolHandle write_timestamp(VkCommandBuffer cmd, VkPipelineStageFlags2 stage);
void set_acquire_semaphore(unsigned index, Semaphore acquire);
Semaphore consume_release_semaphore();
VkQueue get_current_present_queue() const;
CommandBuffer::Type get_current_present_queue_type() const;
const PipelineLayout *request_pipeline_layout(const CombinedResourceLayout &layout,
const ImmutableSamplerBank *immutable_samplers);
DescriptorSetAllocator *request_descriptor_set_allocator(const DescriptorSetLayout &layout,
const uint32_t *stages_for_sets,
const ImmutableSampler * const *immutable_samplers);
const Framebuffer &request_framebuffer(const RenderPassInfo &info);
const RenderPass &request_render_pass(const RenderPassInfo &info, bool compatible);
VkPhysicalDeviceMemoryProperties mem_props;
VkPhysicalDeviceProperties gpu_props;
DeviceFeatures ext;
bool debug_marker_sensitive = false;
void init_stock_samplers();
void init_stock_sampler(StockSampler sampler, float max_aniso, float lod_bias);
void init_timeline_semaphores();
void deinit_timeline_semaphores();
uint64_t update_wrapped_device_timestamp(uint64_t ts);
int64_t convert_timestamp_to_absolute_nsec(const QueryPoolResult &handle);
Context::SystemHandles system_handles;
QueryPoolHandle write_timestamp_nolock(VkCommandBuffer cmd, VkPipelineStageFlags2 stage);
QueryPoolHandle write_calibrated_timestamp_nolock();
void register_time_interval_nolock(std::string tid, QueryPoolHandle start_ts, QueryPoolHandle end_ts,
const std::string &tag);
// Make sure this is deleted last.
HandlePool handle_pool;
// Calibrated timestamps.
void init_calibrated_timestamps();
void recalibrate_timestamps_fallback();
void recalibrate_timestamps();
bool resample_calibrated_timestamps();
VkTimeDomainEXT calibrated_time_domain = VK_TIME_DOMAIN_DEVICE_EXT;
int64_t calibrated_timestamp_device = 0;
int64_t calibrated_timestamp_host = 0;
int64_t calibrated_timestamp_device_accum = 0;
unsigned timestamp_calibration_counter = 0;
Vulkan::QueryPoolHandle frame_context_begin_ts;
struct Managers
{
DeviceAllocator memory;
FenceManager fence;
SemaphoreManager semaphore;
EventManager event;
BufferPool vbo, ibo, ubo, staging;
TimestampIntervalManager timestamps;
};
Managers managers;
struct
{
std::mutex memory_lock;
std::mutex lock;
std::condition_variable cond;
Util::RWSpinLock read_only_cache;
unsigned counter = 0;
bool async_frame_context = false;
} lock;
struct PerFrame
{
PerFrame(Device *device, unsigned index);
~PerFrame();
void operator=(const PerFrame &) = delete;
PerFrame(const PerFrame &) = delete;
void begin();
void trim_command_pools();
Device &device;
unsigned frame_index;
const VolkDeviceTable &table;
Managers &managers;
std::vector<CommandPool> cmd_pools[QUEUE_INDEX_COUNT];
VkSemaphore timeline_semaphores[QUEUE_INDEX_COUNT] = {};
uint64_t timeline_fences[QUEUE_INDEX_COUNT] = {};
QueryPool query_pool;
std::vector<BufferBlock> vbo_blocks;
std::vector<BufferBlock> ibo_blocks;
std::vector<BufferBlock> ubo_blocks;
std::vector<BufferBlock> staging_blocks;
std::vector<VkFence> wait_and_recycle_fences;
std::vector<DeviceAllocation> allocations;
std::vector<VkFramebuffer> destroyed_framebuffers;
std::vector<VkSampler> destroyed_samplers;
std::vector<VkImageView> destroyed_image_views;
std::vector<VkBufferView> destroyed_buffer_views;
std::vector<VkImage> destroyed_images;
std::vector<VkBuffer> destroyed_buffers;
std::vector<VkDescriptorPool> destroyed_descriptor_pools;
Util::SmallVector<CommandBufferHandle> submissions[QUEUE_INDEX_COUNT];
std::vector<VkSemaphore> recycled_semaphores;
std::vector<VkEvent> recycled_events;
std::vector<VkSemaphore> destroyed_semaphores;
std::vector<VkSemaphore> consumed_semaphores;
struct DebugChannel
{
DebugChannelInterface *iface;
std::string tag;
BufferHandle buffer;
};
std::vector<DebugChannel> debug_channels;
struct TimestampIntervalHandles
{
std::string tid;
QueryPoolHandle start_ts;
QueryPoolHandle end_ts;
TimestampInterval *timestamp_tag;
};
std::vector<TimestampIntervalHandles> timestamp_intervals;
bool in_destructor = false;
};
// The per frame structure must be destroyed after
// the hashmap data structures below, so it must be declared before.
std::vector<std::unique_ptr<PerFrame>> per_frame;
struct
{
Semaphore acquire;
Semaphore release;
std::vector<ImageHandle> swapchain;
VkQueue present_queue = VK_NULL_HANDLE;
Vulkan::CommandBuffer::Type present_queue_type = {};
uint32_t queue_family_support_mask = 0;
unsigned index = 0;
bool consumed = false;
} wsi;
bool can_touch_swapchain_in_command_buffer(QueueIndices physical_type) const;
struct QueueData
{
Util::SmallVector<Semaphore> wait_semaphores;
Util::SmallVector<VkPipelineStageFlags2> wait_stages;
bool need_fence = false;
VkSemaphore timeline_semaphore = VK_NULL_HANDLE;
uint64_t current_timeline = 0;
PerformanceQueryPool performance_query_pool;
} queue_data[QUEUE_INDEX_COUNT];
struct InternalFence
{
VkFence fence;
VkSemaphore timeline;
uint64_t value;
};
void submit_queue(QueueIndices physical_type, InternalFence *fence,
SemaphoreHolder *external_semaphore = nullptr,
unsigned semaphore_count = 0,
Semaphore *semaphore = nullptr,
int profiled_iteration = -1);
PerFrame &frame()
{
VK_ASSERT(frame_context_index < per_frame.size());
VK_ASSERT(per_frame[frame_context_index]);
return *per_frame[frame_context_index];
}
const PerFrame &frame() const
{
VK_ASSERT(frame_context_index < per_frame.size());
VK_ASSERT(per_frame[frame_context_index]);
return *per_frame[frame_context_index];
}
unsigned frame_context_index = 0;
uint32_t find_memory_type(BufferDomain domain, uint32_t mask) const;
uint32_t find_memory_type(ImageDomain domain, uint32_t mask) const;
uint32_t find_memory_type(uint32_t required, uint32_t mask) const;
bool memory_type_is_device_optimal(uint32_t type) const;
bool memory_type_is_host_visible(uint32_t type) const;
const ImmutableSampler *samplers[static_cast<unsigned>(StockSampler::Count)] = {};
VulkanCache<PipelineLayout> pipeline_layouts;
VulkanCache<DescriptorSetAllocator> descriptor_set_allocators;
VulkanCache<RenderPass> render_passes;
VulkanCache<Shader> shaders;
VulkanCache<Program> programs;
VulkanCache<ImmutableSampler> immutable_samplers;
VulkanCache<ImmutableYcbcrConversion> immutable_ycbcr_conversions;
VulkanCache<IndirectLayout> indirect_layouts;
FramebufferAllocator framebuffer_allocator;
TransientAttachmentAllocator transient_allocator;
VkPipelineCache pipeline_cache = VK_NULL_HANDLE;
void init_pipeline_cache();
void flush_pipeline_cache();
PerformanceQueryPool &get_performance_query_pool(QueueIndices physical_type);
void clear_wait_semaphores();
void submit_staging(CommandBufferHandle &cmd, bool flush);
PipelineEvent request_pipeline_event();
std::function<void ()> queue_lock_callback;
std::function<void ()> queue_unlock_callback;
void flush_frame(QueueIndices physical_type);
void submit_empty_inner(QueueIndices type, InternalFence *fence,
SemaphoreHolder *external_semaphore,
unsigned semaphore_count,
Semaphore *semaphore);
void collect_wait_semaphores(QueueData &data, Helper::WaitSemaphores &semaphores);
void emit_queue_signals(Helper::BatchComposer &composer,
SemaphoreHolder *external_semaphore,
VkSemaphore sem, uint64_t timeline, InternalFence *fence,
unsigned semaphore_count, Semaphore *semaphores);
VkResult submit_batches(Helper::BatchComposer &composer, VkQueue queue, VkFence fence,
int profiling_iteration = -1);
VkResult queue_submit(VkQueue queue, uint32_t count, const VkSubmitInfo2 *submits, VkFence fence);
void destroy_buffer(VkBuffer buffer);
void destroy_image(VkImage image);
void destroy_image_view(VkImageView view);
void destroy_buffer_view(VkBufferView view);
void destroy_sampler(VkSampler sampler);
void destroy_framebuffer(VkFramebuffer framebuffer);
void destroy_semaphore(VkSemaphore semaphore);
void consume_semaphore(VkSemaphore semaphore);
void recycle_semaphore(VkSemaphore semaphore);
void destroy_event(VkEvent event);
void free_memory(const DeviceAllocation &alloc);
void reset_fence(VkFence fence, bool observed_wait);
void destroy_descriptor_pool(VkDescriptorPool desc_pool);
void destroy_buffer_nolock(VkBuffer buffer);
void destroy_image_nolock(VkImage image);
void destroy_image_view_nolock(VkImageView view);
void destroy_buffer_view_nolock(VkBufferView view);
void destroy_sampler_nolock(VkSampler sampler);
void destroy_framebuffer_nolock(VkFramebuffer framebuffer);
void destroy_semaphore_nolock(VkSemaphore semaphore);
void consume_semaphore_nolock(VkSemaphore semaphore);
void recycle_semaphore_nolock(VkSemaphore semaphore);
void destroy_event_nolock(VkEvent event);
void free_memory_nolock(const DeviceAllocation &alloc);
void destroy_descriptor_pool_nolock(VkDescriptorPool desc_pool);
void reset_fence_nolock(VkFence fence, bool observed_wait);
void flush_frame_nolock();
CommandBufferHandle request_command_buffer_nolock(unsigned thread_index, CommandBuffer::Type type, bool profiled);
void submit_discard_nolock(CommandBufferHandle &cmd);
void submit_nolock(CommandBufferHandle cmd, Fence *fence,
unsigned semaphore_count, Semaphore *semaphore);
void submit_empty_nolock(QueueIndices physical_type, Fence *fence,
SemaphoreHolder *semaphore, int profiling_iteration);
void add_wait_semaphore_nolock(QueueIndices type, Semaphore semaphore,
VkPipelineStageFlags2 stages, bool flush);
void request_vertex_block_nolock(BufferBlock &block, VkDeviceSize size);
void request_index_block_nolock(BufferBlock &block, VkDeviceSize size);
void request_uniform_block_nolock(BufferBlock &block, VkDeviceSize size);
void request_staging_block_nolock(BufferBlock &block, VkDeviceSize size);
CommandBufferHandle request_secondary_command_buffer_for_thread(unsigned thread_index,
const Framebuffer *framebuffer,
unsigned subpass,
CommandBuffer::Type type = CommandBuffer::Type::Generic);
void add_frame_counter_nolock();
void decrement_frame_counter_nolock();
void submit_secondary(CommandBuffer &primary, CommandBuffer &secondary);
void wait_idle_nolock();
void end_frame_nolock();
void add_debug_channel_buffer(DebugChannelInterface *iface, std::string tag, BufferHandle buffer);
void parse_debug_channel(const PerFrame::DebugChannel &channel);
Fence request_legacy_fence();
#ifdef GRANITE_VULKAN_SYSTEM_HANDLES
ShaderManager shader_manager;
ResourceManager resource_manager;
void init_shader_manager_cache();
void flush_shader_manager_cache();
#endif
#ifdef GRANITE_VULKAN_FOSSILIZE
bool enqueue_create_sampler(Fossilize::Hash hash, const VkSamplerCreateInfo *create_info, VkSampler *sampler) override;
bool enqueue_create_descriptor_set_layout(Fossilize::Hash hash, const VkDescriptorSetLayoutCreateInfo *create_info, VkDescriptorSetLayout *layout) override;
bool enqueue_create_pipeline_layout(Fossilize::Hash hash, const VkPipelineLayoutCreateInfo *create_info, VkPipelineLayout *layout) override;
bool enqueue_create_shader_module(Fossilize::Hash hash, const VkShaderModuleCreateInfo *create_info, VkShaderModule *module) override;
bool enqueue_create_render_pass(Fossilize::Hash hash, const VkRenderPassCreateInfo *create_info, VkRenderPass *render_pass) override;
bool enqueue_create_render_pass2(Fossilize::Hash hash, const VkRenderPassCreateInfo2 *create_info, VkRenderPass *render_pass) override;
bool enqueue_create_compute_pipeline(Fossilize::Hash hash, const VkComputePipelineCreateInfo *create_info, VkPipeline *pipeline) override;
bool enqueue_create_graphics_pipeline(Fossilize::Hash hash, const VkGraphicsPipelineCreateInfo *create_info, VkPipeline *pipeline) override;
bool enqueue_create_raytracing_pipeline(Fossilize::Hash hash, const VkRayTracingPipelineCreateInfoKHR *create_info, VkPipeline *pipeline) override;
bool fossilize_replay_graphics_pipeline(Fossilize::Hash hash, VkGraphicsPipelineCreateInfo &info);
bool fossilize_replay_compute_pipeline(Fossilize::Hash hash, VkComputePipelineCreateInfo &info);
void replay_tag_simple(Fossilize::ResourceTag tag);
void register_graphics_pipeline(Fossilize::Hash hash, const VkGraphicsPipelineCreateInfo &info);
void register_compute_pipeline(Fossilize::Hash hash, const VkComputePipelineCreateInfo &info);
void register_render_pass(VkRenderPass render_pass, Fossilize::Hash hash, const VkRenderPassCreateInfo2KHR &info);
void register_descriptor_set_layout(VkDescriptorSetLayout layout, Fossilize::Hash hash, const VkDescriptorSetLayoutCreateInfo &info);
void register_pipeline_layout(VkPipelineLayout layout, Fossilize::Hash hash, const VkPipelineLayoutCreateInfo &info);
void register_shader_module(VkShaderModule module, Fossilize::Hash hash, const VkShaderModuleCreateInfo &info);
void register_sampler(VkSampler sampler, Fossilize::Hash hash, const VkSamplerCreateInfo &info);
void register_sampler_ycbcr_conversion(VkSamplerYcbcrConversion ycbcr, const VkSamplerYcbcrConversionCreateInfo &info);
struct RecorderState;
std::unique_ptr<RecorderState> recorder_state;
struct ReplayerState;
std::unique_ptr<ReplayerState> replayer_state;
void promote_write_cache_to_readonly() const;
void promote_readonly_db_from_assets() const;
void init_pipeline_state(const Fossilize::FeatureFilter &filter,
const VkPhysicalDeviceFeatures2 &pdf2,
const VkApplicationInfo &application_info);
void flush_pipeline_state();
void block_until_shader_module_ready();
void block_until_pipeline_ready();
#endif
ImplementationWorkarounds workarounds;
void init_workarounds();
void fill_buffer_sharing_indices(VkBufferCreateInfo &create_info, uint32_t *sharing_indices);
bool allocate_image_memory(DeviceAllocation *allocation, const ImageCreateInfo &info,
VkImage image, VkImageTiling tiling);
void promote_read_write_caches_to_read_only();
};
// A fairly complex helper used for async queue readbacks.
// Typically used for things like headless backend which emulates WSI through readbacks + encode.
struct OwnershipTransferInfo
{
CommandBuffer::Type old_queue;
CommandBuffer::Type new_queue;
VkImageLayout old_image_layout;
VkImageLayout new_image_layout;
VkPipelineStageFlags2 dst_pipeline_stage;
VkAccessFlags2 dst_access;
};
// For an image which was last accessed in old_queue, requests a command buffer
// for new_queue. Commands will be enqueued as necessary in new_queue to ensure that a complete ownership
// transfer has taken place.
// If queue family for old_queue differs from new_queue, a release barrier is enqueued in old_queue.
// In new_queue we perform either an acquire barrier or a simple pipeline barrier to change layout if required.
// If semaphore is a valid handle, it will be waited on in either old_queue to perform release barrier
// or new_queue depending on what is required.
// If the image uses CONCURRENT sharing mode, acquire/release barriers are skipped.
CommandBufferHandle request_command_buffer_with_ownership_transfer(
Device &device,
const Vulkan::Image &image,
const OwnershipTransferInfo &info,
const Vulkan::Semaphore &semaphore);
using DeviceHandle = Util::IntrusivePtr<Device>;
}