From c80127d29d04949a34e646e05275e785ee0664f1 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 27 Jun 2016 20:54:03 +0200 Subject: [PATCH] Vulkan: Hook up async compute test more properly. Supports multiple queues and queues from different families. --- command.c | 3 + .../libretro-test.c | 204 +++++++++++++++++- gfx/common/vulkan_common.c | 7 +- libretro-common/include/libretro_vulkan.h | 9 +- 4 files changed, 210 insertions(+), 13 deletions(-) diff --git a/command.c b/command.c index 125370dc85..adf5568e08 100644 --- a/command.c +++ b/command.c @@ -2150,15 +2150,18 @@ bool command_event(enum event_command cmd, void *data) /* RARCH_DRIVER_CTL_UNINIT clears the callback struct so we * need to make sure to keep a copy */ struct retro_hw_render_callback *hwr = NULL; + const struct retro_hw_render_context_negotiation_interface *iface = NULL; struct retro_hw_render_callback hwr_copy; int flags = DRIVERS_CMD_ALL; hwr = video_driver_get_hw_context(); + iface = video_driver_get_context_negotiation_interface(); memcpy(&hwr_copy, hwr, sizeof(hwr_copy)); driver_ctl(RARCH_DRIVER_CTL_UNINIT, &flags); memcpy(hwr, &hwr_copy, sizeof(*hwr)); + video_driver_set_context_negotiation_interface(iface); driver_ctl(RARCH_DRIVER_CTL_INIT, &flags); } diff --git a/cores/libretro-test-vulkan-async-compute/libretro-test.c b/cores/libretro-test-vulkan-async-compute/libretro-test.c index a09a35049a..ce4bbb5b3a 100644 --- a/cores/libretro-test-vulkan-async-compute/libretro-test.c +++ b/cores/libretro-test-vulkan-async-compute/libretro-test.c @@ -11,6 +11,8 @@ static struct retro_hw_render_callback hw_render; static const struct retro_hw_render_interface_vulkan *vulkan; static unsigned frame_count; +static VkQueue async_queue; +static uint32_t async_queue_index; #define BASE_WIDTH 640 #define BASE_HEIGHT 360 @@ -44,6 +46,8 @@ struct vulkan_data VkCommandPool cmd_pool[MAX_SYNC]; VkCommandBuffer cmd[MAX_SYNC]; VkSemaphore acquire_semaphores[MAX_SYNC]; + + bool need_acquire[MAX_SYNC]; }; static struct vulkan_data vk; @@ -161,8 +165,17 @@ static void vulkan_test_render(void) prepare_rendering.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; prepare_rendering.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; prepare_rendering.newLayout = VK_IMAGE_LAYOUT_GENERAL; - prepare_rendering.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - prepare_rendering.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + + if (vk.need_acquire[vk.index]) + { + prepare_rendering.srcQueueFamilyIndex = vulkan->queue_index; + prepare_rendering.dstQueueFamilyIndex = async_queue_index; + } + else + { + prepare_rendering.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + prepare_rendering.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + } prepare_rendering.image = vk.images[vk.index].create_info.image; prepare_rendering.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; prepare_rendering.subresourceRange.levelCount = 1; @@ -196,8 +209,20 @@ static void vulkan_test_render(void) prepare_presentation.dstAccessMask = 0; prepare_presentation.oldLayout = VK_IMAGE_LAYOUT_GENERAL; prepare_presentation.newLayout = VK_IMAGE_LAYOUT_GENERAL; - prepare_presentation.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - prepare_presentation.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + + if (async_queue && vulkan->queue_index != async_queue_index) + { + prepare_presentation.srcQueueFamilyIndex = async_queue_index; + prepare_presentation.dstQueueFamilyIndex = vulkan->queue_index; + vk.need_acquire[vk.index] = true; + } + else + { + prepare_presentation.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + prepare_presentation.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + vk.need_acquire[vk.index] = false; + } + prepare_presentation.image = vk.images[vk.index].create_info.image; prepare_presentation.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; prepare_presentation.subresourceRange.levelCount = 1; @@ -211,14 +236,19 @@ static void vulkan_test_render(void) vkEndCommandBuffer(cmd); - vulkan->lock_queue(vulkan->handle); + if (!async_queue) + vulkan->lock_queue(vulkan->handle); + VkSubmitInfo submit = { VK_STRUCTURE_TYPE_SUBMIT_INFO }; submit.commandBufferCount = 1; submit.pCommandBuffers = &cmd; submit.signalSemaphoreCount = 1; submit.pSignalSemaphores = &vk.acquire_semaphores[vk.index]; - vkQueueSubmit(vulkan->queue, 1, &submit, VK_NULL_HANDLE); - vulkan->unlock_queue(vulkan->handle); + vkQueueSubmit(async_queue != VK_NULL_HANDLE ? async_queue : vulkan->queue, + 1, &submit, VK_NULL_HANDLE); + + if (!async_queue) + vulkan->unlock_queue(vulkan->handle); } static VkShaderModule create_shader_module(const uint32_t *data, size_t size) @@ -340,6 +370,13 @@ static void init_swapchain(void) image.mipLevels = 1; image.arrayLayers = 1; + uint32_t share_queues[] = { async_queue_index, vulkan->queue_index }; + if (async_queue && async_queue_index != vulkan->queue_index) + { + image.queueFamilyIndexCount = 2; + image.pQueueFamilyIndices = share_queues; + } + vkCreateImage(device, &image, NULL, &vk.images[i].create_info.image); VkMemoryAllocateInfo alloc = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; @@ -468,7 +505,10 @@ void retro_run(void) vk.index = vulkan->get_sync_index(vulkan->handle); vulkan_test_render(); - vulkan->set_image(vulkan->handle, &vk.images[vk.index], 1, &vk.acquire_semaphores[vk.index], VK_QUEUE_FAMILY_IGNORED); + vulkan->set_image(vulkan->handle, &vk.images[vk.index], + 1, &vk.acquire_semaphores[vk.index], + async_queue && async_queue_index != vulkan->queue_index ? + async_queue_index : VK_QUEUE_FAMILY_IGNORED); video_cb(RETRO_HW_FRAME_BUFFER_VALID, BASE_WIDTH, BASE_HEIGHT, 0); } @@ -517,6 +557,152 @@ static const VkApplicationInfo *get_application_info(void) return &info; } +static bool create_device(struct retro_vulkan_context *context, + VkInstance instance, + VkPhysicalDevice gpu, + VkSurfaceKHR surface, + PFN_vkGetInstanceProcAddr get_instance_proc_addr, + const char **required_device_extensions, + unsigned num_required_device_extensions, + const char **required_device_layers, + unsigned num_required_device_layers, + const VkPhysicalDeviceFeatures *required_features) +{ + async_queue = VK_NULL_HANDLE; + vulkan_symbol_wrapper_init(get_instance_proc_addr); + vulkan_symbol_wrapper_load_core_symbols(instance); + + if (gpu == VK_NULL_HANDLE) + { + uint32_t gpu_count; + vkEnumeratePhysicalDevices(instance, &gpu_count, NULL); + if (!gpu_count) + return false; + VkPhysicalDevice *gpus = calloc(gpu_count, sizeof(*gpus)); + if (!gpus) + return false; + + vkEnumeratePhysicalDevices(instance, &gpu_count, gpus); + gpu = gpus[0]; + free(gpus); + } + + context->gpu = gpu; + + uint32_t queue_count; + VkQueueFamilyProperties *queue_properties = NULL; + vkGetPhysicalDeviceQueueFamilyProperties(gpu, &queue_count, NULL); + if (queue_count < 1) + return false; + queue_properties = calloc(queue_count, sizeof(*queue_properties)); + if (!queue_properties) + return false; + vkGetPhysicalDeviceQueueFamilyProperties(gpu, &queue_count, queue_properties); + + if (surface != VK_NULL_HANDLE) + { + VULKAN_SYMBOL_WRAPPER_LOAD_INSTANCE_EXTENSION_SYMBOL(instance, + vkGetPhysicalDeviceSurfaceSupportKHR); + } + + bool found_queue = false; + for (uint32_t i = 0; i < queue_count; i++) + { + VkBool32 supported = surface == VK_NULL_HANDLE; + + if (surface != VK_NULL_HANDLE) + { + vkGetPhysicalDeviceSurfaceSupportKHR( + gpu, i, surface, &supported); + } + + VkQueueFlags required = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; + if (supported && ((queue_properties[i].queueFlags & required) == required)) + { + context->queue_family_index = i; + found_queue = true; + break; + } + } + + if (!found_queue) + { + free(queue_properties); + return false; + } + + bool same_queue_async = false; + if (queue_properties[context->queue_family_index].queueCount >= 2) + same_queue_async = true; + + if (!same_queue_async) + { + found_queue = false; + for (uint32_t i = 0; i < queue_count; i++) + { + if (i == context->queue_family_index) + continue; + + VkQueueFlags required = VK_QUEUE_COMPUTE_BIT; + if ((queue_properties[i].queueFlags & required) == required) + { + async_queue_index = i; + found_queue = true; + break; + } + } + } + else + async_queue_index = context->queue_family_index; + + free(queue_properties); + if (!found_queue) + return false; + + const float prios[] = { 0.5f, 0.5f }; + VkDeviceQueueCreateInfo queues[2] = { + { VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO }, + { VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO }, + }; + + if (same_queue_async) + { + queues[0].queueFamilyIndex = context->queue_family_index; + queues[0].queueCount = 2; + queues[0].pQueuePriorities = prios; + } + else + { + queues[0].queueFamilyIndex = context->queue_family_index; + queues[0].queueCount = 1; + queues[0].pQueuePriorities = &prios[0]; + queues[1].queueFamilyIndex = async_queue_index; + queues[1].queueCount = 1; + queues[1].pQueuePriorities = &prios[1]; + } + + VkDeviceCreateInfo device_info = { VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO }; + device_info.enabledExtensionCount = num_required_device_extensions; + device_info.ppEnabledExtensionNames = required_device_extensions; + device_info.enabledLayerCount = num_required_device_layers; + device_info.ppEnabledLayerNames = required_device_layers; + device_info.queueCreateInfoCount = same_queue_async ? 1 : 2; + device_info.pQueueCreateInfos = queues; + + if (vkCreateDevice(gpu, &device_info, NULL, &context->device) != VK_SUCCESS) + return false; + + vkGetDeviceQueue(context->device, context->queue_family_index, 0, &context->queue); + if (same_queue_async) + vkGetDeviceQueue(context->device, context->queue_family_index, 1, &async_queue); + else + vkGetDeviceQueue(context->device, async_queue_index, 0, &async_queue); + + context->presentation_queue = context->queue; + context->presentation_queue_family_index = context->queue_family_index; + return true; +} + static bool retro_init_hw_context(void) { hw_render.context_type = RETRO_HW_CONTEXT_VULKAN; @@ -533,7 +719,7 @@ static bool retro_init_hw_context(void) RETRO_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE_VULKAN_VERSION, get_application_info, - NULL, + create_device, }; environ_cb(RETRO_ENVIRONMENT_SET_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE, (void*)&iface); diff --git a/gfx/common/vulkan_common.c b/gfx/common/vulkan_common.c index c3342623ec..dbc896a9e9 100644 --- a/gfx/common/vulkan_common.c +++ b/gfx/common/vulkan_common.c @@ -1246,6 +1246,7 @@ static bool vulkan_context_init_device(gfx_ctx_vulkan_data_t *vk) const VkPhysicalDeviceFeatures features = { 0 }; bool ret = iface->create_device(&context, vk->context.instance, + VK_NULL_HANDLE, vk->vk_surface, vulkan_symbol_wrapper_instance_proc_addr(), device_extensions, @@ -1348,7 +1349,7 @@ static bool vulkan_context_init_device(gfx_ctx_vulkan_data_t *vk) { VkBool32 supported = VK_FALSE; vkGetPhysicalDeviceSurfaceSupportKHR( - vk->context.gpu, vk->context.graphics_queue_index, + vk->context.gpu, i, vk->vk_surface, &supported); VkQueueFlags required = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; @@ -1804,8 +1805,8 @@ void vulkan_context_destroy(gfx_ctx_vulkan_data_t *vk, if (!vk->context.instance) return; - if (vk->context.queue) - vkQueueWaitIdle(vk->context.queue); + if (vk->context.device) + vkDeviceWaitIdle(vk->context.device); if (vk->swapchain) vkDestroySwapchainKHR(vk->context.device, vk->swapchain, NULL); diff --git a/libretro-common/include/libretro_vulkan.h b/libretro-common/include/libretro_vulkan.h index 03dc6ea457..7fb36ec106 100644 --- a/libretro-common/include/libretro_vulkan.h +++ b/libretro-common/include/libretro_vulkan.h @@ -26,7 +26,7 @@ #include #include -#define RETRO_HW_RENDER_INTERFACE_VULKAN_VERSION 4 +#define RETRO_HW_RENDER_INTERFACE_VULKAN_VERSION 5 #define RETRO_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE_VULKAN_VERSION 1 struct retro_vulkan_image @@ -67,6 +67,7 @@ struct retro_vulkan_context typedef bool (*retro_vulkan_create_device_t)( struct retro_vulkan_context *context, VkInstance instance, + VkPhysicalDevice gpu, VkSurfaceKHR surface, PFN_vkGetInstanceProcAddr get_instance_proc_addr, const char **required_device_extensions, @@ -101,11 +102,17 @@ struct retro_hw_render_context_negotiation_interface_vulkan * The core must prepare a designated PhysicalDevice, Device, Queue and queue family index * which the frontend will use for its internal operation. * + * If gpu is not VK_NULL_HANDLE, the physical device provided to the frontend must be this PhysicalDevice. + * The core is still free to use other physical devices. + * * The frontend will request certain extensions and layers for a device which is created. * The core must ensure that the queue and queue_family_index support GRAPHICS and COMPUTE. * + * If surface is not VK_NULL_HANDLE, the core must consider presentation when creating the queues. * If presentation to "surface" is supported on the queue, presentation_queue must be equal to queue. * If not, a second queue must be provided in presentation_queue and presentation_queue_index. + * If surface is not VK_NULL_HANDLE, the instance from frontend will have been created with supported for + * VK_KHR_surface extension. * * The core is free to set its own queue priorities. * Device provided to frontend is owned by the frontend, but any additional device resources must be freed by core