mirror of
https://github.com/libretro/RetroArch.git
synced 2025-04-02 10:51:52 -04:00
Vulkan: Hook up async compute test more properly.
Supports multiple queues and queues from different families.
This commit is contained in:
parent
4abb0ef331
commit
c80127d29d
4 changed files with 210 additions and 13 deletions
|
@ -2150,15 +2150,18 @@ bool command_event(enum event_command cmd, void *data)
|
|||
/* RARCH_DRIVER_CTL_UNINIT clears the callback struct so we
|
||||
* need to make sure to keep a copy */
|
||||
struct retro_hw_render_callback *hwr = NULL;
|
||||
const struct retro_hw_render_context_negotiation_interface *iface = NULL;
|
||||
struct retro_hw_render_callback hwr_copy;
|
||||
int flags = DRIVERS_CMD_ALL;
|
||||
|
||||
hwr = video_driver_get_hw_context();
|
||||
iface = video_driver_get_context_negotiation_interface();
|
||||
memcpy(&hwr_copy, hwr, sizeof(hwr_copy));
|
||||
|
||||
driver_ctl(RARCH_DRIVER_CTL_UNINIT, &flags);
|
||||
|
||||
memcpy(hwr, &hwr_copy, sizeof(*hwr));
|
||||
video_driver_set_context_negotiation_interface(iface);
|
||||
|
||||
driver_ctl(RARCH_DRIVER_CTL_INIT, &flags);
|
||||
}
|
||||
|
|
|
@ -11,6 +11,8 @@
|
|||
static struct retro_hw_render_callback hw_render;
|
||||
static const struct retro_hw_render_interface_vulkan *vulkan;
|
||||
static unsigned frame_count;
|
||||
static VkQueue async_queue;
|
||||
static uint32_t async_queue_index;
|
||||
|
||||
#define BASE_WIDTH 640
|
||||
#define BASE_HEIGHT 360
|
||||
|
@ -44,6 +46,8 @@ struct vulkan_data
|
|||
VkCommandPool cmd_pool[MAX_SYNC];
|
||||
VkCommandBuffer cmd[MAX_SYNC];
|
||||
VkSemaphore acquire_semaphores[MAX_SYNC];
|
||||
|
||||
bool need_acquire[MAX_SYNC];
|
||||
};
|
||||
static struct vulkan_data vk;
|
||||
|
||||
|
@ -161,8 +165,17 @@ static void vulkan_test_render(void)
|
|||
prepare_rendering.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
prepare_rendering.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
prepare_rendering.newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
prepare_rendering.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
prepare_rendering.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
|
||||
if (vk.need_acquire[vk.index])
|
||||
{
|
||||
prepare_rendering.srcQueueFamilyIndex = vulkan->queue_index;
|
||||
prepare_rendering.dstQueueFamilyIndex = async_queue_index;
|
||||
}
|
||||
else
|
||||
{
|
||||
prepare_rendering.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
prepare_rendering.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
}
|
||||
prepare_rendering.image = vk.images[vk.index].create_info.image;
|
||||
prepare_rendering.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
prepare_rendering.subresourceRange.levelCount = 1;
|
||||
|
@ -196,8 +209,20 @@ static void vulkan_test_render(void)
|
|||
prepare_presentation.dstAccessMask = 0;
|
||||
prepare_presentation.oldLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
prepare_presentation.newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
prepare_presentation.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
prepare_presentation.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
|
||||
if (async_queue && vulkan->queue_index != async_queue_index)
|
||||
{
|
||||
prepare_presentation.srcQueueFamilyIndex = async_queue_index;
|
||||
prepare_presentation.dstQueueFamilyIndex = vulkan->queue_index;
|
||||
vk.need_acquire[vk.index] = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
prepare_presentation.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
prepare_presentation.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
vk.need_acquire[vk.index] = false;
|
||||
}
|
||||
|
||||
prepare_presentation.image = vk.images[vk.index].create_info.image;
|
||||
prepare_presentation.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
prepare_presentation.subresourceRange.levelCount = 1;
|
||||
|
@ -211,14 +236,19 @@ static void vulkan_test_render(void)
|
|||
|
||||
vkEndCommandBuffer(cmd);
|
||||
|
||||
vulkan->lock_queue(vulkan->handle);
|
||||
if (!async_queue)
|
||||
vulkan->lock_queue(vulkan->handle);
|
||||
|
||||
VkSubmitInfo submit = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
|
||||
submit.commandBufferCount = 1;
|
||||
submit.pCommandBuffers = &cmd;
|
||||
submit.signalSemaphoreCount = 1;
|
||||
submit.pSignalSemaphores = &vk.acquire_semaphores[vk.index];
|
||||
vkQueueSubmit(vulkan->queue, 1, &submit, VK_NULL_HANDLE);
|
||||
vulkan->unlock_queue(vulkan->handle);
|
||||
vkQueueSubmit(async_queue != VK_NULL_HANDLE ? async_queue : vulkan->queue,
|
||||
1, &submit, VK_NULL_HANDLE);
|
||||
|
||||
if (!async_queue)
|
||||
vulkan->unlock_queue(vulkan->handle);
|
||||
}
|
||||
|
||||
static VkShaderModule create_shader_module(const uint32_t *data, size_t size)
|
||||
|
@ -340,6 +370,13 @@ static void init_swapchain(void)
|
|||
image.mipLevels = 1;
|
||||
image.arrayLayers = 1;
|
||||
|
||||
uint32_t share_queues[] = { async_queue_index, vulkan->queue_index };
|
||||
if (async_queue && async_queue_index != vulkan->queue_index)
|
||||
{
|
||||
image.queueFamilyIndexCount = 2;
|
||||
image.pQueueFamilyIndices = share_queues;
|
||||
}
|
||||
|
||||
vkCreateImage(device, &image, NULL, &vk.images[i].create_info.image);
|
||||
|
||||
VkMemoryAllocateInfo alloc = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO };
|
||||
|
@ -468,7 +505,10 @@ void retro_run(void)
|
|||
|
||||
vk.index = vulkan->get_sync_index(vulkan->handle);
|
||||
vulkan_test_render();
|
||||
vulkan->set_image(vulkan->handle, &vk.images[vk.index], 1, &vk.acquire_semaphores[vk.index], VK_QUEUE_FAMILY_IGNORED);
|
||||
vulkan->set_image(vulkan->handle, &vk.images[vk.index],
|
||||
1, &vk.acquire_semaphores[vk.index],
|
||||
async_queue && async_queue_index != vulkan->queue_index ?
|
||||
async_queue_index : VK_QUEUE_FAMILY_IGNORED);
|
||||
video_cb(RETRO_HW_FRAME_BUFFER_VALID, BASE_WIDTH, BASE_HEIGHT, 0);
|
||||
}
|
||||
|
||||
|
@ -517,6 +557,152 @@ static const VkApplicationInfo *get_application_info(void)
|
|||
return &info;
|
||||
}
|
||||
|
||||
static bool create_device(struct retro_vulkan_context *context,
|
||||
VkInstance instance,
|
||||
VkPhysicalDevice gpu,
|
||||
VkSurfaceKHR surface,
|
||||
PFN_vkGetInstanceProcAddr get_instance_proc_addr,
|
||||
const char **required_device_extensions,
|
||||
unsigned num_required_device_extensions,
|
||||
const char **required_device_layers,
|
||||
unsigned num_required_device_layers,
|
||||
const VkPhysicalDeviceFeatures *required_features)
|
||||
{
|
||||
async_queue = VK_NULL_HANDLE;
|
||||
vulkan_symbol_wrapper_init(get_instance_proc_addr);
|
||||
vulkan_symbol_wrapper_load_core_symbols(instance);
|
||||
|
||||
if (gpu == VK_NULL_HANDLE)
|
||||
{
|
||||
uint32_t gpu_count;
|
||||
vkEnumeratePhysicalDevices(instance, &gpu_count, NULL);
|
||||
if (!gpu_count)
|
||||
return false;
|
||||
VkPhysicalDevice *gpus = calloc(gpu_count, sizeof(*gpus));
|
||||
if (!gpus)
|
||||
return false;
|
||||
|
||||
vkEnumeratePhysicalDevices(instance, &gpu_count, gpus);
|
||||
gpu = gpus[0];
|
||||
free(gpus);
|
||||
}
|
||||
|
||||
context->gpu = gpu;
|
||||
|
||||
uint32_t queue_count;
|
||||
VkQueueFamilyProperties *queue_properties = NULL;
|
||||
vkGetPhysicalDeviceQueueFamilyProperties(gpu, &queue_count, NULL);
|
||||
if (queue_count < 1)
|
||||
return false;
|
||||
queue_properties = calloc(queue_count, sizeof(*queue_properties));
|
||||
if (!queue_properties)
|
||||
return false;
|
||||
vkGetPhysicalDeviceQueueFamilyProperties(gpu, &queue_count, queue_properties);
|
||||
|
||||
if (surface != VK_NULL_HANDLE)
|
||||
{
|
||||
VULKAN_SYMBOL_WRAPPER_LOAD_INSTANCE_EXTENSION_SYMBOL(instance,
|
||||
vkGetPhysicalDeviceSurfaceSupportKHR);
|
||||
}
|
||||
|
||||
bool found_queue = false;
|
||||
for (uint32_t i = 0; i < queue_count; i++)
|
||||
{
|
||||
VkBool32 supported = surface == VK_NULL_HANDLE;
|
||||
|
||||
if (surface != VK_NULL_HANDLE)
|
||||
{
|
||||
vkGetPhysicalDeviceSurfaceSupportKHR(
|
||||
gpu, i, surface, &supported);
|
||||
}
|
||||
|
||||
VkQueueFlags required = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
|
||||
if (supported && ((queue_properties[i].queueFlags & required) == required))
|
||||
{
|
||||
context->queue_family_index = i;
|
||||
found_queue = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found_queue)
|
||||
{
|
||||
free(queue_properties);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool same_queue_async = false;
|
||||
if (queue_properties[context->queue_family_index].queueCount >= 2)
|
||||
same_queue_async = true;
|
||||
|
||||
if (!same_queue_async)
|
||||
{
|
||||
found_queue = false;
|
||||
for (uint32_t i = 0; i < queue_count; i++)
|
||||
{
|
||||
if (i == context->queue_family_index)
|
||||
continue;
|
||||
|
||||
VkQueueFlags required = VK_QUEUE_COMPUTE_BIT;
|
||||
if ((queue_properties[i].queueFlags & required) == required)
|
||||
{
|
||||
async_queue_index = i;
|
||||
found_queue = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
async_queue_index = context->queue_family_index;
|
||||
|
||||
free(queue_properties);
|
||||
if (!found_queue)
|
||||
return false;
|
||||
|
||||
const float prios[] = { 0.5f, 0.5f };
|
||||
VkDeviceQueueCreateInfo queues[2] = {
|
||||
{ VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO },
|
||||
{ VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO },
|
||||
};
|
||||
|
||||
if (same_queue_async)
|
||||
{
|
||||
queues[0].queueFamilyIndex = context->queue_family_index;
|
||||
queues[0].queueCount = 2;
|
||||
queues[0].pQueuePriorities = prios;
|
||||
}
|
||||
else
|
||||
{
|
||||
queues[0].queueFamilyIndex = context->queue_family_index;
|
||||
queues[0].queueCount = 1;
|
||||
queues[0].pQueuePriorities = &prios[0];
|
||||
queues[1].queueFamilyIndex = async_queue_index;
|
||||
queues[1].queueCount = 1;
|
||||
queues[1].pQueuePriorities = &prios[1];
|
||||
}
|
||||
|
||||
VkDeviceCreateInfo device_info = { VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO };
|
||||
device_info.enabledExtensionCount = num_required_device_extensions;
|
||||
device_info.ppEnabledExtensionNames = required_device_extensions;
|
||||
device_info.enabledLayerCount = num_required_device_layers;
|
||||
device_info.ppEnabledLayerNames = required_device_layers;
|
||||
device_info.queueCreateInfoCount = same_queue_async ? 1 : 2;
|
||||
device_info.pQueueCreateInfos = queues;
|
||||
|
||||
if (vkCreateDevice(gpu, &device_info, NULL, &context->device) != VK_SUCCESS)
|
||||
return false;
|
||||
|
||||
vkGetDeviceQueue(context->device, context->queue_family_index, 0, &context->queue);
|
||||
if (same_queue_async)
|
||||
vkGetDeviceQueue(context->device, context->queue_family_index, 1, &async_queue);
|
||||
else
|
||||
vkGetDeviceQueue(context->device, async_queue_index, 0, &async_queue);
|
||||
|
||||
context->presentation_queue = context->queue;
|
||||
context->presentation_queue_family_index = context->queue_family_index;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool retro_init_hw_context(void)
|
||||
{
|
||||
hw_render.context_type = RETRO_HW_CONTEXT_VULKAN;
|
||||
|
@ -533,7 +719,7 @@ static bool retro_init_hw_context(void)
|
|||
RETRO_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE_VULKAN_VERSION,
|
||||
|
||||
get_application_info,
|
||||
NULL,
|
||||
create_device,
|
||||
};
|
||||
|
||||
environ_cb(RETRO_ENVIRONMENT_SET_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE, (void*)&iface);
|
||||
|
|
|
@ -1246,6 +1246,7 @@ static bool vulkan_context_init_device(gfx_ctx_vulkan_data_t *vk)
|
|||
const VkPhysicalDeviceFeatures features = { 0 };
|
||||
|
||||
bool ret = iface->create_device(&context, vk->context.instance,
|
||||
VK_NULL_HANDLE,
|
||||
vk->vk_surface,
|
||||
vulkan_symbol_wrapper_instance_proc_addr(),
|
||||
device_extensions,
|
||||
|
@ -1348,7 +1349,7 @@ static bool vulkan_context_init_device(gfx_ctx_vulkan_data_t *vk)
|
|||
{
|
||||
VkBool32 supported = VK_FALSE;
|
||||
vkGetPhysicalDeviceSurfaceSupportKHR(
|
||||
vk->context.gpu, vk->context.graphics_queue_index,
|
||||
vk->context.gpu, i,
|
||||
vk->vk_surface, &supported);
|
||||
|
||||
VkQueueFlags required = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
|
||||
|
@ -1804,8 +1805,8 @@ void vulkan_context_destroy(gfx_ctx_vulkan_data_t *vk,
|
|||
if (!vk->context.instance)
|
||||
return;
|
||||
|
||||
if (vk->context.queue)
|
||||
vkQueueWaitIdle(vk->context.queue);
|
||||
if (vk->context.device)
|
||||
vkDeviceWaitIdle(vk->context.device);
|
||||
if (vk->swapchain)
|
||||
vkDestroySwapchainKHR(vk->context.device,
|
||||
vk->swapchain, NULL);
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
#include <libretro.h>
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#define RETRO_HW_RENDER_INTERFACE_VULKAN_VERSION 4
|
||||
#define RETRO_HW_RENDER_INTERFACE_VULKAN_VERSION 5
|
||||
#define RETRO_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE_VULKAN_VERSION 1
|
||||
|
||||
struct retro_vulkan_image
|
||||
|
@ -67,6 +67,7 @@ struct retro_vulkan_context
|
|||
typedef bool (*retro_vulkan_create_device_t)(
|
||||
struct retro_vulkan_context *context,
|
||||
VkInstance instance,
|
||||
VkPhysicalDevice gpu,
|
||||
VkSurfaceKHR surface,
|
||||
PFN_vkGetInstanceProcAddr get_instance_proc_addr,
|
||||
const char **required_device_extensions,
|
||||
|
@ -101,11 +102,17 @@ struct retro_hw_render_context_negotiation_interface_vulkan
|
|||
* The core must prepare a designated PhysicalDevice, Device, Queue and queue family index
|
||||
* which the frontend will use for its internal operation.
|
||||
*
|
||||
* If gpu is not VK_NULL_HANDLE, the physical device provided to the frontend must be this PhysicalDevice.
|
||||
* The core is still free to use other physical devices.
|
||||
*
|
||||
* The frontend will request certain extensions and layers for a device which is created.
|
||||
* The core must ensure that the queue and queue_family_index support GRAPHICS and COMPUTE.
|
||||
*
|
||||
* If surface is not VK_NULL_HANDLE, the core must consider presentation when creating the queues.
|
||||
* If presentation to "surface" is supported on the queue, presentation_queue must be equal to queue.
|
||||
* If not, a second queue must be provided in presentation_queue and presentation_queue_index.
|
||||
* If surface is not VK_NULL_HANDLE, the instance from frontend will have been created with supported for
|
||||
* VK_KHR_surface extension.
|
||||
*
|
||||
* The core is free to set its own queue priorities.
|
||||
* Device provided to frontend is owned by the frontend, but any additional device resources must be freed by core
|
||||
|
|
Loading…
Add table
Reference in a new issue