Vulkan: Make multithreaded rendering an option.

Multi-threaded rendering is good for smoothing out performance spikes
and increasing performance on low-power devices. However, the way we use
it it doesn't benefit latency (input lag) at all, rather the opposite.

So make it an option.
This commit is contained in:
Henrik Rydgård 2023-07-23 19:20:55 +02:00
parent 95c5550071
commit b118e25b6a
14 changed files with 95 additions and 44 deletions

View file

@ -4,6 +4,12 @@
#include "Common/Log.h"
#include "Common/StringUtils.h"
#if 0 // def _DEBUG
#define VLOG(...) NOTICE_LOG(G3D, __VA_ARGS__)
#else
#define VLOG(...)
#endif
void CachedReadback::Destroy(VulkanContext *vulkan) {
if (buffer) {
vulkan->Delete().QueueDeleteBufferAllocation(buffer, allocation);
@ -196,12 +202,16 @@ void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, Frame
VkResult res;
if (fenceToTrigger == fence) {
VLOG("Doing queue submit, fencing frame %d", this->index);
// The fence is waited on by the main thread, they are not allowed to access it simultaneously.
res = vkQueueSubmit(vulkan->GetGraphicsQueue(), 1, &submit_info, fenceToTrigger);
std::lock_guard<std::mutex> lock(fenceMutex);
readyForFence = true;
fenceCondVar.notify_one();
if (sharedData.useMultiThreading) {
std::lock_guard<std::mutex> lock(fenceMutex);
readyForFence = true;
fenceCondVar.notify_one();
}
} else {
VLOG("Doing queue submit, fencing something (%p)", fenceToTrigger);
res = vkQueueSubmit(vulkan->GetGraphicsQueue(), 1, &submit_info, fenceToTrigger);
}
@ -219,7 +229,7 @@ void FrameData::SubmitPending(VulkanContext *vulkan, FrameSubmitType type, Frame
}
}
void FrameDataShared::Init(VulkanContext *vulkan) {
void FrameDataShared::Init(VulkanContext *vulkan, bool useMultiThreading) {
VkSemaphoreCreateInfo semaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };
semaphoreCreateInfo.flags = 0;
VkResult res = vkCreateSemaphore(vulkan->GetDevice(), &semaphoreCreateInfo, nullptr, &acquireSemaphore);
@ -230,6 +240,8 @@ void FrameDataShared::Init(VulkanContext *vulkan) {
// This fence is used for synchronizing readbacks. Does not need preinitialization.
readbackFence = vulkan->CreateFence(false);
vulkan->SetDebugName(readbackFence, VK_OBJECT_TYPE_FENCE, "readbackFence");
this->useMultiThreading = useMultiThreading;
}
void FrameDataShared::Destroy(VulkanContext *vulkan) {

View file

@ -53,8 +53,9 @@ struct FrameDataShared {
// For synchronous readbacks.
VkFence readbackFence = VK_NULL_HANDLE;
bool useMultiThreading;
void Init(VulkanContext *vulkan);
void Init(VulkanContext *vulkan, bool useMultiThreading);
void Destroy(VulkanContext *vulkan);
};

View file

@ -247,15 +247,16 @@ bool VKRComputePipeline::CreateAsync(VulkanContext *vulkan) {
return true;
}
VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan)
VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan, bool useThread)
: vulkan_(vulkan), queueRunner_(vulkan),
initTimeMs_("initTimeMs"),
totalGPUTimeMs_("totalGPUTimeMs"),
renderCPUTimeMs_("renderCPUTimeMs")
renderCPUTimeMs_("renderCPUTimeMs"),
useRenderThread_(useThread)
{
inflightFramesAtStart_ = vulkan_->GetInflightFrames();
frameDataShared_.Init(vulkan);
frameDataShared_.Init(vulkan, useThread);
for (int i = 0; i < inflightFramesAtStart_; i++) {
frameData_[i].Init(vulkan, i);
@ -292,12 +293,14 @@ bool VulkanRenderManager::CreateBackbuffers() {
outOfDateFrames_ = 0;
// Start the thread.
// Start the thread(s).
if (HasBackbuffers()) {
run_ = true; // For controlling the compiler thread's exit
INFO_LOG(G3D, "Starting Vulkan submission thread");
thread_ = std::thread(&VulkanRenderManager::ThreadFunc, this);
if (useRenderThread_) {
INFO_LOG(G3D, "Starting Vulkan submission thread");
thread_ = std::thread(&VulkanRenderManager::ThreadFunc, this);
}
INFO_LOG(G3D, "Starting Vulkan compiler thread");
compileThread_ = std::thread(&VulkanRenderManager::CompileThreadFunc, this);
}
@ -306,7 +309,8 @@ bool VulkanRenderManager::CreateBackbuffers() {
// Called from main thread.
void VulkanRenderManager::StopThread() {
{
if (useRenderThread_) {
_dbg_assert_(thread_.joinable());
// Tell the render thread to quit when it's done.
VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::EXIT);
task->frame = vulkan_->GetCurFrame();
@ -319,7 +323,9 @@ void VulkanRenderManager::StopThread() {
run_ = false;
// Stop the thread.
thread_.join();
if (useRenderThread_) {
thread_.join();
}
for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
auto &frameData = frameData_[i];
@ -492,6 +498,8 @@ void VulkanRenderManager::DrainCompileQueue() {
void VulkanRenderManager::ThreadFunc() {
SetCurrentThreadName("RenderMan");
while (true) {
_dbg_assert_(useRenderThread_);
// Pop a task of the queue and execute it.
VKRRenderThreadTask *task = nullptr;
{
@ -534,7 +542,7 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
// Makes sure the submission from the previous time around has happened. Otherwise
// we are not allowed to wait from another thread here..
{
if (useRenderThread_) {
std::unique_lock<std::mutex> lock(frameData.fenceMutex);
while (!frameData.readyForFence) {
frameData.fenceCondVar.wait(lock);
@ -1263,11 +1271,16 @@ void VulkanRenderManager::Finish() {
VLOG("PUSH: Frame[%d]", curFrame);
VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::PRESENT);
task->frame = curFrame;
{
if (useRenderThread_) {
std::unique_lock<std::mutex> lock(pushMutex_);
renderThreadQueue_.push(task);
renderThreadQueue_.back()->steps = std::move(steps_);
pushCondVar_.notify_one();
} else {
// Just do it!
task->steps = std::move(steps_);
Run(*task);
delete task;
}
steps_.clear();
@ -1348,7 +1361,7 @@ void VulkanRenderManager::Run(VKRRenderThreadTask &task) {
// The submit will trigger the readbackFence, and also do the wait for it.
frameData.SubmitPending(vulkan_, FrameSubmitType::Sync, frameDataShared_);
{
if (useRenderThread_) {
std::unique_lock<std::mutex> lock(syncMutex_);
syncCondVar_.notify_one();
}
@ -1374,24 +1387,34 @@ void VulkanRenderManager::FlushSync() {
int curFrame = vulkan_->GetCurFrame();
FrameData &frameData = frameData_[curFrame];
{
VLOG("PUSH: Frame[%d]", curFrame);
if (useRenderThread_) {
{
VLOG("PUSH: Frame[%d]", curFrame);
VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::SYNC);
task->frame = curFrame;
std::unique_lock<std::mutex> lock(pushMutex_);
renderThreadQueue_.push(task);
renderThreadQueue_.back()->steps = std::move(steps_);
pushCondVar_.notify_one();
steps_.clear();
}
{
std::unique_lock<std::mutex> lock(syncMutex_);
// Wait for the flush to be hit, since we're syncing.
while (!frameData.syncDone) {
VLOG("PUSH: Waiting for frame[%d].syncDone = 1 (sync)", curFrame);
syncCondVar_.wait(lock);
}
frameData.syncDone = false;
}
} else {
VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::SYNC);
task->frame = curFrame;
std::unique_lock<std::mutex> lock(pushMutex_);
renderThreadQueue_.push(task);
renderThreadQueue_.back()->steps = std::move(steps_);
pushCondVar_.notify_one();
}
{
std::unique_lock<std::mutex> lock(syncMutex_);
// Wait for the flush to be hit, since we're syncing.
while (!frameData.syncDone) {
VLOG("PUSH: Waiting for frame[%d].syncDone = 1 (sync)", curFrame);
syncCondVar_.wait(lock);
}
frameData.syncDone = false;
task->steps = std::move(steps_);
Run(*task);
delete task;
steps_.clear();
}
}

View file

@ -181,7 +181,7 @@ struct CompileQueueEntry {
class VulkanRenderManager {
public:
VulkanRenderManager(VulkanContext *vulkan);
VulkanRenderManager(VulkanContext *vulkan, bool useThread);
~VulkanRenderManager();
// Makes sure that the GPU has caught up enough that we can start writing buffers of this frame again.
@ -489,6 +489,8 @@ private:
bool insideFrame_ = false;
bool run_ = false;
bool useRenderThread_ = true;
// This is the offset within this frame, in case of a mid-frame sync.
VKRStep *curRenderStep_ = nullptr;
bool curStepHasViewport_ = false;

View file

@ -384,7 +384,7 @@ class VKFramebuffer;
class VKContext : public DrawContext {
public:
VKContext(VulkanContext *vulkan);
VKContext(VulkanContext *vulkan, bool useRenderThread);
~VKContext();
void DebugAnnotate(const char *annotation) override;
@ -857,8 +857,8 @@ static DataFormat DataFormatFromVulkanDepth(VkFormat fmt) {
return DataFormat::UNDEFINED;
}
VKContext::VKContext(VulkanContext *vulkan)
: vulkan_(vulkan), renderManager_(vulkan) {
VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread)
: vulkan_(vulkan), renderManager_(vulkan, useRenderThread) {
shaderLanguageDesc_.Init(GLSL_VULKAN);
VkFormat depthStencilFormat = vulkan->GetDeviceInfo().preferredDepthStencilFormat;
@ -1582,8 +1582,8 @@ void VKContext::Clear(int clearMask, uint32_t colorval, float depthVal, int sten
renderManager_.Clear(colorval, depthVal, stencilVal, mask);
}
DrawContext *T3DCreateVulkanContext(VulkanContext *vulkan) {
return new VKContext(vulkan);
DrawContext *T3DCreateVulkanContext(VulkanContext *vulkan, bool useRenderThread) {
return new VKContext(vulkan, useRenderThread);
}
void AddFeature(std::vector<std::string> &features, const char *name, VkBool32 available, VkBool32 enabled) {

View file

@ -31,6 +31,6 @@ DrawContext *T3DCreateDX9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapt
DrawContext *T3DCreateD3D11Context(ID3D11Device *device, ID3D11DeviceContext *context, ID3D11Device1 *device1, ID3D11DeviceContext1 *context1, D3D_FEATURE_LEVEL featureLevel, HWND hWnd, std::vector<std::string> adapterNames);
#endif
DrawContext *T3DCreateVulkanContext(VulkanContext *context);
DrawContext *T3DCreateVulkanContext(VulkanContext *context, bool useRenderThread);
} // namespace Draw

View file

@ -611,6 +611,8 @@ static const ConfigSetting graphicsSettings[] = {
ConfigSetting("InflightFrames", &g_Config.iInflightFrames, 3, CfgFlag::DEFAULT),
ConfigSetting("RenderDuplicateFrames", &g_Config.bRenderDuplicateFrames, false, CfgFlag::PER_GAME),
ConfigSetting("MultiThreading", &g_Config.bRenderMultiThreading, true, CfgFlag::DEFAULT),
ConfigSetting("ShaderCache", &g_Config.bShaderCache, true, CfgFlag::DONT_SAVE), // Doesn't save. Ini-only.
ConfigSetting("GpuLogProfiler", &g_Config.bGpuLogProfiler, false, CfgFlag::DEFAULT),
};

View file

@ -238,6 +238,7 @@ public:
bool bGfxDebugOutput;
int iInflightFrames;
bool bRenderDuplicateFrames;
bool bRenderMultiThreading;
// Sound
bool bEnableSound;

View file

@ -136,7 +136,7 @@ bool SDLVulkanGraphicsContext::Init(SDL_Window *&window, int x, int y, int w, in
return false;
}
draw_ = Draw::T3DCreateVulkanContext(vulkan_);
draw_ = Draw::T3DCreateVulkanContext(vulkan_, g_Config.bRenderMultiThreading);
SetGPUBackend(GPUBackend::VULKAN);
bool success = draw_->CreatePresets();
_assert_(success);
@ -144,7 +144,6 @@ bool SDLVulkanGraphicsContext::Init(SDL_Window *&window, int x, int y, int w, in
renderManager_ = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
renderManager_->SetInflightFrames(g_Config.iInflightFrames);
return true;
}

View file

@ -31,6 +31,7 @@
#include "Common/System/Display.h" // Only to check screen aspect ratio with pixel_yres/pixel_xres
#include "Common/System/Request.h"
#include "Common/System/OSD.h"
#include "Common/Battery/Battery.h"
#include "Common/System/NativeApp.h"
#include "Common/Data/Color/RGBAUtil.h"
@ -1667,6 +1668,15 @@ void DeveloperToolsScreen::CreateViews() {
cpuTests->SetEnabled(TestsAvailable());
#endif
if (g_Config.iGPUBackend == (int)GPUBackend::VULKAN) {
list->Add(new CheckBox(&g_Config.bRenderMultiThreading, dev->T("Multi-threaded rendering"), ""))->OnClick.Add([](UI::EventParams &e) {
// TODO: Not translating yet. Will combine with other translations of settings that need restart.
g_OSD.Show(OSDType::MESSAGE_WARNING, "Restart required");
return UI::EVENT_DONE;
});
}
// For now, we only implement GPU driver tests for Vulkan and OpenGL. This is simply
// because the D3D drivers are generally solid enough to not need this type of investigation.
if (g_Config.iGPUBackend == (int)GPUBackend::VULKAN || g_Config.iGPUBackend == (int)GPUBackend::OPENGL) {

View file

@ -131,7 +131,7 @@ bool WindowsVulkanContext::Init(HINSTANCE hInst, HWND hWnd, std::string *error_m
return false;
}
draw_ = Draw::T3DCreateVulkanContext(vulkan_);
draw_ = Draw::T3DCreateVulkanContext(vulkan_, g_Config.bRenderMultiThreading);
SetGPUBackend(GPUBackend::VULKAN, vulkan_->GetPhysicalDeviceProperties(deviceNum).properties.deviceName);
bool success = draw_->CreatePresets();
_assert_msg_(success, "Failed to compile preset shaders");

View file

@ -113,7 +113,7 @@ bool AndroidVulkanContext::InitFromRenderThread(ANativeWindow *wnd, int desiredB
bool success = true;
if (g_Vulkan->InitSwapchain()) {
draw_ = Draw::T3DCreateVulkanContext(g_Vulkan);
draw_ = Draw::T3DCreateVulkanContext(g_Vulkan, g_Config.bRenderMultiThreading);
SetGPUBackend(GPUBackend::VULKAN);
success = draw_->CreatePresets(); // Doesn't fail, we ship the compiler.
_assert_msg_(success, "Failed to compile preset shaders");

View file

@ -308,6 +308,7 @@ Log Dropped Frame Statistics = Log dropped frame statistics
Log Level = Log level
Log View = Log view
Logging Channels = Logging channels
Multi-threaded rendering = Multi-threaded rendering
Next = Next
No block = No block
Prev = Previous

View file

@ -139,7 +139,7 @@ void LibretroVulkanContext::CreateDrawContext() {
return;
}
draw_ = Draw::T3DCreateVulkanContext(vk);
draw_ = Draw::T3DCreateVulkanContext(vk, true);
((VulkanRenderManager*)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER))->SetInflightFrames(g_Config.iInflightFrames);
SetGPUBackend(GPUBackend::VULKAN);
}