Vulkan: Rough untested implementation of synchronous readbacks.

This commit is contained in:
Henrik Rydgård 2017-10-28 18:03:27 +02:00
parent 717ec2387b
commit b98d4e5c9d
7 changed files with 187 additions and 50 deletions

View file

@ -61,8 +61,7 @@ void CGEDebugger::Init() {
}
CGEDebugger::CGEDebugger(HINSTANCE _hInstance, HWND _hParent)
: Dialog((LPCSTR)IDD_GEDEBUGGER, _hInstance, _hParent), primaryWindow(nullptr), secondWindow(nullptr),
textureLevel_(0), showClut_(false), primaryBuffer_(nullptr), secondBuffer_(nullptr) {
: Dialog((LPCSTR)IDD_GEDEBUGGER, _hInstance, _hParent) {
GPUBreakpoints::Init();
Core_ListenShutdown(ForceUnpause);

View file

@ -85,25 +85,25 @@ private:
u32 TexturePreviewFlags(const GPUgstate &state);
CtrlDisplayListView *displayList;
TabDisplayLists *lists;
TabStateFlags *flags;
TabStateLighting *lighting;
TabStateTexture *textureState;
TabStateSettings *settings;
TabVertices *vertices;
TabMatrices *matrices;
SimpleGLWindow *primaryWindow;
SimpleGLWindow *secondWindow;
TabStateWatch *watch;
TabControl *tabs;
TabControl *fbTabs;
int textureLevel_;
bool showClut_;
bool forceOpaque_;
CtrlDisplayListView *displayList = nullptr;
TabDisplayLists *lists = nullptr;
TabStateFlags *flags = nullptr;
TabStateLighting *lighting = nullptr;
TabStateTexture *textureState = nullptr;
TabStateSettings *settings = nullptr;
TabVertices *vertices = nullptr;
TabMatrices *matrices = nullptr;
SimpleGLWindow *primaryWindow = nullptr;
SimpleGLWindow *secondWindow = nullptr;
TabStateWatch *watch = nullptr;
TabControl *tabs = nullptr;
TabControl *fbTabs = nullptr;
int textureLevel_ = 0;
bool showClut_ = false;
bool forceOpaque_ = false;
// The most recent primary/framebuffer and texture buffers.
const GPUDebugBuffer *primaryBuffer_;
const GPUDebugBuffer *secondBuffer_;
const GPUDebugBuffer *primaryBuffer_ = nullptr;
const GPUDebugBuffer *secondBuffer_ = nullptr;
int minWidth_;
int minHeight_;

View file

@ -1,13 +1,40 @@
#include "VulkanQueueRunner.h"
#include "VulkanRenderManager.h"
const uint32_t readbackBufferSize = 2048 * 2048 * 4;
void VulkanQueueRunner::CreateDeviceObjects() {
InitBackbufferRenderPass();
InitRenderpasses();
VkDevice device = vulkan_->GetDevice();
VkBufferCreateInfo buf{ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
buf.size = readbackBufferSize;
buf.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
vkCreateBuffer(device, &buf, nullptr, &readbackBuffer_);
VkMemoryRequirements reqs{};
vkGetBufferMemoryRequirements(device, readbackBuffer_, &reqs);
VkMemoryAllocateInfo alloc{ VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO };
alloc.allocationSize = reqs.size;
VkFlags typeReqs = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
bool success = vulkan_->MemoryTypeFromProperties(reqs.memoryTypeBits, typeReqs, &alloc.memoryTypeIndex);
assert(success);
vkAllocateMemory(device, &alloc, nullptr, &readbackMemory_);
uint32_t offset = 0;
vkBindBufferMemory(device, readbackBuffer_, readbackMemory_, offset);
}
void VulkanQueueRunner::DestroyDeviceObjects() {
VkDevice device = vulkan_->GetDevice();
vkFreeMemory(device, readbackMemory_, nullptr);
vulkan_->Delete().QueueDeleteBuffer(readbackBuffer_);
for (int i = 0; i < ARRAY_SIZE(renderPasses_); i++) {
assert(renderPasses_[i] != VK_NULL_HANDLE);
vkDestroyRenderPass(device, renderPasses_[i], nullptr);
@ -16,7 +43,6 @@ void VulkanQueueRunner::DestroyDeviceObjects() {
vkDestroyRenderPass(device, backbufferRenderPass_, nullptr);
}
void VulkanQueueRunner::InitBackbufferRenderPass() {
VkResult U_ASSERT_ONLY res;
@ -167,7 +193,7 @@ void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, const std::vector<VKRStep
PerformBlit(step, cmd);
break;
case VKRStepType::READBACK:
// PerformReadback
PerformReadback(step, cmd);
break;
}
delete steps[i];
@ -688,3 +714,39 @@ void VulkanQueueRunner::SetupTransitionToTransferDst(VKRImage &img, VkImageMemor
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
img.layout = barrier.newLayout;
}
void VulkanQueueRunner::PerformReadback(const VKRStep &step, VkCommandBuffer cmd) {
VKRImage *srcImage;
if (step.readback.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
srcImage = &step.readback.src->color;
} else if (step.readback.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
srcImage = &step.readback.src->depth;
} else {
assert(false);
}
VkBufferImageCopy region{};
region.imageOffset = { step.readback.srcRect.offset.x, step.readback.srcRect.offset.y, 0 };
region.imageExtent = { step.readback.srcRect.extent.width, step.readback.srcRect.extent.height, 1 };
region.imageSubresource.aspectMask = step.readback.aspectMask;
region.bufferOffset = 0;
region.bufferRowLength = 0;
region.bufferImageHeight = step.readback.srcRect.extent.height;
vkCmdCopyImageToBuffer(cmd, srcImage->image, srcImage->layout, readbackBuffer_, 1, &region);
// NOTE: Can't read the buffer using the CPU here - need to sync first.
}
void VulkanQueueRunner::CopyReadbackBuffer(const VKRStep &step) {
// Read back to the requested address in ram from buffer.
void *mappedData;
VkResult res = vkMapMemory(vulkan_->GetDevice(), readbackMemory_, 0, step.readback.srcRect.extent.width * step.readback.srcRect.extent.height * 4, 0, &mappedData);
assert(res == VK_SUCCESS);
const int pixelSize = 4; // TODO: Fix.
for (int y = 0; y < step.readback.srcRect.extent.height; y++) {
const uint8_t *src = (const uint8_t *)mappedData + step.readback.srcRect.extent.width * y;
uint8_t *dst = (uint8_t *)step.readback.destPtr + step.readback.pixelStride * pixelSize * y;
}
vkUnmapMemory(vulkan_->GetDevice(), readbackMemory_);
}

View file

@ -127,8 +127,10 @@ struct VKRStep {
VkFilter filter;
} blit;
struct {
int aspectMask;
VKRFramebuffer *src;
void *destPtr;
uint8_t *destPtr;
int pixelStride;
VkRect2D srcRect;
} readback;
};
@ -156,6 +158,8 @@ public:
return (int)depth * 3 + (int)color;
}
void CopyReadbackBuffer(const VKRStep &step);
private:
void InitBackbufferRenderPass();
void InitRenderpasses();
@ -164,6 +168,7 @@ private:
void PerformRenderPass(const VKRStep &pass, VkCommandBuffer cmd);
void PerformCopy(const VKRStep &pass, VkCommandBuffer cmd);
void PerformBlit(const VKRStep &pass, VkCommandBuffer cmd);
void PerformReadback(const VKRStep &pass, VkCommandBuffer cmd);
static void SetupTransitionToTransferSrc(VKRImage &img, VkImageMemoryBarrier &barrier, VkPipelineStageFlags &stage, VkImageAspectFlags aspect);
static void SetupTransitionToTransferDst(VKRImage &img, VkImageMemoryBarrier &barrier, VkPipelineStageFlags &stage, VkImageAspectFlags aspect);
@ -177,4 +182,9 @@ private:
// Renderpasses, all combinations of preserving or clearing or dont-care-ing fb contents.
// TODO: Create these on demand.
VkRenderPass renderPasses_[9]{};
};
// Readback buffer. Currently we only support synchronous readback, so we only really need one.
// We size it generously.
VkDeviceMemory readbackMemory_;
VkBuffer readbackBuffer_;
};

View file

@ -287,9 +287,11 @@ VkCommandBuffer VulkanRenderManager::GetInitCmd() {
int curFrame = vulkan_->GetCurFrame();
FrameData &frameData = frameData_[curFrame];
if (!frameData.hasInitCommands) {
VkCommandBufferBeginInfo begin = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
begin.pInheritanceInfo = nullptr;
VkCommandBufferBeginInfo begin = {
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
nullptr,
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
};
VkResult res = vkBeginCommandBuffer(frameData.initCmd, &begin);
assert(res == VK_SUCCESS);
frameData.hasInitCommands = true;
@ -297,10 +299,6 @@ VkCommandBuffer VulkanRenderManager::GetInitCmd() {
return frameData_[curFrame].initCmd;
}
void VulkanRenderManager::Sync() {
}
void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRRenderPassAction color, VKRRenderPassAction depth, uint32_t clearColor, float clearDepth, uint8_t clearStencil) {
// Eliminate dupes.
if (steps_.size() && steps_.back()->stepType == VKRStepType::RENDER && steps_.back()->render.framebuffer == fb) {
@ -327,6 +325,25 @@ void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRR
curHeight_ = fb ? fb->height : vulkan_->GetBackbufferHeight();
}
void VulkanRenderManager::CopyFramebufferToMemorySync(VKRFramebuffer *src, int aspectBits, int x, int y, int w, int h, uint8_t *pixels, int pixelStride) {
VKRStep *step = new VKRStep{ VKRStepType::READBACK };
step->readback.aspectMask = aspectBits;
step->readback.destPtr = (uint8_t *)pixels;
step->readback.pixelStride = pixelStride;
step->readback.src = src;
step->readback.srcRect.offset = { x, y };
step->readback.srcRect.extent = { (uint32_t)w, (uint32_t)h };
steps_.push_back(step);
curRenderStep_ = nullptr;
FlushSync();
// Need to call this after FlushSync so the pixels are guaranteed to be ready in CPU-accessible VRAM.
queueRunner_.CopyReadbackBuffer(*step);
}
void VulkanRenderManager::InitBackbufferFramebuffers(int width, int height) {
VkResult U_ASSERT_ONLY res;
// We share the same depth buffer but have multiple color buffers, see the loop below.
@ -513,9 +530,6 @@ void VulkanRenderManager::Finish() {
curRenderStep_ = nullptr;
int curFrame = vulkan_->GetCurFrame();
FrameData &frameData = frameData_[curFrame];
if (frameData.hasInitCommands) {
vkEndCommandBuffer(frameData.initCmd);
}
if (!useThread) {
frameData.steps = std::move(steps_);
Run(curFrame);
@ -530,7 +544,7 @@ void VulkanRenderManager::Finish() {
}
// Can be called multiple times with no bad side effects. This is so that we can either begin a frame the normal way,
void VulkanRenderManager::BeginFrame(int frame) {
void VulkanRenderManager::BeginSubmitFrame(int frame) {
FrameData &frameData = frameData_[frame];
if (!frameData.hasBegun) {
// Get the index of the next available swapchain image, and a semaphore to block command buffer execution on.
@ -555,18 +569,19 @@ void VulkanRenderManager::BeginFrame(int frame) {
}
}
void VulkanRenderManager::EndFrame(int frame) {
void VulkanRenderManager::Submit(int frame) {
FrameData &frameData = frameData_[frame];
frameData.hasBegun = false;
insideFrame_ = false;
TransitionToPresent(frameData.mainCmd, swapchainImages_[frameData.curSwapchainImage].image);
if (frameData.hasInitCommands) {
VkResult res = vkEndCommandBuffer(frameData.initCmd);
assert(res == VK_SUCCESS);
}
VkResult res = vkEndCommandBuffer(frameData.mainCmd);
assert(res == VK_SUCCESS);
int numCmdBufs = 0;
std::vector<VkCommandBuffer> cmdBufs;
cmdBufs.reserve(2);
if (frameData.hasInitCommands) {
cmdBufs.push_back(frameData.initCmd);
frameData.hasInitCommands = false;
@ -593,6 +608,16 @@ void VulkanRenderManager::EndFrame(int frame) {
frameData.readyForFence = true;
frameData.push_condVar.notify_all();
}
}
void VulkanRenderManager::EndSubmitFrame(int frame) {
FrameData &frameData = frameData_[frame];
frameData.hasBegun = false;
insideFrame_ = false;
TransitionToPresent(frameData.mainCmd, swapchainImages_[frameData.curSwapchainImage].image);
Submit(frame);
VkSwapchainKHR swapchain = vulkan_->GetSwapchain();
VkPresentInfoKHR present = { VK_STRUCTURE_TYPE_PRESENT_INFO_KHR };
@ -602,7 +627,8 @@ void VulkanRenderManager::EndFrame(int frame) {
present.pWaitSemaphores = &renderingCompleteSemaphore_;
present.waitSemaphoreCount = 1;
present.pResults = nullptr;
res = vkQueuePresentKHR(vulkan_->GetGraphicsQueue(), &present);
VkResult res = vkQueuePresentKHR(vulkan_->GetGraphicsQueue(), &present);
// TODO: Deal with the VK_SUBOPTIMAL_WSI and VK_ERROR_OUT_OF_DATE_WSI
// return codes
if (res == VK_ERROR_OUT_OF_DATE_KHR) {
@ -615,7 +641,7 @@ void VulkanRenderManager::EndFrame(int frame) {
void VulkanRenderManager::Run(int frame) {
VkDevice device = vulkan_->GetDevice();
BeginFrame(frame);
BeginSubmitFrame(frame);
FrameData &frameData = frameData_[frame];
auto &stepsOnThread = frameData_[frame].steps;
@ -623,8 +649,33 @@ void VulkanRenderManager::Run(int frame) {
queueRunner_.RunSteps(cmd, stepsOnThread);
stepsOnThread.clear();
EndFrame(frame);
EndSubmitFrame(frame);
VLOG("PULL: Finished running frame %d", frame);
}
void VulkanRenderManager::FlushSync() {
int frame = vulkan_->GetCurFrame();
BeginSubmitFrame(frame);
FrameData &frameData = frameData_[frame];
auto &stepsOnThread = frameData_[frame].steps;
VkCommandBuffer cmd = frameData.mainCmd;
queueRunner_.RunSteps(cmd, stepsOnThread);
stepsOnThread.clear();
Submit(frame);
vkDeviceWaitIdle(vulkan_->GetDevice());
// At this point we can resume filling the command buffers for the current frame since
// we know the device is idle - and thus all previously enqueued command buffers have been processed.
// No need to switch to the next frame number.
VkCommandBufferBeginInfo begin = {
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
nullptr,
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
};
vkBeginCommandBuffer(frameData_->mainCmd, &begin);
}

View file

@ -78,16 +78,13 @@ public:
// Makes sure that the GPU has caught up enough that we can start writing buffers of this frame again.
void BeginFrame();
void EndFrame();
// Can run on a different thread! Just make sure to use BeginFrameWrites.
// Can run on a different thread!
void Finish();
void Run(int frame);
// Bad for performance but sometimes necessary for synchronous CPU readbacks (screenshots and whatnot).
void Sync();
void BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRRenderPassAction color, VKRRenderPassAction depth, uint32_t clearColor, float clearDepth, uint8_t clearStencil);
VkImageView BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, int aspectBit, int attachment);
void CopyFramebufferToMemorySync(VKRFramebuffer *src, int aspectBits, int x, int y, int w, int h, uint8_t *pixels, int pixelStride);
void CopyFramebuffer(VKRFramebuffer *src, VkRect2D srcRect, VKRFramebuffer *dst, VkOffset2D dstPos, int aspectMask);
void BlitFramebuffer(VKRFramebuffer *src, VkRect2D srcRect, VKRFramebuffer *dst, VkRect2D dstRect, int aspectMask, VkFilter filter);
@ -187,8 +184,12 @@ public:
private:
void InitBackbufferFramebuffers(int width, int height);
void InitDepthStencilBuffer(VkCommandBuffer cmd); // Used for non-buffered rendering.
void BeginFrame(int frame);
void EndFrame(int frame);
void BeginSubmitFrame(int frame);
void EndSubmitFrame(int frame);
void Submit(int frame);
// Bad for performance but sometimes necessary for synchronous CPU readbacks (screenshots and whatnot).
void FlushSync();
// Permanent objects
VkSemaphore acquireSemaphore_;

View file

@ -372,6 +372,7 @@ public:
void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits) override;
bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter) override;
bool CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride);
// These functions should be self explanatory.
void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) override;
@ -1284,6 +1285,19 @@ bool VKContext::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int sr
return true;
}
bool VKContext::CopyFramebufferToMemorySync(Framebuffer *srcfb, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride) {
VKFramebuffer *src = (VKFramebuffer *)srcfb;
int aspectMask = 0;
if (channelBits & FBChannel::FB_COLOR_BIT) aspectMask |= VK_IMAGE_ASPECT_COLOR_BIT;
if (channelBits & FBChannel::FB_DEPTH_BIT) aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
if (channelBits & FBChannel::FB_STENCIL_BIT) aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
renderManager_.CopyFramebufferToMemorySync(src->GetFB(), aspectMask, x, y, w, h, (uint8_t *)pixels, pixelStride);
return true;
}
void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) {
VKFramebuffer *fb = (VKFramebuffer *)fbo;
VKRRenderPassAction color = (VKRRenderPassAction)rp.color; // same values.