Be more economical with UBO pushbuffer space by reusing the last data when possible.

This commit is contained in:
Henrik Rydgard 2016-03-20 16:33:34 +01:00
parent 5d19f3dfb8
commit 0b1cfaf751
9 changed files with 98 additions and 46 deletions

View file

@ -56,6 +56,7 @@ public:
size_t Allocate(size_t numBytes, VkBuffer *vkbuf) {
size_t out = offset_;
offset_ += (numBytes + 3) & ~3; // Round up to 4 bytes.
if (offset_ >= size_) {
// TODO: Allocate a second buffer, then combine them on the next frame.
#ifdef _WIN32

View file

@ -544,6 +544,7 @@ float FromScaledDepth(float z) {
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) {
bool throughmode = gstate.isModeThrough();
out.dirtyProj = false;
out.dirtyDepth = false;
float renderWidthFactor, renderHeightFactor;
float renderX = 0.0f, renderY = 0.0f;

View file

@ -130,7 +130,7 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan)
assert(VK_SUCCESS == res);
VkDescriptorPoolSize dpTypes[2];
dpTypes[0].descriptorCount = 800;
dpTypes[0].descriptorCount = 2048;
dpTypes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
dpTypes[1].descriptorCount = 200;
dpTypes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
@ -213,6 +213,8 @@ void DrawEngineVulkan::BeginFrame() {
frame->pushUBO->Begin(vulkan_->GetDevice());
frame->pushVertex->Begin(vulkan_->GetDevice());
frame->pushIndex->Begin(vulkan_->GetDevice());
DirtyAllUBOs();
}
void DrawEngineVulkan::EndFrame() {
@ -411,6 +413,9 @@ VkDescriptorSet DrawEngineVulkan::GetDescriptorSet(VkImageView imageView, VkSamp
key.base_ = base;
key.light_ = light;
key.bone_ = bone;
assert(base != VK_NULL_HANDLE);
assert(light != VK_NULL_HANDLE);
assert(bone != VK_NULL_HANDLE);
FrameData *frame = &frame_[curFrame_ & 1];
auto iter = frame->descSets.find(key);
@ -455,19 +460,24 @@ VkDescriptorSet DrawEngineVulkan::GetDescriptorSet(VkImageView imageView, VkSamp
// Skipping 2nd texture for now.
// Uniform buffer objects
VkDescriptorBufferInfo buf[3];
buf[0].buffer = base;
buf[0].offset = 0;
buf[0].range = sizeof(UB_VS_FS_Base);
buf[1].buffer = light;
buf[1].offset = 0;
buf[1].range = sizeof(UB_VS_Lights);
buf[2].buffer = bone;
buf[2].offset = 0;
buf[2].range = sizeof(UB_VS_Bones);
for (int i = 0; i < 3; i++) {
int count = 0;
buf[count].buffer = base;
buf[count].offset = 0;
buf[count].range = sizeof(UB_VS_FS_Base);
count++;
buf[count].buffer = light;
buf[count].offset = 0;
buf[count].range = sizeof(UB_VS_Lights);
count++;
buf[count].buffer = bone;
buf[count].offset = 0;
buf[count].range = sizeof(UB_VS_Bones);
count++;
for (int i = 0; i < count; i++) {
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[n].pNext = nullptr;
writes[n].dstBinding = DRAW_BINDING_DYNUBO_BASE + i;
writes[n].dstArrayElement = 0;
writes[n].pBufferInfo = &buf[i];
writes[n].dstSet = desc;
writes[n].descriptorCount = 1;
@ -481,6 +491,16 @@ VkDescriptorSet DrawEngineVulkan::GetDescriptorSet(VkImageView imageView, VkSamp
return desc;
}
void DrawEngineVulkan::DirtyAllUBOs() {
baseUBOOffset = 0;
lightUBOOffset = 0;
boneUBOOffset = 0;
baseBuf = VK_NULL_HANDLE;
lightBuf = VK_NULL_HANDLE;
boneBuf = VK_NULL_HANDLE;
dirtyUniforms_ = DIRTY_BASE_UNIFORMS | DIRTY_LIGHT_UNIFORMS | DIRTY_BONE_UNIFORMS;
}
// The inline wrapper in the header checks for numDrawCalls == 0d
void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) {
gpuStats.numFlushes++;
@ -514,11 +534,6 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) {
VulkanVertexShader *vshader = nullptr;
VulkanFragmentShader *fshader = nullptr;
// TODO: Keep these between calls if not dirty.
uint32_t baseUBOOffset = 0;
uint32_t lightUBOOffset = 0;
uint32_t boneUBOOffset = 0;
uint32_t ibOffset = 0;
uint32_t vbOffset = 0;
@ -560,7 +575,9 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) {
float bc[4];
Uint8x4ToFloat4(bc, dynState.blendColor);
vkCmdSetBlendConstants(cmd_, bc);
shaderManager_->UpdateUniforms();
dirtyUniforms_ |= shaderManager_->UpdateUniforms();
shaderManager_->GetShaders(prim, lastVTypeID_, &vshader, &fshader, useHWTransform);
VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(pipelineLayout_, pipelineKey, dec_, vshader, fshader, true);
if (!pipeline) {
@ -569,16 +586,17 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) {
}
vkCmdBindPipeline(cmd_, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline); // TODO: Avoid if same as last draw.
// TODO: Make only one allocation so all three end up in the same buffer.
VkBuffer baseBuf, lightBuf, boneBuf;
if (pipeline->uniformBlocks & UB_VS_FS_BASE) {
if ((dirtyUniforms_ & DIRTY_BASE_UNIFORMS) || baseBuf == VK_NULL_HANDLE) {
baseUBOOffset = shaderManager_->PushBaseBuffer(frame->pushUBO, &baseBuf);
dirtyUniforms_ &= ~DIRTY_BASE_UNIFORMS;
}
if (pipeline->uniformBlocks & UB_VS_LIGHTS) {
if ((dirtyUniforms_ & DIRTY_LIGHT_UNIFORMS) || lightBuf == VK_NULL_HANDLE) {
lightUBOOffset = shaderManager_->PushLightBuffer(frame->pushUBO, &lightBuf);
dirtyUniforms_ &= ~DIRTY_LIGHT_UNIFORMS;
}
if (pipeline->uniformBlocks & UB_VS_BONES) {
if ((dirtyUniforms_ & DIRTY_BONE_UNIFORMS) || boneBuf == VK_NULL_HANDLE) {
boneUBOOffset = shaderManager_->PushBoneBuffer(frame->pushUBO, &boneBuf);
dirtyUniforms_ &= ~DIRTY_BONE_UNIFORMS;
}
VkDescriptorSet ds = GetDescriptorSet(imageView, sampler, baseBuf, lightBuf, boneBuf);
@ -656,7 +674,6 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) {
vkCmdSetStencilCompareMask(cmd_, VK_STENCIL_FRONT_AND_BACK, dynState.stencilCompareMask);
}
if (result.setStencil) {
// hey, dynamic state!
vkCmdSetStencilReference(cmd_, VK_STENCIL_FRONT_AND_BACK, result.stencilValue);
} else if (dynState.useStencil) {
vkCmdSetStencilReference(cmd_, VK_STENCIL_FRONT_AND_BACK, dynState.stencilRef);
@ -665,7 +682,8 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) {
float bc[4];
Uint8x4ToFloat4(bc, dynState.blendColor);
vkCmdSetBlendConstants(cmd_, bc);
shaderManager_->UpdateUniforms();
dirtyUniforms_ |= shaderManager_->UpdateUniforms();
shaderManager_->GetShaders(prim, lastVTypeID_, &vshader, &fshader, useHWTransform);
VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(pipelineLayout_, pipelineKey, dec_, vshader, fshader, false);
if (!pipeline) {
@ -674,12 +692,21 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) {
}
vkCmdBindPipeline(cmd_, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->pipeline); // TODO: Avoid if same as last draw.
VkBuffer uboBuf;
if (pipeline->uniformBlocks & UB_VS_FS_BASE) {
baseUBOOffset = shaderManager_->PushBaseBuffer(frame->pushUBO, &uboBuf);
if ((dirtyUniforms_ & DIRTY_BASE_UNIFORMS) || baseBuf == VK_NULL_HANDLE) {
baseUBOOffset = shaderManager_->PushBaseBuffer(frame->pushUBO, &baseBuf);
dirtyUniforms_ &= ~DIRTY_BASE_UNIFORMS;
}
// Even if the first draw is through-mode, make sure we at least have one copy of these uniforms buffered
if ((dirtyUniforms_ & DIRTY_LIGHT_UNIFORMS) || lightBuf == VK_NULL_HANDLE) {
lightUBOOffset = shaderManager_->PushLightBuffer(frame->pushUBO, &lightBuf);
dirtyUniforms_ &= ~DIRTY_LIGHT_UNIFORMS;
}
if ((dirtyUniforms_ & DIRTY_BONE_UNIFORMS) || boneBuf == VK_NULL_HANDLE) {
boneUBOOffset = shaderManager_->PushBoneBuffer(frame->pushUBO, &boneBuf);
dirtyUniforms_ &= ~DIRTY_BONE_UNIFORMS;
}
VkDescriptorSet ds = GetDescriptorSet(imageView, sampler, uboBuf, uboBuf, uboBuf);
VkDescriptorSet ds = GetDescriptorSet(imageView, sampler, baseBuf, lightBuf, boneBuf);
const uint32_t dynamicUBOOffsets[3] = {
baseUBOOffset, lightUBOOffset, boneUBOOffset,
};
@ -690,7 +717,6 @@ void DrawEngineVulkan::DoFlush(VkCommandBuffer cmd) {
vbOffset = (uint32_t)frame->pushVertex->Push(drawBuffer, maxIndex * sizeof(TransformedVertex), &vbuf);
ibOffset = (uint32_t)frame->pushIndex->Push(inds, sizeof(short) * numTrans, &ibuf);
VkDeviceSize offsets[1] = { vbOffset };
// TODO: Have a buffer per frame, use a walking buffer pointer
// TODO: Avoid rebinding if the vertex size stays the same by using the offset arguments
vkCmdBindVertexBuffers(cmd_, 0, 1, &vbuf, offsets);
vkCmdBindIndexBuffer(cmd_, ibuf, ibOffset, VK_INDEX_TYPE_UINT16);

View file

@ -141,6 +141,8 @@ public:
void BeginFrame();
void EndFrame();
void DirtyAllUBOs();
private:
void DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf);
void DoFlush(VkCommandBuffer cmd);
@ -223,6 +225,12 @@ private:
enum { MAX_DEFERRED_DRAW_CALLS = 128 };
uint32_t dirtyUniforms_;
uint32_t baseUBOOffset;
uint32_t lightUBOOffset;
uint32_t boneUBOOffset;
VkBuffer baseBuf, lightBuf, boneBuf;
DeferredDrawCall drawCalls[MAX_DEFERRED_DRAW_CALLS];
int numDrawCalls;
int vertexCountInDrawCalls;

View file

@ -98,7 +98,9 @@ int SetupVertexAttribsPretransformed(VkVertexInputAttributeDescription attrs[],
return count;
}
static VulkanPipeline *CreateVulkanPipeline(VkDevice device, VkPipelineCache pipelineCache, VkPipelineLayout layout, VkRenderPass renderPass, const VulkanPipelineRasterStateKey &key, const VertexDecoder *vtxDec, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform) {
static VulkanPipeline *CreateVulkanPipeline(VkDevice device, VkPipelineCache pipelineCache,
VkPipelineLayout layout, VkRenderPass renderPass, const VulkanPipelineRasterStateKey &key,
const VertexDecoder *vtxDec, VulkanVertexShader *vs, VulkanFragmentShader *fs, bool useHwTransform) {
VkPipelineColorBlendAttachmentState blend0 = {};
blend0.blendEnable = key.blendEnable;
if (key.blendEnable) {
@ -262,8 +264,12 @@ static VulkanPipeline *CreateVulkanPipeline(VkDevice device, VkPipelineCache pip
vulkanPipeline->pipeline = pipeline;
vulkanPipeline->uniformBlocks = UB_VS_FS_BASE;
if (useHwTransform) {
// TODO: Remove BONES and LIGHTS when those aren't used.
vulkanPipeline->uniformBlocks |= UB_VS_BONES | UB_VS_LIGHTS;
if (vs->HasLights()) {
vulkanPipeline->uniformBlocks |= UB_VS_LIGHTS;
}
if (vs->HasBones()) {
vulkanPipeline->uniformBlocks |= UB_VS_BONES;
}
}
return vulkanPipeline;
}

View file

@ -98,8 +98,8 @@ std::string VulkanFragmentShader::GetShaderString(DebugShaderStringType type) co
}
}
VulkanVertexShader::VulkanVertexShader(VulkanContext *vulkan, ShaderID id, const char *code, int vertType, bool useHWTransform)
: vulkan_(vulkan), id_(id), failed_(false), useHWTransform_(useHWTransform), module_(VK_NULL_HANDLE) {
VulkanVertexShader::VulkanVertexShader(VulkanContext *vulkan, ShaderID id, const char *code, int vertType, bool useHWTransform, bool usesLighting)
: vulkan_(vulkan), id_(id), failed_(false), useHWTransform_(useHWTransform), module_(VK_NULL_HANDLE), usesLighting_(usesLighting) {
source_ = code;
std::string errorMessage;
std::vector<uint32_t> spirv;
@ -462,13 +462,15 @@ void ShaderManagerVulkan::DirtyLastShader() { // disables vertex arrays
lastFShader_ = nullptr;
}
void ShaderManagerVulkan::UpdateUniforms() {
uint32_t ShaderManagerVulkan::UpdateUniforms() {
uint32_t dirty = globalDirty_;
if (globalDirty_) {
BaseUpdateUniforms(globalDirty_);
LightUpdateUniforms(globalDirty_);
BoneUpdateUniforms(globalDirty_);
globalDirty_ = 0;
BaseUpdateUniforms(dirty);
LightUpdateUniforms(dirty);
BoneUpdateUniforms(dirty);
}
globalDirty_ = 0;
return dirty;
}
void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader **vshader, VulkanFragmentShader **fshader, bool useHWTransform) {
@ -489,8 +491,9 @@ void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader
VulkanVertexShader *vs;
if (vsIter == vsCache_.end()) {
// Vertex shader not in cache. Let's compile it.
GenerateVulkanGLSLVertexShader(VSID, codeBuffer_);
vs = new VulkanVertexShader(vulkan_, VSID, codeBuffer_, vertType, useHWTransform);
bool usesLighting;
GenerateVulkanGLSLVertexShader(VSID, codeBuffer_, &usesLighting);
vs = new VulkanVertexShader(vulkan_, VSID, codeBuffer_, vertType, useHWTransform, usesLighting);
vsCache_[VSID] = vs;
} else {
vs = vsIter->second;

View file

@ -191,13 +191,19 @@ protected:
class VulkanVertexShader {
public:
VulkanVertexShader(VulkanContext *vulkan, ShaderID id, const char *code, int vertType, bool useHWTransform);
VulkanVertexShader(VulkanContext *vulkan, ShaderID id, const char *code, int vertType, bool useHWTransform, bool usesLighting);
~VulkanVertexShader();
const std::string &source() const { return source_; }
bool Failed() const { return failed_; }
bool UseHWTransform() const { return useHWTransform_; }
bool HasBones() const {
return id_.Bit(VS_BIT_ENABLE_BONES);
}
bool HasLights() const {
return usesLighting_;
}
std::string GetShaderString(DebugShaderStringType type) const;
VkShaderModule GetModule() const { return module_; }
@ -209,6 +215,7 @@ protected:
std::string source_;
bool failed_;
bool useHWTransform_;
bool usesLighting_;
ShaderID id_;
};
@ -230,7 +237,7 @@ public:
std::vector<std::string> DebugGetShaderIDs(DebugShaderType type);
std::string DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType);
void UpdateUniforms();
uint32_t UpdateUniforms();
void DirtyUniform(u32 what) {
globalDirty_ |= what;

View file

@ -102,7 +102,7 @@ enum DoLightComputation {
// TODO: Skip all this if we can actually get a 16-bit depth buffer along with stencil, which
// is a bit of a rare configuration, although quite common on mobile.
bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer) {
bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *usesLighting) {
char *p = buffer;
// #define USE_FOR_LOOP
@ -140,7 +140,7 @@ bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer) {
// The uniforms are passed in as three "clumps" that may or may not be present.
// We will memcpy the parts into place in a big buffer so we can be quite dynamic about what parts
// are present and what parts aren't, but we will not be ultra detailed about it.
*usesLighting = enableLighting || doShadeMapping;
WRITE(p, "\n");
WRITE(p, "layout (std140, set = 0, binding = 2) uniform baseVars {\n%s} base;\n", ub_baseStr);
if (enableLighting || doShadeMapping)

View file

@ -2,4 +2,4 @@
#include "GPU/Common/ShaderId.h"
bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer);
bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *usesLighting);