Allow writing directly into the packed descriptor buffer, saving a memcpy.

This commit is contained in:
Henrik Rydgård 2023-10-11 10:48:25 +02:00
parent f931f85d57
commit 183d49329a
5 changed files with 53 additions and 42 deletions

View file

@ -124,6 +124,23 @@ public:
size_ += count;
}
T *extend_uninitialized(size_t count) {
size_t sz = size_;
if (size_ + count <= capacity_) {
size_ += count;
return &data_[sz];
} else {
size_t newCapacity = size_ + count * 2; // Leave some extra room when growing in all cases
if (newCapacity < capacity_ * 2) {
// Standard amortized O(1).
newCapacity = capacity_ * 2;
}
IncreaseCapacityTo(newCapacity);
size_ += count;
return &data_[sz];
}
}
void LockCapacity() {
#ifdef _DEBUG
capacityLocked_ = true;

View file

@ -1747,10 +1747,9 @@ void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame, QueuePro
int numBuffers = 0;
int numImages = 0;
for (int i = 0; i < d.count; i++) {
if (!data[i].image.view) { // This automatically also checks for an null buffer.
if (!data[i].image.view) { // This automatically also checks for an null buffer due to the union.
continue;
}
switch (this->bindingTypes[i]) {
case BindingType::COMBINED_IMAGE_SAMPLER:
_dbg_assert_(data[i].image.sampler != VK_NULL_HANDLE);

View file

@ -451,12 +451,11 @@ public:
curRenderStep_->render.stencilStore = VKRRenderPassStoreAction::DONT_CARE;
}
private:
// Descriptors will match the current pipeline layout, set by the last call to BindPipeline.
// Count is the count of void*s. Two are needed for COMBINED_IMAGE_SAMPLER, everything else is a single one.
// The goal is to keep this function very small and fast, and do the expensive work on the render thread or
// another thread.
int BindDescriptors(const PackedDescriptor *desc, int count) {
PackedDescriptor *PushDescriptorSet(int count, int *descSetIndex) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);
int curFrame = vulkan_->GetCurFrame();
@ -464,25 +463,24 @@ private:
VKRPipelineLayout::FrameData &data = curPipelineLayout_->frameData[curFrame];
size_t offset = data.descData_.size();
data.descData_.extend(desc, count);
PackedDescriptor *retval = data.descData_.extend_uninitialized(count);
int setIndex = (int)data.descSets_.size();
PendingDescSet &descSet = data.descSets_.push_uninitialized();
descSet.offset = (uint32_t)offset;
descSet.count = count;
// descSet.set = VK_NULL_HANDLE; // to be filled in
return setIndex;
*descSetIndex = setIndex;
return retval;
}
public:
void Draw(const PackedDescriptor *desc, int descCount, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, int count, int offset = 0) {
void Draw(int descSetIndex, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, int count, int offset = 0) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_);
int setIndex = BindDescriptors(desc, descCount);
VkRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = VKRRenderCommand::DRAW;
data.draw.count = count;
data.draw.offset = offset;
data.draw.descSetIndex = setIndex;
data.draw.descSetIndex = descSetIndex;
data.draw.vbuffer = vbuffer;
data.draw.voffset = voffset;
data.draw.numUboOffsets = numUboOffsets;
@ -492,14 +490,13 @@ public:
curRenderStep_->render.numDraws++;
}
void DrawIndexed(const PackedDescriptor *desc, int descCount, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, VkBuffer ibuffer, int ioffset, int count, int numInstances) {
void DrawIndexed(int descSetIndex, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, VkBuffer ibuffer, int ioffset, int count, int numInstances) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_);
int setIndex = BindDescriptors(desc, descCount);
VkRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = VKRRenderCommand::DRAW_INDEXED;
data.drawIndexed.count = count;
data.drawIndexed.instances = numInstances;
data.drawIndexed.descSetIndex = setIndex;
data.drawIndexed.descSetIndex = descSetIndex;
data.drawIndexed.vbuffer = vbuffer;
data.drawIndexed.voffset = voffset;
data.drawIndexed.ibuffer = ibuffer;

View file

@ -1417,9 +1417,10 @@ void VKContext::Draw(int vertexCount, int offset) {
BindCurrentPipeline();
ApplyDynamicState();
PackedDescriptor descriptors[4];
int descSetIndex;
PackedDescriptor *descriptors = renderManager_.PushDescriptorSet(4, &descSetIndex);
BindDescriptors(vulkanUBObuf, descriptors);
renderManager_.Draw(descriptors, ARRAY_SIZE(descriptors), 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffsets_[0], vertexCount, offset);
renderManager_.Draw(descSetIndex, 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffsets_[0], vertexCount, offset);
}
void VKContext::DrawIndexed(int vertexCount, int offset) {
@ -1433,9 +1434,10 @@ void VKContext::DrawIndexed(int vertexCount, int offset) {
BindCurrentPipeline();
ApplyDynamicState();
PackedDescriptor descriptors[4];
int descSetIndex;
PackedDescriptor *descriptors = renderManager_.PushDescriptorSet(4, &descSetIndex);
BindDescriptors(vulkanUBObuf, descriptors);
renderManager_.DrawIndexed(descriptors, ARRAY_SIZE(descriptors), 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffsets_[0], vulkanIbuf, (int)ibBindOffset + offset * sizeof(uint32_t), vertexCount, 1);
renderManager_.DrawIndexed(descSetIndex, 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffsets_[0], vulkanIbuf, (int)ibBindOffset + offset * sizeof(uint32_t), vertexCount, 1);
}
void VKContext::DrawUP(const void *vdata, int vertexCount) {
@ -1455,9 +1457,10 @@ void VKContext::DrawUP(const void *vdata, int vertexCount) {
BindCurrentPipeline();
ApplyDynamicState();
PackedDescriptor descriptors[4];
int descSetIndex;
PackedDescriptor *descriptors = renderManager_.PushDescriptorSet(4, &descSetIndex);
BindDescriptors(vulkanUBObuf, descriptors);
renderManager_.Draw(descriptors, ARRAY_SIZE(descriptors), 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffsets_[0], vertexCount);
renderManager_.Draw(descSetIndex, 1, &ubo_offset, vulkanVbuf, (int)vbBindOffset + curVBufferOffsets_[0], vertexCount);
}
void VKContext::BindCurrentPipeline() {

View file

@ -343,18 +343,17 @@ void DrawEngineVulkan::DoFlush() {
dirtyUniforms_ |= shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering());
UpdateUBOs();
PackedDescriptor descriptors[9]{};
int descCount = 6;
if (tess)
descCount = 9;
int descSetIndex;
PackedDescriptor *descriptors = renderManager->PushDescriptorSet(descCount, &descSetIndex);
descriptors[0].image.view = imageView;
descriptors[0].image.sampler = sampler;
if (boundSecondary_) {
descriptors[1].image.view = boundSecondary_;
descriptors[1].image.sampler = samplerSecondaryNearest_;
}
if (boundDepal_) {
descriptors[2].image.view = boundDepal_;
descriptors[2].image.sampler = boundDepalSmoothed_ ? samplerSecondaryLinear_ : samplerSecondaryNearest_;
}
descriptors[1].image.view = boundSecondary_;
descriptors[1].image.sampler = samplerSecondaryNearest_;
descriptors[2].image.view = boundDepal_;
descriptors[2].image.sampler = (boundDepal_ && boundDepalSmoothed_) ? samplerSecondaryLinear_ : samplerSecondaryNearest_;
descriptors[3].buffer.buffer = baseBuf;
descriptors[3].buffer.range = sizeof(UB_VS_FS_Base);
descriptors[4].buffer.buffer = lightBuf;
@ -368,7 +367,6 @@ void DrawEngineVulkan::DoFlush() {
descriptors[j + 6].buffer.offset = bufInfo[j].offset;
descriptors[j + 6].buffer.range = bufInfo[j].range;
}
descCount = 9;
}
// TODO: Can we avoid binding all three when not needed? Same below for hardware transform.
// Think this will require different descriptor set layouts.
@ -379,9 +377,9 @@ void DrawEngineVulkan::DoFlush() {
if (!ibuf) {
ibOffset = (uint32_t)pushIndex_->Push(decIndex_, sizeof(uint16_t) * indexGen.VertexCount(), 4, &ibuf);
}
renderManager->DrawIndexed(descriptors, descCount, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, vertexCount, 1);
renderManager->DrawIndexed(descSetIndex, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, vertexCount, 1);
} else {
renderManager->Draw(descriptors, descCount, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, vertexCount);
renderManager->Draw(descSetIndex, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, vertexCount);
}
} else {
PROFILE_THIS_SCOPE("soft");
@ -509,18 +507,15 @@ void DrawEngineVulkan::DoFlush() {
// Even if the first draw is through-mode, make sure we at least have one copy of these uniforms buffered
UpdateUBOs();
PackedDescriptor descriptors[9]{};
int descCount = 6;
int descSetIndex;
PackedDescriptor *descriptors = renderManager->PushDescriptorSet(descCount, &descSetIndex);
descriptors[0].image.view = imageView;
descriptors[0].image.sampler = sampler;
if (boundSecondary_) {
descriptors[1].image.view = boundSecondary_;
descriptors[1].image.sampler = samplerSecondaryNearest_;
}
if (boundDepal_) {
descriptors[2].image.view = boundDepal_;
descriptors[2].image.sampler = boundDepalSmoothed_ ? samplerSecondaryLinear_ : samplerSecondaryNearest_;
}
descriptors[1].image.view = boundSecondary_;
descriptors[1].image.sampler = samplerSecondaryNearest_;
descriptors[2].image.view = boundDepal_;
descriptors[2].image.sampler = (boundDepal_ && boundDepalSmoothed_) ? samplerSecondaryLinear_ : samplerSecondaryNearest_;
descriptors[3].buffer.buffer = baseBuf;
descriptors[3].buffer.range = sizeof(UB_VS_FS_Base);
descriptors[4].buffer.buffer = lightBuf;
@ -538,11 +533,11 @@ void DrawEngineVulkan::DoFlush() {
VkBuffer vbuf, ibuf;
vbOffset = (uint32_t)pushVertex_->Push(result.drawBuffer, maxIndex * sizeof(TransformedVertex), 4, &vbuf);
ibOffset = (uint32_t)pushIndex_->Push(inds, sizeof(short) * result.drawNumTrans, 4, &ibuf);
renderManager->DrawIndexed(descriptors, descCount, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, result.drawNumTrans, 1);
renderManager->DrawIndexed(descSetIndex, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, result.drawNumTrans, 1);
} else {
VkBuffer vbuf;
vbOffset = (uint32_t)pushVertex_->Push(result.drawBuffer, result.drawNumTrans * sizeof(TransformedVertex), 4, &vbuf);
renderManager->Draw(descriptors, descCount, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, result.drawNumTrans);
renderManager->Draw(descSetIndex, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, result.drawNumTrans);
}
} else if (result.action == SW_CLEAR) {
// Note: we won't get here if the clear is alpha but not color, or color but not alpha.