Implement vertex caching for Vulkan.

This commit is contained in:
Henrik Rydgård 2017-08-17 11:22:23 +02:00
parent cd43049788
commit 5e788ffadf
8 changed files with 326 additions and 56 deletions

View file

@ -114,6 +114,14 @@ void VulkanPushBuffer::Defragment(VulkanContext *vulkan) {
assert(res);
}
size_t VulkanPushBuffer::GetTotalSize() const {
size_t sum = 0;
if (buffers_.size() > 1)
sum += size_ * (buffers_.size() - 1);
sum += offset_;
return sum;
}
VulkanDeviceAllocator::VulkanDeviceAllocator(VulkanContext *vulkan, size_t minSlabSize, size_t maxSlabSize)
: vulkan_(vulkan), lastSlab_(0), minSlabSize_(minSlabSize), maxSlabSize_(maxSlabSize), memoryTypeIndex_(UNDEFINED_MEMORY_TYPE), destroyed_(false) {
assert((minSlabSize_ & (SLAB_GRAIN_SIZE - 1)) == 0);

View file

@ -38,13 +38,18 @@ public:
Map();
}
void BeginNoReset() {
Map();
}
void End() {
Unmap();
}
void Map() {
assert(!writePtr_);
VkResult res = vkMapMemory(device_, buffers_[buf_].deviceMemory, offset_, size_, 0, (void **)(&writePtr_));
VkResult res = vkMapMemory(device_, buffers_[buf_].deviceMemory, 0, size_, 0, (void **)(&writePtr_));
assert(writePtr_);
assert(VK_SUCCESS == res);
}
@ -105,6 +110,8 @@ public:
return writePtr_ + off;
}
size_t GetTotalSize() const;
private:
bool AddBuffer();
void NextBuffer(size_t minSize);

View file

@ -59,7 +59,7 @@ public:
}
void SetIndex(int ind) { index_ = ind; }
int MaxIndex() const { return index_; }
int MaxIndex() const { return index_; } // Really NextIndex rather than MaxIndex, it's one more than the highest index generated
int VertexCount() const { return count_; }
bool Empty() const { return index_ == 0; }
int SeenPrims() const { return seenPrims_; }

View file

@ -72,7 +72,7 @@ public:
}
~VertexArrayInfoD3D11();
enum Status {
enum Status : uint8_t {
VAI_NEW,
VAI_HASHING,
VAI_RELIABLE, // cache, don't hash
@ -82,8 +82,6 @@ public:
ReliableHashType hash;
u32 minihash;
Status status;
ID3D11Buffer *vbo;
ID3D11Buffer *ebo;
@ -91,6 +89,7 @@ public:
u16 numVerts;
u16 maxIndex;
s8 prim;
Status status;
// ID information
int numDraws;

View file

@ -71,7 +71,7 @@ public:
}
~VertexArrayInfoDX9();
enum Status {
enum Status : uint8_t {
VAI_NEW,
VAI_HASHING,
VAI_RELIABLE, // cache, don't hash
@ -81,8 +81,6 @@ public:
ReliableHashType hash;
u32 minihash;
Status status;
LPDIRECT3DVERTEXBUFFER9 vbo;
LPDIRECT3DINDEXBUFFER9 ebo;
@ -90,6 +88,7 @@ public:
u16 numVerts;
u16 maxIndex;
s8 prim;
Status status;
// ID information
int numDraws;

View file

@ -71,7 +71,7 @@ public:
flags = 0;
}
enum Status {
enum Status : uint8_t {
VAI_NEW,
VAI_HASHING,
VAI_RELIABLE, // cache, don't hash
@ -81,8 +81,6 @@ public:
ReliableHashType hash;
u32 minihash;
Status status;
u32 vbo;
u32 ebo;
@ -90,6 +88,7 @@ public:
u16 numVerts;
u16 maxIndex;
s8 prim;
Status status;
// ID information
int numDraws;

View file

@ -49,6 +49,15 @@
#include "GPU/Vulkan/FramebufferVulkan.h"
#include "GPU/Vulkan/GPU_Vulkan.h"
enum {
VERTEX_CACHE_SIZE = 4096 * 1024
};
#define VERTEXCACHE_DECIMATION_INTERVAL 17
enum { VAI_KILL_AGE = 120, VAI_UNRELIABLE_KILL_AGE = 240, VAI_UNRELIABLE_KILL_MAX = 4 };
enum {
DRAW_BINDING_TEXTURE = 0,
DRAW_BINDING_2ND_TEXTURE = 1,
@ -190,6 +199,8 @@ void DrawEngineVulkan::InitDeviceObjects() {
res = vkCreateSampler(device, &samp, nullptr, &depalSampler_);
res = vkCreateSampler(device, &samp, nullptr, &nullSampler_);
assert(VK_SUCCESS == res);
vertexCache_ = new VulkanPushBuffer(vulkan_, VERTEX_CACHE_SIZE);
}
DrawEngineVulkan::~DrawEngineVulkan() {
@ -243,6 +254,9 @@ void DrawEngineVulkan::DestroyDeviceObjects() {
delete nullTexture_;
nullTexture_ = nullptr;
}
vertexCache_->Destroy(vulkan_);
delete vertexCache_;
vertexCache_ = nullptr;
}
void DrawEngineVulkan::DeviceLost() {
@ -297,6 +311,39 @@ void DrawEngineVulkan::BeginFrame() {
}
DirtyAllUBOs();
// Wipe the vertex cache if it's grown too large.
if (vertexCache_->GetTotalSize() > VERTEX_CACHE_SIZE) {
vertexCache_->Destroy(vulkan_);
delete vertexCache_; // orphans the buffers, they'll get deleted once no longer used by an in-flight frame.
vertexCache_ = new VulkanPushBuffer(vulkan_, VERTEX_CACHE_SIZE);
vai_.clear();
}
vertexCache_->BeginNoReset();
if (--decimationCounter_ <= 0) {
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
const int threshold = gpuStats.numFlips - VAI_KILL_AGE;
const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE;
int unreliableLeft = VAI_UNRELIABLE_KILL_MAX;
for (auto iter = vai_.begin(); iter != vai_.end(); ) {
bool kill;
if (iter->second->status == VertexArrayInfoVulkan::VAI_UNRELIABLE) {
// We limit killing unreliable so we don't rehash too often.
kill = iter->second->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
} else {
kill = iter->second->lastFrame < threshold;
}
if (kill) {
delete iter->second;
vai_.erase(iter++);
} else {
++iter;
}
}
}
}
void DrawEngineVulkan::EndFrame() {
@ -308,6 +355,7 @@ void DrawEngineVulkan::EndFrame() {
frame->pushVertex->End();
frame->pushIndex->End();
curFrame_++;
vertexCache_->End();
}
void DrawEngineVulkan::SetupVertexDecoder(u32 vertType) {
@ -393,11 +441,7 @@ void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
}
}
void DrawEngineVulkan::DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf) {
u8 *dest = decoded;
// Figure out how much pushbuffer space we need to allocate.
if (push) {
int DrawEngineVulkan::ComputeNumVertsToDecode() const {
int vertsToDecode = 0;
if (drawCalls[0].indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
for (int i = 0; i < numDrawCalls; i++) {
@ -424,6 +468,15 @@ void DrawEngineVulkan::DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset,
i = lastMatch;
}
}
return vertsToDecode;
}
void DrawEngineVulkan::DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf) {
u8 *dest = decoded;
// Figure out how much pushbuffer space we need to allocate.
if (push) {
int vertsToDecode = ComputeNumVertsToDecode();
dest = (u8 *)push->Push(vertsToDecode * dec_->GetDecVtxFmt().stride, bindOffset, vkbuf);
}
@ -442,6 +495,7 @@ void DrawEngineVulkan::DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset,
}
}
VkDescriptorSet DrawEngineVulkan::GetDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, VkBuffer bone) {
DescriptorSetKey key;
key.imageView_ = imageView;
@ -567,9 +621,17 @@ void DrawEngineVulkan::DirtyAllUBOs() {
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
}
void MarkUnreliable(VertexArrayInfoVulkan *vai) {
vai->status = VertexArrayInfoVulkan::VAI_UNRELIABLE;
// TODO: If we change to a real allocator, free the data here.
// For now we just leave it in the pushbuffer.
}
// The inline wrapper in the header checks for numDrawCalls == 0d
void DrawEngineVulkan::DoFlush() {
gpuStats.numFlushes++;
// TODO: Should be enough to update this once per frame?
gpuStats.numTrackedVertexArrays = (int)vai_.size();
VkCommandBuffer cmd = (VkCommandBuffer)draw_->GetNativeObject(Draw::NativeObject::RENDERPASS_COMMANDBUFFER);
if (cmd != lastCmd_) {
@ -613,12 +675,164 @@ void DrawEngineVulkan::DoFlush() {
// We don't detect clears in this path, so here we can switch framebuffers if necessary.
int vertexCount = 0;
int maxIndex;
bool useElements = true;
// Cannot cache vertex data with morph enabled.
bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK);
// Also avoid caching when software skinning.
VkBuffer vbuf;
VkBuffer vbuf = VK_NULL_HANDLE;
VkBuffer ibuf = VK_NULL_HANDLE;
if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) {
useCache = false;
}
if (useCache) {
u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
auto iter = vai_.find(id);
VertexArrayInfoVulkan *vai;
if (iter != vai_.end()) {
// We've seen this before. Could have been a cached draw.
vai = iter->second;
} else {
vai = new VertexArrayInfoVulkan();
vai_[id] = vai;
}
switch (vai->status) {
case VertexArrayInfoVulkan::VAI_NEW:
{
// Haven't seen this one before. We don't actually upload the vertex data yet.
ReliableHashType dataHash = ComputeHash();
vai->hash = dataHash;
vai->minihash = ComputeMiniHash();
vai->status = VertexArrayInfoVulkan::VAI_HASHING;
vai->drawsUntilNextFullHash = 0;
DecodeVerts(frame->pushVertex, &vbOffset, &vbuf); // writes to indexGen
vai->numVerts = indexGen.VertexCount();
vai->prim = indexGen.Prim();
vai->maxIndex = indexGen.MaxIndex();
vai->flags = gstate_c.vertexFullAlpha ? VAIVULKAN_FLAG_VERTEXFULLALPHA : 0;
goto rotateVBO;
}
// Hashing - still gaining confidence about the buffer.
// But if we get this far it's likely to be worth uploading the data.
case VertexArrayInfoVulkan::VAI_HASHING:
{
vai->numDraws++;
if (vai->lastFrame != gpuStats.numFlips) {
vai->numFrames++;
}
if (vai->drawsUntilNextFullHash == 0) {
// Let's try to skip a full hash if mini would fail.
const u32 newMiniHash = ComputeMiniHash();
ReliableHashType newHash = vai->hash;
if (newMiniHash == vai->minihash) {
newHash = ComputeHash();
}
if (newMiniHash != vai->minihash || newHash != vai->hash) {
MarkUnreliable(vai);
DecodeVerts(frame->pushVertex, &vbOffset, &vbuf);
goto rotateVBO;
}
if (vai->numVerts > 64) {
// exponential backoff up to 16 draws, then every 24
vai->drawsUntilNextFullHash = std::min(24, vai->numFrames);
} else {
// Lower numbers seem much more likely to change.
vai->drawsUntilNextFullHash = 0;
}
// TODO: tweak
//if (vai->numFrames > 1000) {
// vai->status = VertexArrayInfo::VAI_RELIABLE;
//}
} else {
vai->drawsUntilNextFullHash--;
u32 newMiniHash = ComputeMiniHash();
if (newMiniHash != vai->minihash) {
MarkUnreliable(vai);
DecodeVerts(frame->pushVertex, &vbOffset, &vbuf);
goto rotateVBO;
}
}
if (!vai->vb) {
// same as MaxIndex
int numVertsToDecode = ComputeNumVertsToDecode();
// Directly push to the vertex cache.
DecodeVerts(vertexCache_, &vai->vbOffset, &vai->vb);
_dbg_assert_msg_(G3D, gstate_c.vertBounds.minV >= gstate_c.vertBounds.maxV, "Should not have checked UVs when caching.");
if (useElements) {
u32 size = sizeof(uint16_t) * indexGen.VertexCount();
void *dest = vertexCache_->Push(size, &vai->ibOffset, &vai->ib);
memcpy(dest, decIndex, size);
} else {
vai->ib = 0;
vai->ibOffset = 0;
}
vai->numVerts = indexGen.VertexCount();
vai->prim = indexGen.Prim();
vai->maxIndex = indexGen.MaxIndex();
_dbg_assert_msg_(G3D, vai->maxIndex == numVertsToDecode, "maxindex wrong.");
vai->flags = gstate_c.vertexFullAlpha ? VAIVULKAN_FLAG_VERTEXFULLALPHA : 0;
/*
useElements = !indexGen.SeenOnlyPurePrims();
if (!useElements && indexGen.PureCount()) {
vai->numVerts = indexGen.PureCount();
}*/
} else {
gpuStats.numCachedDrawCalls++;
useElements = vai->ib ? true : false;
gpuStats.numCachedVertsDrawn += vai->numVerts;
gstate_c.vertexFullAlpha = vai->flags & VAIVULKAN_FLAG_VERTEXFULLALPHA;
}
vbuf = vai->vb;
ibuf = vai->ib;
vbOffset = vai->vbOffset;
ibOffset = vai->ibOffset;
vertexCount = vai->numVerts;
maxIndex = vai->maxIndex;
prim = static_cast<GEPrimitiveType>(vai->prim);
break;
}
// Reliable - we don't even bother hashing anymore. Right now we don't go here until after a very long time.
case VertexArrayInfoVulkan::VAI_RELIABLE:
{
vai->numDraws++;
if (vai->lastFrame != gpuStats.numFlips) {
vai->numFrames++;
}
gpuStats.numCachedDrawCalls++;
gpuStats.numCachedVertsDrawn += vai->numVerts;
vbuf = vai->vb;
ibuf = vai->ib;
vbOffset = vai->vbOffset;
ibOffset = vai->ibOffset;
vertexCount = vai->numVerts;
maxIndex = vai->maxIndex;
prim = static_cast<GEPrimitiveType>(vai->prim);
gstate_c.vertexFullAlpha = vai->flags & VAIVULKAN_FLAG_VERTEXFULLALPHA;
break;
}
case VertexArrayInfoVulkan::VAI_UNRELIABLE:
{
vai->numDraws++;
if (vai->lastFrame != gpuStats.numFlips) {
vai->numFrames++;
}
DecodeVerts(frame->pushVertex, &vbOffset, &vbuf);
goto rotateVBO;
}
default:
break;
}
} else {
if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) {
// If software skinning, we've already predecoded into "decoded". So push that content.
VkDeviceSize size = decodedVerts_ * dec_->GetDecVtxFmt().stride;
@ -629,12 +843,7 @@ void DrawEngineVulkan::DoFlush() {
DecodeVerts(frame->pushVertex, &vbOffset, &vbuf);
}
useCache = false;
if (useCache) {
u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
// TODO: Actually support vertex caching
}
rotateVBO:
gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
useElements = !indexGen.SeenOnlyPurePrims();
vertexCount = indexGen.VertexCount();
@ -642,6 +851,7 @@ void DrawEngineVulkan::DoFlush() {
vertexCount = indexGen.PureCount();
}
prim = indexGen.Prim();
}
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
@ -693,8 +903,8 @@ void DrawEngineVulkan::DoFlush() {
VkDeviceSize offsets[1] = { vbOffset };
if (useElements) {
VkBuffer ibuf;
ibOffset = (uint32_t)frame->pushIndex->Push(decIndex, 2 * indexGen.VertexCount(), &ibuf);
if (!ibuf)
ibOffset = (uint32_t)frame->pushIndex->Push(decIndex, sizeof(uint16_t) * indexGen.VertexCount(), &ibuf);
// TODO (maybe): Avoid rebinding vertex/index buffers if the vertex size stays the same by using the offset arguments.
// Not sure if actually worth it, binding buffers should be fast.
vkCmdBindVertexBuffers(cmd, 0, 1, &vbuf, offsets);

View file

@ -68,6 +68,49 @@ struct DrawEngineVulkanStats {
int pushIndexSpaceUsed;
};
enum {
VAIVULKAN_FLAG_VERTEXFULLALPHA = 1,
};
// Try to keep this POD.
class VertexArrayInfoVulkan {
public:
VertexArrayInfoVulkan() {
lastFrame = gpuStats.numFlips;
}
// No destructor needed - we always fully wipe.
enum Status : uint8_t {
VAI_NEW,
VAI_HASHING,
VAI_RELIABLE, // cache, don't hash
VAI_UNRELIABLE, // never cache
};
ReliableHashType hash;
u32 minihash;
// These will probably always be the same, but whatever.
VkBuffer vb = VK_NULL_HANDLE;
VkBuffer ib = VK_NULL_HANDLE;
// Offsets into the cache buffer.
uint32_t vbOffset = 0;
uint32_t ibOffset = 0;
// Precalculated parameter for vkDrawIndexed
u16 numVerts = 0;
u16 maxIndex = 0;
s8 prim = GE_PRIM_INVALID;
Status status = VAI_NEW;
// ID information
int numDraws = 0;
int numFrames = 0;
int lastFrame; // So that we can forget.
u16 drawsUntilNextFullHash = 0;
u8 flags = 0;
};
// Handles transform, lighting and drawing.
class DrawEngineVulkan : public DrawEngineCommon {
public:
@ -134,6 +177,7 @@ private:
void InitDeviceObjects();
void DestroyDeviceObjects();
int ComputeNumVertsToDecode() const;
void DecodeVerts(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf);
void DoFlush();
@ -150,6 +194,10 @@ private:
VkCommandBuffer lastCmd_ = VK_NULL_HANDLE;
VulkanPipeline *lastPipeline_;
std::unordered_map<u32, VertexArrayInfoVulkan *> vai_;
VulkanPushBuffer *vertexCache_;
int decimationCounter_ = 0;
struct DescriptorSetKey {
VkImageView imageView_;
VkImageView secondaryImageView_;