From 186b0f105cbd91193725a0f82d9077aa4ea10c40 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Mon, 5 Jun 2023 09:40:08 +0200
Subject: [PATCH 1/5] Simplify the vertex cache ID handling

---
 GPU/Common/DrawEngineCommon.cpp | 9 ---------
 GPU/Common/DrawEngineCommon.h   | 1 -
 GPU/D3D11/DrawEngineD3D11.cpp   | 8 ++++----
 GPU/Directx9/DrawEngineDX9.cpp  | 8 ++++----
 GPU/GLES/DrawEngineGLES.cpp     | 2 --
 GPU/Vulkan/DrawEngineVulkan.cpp | 9 +++++----
 6 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp
index 234d2af223..554dd72bb3 100644
--- a/GPU/Common/DrawEngineCommon.cpp
+++ b/GPU/Common/DrawEngineCommon.cpp
@@ -821,15 +821,6 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
 	if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > GE_PRIM_LINE_STRIP && prim != GE_PRIM_RECTANGLES))
 		return;
 
-	if (g_Config.bVertexCache) {
-		u32 dhash = dcid_;
-		dhash = __rotl(dhash ^ (u32)(uintptr_t)verts, 13);
-		dhash = __rotl(dhash ^ (u32)(uintptr_t)inds, 19);
-		dhash = __rotl(dhash ^ (u32)vertTypeID, 7);
-		dhash = __rotl(dhash ^ (u32)vertexCount, 11);
-		dcid_ = lowbias32_r(dhash ^ (u32)prim);
-	}
-
 	DeferredDrawCall &dc = drawCalls_[numDrawCalls_];
 	dc.verts = verts;
 	dc.inds = inds;
diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h
index c9b51ff020..0798c454b5 100644
--- a/GPU/Common/DrawEngineCommon.h
+++ b/GPU/Common/DrawEngineCommon.h
@@ -218,7 +218,6 @@ protected:
 
 	int decimationCounter_ = 0;
 	int decodeCounter_ = 0;
-	u32 dcid_ = 0;
 
 	// Vertex collector state
 	IndexGenerator indexGen;
diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp
index aab67928bd..119607d76d 100644
--- a/GPU/D3D11/DrawEngineD3D11.cpp
+++ b/GPU/D3D11/DrawEngineD3D11.cpp
@@ -363,12 +363,13 @@ void DrawEngineD3D11::DoFlush() {
 			useCache = false;
 
 		if (useCache) {
-			u32 id = dcid_ ^ gstate.getUVGenMode();  // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
+			// getUVGenMode can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
+			u32 dcid = (u32)XXH3_64bits(&drawCalls_, sizeof(DeferredDrawCall) * numDrawCalls_) ^ gstate.getUVGenMode();
 
-			VertexArrayInfoD3D11 *vai = vai_.Get(id);
+			VertexArrayInfoD3D11 *vai = vai_.Get(dcid);
 			if (!vai) {
 				vai = new VertexArrayInfoD3D11();
-				vai_.Insert(id, vai);
+				vai_.Insert(dcid, vai);
 			}
 
 			switch (vai->status) {
@@ -724,7 +725,6 @@ rotateVBO:
 	numDrawCalls_ = 0;
 	vertexCountInDrawCalls_ = 0;
 	decodeCounter_ = 0;
-	dcid_ = 0;
 	gstate_c.vertexFullAlpha = true;
 	framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
 
diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp
index b098898d21..95767740a4 100644
--- a/GPU/Directx9/DrawEngineDX9.cpp
+++ b/GPU/Directx9/DrawEngineDX9.cpp
@@ -345,11 +345,12 @@ void DrawEngineDX9::DoFlush() {
 			useCache = false;
 
 		if (useCache) {
-			u32 id = dcid_ ^ gstate.getUVGenMode();  // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
-			VertexArrayInfoDX9 *vai = vai_.Get(id);
+			// getUVGenMode can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
+			u32 dcid = (u32)XXH3_64bits(&drawCalls_, sizeof(DeferredDrawCall) * numDrawCalls_) ^ gstate.getUVGenMode();
+			VertexArrayInfoDX9 *vai = vai_.Get(dcid);
 			if (!vai) {
 				vai = new VertexArrayInfoDX9();
-				vai_.Insert(id, vai);
+				vai_.Insert(dcid, vai);
 			}
 
 			switch (vai->status) {
@@ -666,7 +667,6 @@ rotateVBO:
 	numDrawCalls_ = 0;
 	vertexCountInDrawCalls_ = 0;
 	decodeCounter_ = 0;
-	dcid_ = 0;
 	gstate_c.vertexFullAlpha = true;
 	framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
 
diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp
index 1f55ad16d3..e77728eb08 100644
--- a/GPU/GLES/DrawEngineGLES.cpp
+++ b/GPU/GLES/DrawEngineGLES.cpp
@@ -248,7 +248,6 @@ void DrawEngineGLES::DoFlush() {
 		numDrawCalls_ = 0;
 		vertexCountInDrawCalls_ = 0;
 		decodeCounter_ = 0;
-		dcid_ = 0;
 		return;
 	}
 
@@ -483,7 +482,6 @@ bail:
 	numDrawCalls_ = 0;
 	vertexCountInDrawCalls_ = 0;
 	decodeCounter_ = 0;
-	dcid_ = 0;
 	gstate_c.vertexFullAlpha = true;
 	framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
 
diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp
index 62bf89de8d..8676ba9e87 100644
--- a/GPU/Vulkan/DrawEngineVulkan.cpp
+++ b/GPU/Vulkan/DrawEngineVulkan.cpp
@@ -597,12 +597,14 @@ void DrawEngineVulkan::DoFlush() {
 		}
 
 		if (useCache) {
+			// getUVGenMode can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
+			u32 dcid = (u32)XXH3_64bits(&drawCalls_, sizeof(DeferredDrawCall) * numDrawCalls_) ^ gstate.getUVGenMode();
+
 			PROFILE_THIS_SCOPE("vcache");
-			u32 id = dcid_ ^ gstate.getUVGenMode();  // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
-			VertexArrayInfoVulkan *vai = vai_.Get(id);
+			VertexArrayInfoVulkan *vai = vai_.Get(dcid);
 			if (!vai) {
 				vai = new VertexArrayInfoVulkan();
-				vai_.Insert(id, vai);
+				vai_.Insert(dcid, vai);
 			}
 
 			switch (vai->status) {
@@ -1011,7 +1013,6 @@ void DrawEngineVulkan::DoFlush() {
 	numDrawCalls_ = 0;
 	vertexCountInDrawCalls_ = 0;
 	decodeCounter_ = 0;
-	dcid_ = 0;
 	gstate_c.vertexFullAlpha = true;
 	framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
 

From d90671e87761e5bd527d9c12485a06edf3486adb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Mon, 5 Jun 2023 09:49:58 +0200
Subject: [PATCH 2/5] Add some comments.

---
 Common/GPU/Vulkan/VulkanMemory.h | 2 ++
 GPU/Vulkan/DrawEngineVulkan.cpp  | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/Common/GPU/Vulkan/VulkanMemory.h b/Common/GPU/Vulkan/VulkanMemory.h
index 53f96ae689..9f51990691 100644
--- a/Common/GPU/Vulkan/VulkanMemory.h
+++ b/Common/GPU/Vulkan/VulkanMemory.h
@@ -135,6 +135,8 @@ public:
 		return blocks_[curBlockIndex_].writePtr;
 	}
 
+	// NOTE: If you can avoid this by writing the data directly into memory returned from Allocate,
+	// do so. Savings from avoiding memcpy can be significant.
 	VkDeviceSize Push(const void *data, VkDeviceSize numBytes, int alignment, VkBuffer *vkbuf) {
 		uint32_t bindOffset;
 		uint8_t *ptr = Allocate(numBytes, alignment, vkbuf, &bindOffset);
diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp
index 8676ba9e87..254e6932ed 100644
--- a/GPU/Vulkan/DrawEngineVulkan.cpp
+++ b/GPU/Vulkan/DrawEngineVulkan.cpp
@@ -825,6 +825,8 @@ void DrawEngineVulkan::DoFlush() {
 
 		VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, boneBuf, tess);
 
+		// TODO: Can we avoid binding all three when not needed? Same below for hardware transform.
+		// Think this will require different descriptor set layouts.
 		const uint32_t dynamicUBOOffsets[3] = {
 			baseUBOOffset, lightUBOOffset, boneUBOOffset,
 		};

From 468757b93a428e0141678786762e76c5eca99510 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Mon, 5 Jun 2023 10:07:36 +0200
Subject: [PATCH 3/5] Add comment about possible UV scale/offset bug. Move
 loop-max to local.

---
 GPU/Common/DrawEngineCommon.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp
index 554dd72bb3..eaf7b2ce52 100644
--- a/GPU/Common/DrawEngineCommon.cpp
+++ b/GPU/Common/DrawEngineCommon.cpp
@@ -74,17 +74,18 @@ VertexDecoder *DrawEngineCommon::GetVertexDecoder(u32 vtype) {
 
 int DrawEngineCommon::ComputeNumVertsToDecode() const {
 	int vertsToDecode = 0;
+	int numDrawCalls = numDrawCalls_;
 	if (drawCalls_[0].indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
-		for (int i = 0; i < numDrawCalls_; i++) {
+		for (int i = 0; i < numDrawCalls; i++) {
 			const DeferredDrawCall &dc = drawCalls_[i];
 			vertsToDecode += dc.vertexCount;
 		}
 	} else {
 		// TODO: Share this computation with DecodeVertsStep?
-		for (int i = 0; i < numDrawCalls_; i++) {
+		for (int i = 0; i < numDrawCalls; i++) {
 			const DeferredDrawCall &dc = drawCalls_[i];
 			int lastMatch = i;
-			const int total = numDrawCalls_;
+			const int total = numDrawCalls;
 			int indexLowerBound = dc.indexLowerBound;
 			int indexUpperBound = dc.indexUpperBound;
 			for (int j = i + 1; j < total; ++j) {
@@ -645,7 +646,7 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
 		for (int j = i + 1; j < total; ++j) {
 			if (drawCalls_[j].verts != dc.verts)
 				break;
-
+			// TODO: What if UV scale/offset changes between drawcalls here?
 			indexLowerBound = std::min(indexLowerBound, (int)drawCalls_[j].indexLowerBound);
 			indexUpperBound = std::max(indexUpperBound, (int)drawCalls_[j].indexUpperBound);
 			lastMatch = j;

From 2f90ec609391b6edd6c397a089fa82cabd42ba4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Mon, 5 Jun 2023 10:47:20 +0200
Subject: [PATCH 4/5] Breakout the vertex caching (just code cleanup)

---
 GPU/Vulkan/DrawEngineVulkan.cpp | 295 ++++++++++++++++----------------
 GPU/Vulkan/DrawEngineVulkan.h   |   2 +
 2 files changed, 152 insertions(+), 145 deletions(-)

diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp
index 254e6932ed..86a6dd1d8e 100644
--- a/GPU/Vulkan/DrawEngineVulkan.cpp
+++ b/GPU/Vulkan/DrawEngineVulkan.cpp
@@ -552,6 +552,152 @@ void DrawEngineVulkan::Invalidate(InvalidationCallbackFlags flags) {
 	}
 }
 
+bool DrawEngineVulkan::VertexCacheLookup(int &vertexCount, GEPrimitiveType &prim, VkBuffer &vbuf, uint32_t &vbOffset, VkBuffer &ibuf, uint32_t &ibOffset, bool &useElements, bool forceIndexed) {
+	// getUVGenMode can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
+	u32 dcid = (u32)XXH3_64bits(&drawCalls_, sizeof(DeferredDrawCall) * numDrawCalls_) ^ gstate.getUVGenMode();
+
+	PROFILE_THIS_SCOPE("vcache");
+	VertexArrayInfoVulkan *vai = vai_.Get(dcid);
+	if (!vai) {
+		vai = new VertexArrayInfoVulkan();
+		vai_.Insert(dcid, vai);
+	}
+
+	switch (vai->status) {
+	case VertexArrayInfoVulkan::VAI_NEW:
+	{
+		// Haven't seen this one before. We don't actually upload the vertex data yet.
+		uint64_t dataHash = ComputeHash();
+		vai->hash = dataHash;
+		vai->minihash = ComputeMiniHash();
+		vai->status = VertexArrayInfoVulkan::VAI_HASHING;
+		vai->drawsUntilNextFullHash = 0;
+		DecodeVertsToPushPool(pushVertex_, &vbOffset, &vbuf);  // writes to indexGen
+		vai->numVerts = indexGen.VertexCount();
+		vai->prim = indexGen.Prim();
+		vai->maxIndex = indexGen.MaxIndex();
+		vai->flags = gstate_c.vertexFullAlpha ? VAIVULKAN_FLAG_VERTEXFULLALPHA : 0;
+		return true;
+	}
+
+	// Hashing - still gaining confidence about the buffer.
+	// But if we get this far it's likely to be worth uploading the data.
+	case VertexArrayInfoVulkan::VAI_HASHING:
+	{
+		PROFILE_THIS_SCOPE("vcachehash");
+		vai->numDraws++;
+		if (vai->lastFrame != gpuStats.numFlips) {
+			vai->numFrames++;
+		}
+		if (vai->drawsUntilNextFullHash == 0) {
+			// Let's try to skip a full hash if mini would fail.
+			const u32 newMiniHash = ComputeMiniHash();
+			uint64_t newHash = vai->hash;
+			if (newMiniHash == vai->minihash) {
+				newHash = ComputeHash();
+			}
+			if (newMiniHash != vai->minihash || newHash != vai->hash) {
+				MarkUnreliable(vai);
+				DecodeVertsToPushPool(pushVertex_, &vbOffset, &vbuf);
+				return true;
+			}
+			if (vai->numVerts > 64) {
+				// exponential backoff up to 16 draws, then every 24
+				vai->drawsUntilNextFullHash = std::min(24, vai->numFrames);
+			} else {
+				// Lower numbers seem much more likely to change.
+				vai->drawsUntilNextFullHash = 0;
+			}
+			// TODO: tweak
+			//if (vai->numFrames > 1000) {
+			//	vai->status = VertexArrayInfo::VAI_RELIABLE;
+			//}
+		} else {
+			vai->drawsUntilNextFullHash--;
+			u32 newMiniHash = ComputeMiniHash();
+			if (newMiniHash != vai->minihash) {
+				MarkUnreliable(vai);
+				DecodeVertsToPushPool(pushVertex_, &vbOffset, &vbuf);
+				return true;
+			}
+		}
+
+		if (!vai->vb) {
+			// Directly push to the vertex cache.
+			DecodeVertsToPushBuffer(vertexCache_, &vai->vbOffset, &vai->vb);
+			_dbg_assert_msg_(gstate_c.vertBounds.minV >= gstate_c.vertBounds.maxV, "Should not have checked UVs when caching.");
+			vai->numVerts = indexGen.VertexCount();
+			vai->maxIndex = indexGen.MaxIndex();
+			vai->flags = gstate_c.vertexFullAlpha ? VAIVULKAN_FLAG_VERTEXFULLALPHA : 0;
+			if (forceIndexed) {
+				vai->prim = indexGen.GeneralPrim();
+				useElements = true;
+			} else {
+				vai->prim = indexGen.Prim();
+				useElements = !indexGen.SeenOnlyPurePrims();
+				if (!useElements && indexGen.PureCount()) {
+					vai->numVerts = indexGen.PureCount();
+				}
+			}
+
+			if (useElements) {
+				u32 size = sizeof(uint16_t) * indexGen.VertexCount();
+				void *dest = vertexCache_->Allocate(size, 4, &vai->ib, &vai->ibOffset);
+				memcpy(dest, decIndex_, size);
+			} else {
+				vai->ib = VK_NULL_HANDLE;
+				vai->ibOffset = 0;
+			}
+		} else {
+			gpuStats.numCachedDrawCalls++;
+			useElements = vai->ib ? true : false;
+			gpuStats.numCachedVertsDrawn += vai->numVerts;
+			gstate_c.vertexFullAlpha = vai->flags & VAIVULKAN_FLAG_VERTEXFULLALPHA;
+		}
+		vbuf = vai->vb;
+		ibuf = vai->ib;
+		vbOffset = vai->vbOffset;
+		ibOffset = vai->ibOffset;
+		vertexCount = vai->numVerts;
+		prim = static_cast<GEPrimitiveType>(vai->prim);
+		break;
+	}
+
+	// Reliable - we don't even bother hashing anymore. Right now we don't go here until after a very long time.
+	case VertexArrayInfoVulkan::VAI_RELIABLE:
+	{
+		vai->numDraws++;
+		if (vai->lastFrame != gpuStats.numFlips) {
+			vai->numFrames++;
+		}
+		gpuStats.numCachedDrawCalls++;
+		gpuStats.numCachedVertsDrawn += vai->numVerts;
+		vbuf = vai->vb;
+		ibuf = vai->ib;
+		vbOffset = vai->vbOffset;
+		ibOffset = vai->ibOffset;
+		vertexCount = vai->numVerts;
+		prim = static_cast<GEPrimitiveType>(vai->prim);
+
+		gstate_c.vertexFullAlpha = vai->flags & VAIVULKAN_FLAG_VERTEXFULLALPHA;
+		break;
+	}
+
+	case VertexArrayInfoVulkan::VAI_UNRELIABLE:
+	{
+		vai->numDraws++;
+		if (vai->lastFrame != gpuStats.numFlips) {
+			vai->numFrames++;
+		}
+		DecodeVertsToPushPool(pushVertex_, &vbOffset, &vbuf);
+		return true;
+	}
+	default:
+		break;
+	}
+	return false;
+}
+
 // The inline wrapper in the header checks for numDrawCalls_ == 0
 void DrawEngineVulkan::DoFlush() {
 	VulkanRenderManager *renderManager = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
@@ -595,150 +741,9 @@ void DrawEngineVulkan::DoFlush() {
 		if (decOptions_.applySkinInDecode && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) {
 			useCache = false;
 		}
-
+		bool useIndexGen = true;
 		if (useCache) {
-			// getUVGenMode can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
-			u32 dcid = (u32)XXH3_64bits(&drawCalls_, sizeof(DeferredDrawCall) * numDrawCalls_) ^ gstate.getUVGenMode();
-
-			PROFILE_THIS_SCOPE("vcache");
-			VertexArrayInfoVulkan *vai = vai_.Get(dcid);
-			if (!vai) {
-				vai = new VertexArrayInfoVulkan();
-				vai_.Insert(dcid, vai);
-			}
-
-			switch (vai->status) {
-			case VertexArrayInfoVulkan::VAI_NEW:
-			{
-				// Haven't seen this one before. We don't actually upload the vertex data yet.
-				uint64_t dataHash = ComputeHash();
-				vai->hash = dataHash;
-				vai->minihash = ComputeMiniHash();
-				vai->status = VertexArrayInfoVulkan::VAI_HASHING;
-				vai->drawsUntilNextFullHash = 0;
-				DecodeVertsToPushPool(pushVertex_, &vbOffset, &vbuf);  // writes to indexGen
-				vai->numVerts = indexGen.VertexCount();
-				vai->prim = indexGen.Prim();
-				vai->maxIndex = indexGen.MaxIndex();
-				vai->flags = gstate_c.vertexFullAlpha ? VAIVULKAN_FLAG_VERTEXFULLALPHA : 0;
-				goto rotateVBO;
-			}
-
-			// Hashing - still gaining confidence about the buffer.
-			// But if we get this far it's likely to be worth uploading the data.
-			case VertexArrayInfoVulkan::VAI_HASHING:
-			{
-				PROFILE_THIS_SCOPE("vcachehash");
-				vai->numDraws++;
-				if (vai->lastFrame != gpuStats.numFlips) {
-					vai->numFrames++;
-				}
-				if (vai->drawsUntilNextFullHash == 0) {
-					// Let's try to skip a full hash if mini would fail.
-					const u32 newMiniHash = ComputeMiniHash();
-					uint64_t newHash = vai->hash;
-					if (newMiniHash == vai->minihash) {
-						newHash = ComputeHash();
-					}
-					if (newMiniHash != vai->minihash || newHash != vai->hash) {
-						MarkUnreliable(vai);
-						DecodeVertsToPushPool(pushVertex_, &vbOffset, &vbuf);
-						goto rotateVBO;
-					}
-					if (vai->numVerts > 64) {
-						// exponential backoff up to 16 draws, then every 24
-						vai->drawsUntilNextFullHash = std::min(24, vai->numFrames);
-					} else {
-						// Lower numbers seem much more likely to change.
-						vai->drawsUntilNextFullHash = 0;
-					}
-					// TODO: tweak
-					//if (vai->numFrames > 1000) {
-					//	vai->status = VertexArrayInfo::VAI_RELIABLE;
-					//}
-				} else {
-					vai->drawsUntilNextFullHash--;
-					u32 newMiniHash = ComputeMiniHash();
-					if (newMiniHash != vai->minihash) {
-						MarkUnreliable(vai);
-						DecodeVertsToPushPool(pushVertex_, &vbOffset, &vbuf);
-						goto rotateVBO;
-					}
-				}
-
-				if (!vai->vb) {
-					// Directly push to the vertex cache.
-					DecodeVertsToPushBuffer(vertexCache_, &vai->vbOffset, &vai->vb);
-					_dbg_assert_msg_(gstate_c.vertBounds.minV >= gstate_c.vertBounds.maxV, "Should not have checked UVs when caching.");
-					vai->numVerts = indexGen.VertexCount();
-					vai->maxIndex = indexGen.MaxIndex();
-					vai->flags = gstate_c.vertexFullAlpha ? VAIVULKAN_FLAG_VERTEXFULLALPHA : 0;
-					if (forceIndexed) {
-						vai->prim = indexGen.GeneralPrim();
-						useElements = true;
-					} else {
-						vai->prim = indexGen.Prim();
-						useElements = !indexGen.SeenOnlyPurePrims();
-						if (!useElements && indexGen.PureCount()) {
-							vai->numVerts = indexGen.PureCount();
-						}
-					}
-
-					if (useElements) {
-						u32 size = sizeof(uint16_t) * indexGen.VertexCount();
-						void *dest = vertexCache_->Allocate(size, 4, &vai->ib, &vai->ibOffset);
-						memcpy(dest, decIndex_, size);
-					} else {
-						vai->ib = VK_NULL_HANDLE;
-						vai->ibOffset = 0;
-					}
-				} else {
-					gpuStats.numCachedDrawCalls++;
-					useElements = vai->ib ? true : false;
-					gpuStats.numCachedVertsDrawn += vai->numVerts;
-					gstate_c.vertexFullAlpha = vai->flags & VAIVULKAN_FLAG_VERTEXFULLALPHA;
-				}
-				vbuf = vai->vb;
-				ibuf = vai->ib;
-				vbOffset = vai->vbOffset;
-				ibOffset = vai->ibOffset;
-				vertexCount = vai->numVerts;
-				prim = static_cast<GEPrimitiveType>(vai->prim);
-				break;
-			}
-
-			// Reliable - we don't even bother hashing anymore. Right now we don't go here until after a very long time.
-			case VertexArrayInfoVulkan::VAI_RELIABLE:
-			{
-				vai->numDraws++;
-				if (vai->lastFrame != gpuStats.numFlips) {
-					vai->numFrames++;
-				}
-				gpuStats.numCachedDrawCalls++;
-				gpuStats.numCachedVertsDrawn += vai->numVerts;
-				vbuf = vai->vb;
-				ibuf = vai->ib;
-				vbOffset = vai->vbOffset;
-				ibOffset = vai->ibOffset;
-				vertexCount = vai->numVerts;
-				prim = static_cast<GEPrimitiveType>(vai->prim);
-
-				gstate_c.vertexFullAlpha = vai->flags & VAIVULKAN_FLAG_VERTEXFULLALPHA;
-				break;
-			}
-
-			case VertexArrayInfoVulkan::VAI_UNRELIABLE:
-			{
-				vai->numDraws++;
-				if (vai->lastFrame != gpuStats.numFlips) {
-					vai->numFrames++;
-				}
-				DecodeVertsToPushPool(pushVertex_, &vbOffset, &vbuf);
-				goto rotateVBO;
-			}
-			default:
-				break;
-			}
+			useIndexGen = VertexCacheLookup(vertexCount, prim, vbuf, vbOffset, ibuf, ibOffset, useElements, forceIndexed);
 		} else {
 			if (decOptions_.applySkinInDecode && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) {
 				// If software skinning, we've already predecoded into "decoded". So push that content.
@@ -749,10 +754,10 @@ void DrawEngineVulkan::DoFlush() {
 				// Decode directly into the pushbuffer
 				DecodeVertsToPushPool(pushVertex_, &vbOffset, &vbuf);
 			}
-
-	rotateVBO:
 			gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
+		}
 
+		if (useIndexGen) {
 			vertexCount = indexGen.VertexCount();
 			if (forceIndexed) {
 				useElements = true;
diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h
index 4f169a65ca..541263ab8a 100644
--- a/GPU/Vulkan/DrawEngineVulkan.h
+++ b/GPU/Vulkan/DrawEngineVulkan.h
@@ -222,6 +222,8 @@ private:
 
 	void DestroyDeviceObjects();
 
+	bool VertexCacheLookup(int &vertexCount, GEPrimitiveType &prim, VkBuffer &vbuf, uint32_t &vbOffset, VkBuffer &ibuf, uint32_t &ibOffset, bool &useElements, bool forceIndexed);
+
 	void DecodeVertsToPushPool(VulkanPushPool *push, uint32_t *bindOffset, VkBuffer *vkbuf);
 	void DecodeVertsToPushBuffer(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf);
 

From f5516d3248ed62ae96081c90519a70c683560b9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Mon, 12 Jun 2023 14:24:20 +0200
Subject: [PATCH 5/5] Actually switch away from XXH to a custom hash, to
 de-risk

---
 GPU/Common/DrawEngineCommon.cpp | 33 +++++++++++++++++++++++----------
 GPU/Common/DrawEngineCommon.h   |  2 ++
 GPU/Vulkan/DrawEngineVulkan.cpp |  3 ++-
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp
index eaf7b2ce52..6421108401 100644
--- a/GPU/Common/DrawEngineCommon.cpp
+++ b/GPU/Common/DrawEngineCommon.cpp
@@ -783,16 +783,6 @@ uint64_t DrawEngineCommon::ComputeHash() {
 	return fullhash;
 }
 
-// Cheap bit scrambler from https://nullprogram.com/blog/2018/07/31/
-inline uint32_t lowbias32_r(uint32_t x) {
-	x ^= x >> 16;
-	x *= 0x43021123U;
-	x ^= x >> 15 ^ x >> 30;
-	x *= 0x1d69e2a5U;
-	x ^= x >> 16;
-	return x;
-}
-
 // vertTypeID is the vertex type but with the UVGen mode smashed into the top bits.
 void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
 	if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls_ >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
@@ -865,6 +855,29 @@ bool DrawEngineCommon::CanUseHardwareTessellation(GEPatchPrimType prim) {
 	return false;
 }
 
+// Cheap bit scrambler from https://nullprogram.com/blog/2018/07/31/
+inline uint32_t lowbias32_r(uint32_t x) {
+	x ^= x >> 16;
+	x *= 0x43021123U;
+	x ^= x >> 15 ^ x >> 30;
+	x *= 0x1d69e2a5U;
+	x ^= x >> 16;
+	return x;
+}
+
+uint32_t DrawEngineCommon::ComputeDrawcallsHash() const {
+	uint32_t dcid = 0;
+	for (int i = 0; i < numDrawCalls_; i++) {
+		u32 dhash = dcid;
+		dhash = __rotl(dhash ^ (u32)(uintptr_t)drawCalls_[i].verts, 13);
+		dhash = __rotl(dhash ^ (u32)(uintptr_t)drawCalls_[i].inds, 19);
+		dhash = __rotl(dhash ^ (u32)drawCalls_[i].indexType, 7);
+		dhash = __rotl(dhash ^ (u32)drawCalls_[i].vertexCount, 11);
+		dcid = lowbias32_r(dhash ^ (u32)drawCalls_[i].prim);
+	}
+	return dcid;
+}
+
 void TessellationDataTransfer::CopyControlPoints(float *pos, float *tex, float *col, int posStride, int texStride, int colStride, const SimpleVertex *const *points, int size, u32 vertType) {
 	bool hasColor = (vertType & GE_VTYPE_COL_MASK) != 0;
 	bool hasTexCoord = (vertType & GE_VTYPE_TC_MASK) != 0;
diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h
index 0798c454b5..f20c411081 100644
--- a/GPU/Common/DrawEngineCommon.h
+++ b/GPU/Common/DrawEngineCommon.h
@@ -175,6 +175,8 @@ protected:
 		}
 	}
 
+	uint32_t ComputeDrawcallsHash() const;
+
 	bool useHWTransform_ = false;
 	bool useHWTessellation_ = false;
 	// Used to prevent unnecessary flushing in softgpu.
diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp
index 86a6dd1d8e..88a722cbcd 100644
--- a/GPU/Vulkan/DrawEngineVulkan.cpp
+++ b/GPU/Vulkan/DrawEngineVulkan.cpp
@@ -554,7 +554,8 @@ void DrawEngineVulkan::Invalidate(InvalidationCallbackFlags flags) {
 
 bool DrawEngineVulkan::VertexCacheLookup(int &vertexCount, GEPrimitiveType &prim, VkBuffer &vbuf, uint32_t &vbOffset, VkBuffer &ibuf, uint32_t &ibOffset, bool &useElements, bool forceIndexed) {
 	// getUVGenMode can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
-	u32 dcid = (u32)XXH3_64bits(&drawCalls_, sizeof(DeferredDrawCall) * numDrawCalls_) ^ gstate.getUVGenMode();
+	// u32 dcid = (u32)XXH3_64bits(&drawCalls_, sizeof(DeferredDrawCall) * numDrawCalls_) ^ gstate.getUVGenMode();
+	u32 dcid = ComputeDrawcallsHash() ^ gstate.getUVGenMode();
 
 	PROFILE_THIS_SCOPE("vcache");
 	VertexArrayInfoVulkan *vai = vai_.Get(dcid);