From 23980065ba7eb14a5b2c13d4a11d0aec93d00eba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Mon, 26 Feb 2018 11:02:52 +0100
Subject: [PATCH 01/11] Unify all the DrawEngine::SubmitPrim

---
 GPU/Common/DrawEngineCommon.cpp | 66 +++++++++++++++++++++++++++++++++
 GPU/Common/DrawEngineCommon.h   |  2 +
 GPU/D3D11/DrawEngineD3D11.cpp   | 66 ---------------------------------
 GPU/D3D11/DrawEngineD3D11.h     |  2 -
 GPU/Directx9/DrawEngineDX9.cpp  | 64 --------------------------------
 GPU/Directx9/DrawEngineDX9.h    |  2 -
 GPU/GLES/DrawEngineGLES.cpp     | 64 --------------------------------
 GPU/GLES/DrawEngineGLES.h       |  2 -
 GPU/Vulkan/DrawEngineVulkan.cpp | 66 ---------------------------------
 GPU/Vulkan/DrawEngineVulkan.h   |  2 -
 10 files changed, 68 insertions(+), 268 deletions(-)

diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp
index 4ea75b7d16..1f6a44eda2 100644
--- a/GPU/Common/DrawEngineCommon.cpp
+++ b/GPU/Common/DrawEngineCommon.cpp
@@ -668,3 +668,69 @@ ReliableHashType DrawEngineCommon::ComputeHash() {
 	fullhash += DoReliableHash(&uvScale[0], sizeof(uvScale[0]) * numDrawCalls, 0x0123e658);
 	return fullhash;
 }
+
+void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
+	if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
+		DispatchFlush();
+	}
+
+	// TODO: Is this the right thing to do?
+	if (prim == GE_PRIM_KEEP_PREVIOUS) {
+		prim = prevPrim_ != GE_PRIM_INVALID ? prevPrim_ : GE_PRIM_POINTS;
+	} else {
+		prevPrim_ = prim;
+	}
+
+	SetupVertexDecoder(vertType);
+
+	*bytesRead = vertexCount * dec_->VertexSize();
+	if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > 2 && prim != GE_PRIM_RECTANGLES))
+		return;
+
+	DeferredDrawCall &dc = drawCalls[numDrawCalls];
+	dc.verts = verts;
+	dc.inds = inds;
+	dc.vertType = vertType;
+	dc.indexType = (vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
+	dc.prim = prim;
+	dc.vertexCount = vertexCount;
+
+	if (g_Config.bVertexCache) {
+		u32 dhash = dcid_;
+		dhash ^= (u32)(uintptr_t)verts;
+		dhash = __rotl(dhash, 13);
+		dhash ^= (u32)(uintptr_t)inds;
+		dhash = __rotl(dhash, 13);
+		dhash ^= (u32)vertType;
+		dhash = __rotl(dhash, 13);
+		dhash ^= (u32)vertexCount;
+		dhash = __rotl(dhash, 13);
+		dhash ^= (u32)prim;
+		dcid_ = dhash;
+	}
+
+	if (inds) {
+		GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound);
+	} else {
+		dc.indexLowerBound = 0;
+		dc.indexUpperBound = vertexCount - 1;
+	}
+
+	uvScale[numDrawCalls] = gstate_c.uv;
+
+	numDrawCalls++;
+	vertexCountInDrawCalls_ += vertexCount;
+
+	if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
+		DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
+		decodeCounter_++;
+	}
+
+	if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
+		// Rendertarget == texture?
+		if (!g_Config.bDisableSlowFramebufEffects) {
+			gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
+			DispatchFlush();
+		}
+	}
+}
diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h
index 24599a094b..25eb441b06 100644
--- a/GPU/Common/DrawEngineCommon.h
+++ b/GPU/Common/DrawEngineCommon.h
@@ -60,6 +60,8 @@ public:
 	virtual void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) = 0;
 
 	bool TestBoundingBox(void* control_points, int vertexCount, u32 vertType, int *bytesRead);
+
+	void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead);
 	void SubmitSpline(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead);
 	void SubmitBezier(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead);
 
diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp
index 9924d9c888..2b49ae2d6e 100644
--- a/GPU/D3D11/DrawEngineD3D11.cpp
+++ b/GPU/D3D11/DrawEngineD3D11.cpp
@@ -257,72 +257,6 @@ ID3D11InputLayout *DrawEngineD3D11::SetupDecFmtForDraw(D3D11VertexShader *vshade
 	}
 }
 
-void DrawEngineD3D11::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
-	if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX)
-		Flush();
-
-	// TODO: Is this the right thing to do?
-	if (prim == GE_PRIM_KEEP_PREVIOUS) {
-		prim = prevPrim_ != GE_PRIM_INVALID ? prevPrim_ : GE_PRIM_POINTS;
-	} else {
-		prevPrim_ = prim;
-	}
-
-	SetupVertexDecoder(vertType);
-
-	*bytesRead = vertexCount * dec_->VertexSize();
-
-	if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > 2 && prim != GE_PRIM_RECTANGLES))
-		return;
-
-	DeferredDrawCall &dc = drawCalls[numDrawCalls];
-	dc.verts = verts;
-	dc.inds = inds;
-	dc.vertType = vertType;
-	dc.indexType = (vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
-	dc.prim = prim;
-	dc.vertexCount = vertexCount;
-
-	if (g_Config.bVertexCache) {
-		u32 dhash = dcid_;
-		dhash ^= (u32)(uintptr_t)verts;
-		dhash = __rotl(dhash, 13);
-		dhash ^= (u32)(uintptr_t)inds;
-		dhash = __rotl(dhash, 13);
-		dhash ^= (u32)vertType;
-		dhash = __rotl(dhash, 13);
-		dhash ^= (u32)vertexCount;
-		dhash = __rotl(dhash, 13);
-		dhash ^= (u32)prim;
-		dcid_ = dhash;
-	}
-
-	if (inds) {
-		GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound);
-	} else {
-		dc.indexLowerBound = 0;
-		dc.indexUpperBound = vertexCount - 1;
-	}
-
-	uvScale[numDrawCalls] = gstate_c.uv;
-
-	numDrawCalls++;
-	vertexCountInDrawCalls_ += vertexCount;
-
-	if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
-		DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
-		decodeCounter_++;
-	}
-
-	if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
-		// Rendertarget == texture?
-		if (!g_Config.bDisableSlowFramebufEffects) {
-			gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
-			Flush();
-		}
-	}
-}
-
 void DrawEngineD3D11::MarkUnreliable(VertexArrayInfoD3D11 *vai) {
 	vai->status = VertexArrayInfoD3D11::VAI_UNRELIABLE;
 	if (vai->vbo) {
diff --git a/GPU/D3D11/DrawEngineD3D11.h b/GPU/D3D11/DrawEngineD3D11.h
index 6928bca67b..3a88d7ae97 100644
--- a/GPU/D3D11/DrawEngineD3D11.h
+++ b/GPU/D3D11/DrawEngineD3D11.h
@@ -105,8 +105,6 @@ public:
 	DrawEngineD3D11(Draw::DrawContext *draw, ID3D11Device *device, ID3D11DeviceContext *context);
 	virtual ~DrawEngineD3D11();
 
-	void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead);
-
 	void SetShaderManager(ShaderManagerD3D11 *shaderManager) {
 		shaderManager_ = shaderManager;
 	}
diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp
index 1acd41d7a3..c72d256727 100644
--- a/GPU/Directx9/DrawEngineDX9.cpp
+++ b/GPU/Directx9/DrawEngineDX9.cpp
@@ -232,70 +232,6 @@ IDirect3DVertexDeclaration9 *DrawEngineDX9::SetupDecFmtForDraw(VSShader *vshader
 	}
 }
 
-void DrawEngineDX9::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
-	if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX)
-		Flush();
-
-	// TODO: Is this the right thing to do?
-	if (prim == GE_PRIM_KEEP_PREVIOUS) {
-		prim = prevPrim_ != GE_PRIM_INVALID ? prevPrim_ : GE_PRIM_POINTS;
-	} else {
-		prevPrim_ = prim;
-	}
-
-	SetupVertexDecoder(vertType);
-
-	*bytesRead = vertexCount * dec_->VertexSize();
-
-	if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > 2 && prim != GE_PRIM_RECTANGLES))
-		return;
-
-	DeferredDrawCall &dc = drawCalls[numDrawCalls];
-	dc.verts = verts;
-	dc.inds = inds;
-	dc.vertType = vertType;
-	dc.indexType = (vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
-	dc.prim = prim;
-	dc.vertexCount = vertexCount;
-
-	u32 dhash = dcid_;
-	dhash ^= (u32)(uintptr_t)verts;
-	dhash = __rotl(dhash, 13);
-	dhash ^= (u32)(uintptr_t)inds;
-	dhash = __rotl(dhash, 13);
-	dhash ^= (u32)vertType;
-	dhash = __rotl(dhash, 13);
-	dhash ^= (u32)vertexCount;
-	dhash = __rotl(dhash, 13);
-	dhash ^= (u32)prim;
-	dcid_ = dhash;
-
-	if (inds) {
-		GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound);
-	} else {
-		dc.indexLowerBound = 0;
-		dc.indexUpperBound = vertexCount - 1;
-	}
-
-	uvScale[numDrawCalls] = gstate_c.uv;
-
-	numDrawCalls++;
-	vertexCountInDrawCalls_ += vertexCount;
-
-	if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
-		DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
-		decodeCounter_++;
-	}
-
-	if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
-		// Rendertarget == texture?
-		if (!g_Config.bDisableSlowFramebufEffects) {
-			gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
-			Flush();
-		}
-	}
-}
-
 void DrawEngineDX9::MarkUnreliable(VertexArrayInfoDX9 *vai) {
 	vai->status = VertexArrayInfoDX9::VAI_UNRELIABLE;
 	if (vai->vbo) {
diff --git a/GPU/Directx9/DrawEngineDX9.h b/GPU/Directx9/DrawEngineDX9.h
index 35cc5fd18e..171675f3e0 100644
--- a/GPU/Directx9/DrawEngineDX9.h
+++ b/GPU/Directx9/DrawEngineDX9.h
@@ -103,8 +103,6 @@ public:
 	DrawEngineDX9(Draw::DrawContext *draw);
 	virtual ~DrawEngineDX9();
 
-	void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead);
-
 	void SetShaderManager(ShaderManagerDX9 *shaderManager) {
 		shaderManager_ = shaderManager;
 	}
diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp
index bb0676719c..83b56e5cab 100644
--- a/GPU/GLES/DrawEngineGLES.cpp
+++ b/GPU/GLES/DrawEngineGLES.cpp
@@ -276,70 +276,6 @@ GLRInputLayout *DrawEngineGLES::SetupDecFmtForDraw(LinkedShader *program, const
 	return inputLayout;
 }
 
-void DrawEngineGLES::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
-	if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX)
-		Flush();
-
-	// TODO: Is this the right thing to do?
-	if (prim == GE_PRIM_KEEP_PREVIOUS) {
-		prim = prevPrim_ != GE_PRIM_INVALID ? prevPrim_ : GE_PRIM_POINTS;
-	} else {
-		prevPrim_ = prim;
-	}
-
-	SetupVertexDecoder(vertType);
-
-	*bytesRead = vertexCount * dec_->VertexSize();
-
-	if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > 2 && prim != GE_PRIM_RECTANGLES))
-		return;
-
-	DeferredDrawCall &dc = drawCalls[numDrawCalls];
-	dc.verts = verts;
-	dc.inds = inds;
-	dc.vertType = vertType;
-	dc.indexType = (vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
-	dc.prim = prim;
-	dc.vertexCount = vertexCount;
-
-	u32 dhash = dcid_;
-	dhash ^= (u32)(uintptr_t)verts;
-	dhash = __rotl(dhash, 13);
-	dhash ^= (u32)(uintptr_t)inds;
-	dhash = __rotl(dhash, 13);
-	dhash ^= (u32)vertType;
-	dhash = __rotl(dhash, 13);
-	dhash ^= (u32)vertexCount;
-	dhash = __rotl(dhash, 13);
-	dhash ^= (u32)prim;
-	dcid_ = dhash;
-
-	if (inds) {
-		GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound);
-	} else {
-		dc.indexLowerBound = 0;
-		dc.indexUpperBound = vertexCount - 1;
-	}
-
-	uvScale[numDrawCalls] = gstate_c.uv;
-
-	numDrawCalls++;
-	vertexCountInDrawCalls_ += vertexCount;
-
-	if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
-		DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
-		decodeCounter_++;
-	}
-
-	if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
-		// Rendertarget == texture?
-		if (!g_Config.bDisableSlowFramebufEffects) {
-			gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
-			Flush();
-		}
-	}
-}
-
 void DrawEngineGLES::DecodeVertsToPushBuffer(GLPushBuffer *push, uint32_t *bindOffset, GLRBuffer **buf) {
 	u8 *dest = decoded;
 
diff --git a/GPU/GLES/DrawEngineGLES.h b/GPU/GLES/DrawEngineGLES.h
index 59246fb5cd..62340af75c 100644
--- a/GPU/GLES/DrawEngineGLES.h
+++ b/GPU/GLES/DrawEngineGLES.h
@@ -105,8 +105,6 @@ public:
 	DrawEngineGLES(Draw::DrawContext *draw);
 	virtual ~DrawEngineGLES();
 
-	void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead);
-
 	void SetShaderManager(ShaderManagerGLES *shaderManager) {
 		shaderManager_ = shaderManager;
 	}
diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp
index 122b50ddaa..faba817b1c 100644
--- a/GPU/Vulkan/DrawEngineVulkan.cpp
+++ b/GPU/Vulkan/DrawEngineVulkan.cpp
@@ -337,72 +337,6 @@ void DrawEngineVulkan::EndFrame() {
 	vertexCache_->End();
 }
 
-void DrawEngineVulkan::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
-	if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
-		Flush();
-	}
-
-	// TODO: Is this the right thing to do?
-	if (prim == GE_PRIM_KEEP_PREVIOUS) {
-		prim = prevPrim_ != GE_PRIM_INVALID ? prevPrim_ : GE_PRIM_POINTS;
-	} else {
-		prevPrim_ = prim;
-	}
-
-	SetupVertexDecoder(vertType);
-
-	*bytesRead = vertexCount * dec_->VertexSize();
-	if ((vertexCount < 2 && prim > 0) || (vertexCount < 3 && prim > 2 && prim != GE_PRIM_RECTANGLES))
-		return;
-
-	DeferredDrawCall &dc = drawCalls[numDrawCalls];
-	dc.verts = verts;
-	dc.inds = inds;
-	dc.vertType = vertType;
-	dc.indexType = (vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
-	dc.prim = prim;
-	dc.vertexCount = vertexCount;
-
-	if (g_Config.bVertexCache) {
-		u32 dhash = dcid_;
-		dhash ^= (u32)(uintptr_t)verts;
-		dhash = __rotl(dhash, 13);
-		dhash ^= (u32)(uintptr_t)inds;
-		dhash = __rotl(dhash, 13);
-		dhash ^= (u32)vertType;
-		dhash = __rotl(dhash, 13);
-		dhash ^= (u32)vertexCount;
-		dhash = __rotl(dhash, 13);
-		dhash ^= (u32)prim;
-		dcid_ = dhash;
-	}
-
-	if (inds) {
-		GetIndexBounds(inds, vertexCount, vertType, &dc.indexLowerBound, &dc.indexUpperBound);
-	} else {
-		dc.indexLowerBound = 0;
-		dc.indexUpperBound = vertexCount - 1;
-	}
-
-	uvScale[numDrawCalls] = gstate_c.uv;
-
-	numDrawCalls++;
-	vertexCountInDrawCalls_ += vertexCount;
-
-	if (g_Config.bSoftwareSkinning && (vertType & GE_VTYPE_WEIGHT_MASK)) {
-		DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
-		decodeCounter_++;
-	}
-
-	if (prim == GE_PRIM_RECTANGLES && (gstate.getTextureAddress(0) & 0x3FFFFFFF) == (gstate.getFrameBufAddress() & 0x3FFFFFFF)) {
-		// Rendertarget == texture?
-		if (!g_Config.bDisableSlowFramebufEffects) {
-			gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
-			Flush();
-		}
-	}
-}
-
 void DrawEngineVulkan::DecodeVertsToPushBuffer(VulkanPushBuffer *push, uint32_t *bindOffset, VkBuffer *vkbuf) {
 	u8 *dest = decoded;
 
diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h
index 15890cefde..a80f05a5d9 100644
--- a/GPU/Vulkan/DrawEngineVulkan.h
+++ b/GPU/Vulkan/DrawEngineVulkan.h
@@ -122,8 +122,6 @@ public:
 	DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *draw);
 	virtual ~DrawEngineVulkan();
 
-	void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead);
-
 	void SetShaderManager(ShaderManagerVulkan *shaderManager) {
 		shaderManager_ = shaderManager;
 	}

From c7f8f4c5cac059e44c7fe873842d2f6d7c82c3f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Mon, 26 Feb 2018 11:18:52 +0100
Subject: [PATCH 02/11] Unify Execute_Prim

---
 GPU/D3D11/GPU_D3D11.cpp   | 85 +------------------------------------
 GPU/D3D11/GPU_D3D11.h     |  1 -
 GPU/Directx9/GPU_DX9.cpp  | 88 ---------------------------------------
 GPU/Directx9/GPU_DX9.h    |  2 -
 GPU/GLES/GPU_GLES.cpp     | 82 +-----------------------------------
 GPU/GPUCommon.cpp         | 84 ++++++++++++++++++++++++++++++++++++-
 GPU/GPUCommon.h           |  3 ++
 GPU/Vulkan/GPU_Vulkan.cpp | 86 +-------------------------------------
 GPU/Vulkan/GPU_Vulkan.h   |  3 --
 9 files changed, 89 insertions(+), 345 deletions(-)

diff --git a/GPU/D3D11/GPU_D3D11.cpp b/GPU/D3D11/GPU_D3D11.cpp
index 24dcfdd734..e11a6dec2d 100644
--- a/GPU/D3D11/GPU_D3D11.cpp
+++ b/GPU/D3D11/GPU_D3D11.cpp
@@ -75,16 +75,6 @@ struct D3D11CommandTableEntry {
 
 // This table gets crunched into a faster form by init.
 static const D3D11CommandTableEntry commandTable[] = {
-	// Changes that dirty the current texture.
-	{ GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 },
-
-	// Changing the vertex type requires us to flush.
-	{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
-
-	{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPU_D3D11::Execute_Prim },
-	{ GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
-	{ GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
-
 	// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
 	{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_D3D11::Execute_LoadClut },
 };
@@ -435,82 +425,9 @@ void GPU_D3D11::ExecuteOp(u32 op, u32 diff) {
 	}
 }
 
-void GPU_D3D11::Execute_Prim(u32 op, u32 diff) {
-	// This drives all drawing. All other state we just buffer up, then we apply it only
-	// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
-
-	u32 data = op & 0xFFFFFF;
-	u32 count = data & 0xFFFF;
-	if (count == 0)
-		return;
-
-	// Upper bits are ignored.
-	GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
-	SetDrawType(DRAW_PRIM, prim);
-
-	// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
-
-	if (gstate.isAntiAliasEnabled()) {
-		// Discard AA lines in DOA
-		if (prim == GE_PRIM_LINE_STRIP)
-			return;
-		// Discard AA lines in Summon Night 5
-		if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
-			return;
-	}
-
-	// This also make skipping drawing very effective.
-	framebufferManagerD3D11_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
-	if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
-		drawEngine_.SetupVertexDecoder(gstate.vertType);
-		// Rough estimate, not sure what's correct.
-		cyclesExecuted += EstimatePerVertexCost() * count;
-		return;
-	}
-
-	u32 vertexAddr = gstate_c.vertexAddr;
-	if (!Memory::IsValidAddress(vertexAddr)) {
-		ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", vertexAddr);
-		return;
-	}
-
-	void *verts = Memory::GetPointerUnchecked(vertexAddr);
-	void *inds = 0;
-	u32 vertexType = gstate.vertType;
-	if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
-		u32 indexAddr = gstate_c.indexAddr;
-		if (!Memory::IsValidAddress(indexAddr)) {
-			ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr);
-			return;
-		}
-		inds = Memory::GetPointerUnchecked(indexAddr);
-	}
-
-#ifndef MOBILE_DEVICE
-	if (prim > GE_PRIM_RECTANGLES) {
-		ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
-	}
-#endif
-
-	if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
-		vertexCost_ = EstimatePerVertexCost();
-	}
-	gpuStats.vertexGPUCycles += vertexCost_ * count;
-	cyclesExecuted += vertexCost_* count;
-
-	int bytesRead = 0;
-	UpdateUVScaleOffset();
-	drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
-
-	// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
-	// Some games rely on this, they don't bother reloading VADDR and IADDR.
-	// The VADDR/IADDR registers are NOT updated.
-	AdvanceVerts(vertexType, count, bytesRead);
-}
-
 void GPU_D3D11::Execute_LoadClut(u32 op, u32 diff) {
 	gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
-	textureCacheD3D11_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
+	textureCache_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
 	// This could be used to "dirty" textures with clut.
 }
 
diff --git a/GPU/D3D11/GPU_D3D11.h b/GPU/D3D11/GPU_D3D11.h
index f2dea31c61..0719144feb 100644
--- a/GPU/D3D11/GPU_D3D11.h
+++ b/GPU/D3D11/GPU_D3D11.h
@@ -64,7 +64,6 @@ public:
 		GPU_D3D11::CmdFunc func;
 	};
 
-	void Execute_Prim(u32 op, u32 diff);
 	void Execute_LoadClut(u32 op, u32 diff);
 
 	// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp
index 158442285c..04b2f0ddda 100644
--- a/GPU/Directx9/GPU_DX9.cpp
+++ b/GPU/Directx9/GPU_DX9.cpp
@@ -59,16 +59,6 @@ struct D3D9CommandTableEntry {
 
 // This table gets crunched into a faster form by init.
 static const D3D9CommandTableEntry commandTable[] = {
-	// Changes that dirty the current texture.
-	{ GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 },
-
-	// Changing the vertex type requires us to flush.
-	{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
-
-	{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPU_DX9::Execute_Prim },
-	{ GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
-	{ GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
-
 	// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
 	{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_DX9::Execute_LoadClut },
 };
@@ -410,84 +400,6 @@ void GPU_DX9::ExecuteOp(u32 op, u32 diff) {
 	}
 }
 
-void GPU_DX9::Execute_Prim(u32 op, u32 diff) {
-	// This drives all drawing. All other state we just buffer up, then we apply it only
-	// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
-
-	u32 data = op & 0xFFFFFF;
-	u32 count = data & 0xFFFF;
-	if (count == 0)
-		return;
-	// Upper bits are ignored.
-	GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
-	SetDrawType(DRAW_PRIM, prim);
-
-	// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
-
-	if (gstate.isAntiAliasEnabled()) {
-		// Discard AA lines in DOA
-		if (prim == GE_PRIM_LINE_STRIP)
-			return;
-		// Discard AA lines in Summon Night 5
-		if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
-			return;
-	}
-
-	// This also make skipping drawing very effective.
-	framebufferManagerDX9_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
-	if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
-		drawEngine_.SetupVertexDecoder(gstate.vertType);
-		// Rough estimate, not sure what's correct.
-		cyclesExecuted += EstimatePerVertexCost() * count;
-		return;
-	}
-
-	u32 vertexAddr = gstate_c.vertexAddr;
-	if (!Memory::IsValidAddress(vertexAddr)) {
-		ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", vertexAddr);
-		return;
-	}
-
-	void *verts = Memory::GetPointerUnchecked(vertexAddr);
-	void *inds = 0;
-	u32 vertexType = gstate.vertType;
-	if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
-		u32 indexAddr = gstate_c.indexAddr;
-		if (!Memory::IsValidAddress(indexAddr)) {
-			ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr);
-			return;
-		}
-		inds = Memory::GetPointerUnchecked(indexAddr);
-	}
-
-#ifndef MOBILE_DEVICE
-	if (prim > GE_PRIM_RECTANGLES) {
-		ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
-	}
-#endif
-
-	if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
-		vertexCost_ = EstimatePerVertexCost();
-	}
-	gpuStats.vertexGPUCycles += vertexCost_ * count;
-	cyclesExecuted += vertexCost_* count;
-
-	int bytesRead = 0;
-	UpdateUVScaleOffset();
-	drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
-
-	// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
-	// Some games rely on this, they don't bother reloading VADDR and IADDR.
-	// The VADDR/IADDR registers are NOT updated.
-	AdvanceVerts(vertexType, count, bytesRead);
-}
-
-void GPU_DX9::Execute_LoadClut(u32 op, u32 diff) {
-	gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
-	textureCacheDX9_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
-	// This could be used to "dirty" textures with clut.
-}
-
 void GPU_DX9::GetStats(char *buffer, size_t bufsize) {
 	float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
 	snprintf(buffer, bufsize - 1,
diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h
index 1a0a7ee037..30f661696f 100644
--- a/GPU/Directx9/GPU_DX9.h
+++ b/GPU/Directx9/GPU_DX9.h
@@ -65,8 +65,6 @@ public:
 		GPU_DX9::CmdFunc func;
 	};
 
-	void Execute_Prim(u32 op, u32 diff);
-	void Execute_TexSize0(u32 op, u32 diff);
 	void Execute_LoadClut(u32 op, u32 diff);
 
 	// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp
index 4ca8419772..8ec5f19103 100644
--- a/GPU/GLES/GPU_GLES.cpp
+++ b/GPU/GLES/GPU_GLES.cpp
@@ -61,16 +61,6 @@ struct GLESCommandTableEntry {
 // This table gets crunched into a faster form by init.
 // TODO: Share this table between the backends. Will have to make another indirection for the function pointers though..
 static const GLESCommandTableEntry commandTable[] = {
-	// Changes that dirty the current texture.
-	{ GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 },
-
-	// Changing the vertex type requires us to flush.
-	{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
-
-	{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPU_GLES::Execute_Prim },
-	{ GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
-	{ GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
-
 	// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
 	{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_GLES::Execute_LoadClut },
 };
@@ -625,79 +615,9 @@ void GPU_GLES::ExecuteOp(u32 op, u32 diff) {
 	}
 }
 
-void GPU_GLES::Execute_Prim(u32 op, u32 diff) {
-	// This drives all drawing. All other state we just buffer up, then we apply it only
-	// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
-
-	u32 data = op & 0xFFFFFF;
-	u32 count = data & 0xFFFF;
-	if (count == 0)
-		return;
-
-	// Upper bits are ignored.
-	GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
-	SetDrawType(DRAW_PRIM, prim);
-
-	// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
-
-	if (gstate.isAntiAliasEnabled()) {
-		// Discard AA lines in DOA
-		if (prim == GE_PRIM_LINE_STRIP)
-			return;
-		// Discard AA lines in Summon Night 5
-		if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
-			return;
-	}
-
-	// This also makes skipping drawing very effective. This function can change the framebuffer.
-	framebufferManagerGL_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
-	if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB))	{
-		drawEngine_.SetupVertexDecoder(gstate.vertType);
-		// Rough estimate, not sure what's correct.
-		cyclesExecuted += EstimatePerVertexCost() * count;
-		return;
-	}
-
-	if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
-		ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
-		return;
-	}
-
-	void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
-	void *inds = 0;
-	if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
-		if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
-			ERROR_LOG_REPORT(G3D, "Bad index address %08x!", gstate_c.indexAddr);
-			return;
-		}
-		inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
-	}
-
-#ifndef MOBILE_DEVICE
-	if (prim > GE_PRIM_RECTANGLES) {
-		ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
-	}
-#endif
-
-	if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
-		vertexCost_ = EstimatePerVertexCost();
-	}
-	gpuStats.vertexGPUCycles += vertexCost_ * count;
-	cyclesExecuted += vertexCost_* count;
-
-	int bytesRead = 0;
-	UpdateUVScaleOffset();
-	drawEngine_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
-
-	// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
-	// Some games rely on this, they don't bother reloading VADDR and IADDR.
-	// The VADDR/IADDR registers are NOT updated.
-	AdvanceVerts(gstate.vertType, count, bytesRead);
-}
-
 void GPU_GLES::Execute_LoadClut(u32 op, u32 diff) {
 	gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
-	textureCacheGL_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
+	textureCache_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
 }
 
 void GPU_GLES::GetStats(char *buffer, size_t bufsize) {
diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp
index af99023ced..16ecf8c23a 100644
--- a/GPU/GPUCommon.cpp
+++ b/GPU/GPUCommon.cpp
@@ -42,6 +42,13 @@ const CommonCommandTableEntry commonCommandTable[] = {
 	{ GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_BJump },  // EXECUTE
 	{ GE_CMD_BOUNDINGBOX, FLAG_EXECUTE, 0, &GPUCommon::Execute_BoundingBox }, // + FLUSHBEFORE when we implement... or not, do we need to?
 
+	{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPUCommon::Execute_Prim },
+	{ GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
+	{ GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
+
+	// Changing the vertex type requires us to flush.
+	{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
+
 	// These two are actually processed in CMD_END. Not sure if FLAG_FLUSHBEFORE matters.
 	{ GE_CMD_SIGNAL, FLAG_FLUSHBEFORE },
 	{ GE_CMD_FINISH, FLAG_FLUSHBEFORE },
@@ -121,7 +128,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
 	{ GE_CMD_TEXOFFSETU },
 	{ GE_CMD_TEXOFFSETV },
 
-	// TEXSIZE0 is handled by each backend.
+	{ GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 },
 	{ GE_CMD_TEXSIZE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
 	{ GE_CMD_TEXSIZE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
 	{ GE_CMD_TEXSIZE3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
@@ -1368,6 +1375,81 @@ void GPUCommon::Execute_VertexTypeSkinning(u32 op, u32 diff) {
 		gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE);
 }
 
+
+void GPUCommon::Execute_Prim(u32 op, u32 diff) {
+	// This drives all drawing. All other state we just buffer up, then we apply it only
+	// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
+
+	PROFILE_THIS_SCOPE("execprim");
+
+	u32 data = op & 0xFFFFFF;
+	u32 count = data & 0xFFFF;
+	if (count == 0)
+		return;
+
+	// Upper bits are ignored.
+	GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
+	SetDrawType(DRAW_PRIM, prim);
+
+	// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
+	if (gstate.isAntiAliasEnabled()) {
+		// Discard AA lines in DOA
+		if (prim == GE_PRIM_LINE_STRIP)
+			return;
+		// Discard AA lines in Summon Night 5
+		if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
+			return;
+	}
+
+	// This also makes skipping drawing very effective.
+	framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
+
+	if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
+		drawEngineCommon_->SetupVertexDecoder(gstate.vertType);  // Do we still need to do this?
+																											// Rough estimate, not sure what's correct.
+		cyclesExecuted += EstimatePerVertexCost() * count;
+		return;
+	}
+
+	if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
+		ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
+		return;
+	}
+
+	void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
+	void *inds = 0;
+	u32 vertexType = gstate.vertType;
+	if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
+		u32 indexAddr = gstate_c.indexAddr;
+		if (!Memory::IsValidAddress(indexAddr)) {
+			ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr);
+			return;
+		}
+		inds = Memory::GetPointerUnchecked(indexAddr);
+	}
+
+#ifndef MOBILE_DEVICE
+	if (prim > GE_PRIM_RECTANGLES) {
+		ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
+	}
+#endif
+
+	if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
+		vertexCost_ = EstimatePerVertexCost();
+	}
+	gpuStats.vertexGPUCycles += vertexCost_ * count;
+	cyclesExecuted += vertexCost_* count;
+
+	int bytesRead = 0;
+	UpdateUVScaleOffset();
+	drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
+
+	// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
+	// Some games rely on this, they don't bother reloading VADDR and IADDR.
+	// The VADDR/IADDR registers are NOT updated.
+	AdvanceVerts(vertexType, count, bytesRead);
+}
+
 void GPUCommon::Execute_Bezier(u32 op, u32 diff) {
 	drawEngineCommon_->DispatchFlush();
 
diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h
index 1890166ce8..38d28d8e41 100644
--- a/GPU/GPUCommon.h
+++ b/GPU/GPUCommon.h
@@ -129,6 +129,7 @@ public:
 	void Execute_VertexType(u32 op, u32 diff);
 	void Execute_VertexTypeSkinning(u32 op, u32 diff);
 
+	void Execute_Prim(u32 op, u32 diff);
 	void Execute_Bezier(u32 op, u32 diff);
 	void Execute_Spline(u32 op, u32 diff);
 	void Execute_BoundingBox(u32 op, u32 diff);
@@ -316,6 +317,8 @@ protected:
 	DrawType lastDraw_;
 	GEPrimitiveType lastPrim_;
 
+	int vertexCost_ = 0;
+
 	// No idea how big this buffer needs to be.
 	enum {
 		MAX_IMMBUFFER_SIZE = 32,
diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp
index 5bdd2fc086..48314824da 100644
--- a/GPU/Vulkan/GPU_Vulkan.cpp
+++ b/GPU/Vulkan/GPU_Vulkan.cpp
@@ -57,16 +57,6 @@ GPU_Vulkan::CommandInfo GPU_Vulkan::cmdInfo_[256];
 
 // This table gets crunched into a faster form by init.
 static const VulkanCommandTableEntry commandTable[] = {
-	// Changes that dirty the current texture.
-	{ GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_TexSize0 },
-
-	// Changing the vertex type requires us to flush.
-	{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
-
-	{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_Prim },
-	{ GE_CMD_BEZIER, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
-	{ GE_CMD_SPLINE, FLAG_FLUSHBEFORE | FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
-
 	// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
 	{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_LoadClut },
 };
@@ -520,83 +510,9 @@ void GPU_Vulkan::ExecuteOp(u32 op, u32 diff) {
 	}
 }
 
-void GPU_Vulkan::Execute_Prim(u32 op, u32 diff) {
-	// This drives all drawing. All other state we just buffer up, then we apply it only
-	// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
-
-	PROFILE_THIS_SCOPE("execprim");
-
-	u32 data = op & 0xFFFFFF;
-	u32 count = data & 0xFFFF;
-	if (count == 0)
-		return;
-
-	// Upper bits are ignored.
-	GEPrimitiveType prim = static_cast<GEPrimitiveType>((data >> 16) & 7);
-	SetDrawType(DRAW_PRIM, prim);
-
-	// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
-	if (gstate.isAntiAliasEnabled()) {
-		// Discard AA lines in DOA
-		if (prim == GE_PRIM_LINE_STRIP)
-			return;
-		// Discard AA lines in Summon Night 5
-		if ((prim == GE_PRIM_LINES) && gstate.isSkinningEnabled())
-			return;
-	}
-
-	// This also makes skipping drawing very effective.
-	framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
-
-	if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
-		drawEngine_.SetupVertexDecoder(gstate.vertType);  // Do we still need to do this?
-		// Rough estimate, not sure what's correct.
-		cyclesExecuted += EstimatePerVertexCost() * count;
-		return;
-	}
-
-	if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
-		ERROR_LOG_REPORT(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
-		return;
-	}
-
-	void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
-	void *inds = 0;
-	u32 vertexType = gstate.vertType;
-	if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
-		u32 indexAddr = gstate_c.indexAddr;
-		if (!Memory::IsValidAddress(indexAddr)) {
-			ERROR_LOG_REPORT(G3D, "Bad index address %08x!", indexAddr);
-			return;
-		}
-		inds = Memory::GetPointerUnchecked(indexAddr);
-	}
-
-#ifndef MOBILE_DEVICE
-	if (prim > GE_PRIM_RECTANGLES) {
-		ERROR_LOG_REPORT_ONCE(reportPrim, G3D, "Unexpected prim type: %d", prim);
-	}
-#endif
-
-	if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
-		vertexCost_ = EstimatePerVertexCost();
-	}
-	gpuStats.vertexGPUCycles += vertexCost_ * count;
-	cyclesExecuted += vertexCost_* count;
-
-	int bytesRead = 0;
-	UpdateUVScaleOffset();
-	drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
-
-	// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
-	// Some games rely on this, they don't bother reloading VADDR and IADDR.
-	// The VADDR/IADDR registers are NOT updated.
-	AdvanceVerts(vertexType, count, bytesRead);
-}
-
 void GPU_Vulkan::Execute_LoadClut(u32 op, u32 diff) {
 	gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
-	textureCacheVulkan_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
+	textureCache_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
 }
 
 void GPU_Vulkan::InitDeviceObjects() {
diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h
index 63813331fb..9c846dd15d 100644
--- a/GPU/Vulkan/GPU_Vulkan.h
+++ b/GPU/Vulkan/GPU_Vulkan.h
@@ -68,7 +68,6 @@ public:
 		GPU_Vulkan::CmdFunc func;
 	};
 	
-	void Execute_Prim(u32 op, u32 diff);
 	void Execute_LoadClut(u32 op, u32 diff);
 
 	// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
@@ -112,8 +111,6 @@ private:
 	// Manages state and pipeline objects
 	PipelineManagerVulkan *pipelineManager_;
 
-	int vertexCost_ = 0;
-
 	std::string reportingPrimaryInfo_;
 	std::string reportingFullInfo_;
 

From 967018b7f8132c252796294187a214ae4e19caab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Mon, 26 Feb 2018 11:26:52 +0100
Subject: [PATCH 03/11] Unify Execute_LoadClut

---
 GPU/Common/DrawEngineCommon.h |  4 +++-
 GPU/D3D11/GPU_D3D11.cpp       | 33 ---------------------------------
 GPU/D3D11/GPU_D3D11.h         |  2 --
 GPU/Directx9/GPU_DX9.cpp      | 26 --------------------------
 GPU/Directx9/GPU_DX9.h        |  2 --
 GPU/GLES/GPU_GLES.cpp         | 32 --------------------------------
 GPU/GLES/GPU_GLES.h           |  3 ---
 GPU/GPUCommon.cpp             |  7 +++++++
 GPU/GPUCommon.h               |  2 ++
 GPU/Vulkan/GPU_Vulkan.cpp     | 32 --------------------------------
 GPU/Vulkan/GPU_Vulkan.h       |  2 --
 11 files changed, 12 insertions(+), 133 deletions(-)

diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h
index 25eb441b06..d2150e4f0c 100644
--- a/GPU/Common/DrawEngineCommon.h
+++ b/GPU/Common/DrawEngineCommon.h
@@ -56,7 +56,9 @@ public:
 	// Flush is normally non-virtual but here's a virtual way to call it, used by the shared spline code, which is expensive anyway.
 	// Not really sure if these wrappers are worth it...
 	virtual void DispatchFlush() = 0;
-	// Same for SubmitPrim
+
+	// This would seem to be unnecessary now, but is still required for splines/beziers to work in the software backend since SubmitPrim
+	// is different. Should probably refactor that.
 	virtual void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) = 0;
 
 	bool TestBoundingBox(void* control_points, int vertexCount, u32 vertType, int *bytesRead);
diff --git a/GPU/D3D11/GPU_D3D11.cpp b/GPU/D3D11/GPU_D3D11.cpp
index e11a6dec2d..4c8a3d591e 100644
--- a/GPU/D3D11/GPU_D3D11.cpp
+++ b/GPU/D3D11/GPU_D3D11.cpp
@@ -66,19 +66,6 @@
 #include "Core/HLE/sceKernelInterrupt.h"
 #include "Core/HLE/sceGe.h"
 
-struct D3D11CommandTableEntry {
-	uint8_t cmd;
-	uint8_t flags;
-	uint64_t dirty;
-	GPU_D3D11::CmdFunc func;
-};
-
-// This table gets crunched into a faster form by init.
-static const D3D11CommandTableEntry commandTable[] = {
-	// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
-	{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_D3D11::Execute_LoadClut },
-};
-
 GPU_D3D11::CommandInfo GPU_D3D11::cmdInfo_[256]{};
 
 GPU_D3D11::GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
@@ -134,20 +121,6 @@ GPU_D3D11::GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
 		}
 	}
 
-	for (size_t i = 0; i < ARRAY_SIZE(commandTable); i++) {
-		const u8 cmd = commandTable[i].cmd;
-		if (dupeCheck.find(cmd) != dupeCheck.end()) {
-			ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
-		} else {
-			dupeCheck.insert(cmd);
-		}
-		cmdInfo_[cmd].flags |= (uint64_t)commandTable[i].flags | (commandTable[i].dirty << 8);
-		cmdInfo_[cmd].func = commandTable[i].func;
-		if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
-			Crash();
-		}
-	}
-
 	// Find commands missing from the table.
 	for (int i = 0; i < 0xEF; i++) {
 		if (dupeCheck.find((u8)i) == dupeCheck.end()) {
@@ -425,12 +398,6 @@ void GPU_D3D11::ExecuteOp(u32 op, u32 diff) {
 	}
 }
 
-void GPU_D3D11::Execute_LoadClut(u32 op, u32 diff) {
-	gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
-	textureCache_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
-	// This could be used to "dirty" textures with clut.
-}
-
 void GPU_D3D11::GetStats(char *buffer, size_t bufsize) {
 	float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
 	snprintf(buffer, bufsize - 1,
diff --git a/GPU/D3D11/GPU_D3D11.h b/GPU/D3D11/GPU_D3D11.h
index 0719144feb..470837af44 100644
--- a/GPU/D3D11/GPU_D3D11.h
+++ b/GPU/D3D11/GPU_D3D11.h
@@ -64,8 +64,6 @@ public:
 		GPU_D3D11::CmdFunc func;
 	};
 
-	void Execute_LoadClut(u32 op, u32 diff);
-
 	// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
 	std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
 	std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override;
diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp
index 04b2f0ddda..3704269b25 100644
--- a/GPU/Directx9/GPU_DX9.cpp
+++ b/GPU/Directx9/GPU_DX9.cpp
@@ -50,19 +50,6 @@
 
 namespace DX9 {
 
-struct D3D9CommandTableEntry {
-	uint8_t cmd;
-	uint8_t flags;
-	uint64_t dirty;
-	GPU_DX9::CmdFunc func;
-};
-
-// This table gets crunched into a faster form by init.
-static const D3D9CommandTableEntry commandTable[] = {
-	// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
-	{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_DX9::Execute_LoadClut },
-};
-
 GPU_DX9::CommandInfo GPU_DX9::cmdInfo_[256];
 
 GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
@@ -116,19 +103,6 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
 		}
 	}
 
-	for (size_t i = 0; i < ARRAY_SIZE(commandTable); i++) {
-		const u8 cmd = commandTable[i].cmd;
-		if (dupeCheck.find(cmd) != dupeCheck.end()) {
-			ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
-		} else {
-			dupeCheck.insert(cmd);
-		}
-		cmdInfo_[cmd].flags |= (uint64_t)commandTable[i].flags | (commandTable[i].dirty << 8);
-		cmdInfo_[cmd].func = commandTable[i].func;
-		if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
-			Crash();
-		}
-	}
 	// Find commands missing from the table.
 	for (int i = 0; i < 0xEF; i++) {
 		if (dupeCheck.find((u8)i) == dupeCheck.end()) {
diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h
index 30f661696f..5edae1e5a9 100644
--- a/GPU/Directx9/GPU_DX9.h
+++ b/GPU/Directx9/GPU_DX9.h
@@ -65,8 +65,6 @@ public:
 		GPU_DX9::CmdFunc func;
 	};
 
-	void Execute_LoadClut(u32 op, u32 diff);
-
 	// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
 	std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
 	std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override;
diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp
index 8ec5f19103..ea6c6a17ec 100644
--- a/GPU/GLES/GPU_GLES.cpp
+++ b/GPU/GLES/GPU_GLES.cpp
@@ -51,20 +51,6 @@
 #include "Windows/GPU/WindowsGLContext.h"
 #endif
 
-struct GLESCommandTableEntry {
-	uint8_t cmd;
-	uint8_t flags;
-	uint64_t dirty;
-	GPU_GLES::CmdFunc func;
-};
-
-// This table gets crunched into a faster form by init.
-// TODO: Share this table between the backends. Will have to make another indirection for the function pointers though..
-static const GLESCommandTableEntry commandTable[] = {
-	// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
-	{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_GLES::Execute_LoadClut },
-};
-
 GPU_GLES::CommandInfo GPU_GLES::cmdInfo_[256];
 
 GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
@@ -120,19 +106,6 @@ GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
 		}
 	}
 
-	for (size_t i = 0; i < ARRAY_SIZE(commandTable); i++) {
-		const u8 cmd = commandTable[i].cmd;
-		if (dupeCheck.find(cmd) != dupeCheck.end()) {
-			ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
-		} else {
-			dupeCheck.insert(cmd);
-		}
-		cmdInfo_[cmd].flags |= (uint64_t)commandTable[i].flags | (commandTable[i].dirty << 8);
-		cmdInfo_[cmd].func = commandTable[i].func;
-		if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
-			Crash();
-		}
-	}
 	// Find commands missing from the table.
 	for (int i = 0; i < 0xEF; i++) {
 		if (dupeCheck.find((u8)i) == dupeCheck.end()) {
@@ -615,11 +588,6 @@ void GPU_GLES::ExecuteOp(u32 op, u32 diff) {
 	}
 }
 
-void GPU_GLES::Execute_LoadClut(u32 op, u32 diff) {
-	gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
-	textureCache_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
-}
-
 void GPU_GLES::GetStats(char *buffer, size_t bufsize) {
 	float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
 	snprintf(buffer, bufsize - 1,
diff --git a/GPU/GLES/GPU_GLES.h b/GPU/GLES/GPU_GLES.h
index f23c5ddf9d..3cedc50e8d 100644
--- a/GPU/GLES/GPU_GLES.h
+++ b/GPU/GLES/GPU_GLES.h
@@ -69,9 +69,6 @@ public:
 		GPU_GLES::CmdFunc func;
 	};
 
-	void Execute_Prim(u32 op, u32 diff);
-	void Execute_LoadClut(u32 op, u32 diff);
-
 	// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
 	std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
 	std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override;
diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp
index 16ecf8c23a..f92920ffd3 100644
--- a/GPU/GPUCommon.cpp
+++ b/GPU/GPUCommon.cpp
@@ -49,6 +49,8 @@ const CommonCommandTableEntry commonCommandTable[] = {
 	// Changing the vertex type requires us to flush.
 	{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
 
+	{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_LoadClut },
+
 	// These two are actually processed in CMD_END. Not sure if FLAG_FLUSHBEFORE matters.
 	{ GE_CMD_SIGNAL, FLAG_FLUSHBEFORE },
 	{ GE_CMD_FINISH, FLAG_FLUSHBEFORE },
@@ -1354,6 +1356,11 @@ void GPUCommon::Execute_VertexType(u32 op, u32 diff) {
 	}
 }
 
+void GPUCommon::Execute_LoadClut(u32 op, u32 diff) {
+	gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
+	textureCache_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
+}
+
 void GPUCommon::Execute_VertexTypeSkinning(u32 op, u32 diff) {
 	// Don't flush when weight count changes, unless morph is enabled.
 	if ((diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) || (op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h
index 38d28d8e41..f60dec18cd 100644
--- a/GPU/GPUCommon.h
+++ b/GPU/GPUCommon.h
@@ -135,6 +135,8 @@ public:
 	void Execute_BoundingBox(u32 op, u32 diff);
 	void Execute_BlockTransferStart(u32 op, u32 diff);
 
+	void Execute_LoadClut(u32 op, u32 diff);
+
 	void Execute_TexSize0(u32 op, u32 diff);
 	void Execute_TexLevel(u32 op, u32 diff);
 
diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp
index 48314824da..4e05af6bc6 100644
--- a/GPU/Vulkan/GPU_Vulkan.cpp
+++ b/GPU/Vulkan/GPU_Vulkan.cpp
@@ -46,21 +46,8 @@
 #include "Core/HLE/sceKernelInterrupt.h"
 #include "Core/HLE/sceGe.h"
 
-struct VulkanCommandTableEntry {
-	uint8_t cmd;
-	uint8_t flags;
-	uint64_t dirty;
-	GPU_Vulkan::CmdFunc func;
-};
-
 GPU_Vulkan::CommandInfo GPU_Vulkan::cmdInfo_[256];
 
-// This table gets crunched into a faster form by init.
-static const VulkanCommandTableEntry commandTable[] = {
-	// Changes that trigger data copies. Only flushing on change for LOADCLUT must be a bit of a hack...
-	{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_LoadClut },
-};
-
 GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
 	: GPUCommon(gfxCtx, draw),
 		vulkan_((VulkanContext *)gfxCtx->GetAPIContext()),
@@ -118,20 +105,6 @@ GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
 			Crash();
 		}
 	}
-
-	for (size_t i = 0; i < ARRAY_SIZE(commandTable); i++) {
-		const u8 cmd = commandTable[i].cmd;
-		if (dupeCheck.find(cmd) != dupeCheck.end()) {
-			ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
-		} else {
-			dupeCheck.insert(cmd);
-		}
-		cmdInfo_[cmd].flags |= (uint64_t)commandTable[i].flags | (commandTable[i].dirty << 8);
-		cmdInfo_[cmd].func = commandTable[i].func;
-		if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
-			Crash();
-		}
-	}
 	// Find commands missing from the table.
 	for (int i = 0; i < 0xEF; i++) {
 		if (dupeCheck.find((u8)i) == dupeCheck.end()) {
@@ -510,11 +483,6 @@ void GPU_Vulkan::ExecuteOp(u32 op, u32 diff) {
 	}
 }
 
-void GPU_Vulkan::Execute_LoadClut(u32 op, u32 diff) {
-	gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
-	textureCache_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
-}
-
 void GPU_Vulkan::InitDeviceObjects() {
 	ILOG("GPU_Vulkan::InitDeviceObjects");
 	// Initialize framedata
diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h
index 9c846dd15d..1a081b0514 100644
--- a/GPU/Vulkan/GPU_Vulkan.h
+++ b/GPU/Vulkan/GPU_Vulkan.h
@@ -67,8 +67,6 @@ public:
 		uint64_t flags;
 		GPU_Vulkan::CmdFunc func;
 	};
-	
-	void Execute_LoadClut(u32 op, u32 diff);
 
 	// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
 	std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;

From feb4694accdb7fd3f783c850e66d78e213efa25c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Mon, 26 Feb 2018 11:33:17 +0100
Subject: [PATCH 04/11] Unify DispatchSubmitPrim as much as possible

---
 GPU/Common/DrawEngineCommon.h | 4 +++-
 GPU/D3D11/DrawEngineD3D11.h   | 3 ---
 GPU/Directx9/DrawEngineDX9.h  | 3 ---
 GPU/GLES/DrawEngineGLES.h     | 3 ---
 GPU/Vulkan/DrawEngineVulkan.h | 3 ---
 5 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h
index d2150e4f0c..10af8ca780 100644
--- a/GPU/Common/DrawEngineCommon.h
+++ b/GPU/Common/DrawEngineCommon.h
@@ -59,7 +59,9 @@ public:
 
 	// This would seem to be unnecessary now, but is still required for splines/beziers to work in the software backend since SubmitPrim
 	// is different. Should probably refactor that.
-	virtual void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) = 0;
+	virtual void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) {
+		SubmitPrim(verts, inds, prim, vertexCount, vertType, bytesRead);
+	}
 
 	bool TestBoundingBox(void* control_points, int vertexCount, u32 vertType, int *bytesRead);
 
diff --git a/GPU/D3D11/DrawEngineD3D11.h b/GPU/D3D11/DrawEngineD3D11.h
index 3a88d7ae97..9d4589baae 100644
--- a/GPU/D3D11/DrawEngineD3D11.h
+++ b/GPU/D3D11/DrawEngineD3D11.h
@@ -133,9 +133,6 @@ public:
 	}
 
 	void DispatchFlush() override { Flush(); }
-	void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) override {
-		SubmitPrim(verts, inds, prim, vertexCount, vertType, bytesRead);
-	}
 
 	void ClearTrackedVertexArrays() override;
 
diff --git a/GPU/Directx9/DrawEngineDX9.h b/GPU/Directx9/DrawEngineDX9.h
index 171675f3e0..ef015b02b5 100644
--- a/GPU/Directx9/DrawEngineDX9.h
+++ b/GPU/Directx9/DrawEngineDX9.h
@@ -132,9 +132,6 @@ public:
 	}
 
 	void DispatchFlush() override { Flush(); }
-	void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) override {
-		SubmitPrim(verts, inds, prim, vertexCount, vertType, bytesRead);
-	}
 
 private:
 	void DoFlush();
diff --git a/GPU/GLES/DrawEngineGLES.h b/GPU/GLES/DrawEngineGLES.h
index 62340af75c..42e4ce5133 100644
--- a/GPU/GLES/DrawEngineGLES.h
+++ b/GPU/GLES/DrawEngineGLES.h
@@ -144,9 +144,6 @@ public:
 	bool IsCodePtrVertexDecoder(const u8 *ptr) const;
 
 	void DispatchFlush() override { Flush(); }
-	void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) override {
-		SubmitPrim(verts, inds, prim, vertexCount, vertType, bytesRead);
-	}
 
 	GLPushBuffer *GetPushVertexBuffer() {
 		return frameData_[render_->GetCurFrame()].pushVertex;
diff --git a/GPU/Vulkan/DrawEngineVulkan.h b/GPU/Vulkan/DrawEngineVulkan.h
index a80f05a5d9..d39186837c 100644
--- a/GPU/Vulkan/DrawEngineVulkan.h
+++ b/GPU/Vulkan/DrawEngineVulkan.h
@@ -155,9 +155,6 @@ public:
 	}
 
 	void DispatchFlush() override { Flush(); }
-	void DispatchSubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertType, int *bytesRead) override {
-		SubmitPrim(verts, inds, prim, vertexCount, vertType, bytesRead);
-	}
 
 	VkPipelineLayout GetPipelineLayout() const {
 		return pipelineLayout_;

From a8a34fef9c427c2be88580f968177df2086df17e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Mon, 26 Feb 2018 11:35:37 +0100
Subject: [PATCH 05/11] Virtualize CheckGPUFeatures

---
 GPU/D3D11/GPU_D3D11.h   | 2 +-
 GPU/Directx9/GPU_DX9.h  | 2 +-
 GPU/GLES/GPU_GLES.h     | 2 +-
 GPU/GPUCommon.h         | 2 ++
 GPU/Null/NullGpu.h      | 2 ++
 GPU/Software/SoftGpu.h  | 2 ++
 GPU/Vulkan/GPU_Vulkan.h | 2 +-
 7 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/GPU/D3D11/GPU_D3D11.h b/GPU/D3D11/GPU_D3D11.h
index 470837af44..28eeccb522 100644
--- a/GPU/D3D11/GPU_D3D11.h
+++ b/GPU/D3D11/GPU_D3D11.h
@@ -36,7 +36,7 @@ public:
 	GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw);
 	~GPU_D3D11();
 
-	void CheckGPUFeatures();
+	void CheckGPUFeatures() override;
 	void PreExecuteOp(u32 op, u32 diff) override;
 	void ExecuteOp(u32 op, u32 diff) override;
 
diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h
index 5edae1e5a9..9ea2f95c1e 100644
--- a/GPU/Directx9/GPU_DX9.h
+++ b/GPU/Directx9/GPU_DX9.h
@@ -37,7 +37,7 @@ public:
 	GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw);
 	~GPU_DX9();
 
-	void CheckGPUFeatures();
+	void CheckGPUFeatures() override;
 	void PreExecuteOp(u32 op, u32 diff) override;
 	void ExecuteOp(u32 op, u32 diff) override;
 
diff --git a/GPU/GLES/GPU_GLES.h b/GPU/GLES/GPU_GLES.h
index 3cedc50e8d..d6ed19083d 100644
--- a/GPU/GLES/GPU_GLES.h
+++ b/GPU/GLES/GPU_GLES.h
@@ -36,7 +36,7 @@ public:
 	~GPU_GLES();
 
 	// This gets called on startup and when we get back from settings.
-	void CheckGPUFeatures();
+	void CheckGPUFeatures() override;
 
 	bool IsReady() override;
 
diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h
index f60dec18cd..b19077e3c0 100644
--- a/GPU/GPUCommon.h
+++ b/GPU/GPUCommon.h
@@ -70,6 +70,8 @@ public:
 	Draw::DrawContext *GetDrawContext() override {
 		return draw_;
 	}
+	virtual void CheckGPUFeatures() = 0;
+
 	bool IsReady() override {
 		return true;
 	}
diff --git a/GPU/Null/NullGpu.h b/GPU/Null/NullGpu.h
index 59b920723c..12deca5d41 100644
--- a/GPU/Null/NullGpu.h
+++ b/GPU/Null/NullGpu.h
@@ -26,6 +26,8 @@ class NullGPU : public GPUCommon {
 public:
 	NullGPU();
 	~NullGPU();
+
+	void CheckGPUFeatures() override {}
 	void InitClear() override {}
 	void ExecuteOp(u32 op, u32 diff) override;
 
diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h
index 7d5750b043..92762a487f 100644
--- a/GPU/Software/SoftGpu.h
+++ b/GPU/Software/SoftGpu.h
@@ -52,6 +52,8 @@ class SoftGPU : public GPUCommon {
 public:
 	SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *_thin3D);
 	~SoftGPU();
+
+	void CheckGPUFeatures() override {}
 	void InitClear() override {}
 	void ExecuteOp(u32 op, u32 diff) override;
 
diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h
index 1a081b0514..81f213bb18 100644
--- a/GPU/Vulkan/GPU_Vulkan.h
+++ b/GPU/Vulkan/GPU_Vulkan.h
@@ -36,7 +36,7 @@ public:
 	~GPU_Vulkan();
 
 	// This gets called on startup and when we get back from settings.
-	void CheckGPUFeatures();
+	void CheckGPUFeatures() override;
 
 	// These are where we can reset command buffers etc.
 	void BeginHostFrame() override;

From 8cef1f0f8dab4cb3f8f629c271a1cb27ff8a0fc7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Mon, 26 Feb 2018 11:44:02 +0100
Subject: [PATCH 06/11] GPU: Unify command table (cmdInfo_)

---
 GPU/D3D11/GPU_D3D11.cpp   | 27 ---------------------------
 GPU/D3D11/GPU_D3D11.h     |  8 --------
 GPU/Directx9/GPU_DX9.cpp  | 27 ---------------------------
 GPU/Directx9/GPU_DX9.h    |  8 --------
 GPU/GLES/GPU_GLES.cpp     | 27 ---------------------------
 GPU/GLES/GPU_GLES.h       |  8 --------
 GPU/GPUCommon.cpp         | 27 +++++++++++++++++++++++++++
 GPU/GPUCommon.h           |  8 ++++++++
 GPU/Vulkan/GPU_Vulkan.cpp | 26 --------------------------
 GPU/Vulkan/GPU_Vulkan.h   |  8 --------
 10 files changed, 35 insertions(+), 139 deletions(-)

diff --git a/GPU/D3D11/GPU_D3D11.cpp b/GPU/D3D11/GPU_D3D11.cpp
index 4c8a3d591e..eabd5d3507 100644
--- a/GPU/D3D11/GPU_D3D11.cpp
+++ b/GPU/D3D11/GPU_D3D11.cpp
@@ -66,8 +66,6 @@
 #include "Core/HLE/sceKernelInterrupt.h"
 #include "Core/HLE/sceGe.h"
 
-GPU_D3D11::CommandInfo GPU_D3D11::cmdInfo_[256]{};
-
 GPU_D3D11::GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
 	: GPUCommon(gfxCtx, draw), drawEngine_(draw,
 	(ID3D11Device *)draw->GetNativeObject(Draw::NativeObject::DEVICE),
@@ -103,31 +101,6 @@ GPU_D3D11::GPU_D3D11(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
 		ERROR_LOG(G3D, "gstate has drifted out of sync!");
 	}
 
-	memset(cmdInfo_, 0, sizeof(cmdInfo_));
-
-	// Import both the global and local command tables, and check for dupes
-	std::set<u8> dupeCheck;
-	for (size_t i = 0; i < commonCommandTableSize; i++) {
-		const u8 cmd = commonCommandTable[i].cmd;
-		if (dupeCheck.find(cmd) != dupeCheck.end()) {
-			ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
-		} else {
-			dupeCheck.insert(cmd);
-		}
-		cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8);
-		cmdInfo_[cmd].func = commonCommandTable[i].func;
-		if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
-			Crash();
-		}
-	}
-
-	// Find commands missing from the table.
-	for (int i = 0; i < 0xEF; i++) {
-		if (dupeCheck.find((u8)i) == dupeCheck.end()) {
-			ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i);
-		}
-	}
-
 	// No need to flush before the tex scale/offset commands if we are baking
 	// the tex scale/offset into the vertices anyway.
 	UpdateCmdInfo();
diff --git a/GPU/D3D11/GPU_D3D11.h b/GPU/D3D11/GPU_D3D11.h
index 28eeccb522..c9cb6bad2f 100644
--- a/GPU/D3D11/GPU_D3D11.h
+++ b/GPU/D3D11/GPU_D3D11.h
@@ -58,12 +58,6 @@ public:
 		fullInfo = reportingFullInfo_;
 	}
 
-	typedef void (GPU_D3D11::*CmdFunc)(u32 op, u32 diff);
-	struct CommandInfo {
-		uint64_t flags;
-		GPU_D3D11::CmdFunc func;
-	};
-
 	// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
 	std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
 	std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override;
@@ -98,8 +92,6 @@ private:
 	DrawEngineD3D11 drawEngine_;
 	ShaderManagerD3D11 *shaderManagerD3D11_;
 
-	static CommandInfo cmdInfo_[256];
-
 	int lastVsync_;
 	int vertexCost_ = 0;
 
diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp
index 3704269b25..504ab226c1 100644
--- a/GPU/Directx9/GPU_DX9.cpp
+++ b/GPU/Directx9/GPU_DX9.cpp
@@ -50,8 +50,6 @@
 
 namespace DX9 {
 
-GPU_DX9::CommandInfo GPU_DX9::cmdInfo_[256];
-
 GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
 	: GPUCommon(gfxCtx, draw),
 		depalShaderCache_(draw),
@@ -85,31 +83,6 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
 		ERROR_LOG(G3D, "gstate has drifted out of sync!");
 	}
 
-	memset(cmdInfo_, 0, sizeof(cmdInfo_));
-
-	// Import both the global and local command tables, and check for dupes
-	std::set<u8> dupeCheck;
-	for (size_t i = 0; i < commonCommandTableSize; i++) {
-		const u8 cmd = commonCommandTable[i].cmd;
-		if (dupeCheck.find(cmd) != dupeCheck.end()) {
-			ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
-		} else {
-			dupeCheck.insert(cmd);
-		}
-		cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8);
-		cmdInfo_[cmd].func = commonCommandTable[i].func;
-		if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
-			Crash();
-		}
-	}
-
-	// Find commands missing from the table.
-	for (int i = 0; i < 0xEF; i++) {
-		if (dupeCheck.find((u8)i) == dupeCheck.end()) {
-			ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i);
-		}
-	}
-
 	// No need to flush before the tex scale/offset commands if we are baking
 	// the tex scale/offset into the vertices anyway.
 	UpdateCmdInfo();
diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h
index 9ea2f95c1e..9a168628f3 100644
--- a/GPU/Directx9/GPU_DX9.h
+++ b/GPU/Directx9/GPU_DX9.h
@@ -59,12 +59,6 @@ public:
 		fullInfo = reportingFullInfo_;
 	}
 
-	typedef void (GPU_DX9::*CmdFunc)(u32 op, u32 diff);
-	struct CommandInfo {
-		uint64_t flags;
-		GPU_DX9::CmdFunc func;
-	};
-
 	// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
 	std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
 	std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override;
@@ -98,8 +92,6 @@ private:
 	DrawEngineDX9 drawEngine_;
 	ShaderManagerDX9 *shaderManagerDX9_;
 
-	static CommandInfo cmdInfo_[256];
-
 	int lastVsync_;
 	int vertexCost_ = 0;
 
diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp
index ea6c6a17ec..b73caca731 100644
--- a/GPU/GLES/GPU_GLES.cpp
+++ b/GPU/GLES/GPU_GLES.cpp
@@ -51,8 +51,6 @@
 #include "Windows/GPU/WindowsGLContext.h"
 #endif
 
-GPU_GLES::CommandInfo GPU_GLES::cmdInfo_[256];
-
 GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
 : GPUCommon(gfxCtx, draw), drawEngine_(draw), fragmentTestCache_(draw), depalShaderCache_(draw) {
 	UpdateVsyncInterval(true);
@@ -88,31 +86,6 @@ GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
 		ERROR_LOG(G3D, "gstate has drifted out of sync!");
 	}
 
-	memset(cmdInfo_, 0, sizeof(cmdInfo_));
-
-	// Import both the global and local command tables, and check for dupes
-	std::set<u8> dupeCheck;
-	for (size_t i = 0; i < commonCommandTableSize; i++) {
-		const u8 cmd = commonCommandTable[i].cmd;
-		if (dupeCheck.find(cmd) != dupeCheck.end()) {
-			ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
-		} else {
-			dupeCheck.insert(cmd);
-		}
-		cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8);
-		cmdInfo_[cmd].func = commonCommandTable[i].func;
-		if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
-			Crash();
-		}
-	}
-
-	// Find commands missing from the table.
-	for (int i = 0; i < 0xEF; i++) {
-		if (dupeCheck.find((u8)i) == dupeCheck.end()) {
-			ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i);
-		}
-	}
-
 	// No need to flush before the tex scale/offset commands if we are baking
 	// the tex scale/offset into the vertices anyway.
 
diff --git a/GPU/GLES/GPU_GLES.h b/GPU/GLES/GPU_GLES.h
index d6ed19083d..0184ab604d 100644
--- a/GPU/GLES/GPU_GLES.h
+++ b/GPU/GLES/GPU_GLES.h
@@ -63,12 +63,6 @@ public:
 		fullInfo = reportingFullInfo_;
 	}
 
-	typedef void (GPU_GLES::*CmdFunc)(u32 op, u32 diff);
-	struct CommandInfo {
-		uint64_t flags;
-		GPU_GLES::CmdFunc func;
-	};
-
 	// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
 	std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
 	std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override;
@@ -95,8 +89,6 @@ private:
 	inline void UpdateVsyncInterval(bool force);
 	void UpdateCmdInfo();
 
-	static CommandInfo cmdInfo_[256];
-
 	FramebufferManagerGLES *framebufferManagerGL_;
 	TextureCacheGLES *textureCacheGL_;
 	DepalShaderCacheGLES depalShaderCache_;
diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp
index f92920ffd3..e63411c5b0 100644
--- a/GPU/GPUCommon.cpp
+++ b/GPU/GPUCommon.cpp
@@ -352,6 +352,9 @@ const CommonCommandTableEntry commonCommandTable[] = {
 };
 size_t commonCommandTableSize = ARRAY_SIZE(commonCommandTable);
 
+// TODO: Make class member?
+GPUCommon::CommandInfo GPUCommon::cmdInfo_[256];
+
 void GPUCommon::Flush() {
 	drawEngineCommon_->DispatchFlush();
 }
@@ -375,6 +378,30 @@ GPUCommon::GPUCommon(GraphicsContext *gfxCtx, Draw::DrawContext *draw) :
 	gstate.Reset();
 	gstate_c.Reset();
 	gpuStats.Reset();
+
+	memset(cmdInfo_, 0, sizeof(cmdInfo_));
+
+	// Import both the global and local command tables, and check for dupes
+	std::set<u8> dupeCheck;
+	for (size_t i = 0; i < commonCommandTableSize; i++) {
+		const u8 cmd = commonCommandTable[i].cmd;
+		if (dupeCheck.find(cmd) != dupeCheck.end()) {
+			ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
+		} else {
+			dupeCheck.insert(cmd);
+		}
+		cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8);
+		cmdInfo_[cmd].func = commonCommandTable[i].func;
+		if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
+			Crash();
+		}
+	}
+	// Find commands missing from the table.
+	for (int i = 0; i < 0xEF; i++) {
+		if (dupeCheck.find((u8)i) == dupeCheck.end()) {
+			ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i);
+		}
+	}
 }
 
 GPUCommon::~GPUCommon() {
diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h
index b19077e3c0..f203660216 100644
--- a/GPU/GPUCommon.h
+++ b/GPU/GPUCommon.h
@@ -295,6 +295,14 @@ protected:
 	GraphicsContext *gfxCtx_;
 	Draw::DrawContext *draw_;
 
+	typedef void (GPUCommon::*CmdFunc)(u32 op, u32 diff);
+	struct CommandInfo {
+		uint64_t flags;
+		GPUCommon::CmdFunc func;
+	};
+
+	static CommandInfo cmdInfo_[256];
+
 	typedef std::list<int> DisplayListQueue;
 
 	int nextListID;
diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp
index 4e05af6bc6..8f7dd68c1c 100644
--- a/GPU/Vulkan/GPU_Vulkan.cpp
+++ b/GPU/Vulkan/GPU_Vulkan.cpp
@@ -46,8 +46,6 @@
 #include "Core/HLE/sceKernelInterrupt.h"
 #include "Core/HLE/sceGe.h"
 
-GPU_Vulkan::CommandInfo GPU_Vulkan::cmdInfo_[256];
-
 GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
 	: GPUCommon(gfxCtx, draw),
 		vulkan_((VulkanContext *)gfxCtx->GetAPIContext()),
@@ -88,30 +86,6 @@ GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
 		ERROR_LOG(G3D, "gstate has drifted out of sync!");
 	}
 
-	memset(cmdInfo_, 0, sizeof(cmdInfo_));
-
-	// Import both the global and local command tables, and check for dupes
-	std::set<u8> dupeCheck;
-	for (size_t i = 0; i < commonCommandTableSize; i++) {
-		const u8 cmd = commonCommandTable[i].cmd;
-		if (dupeCheck.find(cmd) != dupeCheck.end()) {
-			ERROR_LOG(G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
-		} else {
-			dupeCheck.insert(cmd);
-		}
-		cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8);
-		cmdInfo_[cmd].func = commonCommandTable[i].func;
-		if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
-			Crash();
-		}
-	}
-	// Find commands missing from the table.
-	for (int i = 0; i < 0xEF; i++) {
-		if (dupeCheck.find((u8)i) == dupeCheck.end()) {
-			ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i);
-		}
-	}
-
 	UpdateCmdInfo();
 
 	BuildReportingInfo();
diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h
index 81f213bb18..162e9a0992 100644
--- a/GPU/Vulkan/GPU_Vulkan.h
+++ b/GPU/Vulkan/GPU_Vulkan.h
@@ -62,12 +62,6 @@ public:
 		fullInfo = reportingFullInfo_;
 	}
 
-	typedef void (GPU_Vulkan::*CmdFunc)(u32 op, u32 diff);
-	struct CommandInfo {
-		uint64_t flags;
-		GPU_Vulkan::CmdFunc func;
-	};
-
 	// Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend.
 	std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override;
 	std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override;
@@ -95,8 +89,6 @@ private:
 	void InitDeviceObjects();
 	void DestroyDeviceObjects();
 
-	static CommandInfo cmdInfo_[256];
-
 	VulkanContext *vulkan_;
 	FramebufferManagerVulkan *framebufferManagerVulkan_;
 	TextureCacheVulkan *textureCacheVulkan_;

From 6a2f45c2e123d616feb6e2636baebb7898735f68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Mon, 26 Feb 2018 11:48:56 +0100
Subject: [PATCH 07/11] Unify UpdateCmdInfo

---
 GPU/D3D11/GPU_D3D11.cpp   | 12 ------------
 GPU/D3D11/GPU_D3D11.h     |  2 --
 GPU/Directx9/GPU_DX9.cpp  | 12 ------------
 GPU/Directx9/GPU_DX9.h    |  2 --
 GPU/GLES/GPU_GLES.cpp     | 10 ----------
 GPU/GLES/GPU_GLES.h       |  1 -
 GPU/GPUCommon.cpp         | 12 ++++++++++++
 GPU/GPUCommon.h           |  2 ++
 GPU/Vulkan/GPU_Vulkan.cpp | 12 ------------
 GPU/Vulkan/GPU_Vulkan.h   |  1 -
 10 files changed, 14 insertions(+), 52 deletions(-)

diff --git a/GPU/D3D11/GPU_D3D11.cpp b/GPU/D3D11/GPU_D3D11.cpp
index eabd5d3507..e14b1b1499 100644
--- a/GPU/D3D11/GPU_D3D11.cpp
+++ b/GPU/D3D11/GPU_D3D11.cpp
@@ -123,18 +123,6 @@ GPU_D3D11::~GPU_D3D11() {
 	stockD3D11.Destroy();
 }
 
-void GPU_D3D11::UpdateCmdInfo() {
-	if (g_Config.bSoftwareSkinning) {
-		cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
-		cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning;
-	} else {
-		cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
-		cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType;
-	}
-
-	CheckGPUFeatures();
-}
-
 void GPU_D3D11::CheckGPUFeatures() {
 	u32 features = 0;
 
diff --git a/GPU/D3D11/GPU_D3D11.h b/GPU/D3D11/GPU_D3D11.h
index c9cb6bad2f..23d5582759 100644
--- a/GPU/D3D11/GPU_D3D11.h
+++ b/GPU/D3D11/GPU_D3D11.h
@@ -70,8 +70,6 @@ protected:
 	void FinishDeferred() override;
 
 private:
-	void UpdateCmdInfo();
-
 	void Flush() {
 		drawEngine_.Flush();
 	}
diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp
index 504ab226c1..173f7dc004 100644
--- a/GPU/Directx9/GPU_DX9.cpp
+++ b/GPU/Directx9/GPU_DX9.cpp
@@ -103,18 +103,6 @@ GPU_DX9::GPU_DX9(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
 	}
 }
 
-void GPU_DX9::UpdateCmdInfo() {
-	if (g_Config.bSoftwareSkinning) {
-		cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
-		cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning;
-	} else {
-		cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
-		cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType;
-	}
-
-	CheckGPUFeatures();
-}
-
 void GPU_DX9::CheckGPUFeatures() {
 	u32 features = 0;
 
diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h
index 9a168628f3..45cc632c63 100644
--- a/GPU/Directx9/GPU_DX9.h
+++ b/GPU/Directx9/GPU_DX9.h
@@ -70,8 +70,6 @@ protected:
 	void FinishDeferred() override;
 
 private:
-	void UpdateCmdInfo();
-
 	void Flush() {
 		drawEngine_.Flush();
 	}
diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp
index b73caca731..d0290c5e5f 100644
--- a/GPU/GLES/GPU_GLES.cpp
+++ b/GPU/GLES/GPU_GLES.cpp
@@ -407,16 +407,6 @@ inline void GPU_GLES::UpdateVsyncInterval(bool force) {
 #endif
 }
 
-void GPU_GLES::UpdateCmdInfo() {
-	if (g_Config.bSoftwareSkinning) {
-		cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
-		cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning;
-	} else {
-		cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
-		cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType;
-	}
-}
-
 void GPU_GLES::ReapplyGfxState() {
 	GPUCommon::ReapplyGfxState();
 }
diff --git a/GPU/GLES/GPU_GLES.h b/GPU/GLES/GPU_GLES.h
index 0184ab604d..9c3a4ada43 100644
--- a/GPU/GLES/GPU_GLES.h
+++ b/GPU/GLES/GPU_GLES.h
@@ -87,7 +87,6 @@ private:
 	void Reinitialize() override;
 
 	inline void UpdateVsyncInterval(bool force);
-	void UpdateCmdInfo();
 
 	FramebufferManagerGLES *framebufferManagerGL_;
 	TextureCacheGLES *textureCacheGL_;
diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp
index e63411c5b0..cbf22a9c28 100644
--- a/GPU/GPUCommon.cpp
+++ b/GPU/GPUCommon.cpp
@@ -402,11 +402,23 @@ GPUCommon::GPUCommon(GraphicsContext *gfxCtx, Draw::DrawContext *draw) :
 			ERROR_LOG(G3D, "Command missing from table: %02x (%i)", i, i);
 		}
 	}
+
+	UpdateCmdInfo();
 }
 
 GPUCommon::~GPUCommon() {
 }
 
+void GPUCommon::UpdateCmdInfo() {
+	if (g_Config.bSoftwareSkinning) {
+		cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
+		cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning;
+	} else {
+		cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
+		cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType;
+	}
+}
+
 void GPUCommon::BeginHostFrame() {
 	ReapplyGfxState();
 
diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h
index f203660216..2c99e2aaaf 100644
--- a/GPU/GPUCommon.h
+++ b/GPU/GPUCommon.h
@@ -72,6 +72,8 @@ public:
 	}
 	virtual void CheckGPUFeatures() = 0;
 
+	void UpdateCmdInfo();
+
 	bool IsReady() override {
 		return true;
 	}
diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp
index 8f7dd68c1c..1f8fa0bc57 100644
--- a/GPU/Vulkan/GPU_Vulkan.cpp
+++ b/GPU/Vulkan/GPU_Vulkan.cpp
@@ -86,8 +86,6 @@ GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
 		ERROR_LOG(G3D, "gstate has drifted out of sync!");
 	}
 
-	UpdateCmdInfo();
-
 	BuildReportingInfo();
 	// Update again after init to be sure of any silly driver problems.
 	UpdateVsyncInterval(true);
@@ -342,16 +340,6 @@ void GPU_Vulkan::UpdateVsyncInterval(bool force) {
 	// TODO
 }
 
-void GPU_Vulkan::UpdateCmdInfo() {
-	if (g_Config.bSoftwareSkinning) {
-		cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
-		cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning;
-	} else {
-		cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
-		cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType;
-	}
-}
-
 void GPU_Vulkan::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
 	host->GPUNotifyDisplay(framebuf, stride, format);
 	framebufferManager_->SetDisplayFramebuffer(framebuf, stride, format);
diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h
index 162e9a0992..2bb8b3cf26 100644
--- a/GPU/Vulkan/GPU_Vulkan.h
+++ b/GPU/Vulkan/GPU_Vulkan.h
@@ -84,7 +84,6 @@ private:
 	void CopyDisplayToOutput() override;
 	void Reinitialize() override;
 	inline void UpdateVsyncInterval(bool force);
-	void UpdateCmdInfo();
 
 	void InitDeviceObjects();
 	void DestroyDeviceObjects();

From 78467d6092e0caf89bafa52674d390b41701d281 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Mon, 26 Feb 2018 11:52:16 +0100
Subject: [PATCH 08/11] Unify FastRunLoop for the hardware backends.

---
 GPU/D3D11/GPU_D3D11.cpp   | 38 --------------------------------------
 GPU/D3D11/GPU_D3D11.h     |  1 -
 GPU/Directx9/GPU_DX9.cpp  | 38 --------------------------------------
 GPU/Directx9/GPU_DX9.h    |  1 -
 GPU/GLES/GPU_GLES.cpp     | 38 --------------------------------------
 GPU/GLES/GPU_GLES.h       |  1 -
 GPU/GPUCommon.cpp         | 38 ++++++++++++++++++++++++++++++++++++++
 GPU/GPUCommon.h           |  4 ++--
 GPU/Vulkan/GPU_Vulkan.cpp | 38 --------------------------------------
 GPU/Vulkan/GPU_Vulkan.h   |  1 -
 10 files changed, 40 insertions(+), 158 deletions(-)

diff --git a/GPU/D3D11/GPU_D3D11.cpp b/GPU/D3D11/GPU_D3D11.cpp
index e14b1b1499..5dbe449087 100644
--- a/GPU/D3D11/GPU_D3D11.cpp
+++ b/GPU/D3D11/GPU_D3D11.cpp
@@ -289,44 +289,6 @@ void GPU_D3D11::CopyDisplayToOutput() {
 	gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
 }
 
-// Maybe should write this in ASM...
-void GPU_D3D11::FastRunLoop(DisplayList &list) {
-	PROFILE_THIS_SCOPE("gpuloop");
-	const CommandInfo *cmdInfo = cmdInfo_;
-	int dc = downcount;
-	for (; dc > 0; --dc) {
-		// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
-		const u32 op = *(const u32 *)(Memory::base + list.pc);
-		const u32 cmd = op >> 24;
-		const CommandInfo &info = cmdInfo[cmd];
-		const u32 diff = op ^ gstate.cmdmem[cmd];
-		if (diff == 0) {
-			if (info.flags & FLAG_EXECUTE) {
-				downcount = dc;
-				(this->*info.func)(op, diff);
-				dc = downcount;
-			}
-		} else {
-			uint64_t flags = info.flags;
-			if (flags & FLAG_FLUSHBEFOREONCHANGE) {
-				drawEngine_.Flush();
-			}
-			gstate.cmdmem[cmd] = op;
-			if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {
-				downcount = dc;
-				(this->*info.func)(op, diff);
-				dc = downcount;
-			} else {
-				uint64_t dirty = flags >> 8;
-				if (dirty)
-					gstate_c.Dirty(dirty);
-			}
-		}
-		list.pc += 4;
-	}
-	downcount = 0;
-}
-
 void GPU_D3D11::FinishDeferred() {
 	// This finishes reading any vertex data that is pending.
 	drawEngine_.FinishDeferred();
diff --git a/GPU/D3D11/GPU_D3D11.h b/GPU/D3D11/GPU_D3D11.h
index 23d5582759..aadaef4556 100644
--- a/GPU/D3D11/GPU_D3D11.h
+++ b/GPU/D3D11/GPU_D3D11.h
@@ -66,7 +66,6 @@ public:
 	void EndHostFrame() override;
 
 protected:
-	void FastRunLoop(DisplayList &list) override;
 	void FinishDeferred() override;
 
 private:
diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp
index 173f7dc004..32c03fd030 100644
--- a/GPU/Directx9/GPU_DX9.cpp
+++ b/GPU/Directx9/GPU_DX9.cpp
@@ -265,44 +265,6 @@ void GPU_DX9::CopyDisplayToOutput() {
 	gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
 }
 
-// Maybe should write this in ASM...
-void GPU_DX9::FastRunLoop(DisplayList &list) {
-	PROFILE_THIS_SCOPE("gpuloop");
-	const CommandInfo *cmdInfo = cmdInfo_;
-	int dc = downcount;
-	for (; dc > 0; --dc) {
-		// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
-		const u32 op = *(const u32 *)(Memory::base + list.pc);
-		const u32 cmd = op >> 24;
-		const CommandInfo &info = cmdInfo[cmd];
-		const u32 diff = op ^ gstate.cmdmem[cmd];
-		if (diff == 0) {
-			if (info.flags & FLAG_EXECUTE) {
-				downcount = dc;
-				(this->*info.func)(op, diff);
-				dc = downcount;
-			}
-		} else {
-			uint64_t flags = info.flags;
-			if (flags & FLAG_FLUSHBEFOREONCHANGE) {
-				drawEngine_.Flush();
-			}
-			gstate.cmdmem[cmd] = op;
-			if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {
-				downcount = dc;
-				(this->*info.func)(op, diff);
-				dc = downcount;
-			} else {
-				uint64_t dirty = flags >> 8;
-				if (dirty)
-					gstate_c.Dirty(dirty);
-			}
-		}
-		list.pc += 4;
-	}
-	downcount = 0;
-}
-
 void GPU_DX9::FinishDeferred() {
 	// This finishes reading any vertex data that is pending.
 	drawEngine_.FinishDeferred();
diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h
index 45cc632c63..2681bd4c79 100644
--- a/GPU/Directx9/GPU_DX9.h
+++ b/GPU/Directx9/GPU_DX9.h
@@ -66,7 +66,6 @@ public:
 	void BeginHostFrame() override;
 
 protected:
-	void FastRunLoop(DisplayList &list) override;
 	void FinishDeferred() override;
 
 private:
diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp
index d0290c5e5f..44962d935a 100644
--- a/GPU/GLES/GPU_GLES.cpp
+++ b/GPU/GLES/GPU_GLES.cpp
@@ -481,44 +481,6 @@ void GPU_GLES::CopyDisplayToOutput() {
 #endif
 }
 
-// Maybe should write this in ASM...
-void GPU_GLES::FastRunLoop(DisplayList &list) {
-	PROFILE_THIS_SCOPE("gpuloop");
-	const CommandInfo *cmdInfo = cmdInfo_;
-	int dc = downcount;
-	for (; dc > 0; --dc) {
-		// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
-		const u32 op = *(const u32 *)(Memory::base + list.pc);
-		const u32 cmd = op >> 24;
-		const CommandInfo &info = cmdInfo[cmd];
-		const u32 diff = op ^ gstate.cmdmem[cmd];
-		if (diff == 0) {
-			if (info.flags & FLAG_EXECUTE) {
-				downcount = dc;
-				(this->*info.func)(op, diff);
-				dc = downcount;
-			}
-		} else {
-			uint64_t flags = info.flags;
-			if (flags & FLAG_FLUSHBEFOREONCHANGE) {
-				drawEngine_.Flush();
-			}
-			gstate.cmdmem[cmd] = op;
-			if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {
-				downcount = dc;
-				(this->*info.func)(op, diff);
-				dc = downcount;
-			} else {
-				uint64_t dirty = flags >> 8;
-				if (dirty)
-					gstate_c.Dirty(dirty);
-			}
-		}
-		list.pc += 4;
-	}
-	downcount = 0;
-}
-
 void GPU_GLES::FinishDeferred() {
 	// This finishes reading any vertex data that is pending.
 	drawEngine_.FinishDeferred();
diff --git a/GPU/GLES/GPU_GLES.h b/GPU/GLES/GPU_GLES.h
index 9c3a4ada43..b81f349dd5 100644
--- a/GPU/GLES/GPU_GLES.h
+++ b/GPU/GLES/GPU_GLES.h
@@ -71,7 +71,6 @@ public:
 	void EndHostFrame() override;
 
 protected:
-	void FastRunLoop(DisplayList &list) override;
 	void FinishDeferred() override;
 
 private:
diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp
index cbf22a9c28..fc9bf6b62a 100644
--- a/GPU/GPUCommon.cpp
+++ b/GPU/GPUCommon.cpp
@@ -965,6 +965,44 @@ bool GPUCommon::InterpretList(DisplayList &list) {
 	return gpuState == GPUSTATE_DONE || gpuState == GPUSTATE_ERROR;
 }
 
+// Maybe should write this in ASM...
+void GPUCommon::FastRunLoop(DisplayList &list) {
+	PROFILE_THIS_SCOPE("gpuloop");
+	const CommandInfo *cmdInfo = cmdInfo_;
+	int dc = downcount;
+	for (; dc > 0; --dc) {
+		// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
+		const u32 op = *(const u32 *)(Memory::base + list.pc);
+		const u32 cmd = op >> 24;
+		const CommandInfo &info = cmdInfo[cmd];
+		const u32 diff = op ^ gstate.cmdmem[cmd];
+		if (diff == 0) {
+			if (info.flags & FLAG_EXECUTE) {
+				downcount = dc;
+				(this->*info.func)(op, diff);
+				dc = downcount;
+			}
+		} else {
+			uint64_t flags = info.flags;
+			if (flags & FLAG_FLUSHBEFOREONCHANGE) {
+				drawEngineCommon_->DispatchFlush();
+			}
+			gstate.cmdmem[cmd] = op;
+			if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {
+				downcount = dc;
+				(this->*info.func)(op, diff);
+				dc = downcount;
+			} else {
+				uint64_t dirty = flags >> 8;
+				if (dirty)
+					gstate_c.Dirty(dirty);
+			}
+		}
+		list.pc += 4;
+	}
+	downcount = 0;
+}
+
 void GPUCommon::BeginFrame() {
 	immCount_ = 0;
 	if (dumpNextFrame_) {
diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h
index 2c99e2aaaf..e83c499d79 100644
--- a/GPU/GPUCommon.h
+++ b/GPU/GPUCommon.h
@@ -265,8 +265,8 @@ protected:
 
 	void BeginFrame() override;
 
-	// To avoid virtual calls to PreExecuteOp().
-	virtual void FastRunLoop(DisplayList &list) = 0;
+	virtual void FastRunLoop(DisplayList &list);
+
 	void SlowRunLoop(DisplayList &list);
 	void UpdatePC(u32 currentPC, u32 newPC);
 	void UpdateState(GPURunState state);
diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp
index 1f8fa0bc57..a95e93d911 100644
--- a/GPU/Vulkan/GPU_Vulkan.cpp
+++ b/GPU/Vulkan/GPU_Vulkan.cpp
@@ -376,44 +376,6 @@ void GPU_Vulkan::CopyDisplayToOutput() {
 	gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
 }
 
-// Maybe should write this in ASM...
-void GPU_Vulkan::FastRunLoop(DisplayList &list) {
-	PROFILE_THIS_SCOPE("gpuloop");
-	const CommandInfo *cmdInfo = cmdInfo_;
-	int dc = downcount;
-	for (; dc > 0; --dc) {
-		// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
-		const u32 op = *(const u32 *)(Memory::base + list.pc);
-		const u32 cmd = op >> 24;
-		const CommandInfo &info = cmdInfo[cmd];
-		const u32 diff = op ^ gstate.cmdmem[cmd];
-		if (diff == 0) {
-			if (info.flags & FLAG_EXECUTE) {
-				downcount = dc;
-				(this->*info.func)(op, diff);
-				dc = downcount;
-			}
-		} else {
-			uint64_t flags = info.flags;
-			if (flags & FLAG_FLUSHBEFOREONCHANGE) {
-				drawEngine_.Flush();
-			}
-			gstate.cmdmem[cmd] = op;
-			if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {
-				downcount = dc;
-				(this->*info.func)(op, diff);
-				dc = downcount;
-			} else {
-				uint64_t dirty = flags >> 8;
-				if (dirty)
-					gstate_c.Dirty(dirty);
-			}
-		}
-		list.pc += 4;
-	}
-	downcount = 0;
-}
-
 void GPU_Vulkan::FinishDeferred() {
 	drawEngine_.FinishDeferred();
 }
diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h
index 2bb8b3cf26..670ff9487b 100644
--- a/GPU/Vulkan/GPU_Vulkan.h
+++ b/GPU/Vulkan/GPU_Vulkan.h
@@ -71,7 +71,6 @@ public:
 	}
 
 protected:
-	void FastRunLoop(DisplayList &list) override;
 	void FinishDeferred() override;
 
 private:

From 64ec46e7050fb751d54029329421eef0cf5a5323 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Mon, 26 Feb 2018 11:58:17 +0100
Subject: [PATCH 09/11] Unify FramebufferDirty()/FramebufferReallyDirty() for
 the hw backends

---
 GPU/D3D11/GPU_D3D11.cpp   | 20 --------------------
 GPU/D3D11/GPU_D3D11.h     |  2 --
 GPU/Directx9/GPU_DX9.cpp  | 20 --------------------
 GPU/Directx9/GPU_DX9.h    |  2 --
 GPU/GLES/GPU_GLES.cpp     | 20 --------------------
 GPU/GLES/GPU_GLES.h       |  2 --
 GPU/GPUCommon.cpp         | 20 ++++++++++++++++++++
 GPU/GPUCommon.h           |  3 +++
 GPU/Vulkan/GPU_Vulkan.cpp | 20 --------------------
 GPU/Vulkan/GPU_Vulkan.h   |  2 --
 10 files changed, 23 insertions(+), 88 deletions(-)

diff --git a/GPU/D3D11/GPU_D3D11.cpp b/GPU/D3D11/GPU_D3D11.cpp
index 5dbe449087..ec866cc815 100644
--- a/GPU/D3D11/GPU_D3D11.cpp
+++ b/GPU/D3D11/GPU_D3D11.cpp
@@ -254,26 +254,6 @@ void GPU_D3D11::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat f
 	framebufferManagerD3D11_->SetDisplayFramebuffer(framebuf, stride, format);
 }
 
-bool GPU_D3D11::FramebufferDirty() {
-	VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
-	if (vfb) {
-		bool dirty = vfb->dirtyAfterDisplay;
-		vfb->dirtyAfterDisplay = false;
-		return dirty;
-	}
-	return true;
-}
-
-bool GPU_D3D11::FramebufferReallyDirty() {
-	VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
-	if (vfb) {
-		bool dirty = vfb->reallyDirtyAfterDisplay;
-		vfb->reallyDirtyAfterDisplay = false;
-		return dirty;
-	}
-	return true;
-}
-
 void GPU_D3D11::CopyDisplayToOutput() {
 	float blendColor[4]{};
 	context_->OMSetBlendState(stockD3D11.blendStateDisabledWithColorMask[0xF], blendColor, 0xFFFFFFFF);
diff --git a/GPU/D3D11/GPU_D3D11.h b/GPU/D3D11/GPU_D3D11.h
index aadaef4556..e716906f83 100644
--- a/GPU/D3D11/GPU_D3D11.h
+++ b/GPU/D3D11/GPU_D3D11.h
@@ -50,8 +50,6 @@ public:
 	void DoState(PointerWrap &p) override;
 
 	void ClearShaderCache() override;
-	bool FramebufferDirty() override;
-	bool FramebufferReallyDirty() override;
 
 	void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) override {
 		primaryInfo = reportingPrimaryInfo_;
diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp
index 32c03fd030..590c7ac135 100644
--- a/GPU/Directx9/GPU_DX9.cpp
+++ b/GPU/Directx9/GPU_DX9.cpp
@@ -230,26 +230,6 @@ void GPU_DX9::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat for
 	framebufferManagerDX9_->SetDisplayFramebuffer(framebuf, stride, format);
 }
 
-bool GPU_DX9::FramebufferDirty() {
-	VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
-	if (vfb) {
-		bool dirty = vfb->dirtyAfterDisplay;
-		vfb->dirtyAfterDisplay = false;
-		return dirty;
-	}
-	return true;
-}
-
-bool GPU_DX9::FramebufferReallyDirty() {
-	VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
-	if (vfb) {
-		bool dirty = vfb->reallyDirtyAfterDisplay;
-		vfb->reallyDirtyAfterDisplay = false;
-		return dirty;
-	}
-	return true;
-}
-
 void GPU_DX9::CopyDisplayToOutput() {
 	dxstate.depthWrite.set(true);
 	dxstate.colorMask.set(true, true, true, true);
diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h
index 2681bd4c79..d72e2c4321 100644
--- a/GPU/Directx9/GPU_DX9.h
+++ b/GPU/Directx9/GPU_DX9.h
@@ -51,8 +51,6 @@ public:
 	void DoState(PointerWrap &p) override;
 
 	void ClearShaderCache() override;
-	bool FramebufferDirty() override;
-	bool FramebufferReallyDirty() override;
 
 	void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) override {
 		primaryInfo = reportingPrimaryInfo_;
diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp
index 44962d935a..07a15b7960 100644
--- a/GPU/GLES/GPU_GLES.cpp
+++ b/GPU/GLES/GPU_GLES.cpp
@@ -440,26 +440,6 @@ void GPU_GLES::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat fo
 	framebufferManagerGL_->SetDisplayFramebuffer(framebuf, stride, format);
 }
 
-bool GPU_GLES::FramebufferDirty() {
-	VirtualFramebuffer *vfb = framebufferManagerGL_->GetDisplayVFB();
-	if (vfb) {
-		bool dirty = vfb->dirtyAfterDisplay;
-		vfb->dirtyAfterDisplay = false;
-		return dirty;
-	}
-	return true;
-}
-
-bool GPU_GLES::FramebufferReallyDirty() {
-	VirtualFramebuffer *vfb = framebufferManagerGL_->GetDisplayVFB();
-	if (vfb) {
-		bool dirty = vfb->reallyDirtyAfterDisplay;
-		vfb->reallyDirtyAfterDisplay = false;
-		return dirty;
-	}
-	return true;
-}
-
 void GPU_GLES::CopyDisplayToOutput() {
 	// Flush anything left over.
 	framebufferManagerGL_->RebindFramebuffer();
diff --git a/GPU/GLES/GPU_GLES.h b/GPU/GLES/GPU_GLES.h
index b81f349dd5..4ea7a62064 100644
--- a/GPU/GLES/GPU_GLES.h
+++ b/GPU/GLES/GPU_GLES.h
@@ -55,8 +55,6 @@ public:
 
 	void ClearShaderCache() override;
 	void CleanupBeforeUI() override;
-	bool FramebufferDirty() override;
-	bool FramebufferReallyDirty() override;
 
 	void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) override {
 		primaryInfo = reportingPrimaryInfo_;
diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp
index fc9bf6b62a..782b2a324d 100644
--- a/GPU/GPUCommon.cpp
+++ b/GPU/GPUCommon.cpp
@@ -2598,3 +2598,23 @@ bool GPUCommon::DescribeCodePtr(const u8 *ptr, std::string &name) {
 	}
 	return false;
 }
+
+bool GPUCommon::FramebufferDirty() {
+	VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
+	if (vfb) {
+		bool dirty = vfb->dirtyAfterDisplay;
+		vfb->dirtyAfterDisplay = false;
+		return dirty;
+	}
+	return true;
+}
+
+bool GPUCommon::FramebufferReallyDirty() {
+	VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
+	if (vfb) {
+		bool dirty = vfb->reallyDirtyAfterDisplay;
+		vfb->reallyDirtyAfterDisplay = false;
+		return dirty;
+	}
+	return true;
+}
diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h
index e83c499d79..2f49863b7f 100644
--- a/GPU/GPUCommon.h
+++ b/GPU/GPUCommon.h
@@ -245,6 +245,9 @@ public:
 		return -1;
 	}
 
+	bool FramebufferDirty() override;
+	bool FramebufferReallyDirty() override;
+
 	typedef void (GPUCommon::*CmdFunc)(u32 op, u32 diff);
 
 protected:
diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp
index a95e93d911..0aeefe8495 100644
--- a/GPU/Vulkan/GPU_Vulkan.cpp
+++ b/GPU/Vulkan/GPU_Vulkan.cpp
@@ -345,26 +345,6 @@ void GPU_Vulkan::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat
 	framebufferManager_->SetDisplayFramebuffer(framebuf, stride, format);
 }
 
-bool GPU_Vulkan::FramebufferDirty() {
-	VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
-	if (vfb) {
-		bool dirty = vfb->dirtyAfterDisplay;
-		vfb->dirtyAfterDisplay = false;
-		return dirty;
-	}
-	return true;
-}
-
-bool GPU_Vulkan::FramebufferReallyDirty() {
-	VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
-	if (vfb) {
-		bool dirty = vfb->reallyDirtyAfterDisplay;
-		vfb->reallyDirtyAfterDisplay = false;
-		return dirty;
-	}
-	return true;
-}
-
 void GPU_Vulkan::CopyDisplayToOutput() {
 	// Flush anything left over.
 	drawEngine_.Flush();
diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h
index 670ff9487b..266fe01485 100644
--- a/GPU/Vulkan/GPU_Vulkan.h
+++ b/GPU/Vulkan/GPU_Vulkan.h
@@ -54,8 +54,6 @@ public:
 	void DoState(PointerWrap &p) override;
 
 	void ClearShaderCache() override;
-	bool FramebufferDirty() override;
-	bool FramebufferReallyDirty() override;
 
 	void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) override {
 		primaryInfo = reportingPrimaryInfo_;

From 0b4b2bfe9c8144b2c186aa3d318b5ead3000ef63 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Mon, 26 Feb 2018 13:51:14 +0100
Subject: [PATCH 10/11] Avoid the most common virtual call when possible.

---
 GPU/Common/DrawEngineCommon.h | 3 +++
 GPU/GPUCommon.cpp             | 4 +++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h
index 10af8ca780..b99bc12cea 100644
--- a/GPU/Common/DrawEngineCommon.h
+++ b/GPU/Common/DrawEngineCommon.h
@@ -79,6 +79,9 @@ public:
 	bool IsCodePtrVertexDecoder(const u8 *ptr) const {
 		return decJitCache_->IsInSpace(ptr);
 	}
+	int GetNumDrawCalls() const {
+		return numDrawCalls;
+	}
 
 protected:
 	virtual void ClearTrackedVertexArrays() {}
diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp
index 782b2a324d..d30ba2cfdf 100644
--- a/GPU/GPUCommon.cpp
+++ b/GPU/GPUCommon.cpp
@@ -985,7 +985,9 @@ void GPUCommon::FastRunLoop(DisplayList &list) {
 		} else {
 			uint64_t flags = info.flags;
 			if (flags & FLAG_FLUSHBEFOREONCHANGE) {
-				drawEngineCommon_->DispatchFlush();
+				if (drawEngineCommon_->GetNumDrawCalls()) {
+					drawEngineCommon_->DispatchFlush();
+				}
 			}
 			gstate.cmdmem[cmd] = op;
 			if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {

From a78365e73d68c45a7b9a3707b98952cb2d8dc6ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Mon, 26 Feb 2018 15:19:11 +0100
Subject: [PATCH 11/11] Remove duplicate declaration of GPUCommon::CmdFunc

---
 GPU/GPU.cpp     | 2 +-
 GPU/GPUCommon.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/GPU/GPU.cpp b/GPU/GPU.cpp
index 847e25661f..abccdf6da9 100644
--- a/GPU/GPU.cpp
+++ b/GPU/GPU.cpp
@@ -110,6 +110,6 @@ bool GPU_Init(GraphicsContext *ctx, Draw::DrawContext *draw) {
 
 void GPU_Shutdown() {
 	delete gpu;
-	gpu = 0;
+	gpu = nullptr;
 	gpuDebug = 0;
 }
diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h
index 2f49863b7f..e520b38e15 100644
--- a/GPU/GPUCommon.h
+++ b/GPU/GPUCommon.h
@@ -300,7 +300,6 @@ protected:
 	GraphicsContext *gfxCtx_;
 	Draw::DrawContext *draw_;
 
-	typedef void (GPUCommon::*CmdFunc)(u32 op, u32 diff);
 	struct CommandInfo {
 		uint64_t flags;
 		GPUCommon::CmdFunc func;