From fdf5f2ab54066a20fc83d79e07027d3b3a0568be Mon Sep 17 00:00:00 2001
From: "Unknown W. Brackets" <checkins@unknownbrackets.org>
Date: Sun, 21 May 2017 16:19:40 -0700
Subject: [PATCH] SoftGPU: Support separate mip CLUTs with linear.

---
 GPU/Software/SamplerX86.cpp | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/GPU/Software/SamplerX86.cpp b/GPU/Software/SamplerX86.cpp
index fb93987471..38acfd72b1 100644
--- a/GPU/Software/SamplerX86.cpp
+++ b/GPU/Software/SamplerX86.cpp
@@ -613,7 +613,15 @@ bool SamplerJitCache::Jit_ReadClutColor(const SamplerID &id) {
 	if (!id.useSharedClut) {
 		// TODO: Need to load from RAM, always.
 		if (id.linear) {
-			return false;
+#ifdef _WIN32
+			const int argOffset = 24 + 48 + 8 + 32;
+			// Extra 8 to account for CALL.
+			MOV(32, R(tempReg2), MDisp(RSP, argOffset + 12 + 8));
+#else
+			// Extra 8 to account for CALL.
+			MOV(32, R(tempReg2), MDisp(RSP, 24 + 48 + 8 + 8));
+#endif
+			LEA(32, tempReg2, MScaled(tempReg2, SCALE_4, 0));
 		} else {
 #ifdef _WIN32
 			// The argument was saved on the stack.
@@ -623,8 +631,10 @@ bool SamplerJitCache::Jit_ReadClutColor(const SamplerID &id) {
 			// We need to multiply by 16 and add, LEA allows us to copy too.
 			LEA(32, tempReg2, MScaled(levelReg, SCALE_4, 0));
 #endif
-			LEA(64, resultReg, MComplex(resultReg, tempReg2, SCALE_4, 0));
 		}
+
+		// Second step of the multiply by 16 (since we only multiplied by 4 before.)
+		LEA(64, resultReg, MComplex(resultReg, tempReg2, SCALE_4, 0));
 	}
 
 	MOV(PTRBITS, R(tempReg1), ImmPtr(clut));