From 72a73045bbbd4f7f2fc12af94b069eea4aa0595a Mon Sep 17 00:00:00 2001
From: "Unknown W. Brackets" <checkins@unknownbrackets.org>
Date: Sun, 19 Jul 2015 15:59:10 -0700
Subject: [PATCH] Add uv range to arm64 and non-jit decoders.

---
 GPU/Common/VertexDecoderArm64.cpp  | 22 +++++++++++++++++++++-
 GPU/Common/VertexDecoderCommon.cpp | 16 ++++++++++++++++
 2 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/GPU/Common/VertexDecoderArm64.cpp b/GPU/Common/VertexDecoderArm64.cpp
index fa2e05677c..30b1a1d147 100644
--- a/GPU/Common/VertexDecoderArm64.cpp
+++ b/GPU/Common/VertexDecoderArm64.cpp
@@ -554,7 +554,27 @@ void VertexDecoderJitCache::Jit_TcU16() {
 }
 
 void VertexDecoderJitCache::Jit_TcU16Through() {
-	LDUR(tempReg1, srcReg, dec_->tcoff);
+	LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff);
+	LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 2);
+
+	// TODO: Cleanup.
+	MOVP2R(scratchReg64, &gstate_c.vertMinU);
+
+	auto updateSide = [&](ARM64Reg r, CCFlags cc, u32 off) {
+		LDRH(INDEX_UNSIGNED, tempReg3, scratchReg64, off);
+		CMP(r, tempReg3);
+		FixupBranch skip = B(InvertCond(cc));
+		STRH(INDEX_UNSIGNED, r, scratchReg64, off);
+		SetJumpTarget(skip);
+	};
+
+	// TODO: Can this actually be fast?  Hmm, floats aren't better.
+	updateSide(tempReg1, CC_LT, 0);
+	updateSide(tempReg1, CC_GT, 2);
+	updateSide(tempReg2, CC_LT, 4);
+	updateSide(tempReg2, CC_GT, 6);
+
+	ORR(tempReg1, tempReg1, tempReg2, ArithOption(tempReg2, ST_LSL, 16));
 	STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
 }
 
diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp
index 6e36c8830b..b0d869c6a5 100644
--- a/GPU/Common/VertexDecoderCommon.cpp
+++ b/GPU/Common/VertexDecoderCommon.cpp
@@ -15,6 +15,7 @@
 // Official git repository and contact information can be found at
 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
 
+#include <algorithm>
 #include <stdio.h>
 
 #include "base/basictypes.h"
@@ -294,6 +295,11 @@ void VertexDecoder::Step_TcU16Through() const
 	const u16 *uvdata = (const u16_le*)(ptr_ + tcoff);
 	uv[0] = uvdata[0];
 	uv[1] = uvdata[1];
+
+	gstate_c.vertMinU = std::min(gstate_c.vertMinU, uvdata[0]);
+	gstate_c.vertMaxU = std::max(gstate_c.vertMaxU, uvdata[0]);
+	gstate_c.vertMinV = std::min(gstate_c.vertMinV, uvdata[1]);
+	gstate_c.vertMaxV = std::max(gstate_c.vertMaxV, uvdata[1]);
 }
 
 void VertexDecoder::Step_TcU16ThroughDouble() const
@@ -318,6 +324,11 @@ void VertexDecoder::Step_TcU16ThroughToFloat() const
 	const u16 *uvdata = (const u16_le*)(ptr_ + tcoff);
 	uv[0] = uvdata[0];
 	uv[1] = uvdata[1];
+
+	gstate_c.vertMinU = std::min(gstate_c.vertMinU, uvdata[0]);
+	gstate_c.vertMaxU = std::max(gstate_c.vertMaxU, uvdata[0]);
+	gstate_c.vertMinV = std::min(gstate_c.vertMinV, uvdata[1]);
+	gstate_c.vertMaxV = std::max(gstate_c.vertMaxV, uvdata[1]);
 }
 
 void VertexDecoder::Step_TcU16ThroughDoubleToFloat() const
@@ -342,6 +353,11 @@ void VertexDecoder::Step_TcFloatThrough() const
 	const float *uvdata = (const float*)(ptr_ + tcoff);
 	uv[0] = uvdata[0];
 	uv[1] = uvdata[1];
+
+	gstate_c.vertMinU = std::min(gstate_c.vertMinU, (u16)uvdata[0]);
+	gstate_c.vertMaxU = std::max(gstate_c.vertMaxU, (u16)uvdata[0]);
+	gstate_c.vertMinV = std::min(gstate_c.vertMinV, (u16)uvdata[1]);
+	gstate_c.vertMaxV = std::max(gstate_c.vertMaxV, (u16)uvdata[1]);
 }
 
 void VertexDecoder::Step_TcU8Prescale() const {