From ebce8d275378c7f75f2f53d3ac7c20a56f47b075 Mon Sep 17 00:00:00 2001
From: "Unknown W. Brackets" <checkins@unknownbrackets.org>
Date: Sat, 16 Apr 2016 19:00:34 -0700
Subject: [PATCH] Don't convert to float with prescale off.

Since we assume we need to normalize, it seems.
---
 GPU/Common/VertexDecoderCommon.cpp | 74 +++++++++++++++++++++++++++---
 GPU/Common/VertexDecoderCommon.h   |  7 ++-
 GPU/Common/VertexDecoderX86.cpp    |  8 ++--
 3 files changed, 76 insertions(+), 13 deletions(-)

diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp
index 6f6a0dc9d2..0d58cde4ad 100644
--- a/GPU/Common/VertexDecoderCommon.cpp
+++ b/GPU/Common/VertexDecoderCommon.cpp
@@ -405,6 +405,51 @@ void VertexDecoder::Step_TcFloatPrescale() const {
 }
 
 void VertexDecoder::Step_TcU8Morph() const {
+	float uv[2] = { 0, 0 };
+	for (int n = 0; n < morphcount; n++) {
+		float w = gstate_c.morphWeights[n];
+		const u8 *uvdata = (const u8 *)(ptr_ + onesize_*n + tcoff);
+
+		uv[0] += (float)uvdata[0] * w;
+		uv[1] += (float)uvdata[1] * w;
+	}
+
+	u8 *out = decoded_ + decFmt.uvoff;
+	out[0] = (int)uv[0];
+	out[1] = (int)uv[1];
+}
+
+void VertexDecoder::Step_TcU16Morph() const {
+	float uv[2] = { 0, 0 };
+	for (int n = 0; n < morphcount; n++) {
+		float w = gstate_c.morphWeights[n];
+		const u16_le *uvdata = (const u16_le *)(ptr_ + onesize_*n + tcoff);
+
+		uv[0] += (float)uvdata[0] * w;
+		uv[1] += (float)uvdata[1] * w;
+	}
+
+	u16_le *out = (u16_le *)(decoded_ + decFmt.uvoff);
+	out[0] = (int)uv[0];
+	out[1] = (int)uv[1];
+}
+
+void VertexDecoder::Step_TcU16DoubleMorph() const {
+	float uv[2] = { 0, 0 };
+	for (int n = 0; n < morphcount; n++) {
+		float w = gstate_c.morphWeights[n];
+		const u16_le *uvdata = (const u16_le *)(ptr_ + onesize_*n + tcoff);
+
+		uv[0] += (float)uvdata[0] * w;
+		uv[1] += (float)uvdata[1] * w;
+	}
+
+	u16_le *out = (u16_le *)(decoded_ + decFmt.uvoff);
+	out[0] = (int)(uv[0] * 2.0f);
+	out[1] = (int)(uv[1] * 2.0f);
+}
+
+void VertexDecoder::Step_TcU8MorphToFloat() const {
 	float uv[2] = { 0, 0 };
 	for (int n = 0; n < morphcount; n++) {
 		float w = gstate_c.morphWeights[n];
@@ -419,7 +464,7 @@ void VertexDecoder::Step_TcU8Morph() const {
 	out[1] = uv[1];
 }
 
-void VertexDecoder::Step_TcU16Morph() const {
+void VertexDecoder::Step_TcU16MorphToFloat() const {
 	float uv[2] = { 0, 0 };
 	for (int n = 0; n < morphcount; n++) {
 		float w = gstate_c.morphWeights[n];
@@ -434,7 +479,7 @@ void VertexDecoder::Step_TcU16Morph() const {
 	out[1] = uv[1];
 }
 
-void VertexDecoder::Step_TcU16DoubleMorph() const {
+void VertexDecoder::Step_TcU16DoubleMorphToFloat() const {
 	float uv[2] = { 0, 0 };
 	for (int n = 0; n < morphcount; n++) {
 		float w = gstate_c.morphWeights[n];
@@ -914,6 +959,20 @@ static const StepFunction tcstep_morph_remaster[4] = {
 	&VertexDecoder::Step_TcFloatMorph,
 };
 
+static const StepFunction tcstep_morphToFloat[4] = {
+	0,
+	&VertexDecoder::Step_TcU8MorphToFloat,
+	&VertexDecoder::Step_TcU16MorphToFloat,
+	&VertexDecoder::Step_TcFloatMorph,
+};
+
+static const StepFunction tcstep_morph_remasterToFloat[4] = {
+	0,
+	&VertexDecoder::Step_TcU8MorphToFloat,
+	&VertexDecoder::Step_TcU16DoubleMorphToFloat,
+	&VertexDecoder::Step_TcFloatMorph,
+};
+
 static const StepFunction tcstep_through[4] = {
 	0,
 	&VertexDecoder::Step_TcU8,
@@ -1120,18 +1179,19 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
 			else
 				steps_[numSteps_++] = morphcount == 1 ? tcstep_prescale[tc] : tcstep_prescale_morph[tc];
 			decFmt.uvfmt = DEC_FLOAT_2;
-		} else if (morphcount != 1 && !throughmode) {
-			steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remaster[tc] : tcstep_morph[tc];
-			decFmt.uvfmt = DEC_FLOAT_2;
 		} else {
 			if (options.expandAllUVtoFloat) {
-				if (g_DoubleTextureCoordinates)
+				if (morphcount != 1 && !throughmode)
+					steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remasterToFloat[tc] : tcstep_morphToFloat[tc];
+				else if (g_DoubleTextureCoordinates)
 					steps_[numSteps_++] = throughmode ? tcstep_through_remasterToFloat[tc] : tcstep_remasterToFloat[tc];
 				else
 					steps_[numSteps_++] = throughmode ? tcstep_throughToFloat[tc] : tcstepToFloat[tc];
 				decFmt.uvfmt = DEC_FLOAT_2;
 			} else {
-				if (g_DoubleTextureCoordinates)
+				if (morphcount != 1 && !throughmode)
+					steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remaster[tc] : tcstep_morph[tc];
+				else if (g_DoubleTextureCoordinates)
 					steps_[numSteps_++] = throughmode ? tcstep_through_remaster[tc] : tcstep_remaster[tc];
 				else
 					steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc];
diff --git a/GPU/Common/VertexDecoderCommon.h b/GPU/Common/VertexDecoderCommon.h
index 9f9f141d5c..6163951ecb 100644
--- a/GPU/Common/VertexDecoderCommon.h
+++ b/GPU/Common/VertexDecoderCommon.h
@@ -498,6 +498,9 @@ public:
 	void Step_TcU8Morph() const;
 	void Step_TcU16Morph() const;
 	void Step_TcU16DoubleMorph() const;
+	void Step_TcU8MorphToFloat() const;
+	void Step_TcU16MorphToFloat() const;
+	void Step_TcU16DoubleMorphToFloat() const;
 	void Step_TcFloatMorph() const;
 	void Step_TcU8PrescaleMorph() const;
 	void Step_TcU16PrescaleMorph() const;
@@ -638,8 +641,8 @@ public:
 	void Jit_TcFloatPrescale();
 
 	void Jit_TcAnyMorph(int bits);
-	void Jit_TcU8Morph();
-	void Jit_TcU16Morph();
+	void Jit_TcU8MorphToFloat();
+	void Jit_TcU16MorphToFloat();
 	void Jit_TcFloatMorph();
 	void Jit_TcU8PrescaleMorph();
 	void Jit_TcU16PrescaleMorph();
diff --git a/GPU/Common/VertexDecoderX86.cpp b/GPU/Common/VertexDecoderX86.cpp
index ab61a9871e..1f67637ef7 100644
--- a/GPU/Common/VertexDecoderX86.cpp
+++ b/GPU/Common/VertexDecoderX86.cpp
@@ -105,8 +105,8 @@ static const JitLookup jitLookup[] = {
 	{&VertexDecoder::Step_TcU16Prescale, &VertexDecoderJitCache::Jit_TcU16Prescale},
 	{&VertexDecoder::Step_TcFloatPrescale, &VertexDecoderJitCache::Jit_TcFloatPrescale},
 
-	{&VertexDecoder::Step_TcU8Morph, &VertexDecoderJitCache::Jit_TcU8Morph},
-	{&VertexDecoder::Step_TcU16Morph, &VertexDecoderJitCache::Jit_TcU16Morph},
+	{&VertexDecoder::Step_TcU8MorphToFloat, &VertexDecoderJitCache::Jit_TcU8MorphToFloat},
+	{&VertexDecoder::Step_TcU16MorphToFloat, &VertexDecoderJitCache::Jit_TcU16MorphToFloat},
 	{&VertexDecoder::Step_TcFloatMorph, &VertexDecoderJitCache::Jit_TcFloatMorph},
 	{&VertexDecoder::Step_TcU8PrescaleMorph, &VertexDecoderJitCache::Jit_TcU8PrescaleMorph},
 	{&VertexDecoder::Step_TcU16PrescaleMorph, &VertexDecoderJitCache::Jit_TcU16PrescaleMorph},
@@ -810,14 +810,14 @@ void VertexDecoderJitCache::Jit_TcAnyMorph(int bits) {
 	}
 }
 
-void VertexDecoderJitCache::Jit_TcU8Morph() {
+void VertexDecoderJitCache::Jit_TcU8MorphToFloat() {
 	Jit_TcAnyMorph(8);
 	// They were all added (weighted) pre-normalize, we normalize once here.
 	MULPS(fpScratchReg, M(&by128));
 	MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), fpScratchReg);
 }
 
-void VertexDecoderJitCache::Jit_TcU16Morph() {
+void VertexDecoderJitCache::Jit_TcU16MorphToFloat() {
 	Jit_TcAnyMorph(16);
 	// They were all added (weighted) pre-normalize, we normalize once here.
 	MULPS(fpScratchReg, M(&by32768));