Sneak in a minor software transform optimization

2025-04-02 11:01:50 -04:00 · 2022-11-28 11:10:28 +01:00 · 2022-11-28 11:10:28 +01:00 · 9bb0c91a75
commit 9bb0c91a75
parent cd51c05d1c
2 changed files with 2 additions and 6 deletions
--- a/Common/Data/Convert/SmallDataConvert.h
+++ b/Common/Data/Convert/SmallDataConvert.h
@ -23,7 +23,6 @@ extern const float one_over_255_x4[4];
 // NEON intrinsics: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0491f/BABDCGGF.html

 // LSBs in f[0], etc.
-// Could be SSE optimized.
 inline void Uint8x4ToFloat4(float f[4], uint32_t u) {
 #ifdef _M_SSE
 	__m128i zero = _mm_setzero_si128();
--- a/GPU/Common/VertexDecoderCommon.h
+++ b/GPU/Common/VertexDecoderCommon.h
@ -23,6 +23,7 @@

 #include "Common/CommonTypes.h"
 #include "Common/Data/Collections/Hashmaps.h"
+#include "Common/Data/Convert/SmallDataConvert.h"
 #include "Common/Log.h"
 #include "Core/Reporting.h"
 #include "GPU/ge_constants.h"
@ -181,11 +182,7 @@ public:
 	void ReadColor0(float color[4]) const {
 		switch (decFmt_.c0fmt) {
 		case DEC_U8_4:
-			{
-				const u8 *b = (const u8 *)(data_ + decFmt_.c0off);
-				for (int i = 0; i < 4; i++)
-					color[i] = b[i] * (1.f / 255.f);
-			}
+			Uint8x4ToFloat4(color, *(const u32 *)(data_ + decFmt_.c0off));
 			break;
 		case DEC_FLOAT_4:
 			memcpy(color, data_ + decFmt_.c0off, 16);