diff --git a/Common/Data/Convert/SmallDataConvert.h b/Common/Data/Convert/SmallDataConvert.h index 95558a17df..740193e739 100644 --- a/Common/Data/Convert/SmallDataConvert.h +++ b/Common/Data/Convert/SmallDataConvert.h @@ -23,7 +23,6 @@ extern const float one_over_255_x4[4]; // NEON intrinsics: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0491f/BABDCGGF.html // LSBs in f[0], etc. -// Could be SSE optimized. inline void Uint8x4ToFloat4(float f[4], uint32_t u) { #ifdef _M_SSE __m128i zero = _mm_setzero_si128(); diff --git a/GPU/Common/VertexDecoderCommon.h b/GPU/Common/VertexDecoderCommon.h index 7c2dcb2cfc..f23db789cc 100644 --- a/GPU/Common/VertexDecoderCommon.h +++ b/GPU/Common/VertexDecoderCommon.h @@ -23,6 +23,7 @@ #include "Common/CommonTypes.h" #include "Common/Data/Collections/Hashmaps.h" +#include "Common/Data/Convert/SmallDataConvert.h" #include "Common/Log.h" #include "Core/Reporting.h" #include "GPU/ge_constants.h" @@ -181,11 +182,7 @@ public: void ReadColor0(float color[4]) const { switch (decFmt_.c0fmt) { case DEC_U8_4: - { - const u8 *b = (const u8 *)(data_ + decFmt_.c0off); - for (int i = 0; i < 4; i++) - color[i] = b[i] * (1.f / 255.f); - } + Uint8x4ToFloat4(color, *(const u32 *)(data_ + decFmt_.c0off)); break; case DEC_FLOAT_4: memcpy(color, data_ + decFmt_.c0off, 16);