Sneak in a minor software transform optimization

This commit is contained in:
Henrik Rydgård 2022-11-28 11:10:28 +01:00
parent cd51c05d1c
commit 9bb0c91a75
2 changed files with 2 additions and 6 deletions

View file

@ -23,7 +23,6 @@ extern const float one_over_255_x4[4];
// NEON intrinsics: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0491f/BABDCGGF.html
// LSBs in f[0], etc.
// Could be SSE optimized.
inline void Uint8x4ToFloat4(float f[4], uint32_t u) {
#ifdef _M_SSE
__m128i zero = _mm_setzero_si128();

View file

@ -23,6 +23,7 @@
#include "Common/CommonTypes.h"
#include "Common/Data/Collections/Hashmaps.h"
#include "Common/Data/Convert/SmallDataConvert.h"
#include "Common/Log.h"
#include "Core/Reporting.h"
#include "GPU/ge_constants.h"
@ -181,11 +182,7 @@ public:
void ReadColor0(float color[4]) const {
switch (decFmt_.c0fmt) {
case DEC_U8_4:
{
const u8 *b = (const u8 *)(data_ + decFmt_.c0off);
for (int i = 0; i < 4; i++)
color[i] = b[i] * (1.f / 255.f);
}
Uint8x4ToFloat4(color, *(const u32 *)(data_ + decFmt_.c0off));
break;
case DEC_FLOAT_4:
memcpy(color, data_ + decFmt_.c0off, 16);