From 092179c42d6879c423b5ecef4a1f9d08310e6ffd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 10 May 2024 18:41:55 +0200 Subject: [PATCH] More IR interpreter tweaks --- Core/MIPS/IR/IRInterpreter.cpp | 24 ++++++------------------ Core/MIPS/MIPSIntVFPU.cpp | 3 ++- 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/Core/MIPS/IR/IRInterpreter.cpp b/Core/MIPS/IR/IRInterpreter.cpp index 09805fa7fa..c8d98ad6fd 100644 --- a/Core/MIPS/IR/IRInterpreter.cpp +++ b/Core/MIPS/IR/IRInterpreter.cpp @@ -283,33 +283,20 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) { case IROp::LoadVec4: { u32 base = mips->r[inst->src1] + inst->constant; -#if defined(_M_SSE) - _mm_store_ps(&mips->f[inst->dest], _mm_load_ps((const float *)Memory::GetPointerUnchecked(base))); -#else - for (int i = 0; i < 4; i++) - mips->f[inst->dest + i] = Memory::ReadUnchecked_Float(base + 4 * i); -#endif + // This compiles to a nice SSE load/store on x86, and hopefully similar on ARM. + memcpy(&mips->f[inst->dest], Memory::GetPointerUnchecked(base), 4 * 4); break; } case IROp::StoreVec4: { u32 base = mips->r[inst->src1] + inst->constant; -#if defined(_M_SSE) - _mm_store_ps((float *)Memory::GetPointerUnchecked(base), _mm_load_ps(&mips->f[inst->dest])); -#else - for (int i = 0; i < 4; i++) - Memory::WriteUnchecked_Float(mips->f[inst->dest + i], base + 4 * i); -#endif + memcpy((float *)Memory::GetPointerUnchecked(base), &mips->f[inst->dest], 4 * 4); break; } case IROp::Vec4Init: { -#if defined(_M_SSE) - _mm_store_ps(&mips->f[inst->dest], _mm_load_ps(vec4InitValues[inst->src1])); -#else memcpy(&mips->f[inst->dest], vec4InitValues[inst->src1], 4 * sizeof(float)); -#endif break; } @@ -398,8 +385,9 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) { #if defined(_M_SSE) _mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_set1_ps(mips->f[inst->src2]))); #else + const float factor = mips->f[inst->src2]; for (int i = 0; i < 4; i++) - mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2]; + mips->f[inst->dest + i] = mips->f[inst->src1 + i] * factor; #endif break; } @@ -792,7 +780,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) { mips->f[inst->dest] = mips->f[inst->src1] - mips->f[inst->src2]; break; case IROp::FMul: - if ((my_isinf(mips->f[inst->src1]) && mips->f[inst->src2] == 0.0f) || (my_isinf(mips->f[inst->src2]) && mips->f[inst->src1] == 0.0f)) { + if ((mips->f[inst->src2] == 0.0f && my_isinf(mips->f[inst->src1])) || (mips->f[inst->src1] == 0.0f && my_isinf(mips->f[inst->src2]))) { mips->fi[inst->dest] = 0x7fc00000; } else { mips->f[inst->dest] = mips->f[inst->src1] * mips->f[inst->src2]; diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp index 9edb668226..6388d1652a 100644 --- a/Core/MIPS/MIPSIntVFPU.cpp +++ b/Core/MIPS/MIPSIntVFPU.cpp @@ -621,8 +621,9 @@ namespace MIPSInt break; default: ApplySwizzleS(s, sz); + break; } - for (int i = 0; i < n; i++) { + for (int i = 0; i < (int)n; i++) { switch (optype) { case 0: d[i] = s[i]; break; //vmov case 1: d[i] = s[i]; break; //vabs (prefix)