From e8527714809fa07427fbdf4560d78077aba35b71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Thu, 5 Oct 2023 18:52:50 +0200 Subject: [PATCH] Integrate the voffset shuffle in ReadVector --- Core/MIPS/MIPS.cpp | 2 +- Core/MIPS/MIPSVFPUUtils.cpp | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Core/MIPS/MIPS.cpp b/Core/MIPS/MIPS.cpp index b68aebddf5..91223bd637 100644 --- a/Core/MIPS/MIPS.cpp +++ b/Core/MIPS/MIPS.cpp @@ -122,7 +122,7 @@ MIPSState::MIPSState() { // * 4x4 Matrices are contiguous in RAM, making them, too, fast-loadable in NEON // Disadvantages: - // * Extra indirection, can be confusing and slower (interpreter only) + // * Extra indirection, can be confusing and slower (interpreter only, however we can often skip the table by rerranging formulas) // * Flushing and reloading row registers is now slower int i = 0; diff --git a/Core/MIPS/MIPSVFPUUtils.cpp b/Core/MIPS/MIPSVFPUUtils.cpp index 87d9875399..ebb91d863b 100644 --- a/Core/MIPS/MIPSVFPUUtils.cpp +++ b/Core/MIPS/MIPSVFPUUtils.cpp @@ -175,16 +175,17 @@ void ReadVector(float *rd, VectorSize size, int reg) { default: length = 0; break; } int transpose = (reg >> 5) & 1; - const int mtx = reg & (7 << 2); + const int mtx = ((reg << 2) & 0x70); const int col = reg & 3; + // NOTE: We now skip the voffset lookups. if (transpose) { - const int base = mtx + col * 32; - for (int i = 0; i < length; i++) - rd[i] = V(base + ((row+i)&3)); - } else { const int base = mtx + col; for (int i = 0; i < length; i++) - rd[i] = V(base + ((row+i)&3)*32); + rd[i] = currentMIPS->v[base + ((row+i)&3) * 4]; + } else { + const int base = mtx + col * 4; + for (int i = 0; i < length; i++) + rd[i] = currentMIPS->v[base + ((row+i)&3)]; } }