Integrate the voffset shuffle in ReadVector

This commit is contained in:
Henrik Rydgård 2023-10-05 18:52:50 +02:00
parent 5b14cb61a7
commit e852771480
2 changed files with 8 additions and 7 deletions

View file

@ -122,7 +122,7 @@ MIPSState::MIPSState() {
// * 4x4 Matrices are contiguous in RAM, making them, too, fast-loadable in NEON // * 4x4 Matrices are contiguous in RAM, making them, too, fast-loadable in NEON
// Disadvantages: // Disadvantages:
// * Extra indirection, can be confusing and slower (interpreter only) // * Extra indirection, can be confusing and slower (interpreter only, however we can often skip the table by rerranging formulas)
// * Flushing and reloading row registers is now slower // * Flushing and reloading row registers is now slower
int i = 0; int i = 0;

View file

@ -175,16 +175,17 @@ void ReadVector(float *rd, VectorSize size, int reg) {
default: length = 0; break; default: length = 0; break;
} }
int transpose = (reg >> 5) & 1; int transpose = (reg >> 5) & 1;
const int mtx = reg & (7 << 2); const int mtx = ((reg << 2) & 0x70);
const int col = reg & 3; const int col = reg & 3;
// NOTE: We now skip the voffset lookups.
if (transpose) { if (transpose) {
const int base = mtx + col * 32;
for (int i = 0; i < length; i++)
rd[i] = V(base + ((row+i)&3));
} else {
const int base = mtx + col; const int base = mtx + col;
for (int i = 0; i < length; i++) for (int i = 0; i < length; i++)
rd[i] = V(base + ((row+i)&3)*32); rd[i] = currentMIPS->v[base + ((row+i)&3) * 4];
} else {
const int base = mtx + col * 4;
for (int i = 0; i < length; i++)
rd[i] = currentMIPS->v[base + ((row+i)&3)];
} }
} }