mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Merge pull request #7117 from unknownbrackets/jit-simd
x86jit: Implement vmmov using SIMD
This commit is contained in:
commit
7deb8055ee
4 changed files with 88 additions and 2 deletions
|
@ -2389,6 +2389,48 @@ void Jit::Comp_Vmmov(MIPSOpcode op) {
|
|||
MatrixSize sz = GetMtxSize(op);
|
||||
int n = GetMatrixSide(sz);
|
||||
|
||||
if (jo.enableVFPUSIMD) {
|
||||
VectorSize vsz = GetVectorSize(sz);
|
||||
u8 dest[4][4];
|
||||
MatrixOverlapType overlap = GetMatrixOverlap(_VD, _VS, sz);
|
||||
|
||||
u8 vecs[4];
|
||||
if (overlap == OVERLAP_NONE) {
|
||||
GetMatrixColumns(_VD, sz, vecs);
|
||||
for (int i = 0; i < n; ++i) {
|
||||
GetVectorRegs(dest[i], vsz, vecs[i]);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < n; ++i) {
|
||||
fpr.GetTempVS(dest[i], vsz);
|
||||
}
|
||||
}
|
||||
|
||||
GetMatrixColumns(_VS, sz, vecs);
|
||||
for (int i = 0; i < n; i++) {
|
||||
u8 vec[4];
|
||||
GetVectorRegs(vec, vsz, vecs[i]);
|
||||
fpr.MapRegsVS(vec, vsz, 0);
|
||||
fpr.MapRegsVS(dest[i], vsz, MAP_NOINIT);
|
||||
MOVAPS(fpr.VSX(dest[i]), fpr.VS(vec));
|
||||
}
|
||||
|
||||
if (overlap != OVERLAP_NONE) {
|
||||
// Okay, move from the temps to VD now.
|
||||
GetMatrixColumns(_VD, sz, vecs);
|
||||
for (int i = 0; i < n; i++) {
|
||||
u8 vec[4];
|
||||
GetVectorRegs(vec, vsz, vecs[i]);
|
||||
fpr.MapRegsVS(vec, vsz, MAP_NOINIT);
|
||||
fpr.MapRegsVS(dest[i], vsz, 0);
|
||||
MOVAPS(fpr.VSX(vec), fpr.VS(dest[i]));
|
||||
}
|
||||
}
|
||||
|
||||
fpr.ReleaseSpillLocks();
|
||||
return;
|
||||
}
|
||||
|
||||
u8 sregs[16], dregs[16];
|
||||
GetMatrixRegs(sregs, sz, _VS);
|
||||
GetMatrixRegs(dregs, sz, _VD);
|
||||
|
|
|
@ -716,6 +716,50 @@ int FPURegCache::GetTempR() {
|
|||
return -1;
|
||||
}
|
||||
|
||||
int FPURegCache::GetTempVS(u8 *v, VectorSize vsz) {
|
||||
pendingFlush = true;
|
||||
const int n = GetNumVectorElements(vsz);
|
||||
|
||||
// Let's collect regs as we go, but try for n free in a row.
|
||||
int found = 0;
|
||||
for (int r = TEMP0; r <= TEMP0 + NUM_TEMPS - n; ++r) {
|
||||
if (regs[r].away || regs[r].tempLocked) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// How many free siblings does this have?
|
||||
int seq = 1;
|
||||
for (int i = 1; i < n; ++i) {
|
||||
if (regs[r + i].away || regs[r + i].tempLocked) {
|
||||
break;
|
||||
}
|
||||
++seq;
|
||||
}
|
||||
|
||||
if (seq == n) {
|
||||
// Got 'em. Exacty as many as we need.
|
||||
for (int i = 0; i < n; ++i) {
|
||||
v[i] = r + i - 32;
|
||||
}
|
||||
found = n;
|
||||
break;
|
||||
}
|
||||
|
||||
if (found < n) {
|
||||
v[found++] = r - 32;
|
||||
}
|
||||
}
|
||||
|
||||
if (found != n) {
|
||||
_assert_msg_(JIT, 0, "Regcache ran out of temp regs, might need to DiscardR() some.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
regs[v[i] + 32].tempLocked = true;
|
||||
}
|
||||
}
|
||||
|
||||
void FPURegCache::Flush() {
|
||||
if (!pendingFlush) {
|
||||
return;
|
||||
|
|
|
@ -114,7 +114,7 @@ public:
|
|||
int GetTempV() {
|
||||
return GetTempR() - 32;
|
||||
}
|
||||
// TODO: GetTempVS?
|
||||
int GetTempVS(u8 *v, VectorSize vsz);
|
||||
|
||||
void SetEmitter(XEmitter *emitter) {emit = emitter;}
|
||||
void SetOptions(MIPSComp::JitOptions *jo) {jo_ = jo;}
|
||||
|
|
|
@ -173,7 +173,6 @@ bool TestJit() {
|
|||
#else
|
||||
std::vector<std::string> lines = DisassembleX86(block->normalEntry, block->codeSize);
|
||||
#endif
|
||||
printf("Jit was %fx faster than interp.\n\n", jit_speed / interp_speed);
|
||||
// Cut off at 25 due to the repetition above. Might need tweaking for large instructions.
|
||||
const int cutoff = 25;
|
||||
for (int i = 0; i < std::min((int)lines.size(), cutoff); i++) {
|
||||
|
@ -181,6 +180,7 @@ bool TestJit() {
|
|||
}
|
||||
if (lines.size() > cutoff)
|
||||
printf("...\n");
|
||||
printf("Jit was %fx faster than interp.\n\n", jit_speed / interp_speed);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
|
Loading…
Add table
Reference in a new issue