mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Arm VFPU: 2-op and 3-op: map registers before work, improving instruction order a little.
This commit is contained in:
parent
7ae9c26b6a
commit
174b5187e6
1 changed files with 47 additions and 27 deletions
|
@ -699,8 +699,19 @@ namespace MIPSComp
|
|||
}
|
||||
}
|
||||
|
||||
// Map first, then work. This will allow us to use VLDMIA more often
|
||||
// (when we add the appropriate map function) and the instruction ordering
|
||||
// will improve.
|
||||
// Note that mapping like this (instead of first all sregs, first all tregs etc)
|
||||
// reduces the amount of continuous registers a lot :(
|
||||
for (int i = 0; i < n; i++) {
|
||||
fpr.MapDirtyInInV(tempregs[i], sregs[i], tregs[i]);
|
||||
fpr.SpillLockV(tempregs[i]);
|
||||
fpr.SpillLockV(sregs[i]);
|
||||
fpr.SpillLockV(tregs[i]);
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
switch (op >> 26) {
|
||||
case 24: //VFPU0
|
||||
switch ((op >> 23)&7) {
|
||||
|
@ -798,6 +809,31 @@ namespace MIPSComp
|
|||
return;
|
||||
}
|
||||
|
||||
// Catch the disabled operations immediately so we don't map registers unnecessarily later.
|
||||
// Move these down to the big switch below as they are implemented.
|
||||
switch ((op >> 16) & 0x1f) {
|
||||
case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin
|
||||
DISABLE;
|
||||
break;
|
||||
case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos
|
||||
DISABLE;
|
||||
break;
|
||||
case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2
|
||||
DISABLE;
|
||||
break;
|
||||
case 21: // d[i] = logf(s[i])/log(2.0f); break; //vlog2
|
||||
DISABLE;
|
||||
break;
|
||||
case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin
|
||||
DISABLE;
|
||||
break;
|
||||
case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2
|
||||
DISABLE;
|
||||
break;
|
||||
default:
|
||||
;
|
||||
}
|
||||
|
||||
VectorSize sz = GetVecSize(op);
|
||||
int n = GetNumVectorElements(sz);
|
||||
|
||||
|
@ -827,21 +863,27 @@ namespace MIPSComp
|
|||
t4 = fpr.V(t[2]);
|
||||
}
|
||||
|
||||
// Pre map the registers to get better instruction ordering.
|
||||
// Note that mapping like this (instead of first all sregs, first all tempregs etc)
|
||||
// reduces the amount of continuous registers a lot :(
|
||||
for (int i = 0; i < n; i++) {
|
||||
fpr.MapDirtyInV(tempregs[i], sregs[i]);
|
||||
fpr.SpillLockV(tempregs[i]);
|
||||
fpr.SpillLockV(sregs[i]);
|
||||
}
|
||||
|
||||
// Warning: sregs[i] and tempxregs[i] may be the same reg.
|
||||
// Helps for vmov, hurts for vrcp, etc.
|
||||
for (int i = 0; i < n; ++i) {
|
||||
for (int i = 0; i < n; i++) {
|
||||
switch ((op >> 16) & 0x1f) {
|
||||
case 0: // d[i] = s[i]; break; //vmov
|
||||
// Probably for swizzle.
|
||||
fpr.MapDirtyInV(tempregs[i], sregs[i]);
|
||||
VMOV(fpr.V(tempregs[i]), fpr.V(sregs[i]));
|
||||
break;
|
||||
case 1: // d[i] = fabsf(s[i]); break; //vabs
|
||||
fpr.MapDirtyInV(tempregs[i], sregs[i]);
|
||||
VABS(fpr.V(tempregs[i]), fpr.V(sregs[i]));
|
||||
break;
|
||||
case 2: // d[i] = -s[i]; break; //vneg
|
||||
fpr.MapDirtyInV(tempregs[i], sregs[i]);
|
||||
VNEG(fpr.V(tempregs[i]), fpr.V(sregs[i]));
|
||||
break;
|
||||
|
||||
|
@ -866,30 +908,15 @@ namespace MIPSComp
|
|||
*/
|
||||
|
||||
case 16: // d[i] = 1.0f / s[i]; break; //vrcp
|
||||
fpr.MapDirtyInV(tempregs[i], sregs[i]);
|
||||
MOVI2F(S0, 1.0f, R0);
|
||||
VDIV(fpr.V(tempregs[i]), S0, fpr.V(sregs[i]));
|
||||
break;
|
||||
case 17: // d[i] = 1.0f / sqrtf(s[i]); break; //vrsq
|
||||
fpr.MapDirtyInV(tempregs[i], sregs[i]);
|
||||
MOVI2F(S0, 1.0f, R0);
|
||||
VSQRT(S1, fpr.V(sregs[i]));
|
||||
VDIV(fpr.V(tempregs[i]), S0, S1);
|
||||
break;
|
||||
case 18: // d[i] = sinf((float)M_PI_2 * s[i]); break; //vsin
|
||||
DISABLE;
|
||||
break;
|
||||
case 19: // d[i] = cosf((float)M_PI_2 * s[i]); break; //vcos
|
||||
DISABLE;
|
||||
break;
|
||||
case 20: // d[i] = powf(2.0f, s[i]); break; //vexp2
|
||||
DISABLE;
|
||||
break;
|
||||
case 21: // d[i] = logf(s[i])/log(2.0f); break; //vlog2
|
||||
DISABLE;
|
||||
break;
|
||||
case 22: // d[i] = sqrtf(s[i]); break; //vsqrt
|
||||
fpr.MapDirtyInV(tempregs[i], sregs[i]);
|
||||
VSQRT(fpr.V(tempregs[i]), fpr.V(sregs[i]));
|
||||
VABS(fpr.V(tempregs[i]), fpr.V(tempregs[i]));
|
||||
break;
|
||||
|
@ -897,7 +924,6 @@ namespace MIPSComp
|
|||
// Seems to work well enough but can disable if it becomes a problem.
|
||||
// Should be easy enough to translate to NEON. There we can load all the constants
|
||||
// in one go of course.
|
||||
fpr.MapDirtyInV(tempregs[i], sregs[i]);
|
||||
MOVI2F(S0, 0.0f, R0);
|
||||
VCMP(fpr.V(sregs[i]), S0); // flags = sign(sregs[i])
|
||||
VMRS_APSR();
|
||||
|
@ -924,17 +950,11 @@ namespace MIPSComp
|
|||
VMUL(fpr.V(tempregs[i]), fpr.V(tempregs[i]), S1);
|
||||
break;
|
||||
case 24: // d[i] = -1.0f / s[i]; break; // vnrcp
|
||||
fpr.MapDirtyInV(tempregs[i], sregs[i]);
|
||||
MOVI2F(S0, -1.0f, R0);
|
||||
VDIV(fpr.V(tempregs[i]), S0, fpr.V(sregs[i]));
|
||||
break;
|
||||
case 26: // d[i] = -sinf((float)M_PI_2 * s[i]); break; // vnsin
|
||||
DISABLE;
|
||||
break;
|
||||
case 28: // d[i] = 1.0f / expf(s[i] * (float)M_LOG2E); break; // vrexp2
|
||||
DISABLE;
|
||||
break;
|
||||
default:
|
||||
ERROR_LOG(JIT, "case missing in vfpu vv2op");
|
||||
DISABLE;
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue