mirror of
https://github.com/n64dev/cen64.git
synced 2024-06-20 21:17:58 -04:00
rsp_veq_vge_vlt_vne optimization patch from izy.
mm: Might be better to fold these indirect branches back into the RSP vector function table if we're going to ask the compiler to generate a jump table here.
This commit is contained in:
parent
5e3c85df8d
commit
c0d3be2561
|
@ -11,39 +11,38 @@ static inline __m128i rsp_veq_vge_vlt_vne(uint32_t iw, __m128i vs, __m128i vt,
|
|||
__m128i zero, __m128i *le, __m128i eq, __m128i sign) {
|
||||
__m128i equal = _mm_cmpeq_epi16(vs, vt);
|
||||
|
||||
// VNE & VGE
|
||||
if (iw & 0x2) {
|
||||
// VGE
|
||||
if (iw & 0x1) {
|
||||
__m128i gt = _mm_cmpgt_epi16(vs, vt);
|
||||
__m128i equalsign = _mm_and_si128(eq, sign);
|
||||
|
||||
equal = _mm_andnot_si128(equalsign, equal);
|
||||
*le = _mm_or_si128(gt, equal);
|
||||
}
|
||||
|
||||
// VNE
|
||||
else {
|
||||
__m128i nequal = _mm_cmpeq_epi16(equal, zero);
|
||||
|
||||
*le = _mm_and_si128(eq, equal);
|
||||
*le = _mm_or_si128(*le, nequal);
|
||||
}
|
||||
}
|
||||
|
||||
// VEQ & VLT
|
||||
else {
|
||||
// VEQ
|
||||
if (iw & 0x1)
|
||||
*le = _mm_andnot_si128(eq, equal);
|
||||
|
||||
// VLT
|
||||
else {
|
||||
switch(iw & 3)
|
||||
{
|
||||
case 0: // VLT
|
||||
{
|
||||
__m128i lt = _mm_cmplt_epi16(vs, vt);
|
||||
|
||||
equal = _mm_and_si128(eq, equal);
|
||||
equal = _mm_and_si128(sign, equal);
|
||||
*le = _mm_or_si128(lt, equal);
|
||||
break;
|
||||
}
|
||||
case 1: // VEQ
|
||||
{
|
||||
*le = _mm_andnot_si128(eq, equal);
|
||||
break;
|
||||
}
|
||||
case 2: // VNE
|
||||
{
|
||||
__m128i nequal = _mm_cmpeq_epi16(equal, zero);
|
||||
|
||||
*le = _mm_or_si128(_mm_and_si128(eq, equal), nequal);
|
||||
break;
|
||||
}
|
||||
case 3: // VGE
|
||||
{
|
||||
__m128i gt = _mm_cmpgt_epi16(vs, vt);
|
||||
__m128i equalsign = _mm_and_si128(eq, sign);
|
||||
|
||||
equal = _mm_andnot_si128(equalsign, equal);
|
||||
*le = _mm_or_si128(gt, equal);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue