rsp_veq_vge_vlt_vne optimization patch from izy.

mm: Might be better to fold these indirect branches back
into the RSP vector function table if we're going to ask
the compiler to generate a jump table here.
This commit is contained in:
Tyler J. Stachecki 2016-02-06 14:00:39 -05:00
parent 5e3c85df8d
commit c0d3be2561

View file

@ -11,39 +11,38 @@ static inline __m128i rsp_veq_vge_vlt_vne(uint32_t iw, __m128i vs, __m128i vt,
__m128i zero, __m128i *le, __m128i eq, __m128i sign) {
__m128i equal = _mm_cmpeq_epi16(vs, vt);
// VNE & VGE
if (iw & 0x2) {
// VGE
if (iw & 0x1) {
__m128i gt = _mm_cmpgt_epi16(vs, vt);
__m128i equalsign = _mm_and_si128(eq, sign);
equal = _mm_andnot_si128(equalsign, equal);
*le = _mm_or_si128(gt, equal);
}
// VNE
else {
__m128i nequal = _mm_cmpeq_epi16(equal, zero);
*le = _mm_and_si128(eq, equal);
*le = _mm_or_si128(*le, nequal);
}
}
// VEQ & VLT
else {
// VEQ
if (iw & 0x1)
*le = _mm_andnot_si128(eq, equal);
// VLT
else {
switch(iw & 3)
{
case 0: // VLT
{
__m128i lt = _mm_cmplt_epi16(vs, vt);
equal = _mm_and_si128(eq, equal);
equal = _mm_and_si128(sign, equal);
*le = _mm_or_si128(lt, equal);
break;
}
case 1: // VEQ
{
*le = _mm_andnot_si128(eq, equal);
break;
}
case 2: // VNE
{
__m128i nequal = _mm_cmpeq_epi16(equal, zero);
*le = _mm_or_si128(_mm_and_si128(eq, equal), nequal);
break;
}
case 3: // VGE
{
__m128i gt = _mm_cmpgt_epi16(vs, vt);
__m128i equalsign = _mm_and_si128(eq, sign);
equal = _mm_andnot_si128(equalsign, equal);
*le = _mm_or_si128(gt, equal);
break;
}
}