mirror of
https://github.com/n64dev/cen64.git
synced 2025-04-02 10:31:54 -04:00
46 lines
1.2 KiB
C
46 lines
1.2 KiB
C
//
|
|
// arch/x86_64/rsp/vcr.h
|
|
//
|
|
// This file is subject to the terms and conditions defined in
|
|
// 'LICENSE', which is part of this source code package.
|
|
//
|
|
|
|
#include "common.h"
|
|
|
|
static inline __m128i rsp_vcr(__m128i vs, __m128i vt,
|
|
__m128i zero, __m128i *ge, __m128i *le) {
|
|
__m128i diff_sel_mask, diff_gez, diff_lez;
|
|
__m128i sign, sign_notvt;
|
|
|
|
// sign = (vs ^ vt) < 0
|
|
sign = _mm_xor_si128(vs, vt);
|
|
sign = _mm_srai_epi16(sign, 15);
|
|
|
|
// Compute le
|
|
diff_lez = _mm_and_si128(vs, sign);
|
|
diff_lez = _mm_add_epi16(diff_lez, vt);
|
|
*le = _mm_srai_epi16(diff_lez, 15);
|
|
|
|
// Compute ge
|
|
diff_gez = _mm_or_si128(vs, sign);
|
|
diff_gez = _mm_min_epi16(diff_gez, vt);
|
|
*ge = _mm_cmpeq_epi16(diff_gez, vt);
|
|
|
|
// sign_notvt = sn ? ~vt : vt
|
|
sign_notvt = _mm_xor_si128(vt, sign);
|
|
|
|
// Compute result:
|
|
#ifdef __SSE4_1__
|
|
diff_sel_mask = _mm_blendv_epi8(*ge, *le, sign);
|
|
return _mm_blendv_epi8(vs, sign_notvt, diff_sel_mask);
|
|
#else
|
|
diff_sel_mask = _mm_sub_epi16(*le, *ge);
|
|
diff_sel_mask = _mm_and_si128(diff_sel_mask, sign);
|
|
diff_sel_mask = _mm_add_epi16(diff_sel_mask, *ge);
|
|
|
|
zero = _mm_sub_epi16(sign_notvt, vs);
|
|
zero = _mm_and_si128(zero, diff_sel_mask);
|
|
return _mm_add_epi16(zero, vs);
|
|
#endif
|
|
}
|
|
|