// // arch/x86_64/rsp/vrsq.c // // This file is subject to the terms and conditions defined in // 'LICENSE', which is part of this source code package. // #include "common.h" #include "common/reciprocal.h" #include "rsp/cpu.h" #include "rsp/rsp.h" #include // Mask table for vrsq(LH) functions. cen64_align(static const uint16_t vrsq_mask_table[8][8], 16) = { {~0, 0, 0, 0, 0, 0, 0, 0}, {0, ~0, 0, 0, 0, 0, 0, 0}, {0, 0, ~0, 0, 0, 0, 0, 0}, {0, 0, 0, ~0, 0, 0, 0, 0}, {0, 0, 0, 0, ~0, 0, 0, 0}, {0, 0, 0, 0, 0, ~0, 0, 0}, {0, 0, 0, 0, 0, 0, ~0, 0}, {0, 0, 0, 0, 0, 0, 0, ~0} }; __m128i rsp_vrsq(struct rsp *rsp, int dp, unsigned src, unsigned e, unsigned dest, unsigned de) { uint32_t dp_input, sp_input; int32_t input, result; int16_t vt; int32_t input_mask, data; unsigned shift, idx; // Get the element from VT. vt = rsp->cp2.regs[src].e[e & 0x7]; dp_input = ((uint32_t) rsp->cp2.div_in << 16) | (uint16_t) vt; sp_input = vt; input = (dp) ? dp_input : sp_input; input_mask = input >> 31; data = input ^ input_mask; if (input > -32768) data -= input_mask; // Handle edge cases. if (data == 0) result = 0x7fffFFFFU; else if (input == -32768) result = 0xffff0000U; // Main case: compute the reciprocal. else { //TODO: Clean this up. #ifdef _MSC_VER unsigned long bsf_index; _BitScanReverse(&bsf_index, data); shift = 31 - bsf_index; #else shift = __builtin_clz(data); #endif idx = (((unsigned long long) data << shift) & 0x7FC00000U) >> 22; idx = ((idx | 0x200) & 0x3FE) | (shift % 2); result = rsp_reciprocal_rom[idx]; result = ((0x10000 | result) << 14) >> ((31 - shift) >> 1); result = result ^ input_mask; } // Write out the results. rsp->cp2.div_out = result >> 16; rsp->cp2.regs[dest].e[de & 0x7] = result; return rsp_vect_load_unshuffled_operand(rsp->cp2.regs[dest].e); } __m128i rsp_vrsqh(struct rsp *rsp, unsigned src, unsigned e, unsigned dest, unsigned de) { __m128i vd, vd_mask, b_result; int16_t elements[8]; // Get the element from VT. memcpy(elements, rsp->cp2.regs + src, sizeof(elements)); rsp->cp2.div_in = elements[e]; // Write out the upper part of the result. vd_mask = _mm_load_si128((__m128i *) vrsq_mask_table[de]); vd = _mm_load_si128((__m128i *) (rsp->cp2.regs + dest)); vd = _mm_andnot_si128(vd_mask, vd); b_result = _mm_set1_epi16(rsp->cp2.div_out); b_result = _mm_and_si128(vd_mask, b_result); return _mm_or_si128(b_result, vd); }