cen64/arch/x86_64/fpu/round_i64_f32.h
Tyler Stachecki 10a5983c0c Add support for SSE4 FPU acceleration.
0d4a5de2f6 is wrong; we can take
advantage of SSE4 rounding intrinsics.
2014-11-16 14:06:34 -05:00

23 lines
541 B
C

//
// arch/x86_64/fpu/round_i64_f32.h
//
// This file is subject to the terms and conditions defined in
// 'LICENSE', which is part of this source code package.
//
#include "common.h"
#include <smmintrin.h>
#include <string.h>
static inline void fpu_round_i64_f32(const uint32_t *fs, uint64_t *fd) {
float fs_float;
__m128 fs_reg;
// Prevent aliasing.
memcpy(&fs_float, fs, sizeof(fs_float));
fs_reg = _mm_set_ss(fs_float);
fs_reg = _mm_round_ss(fs_reg, fs_reg, _MM_FROUND_TO_NEAREST_INT);
*fd = _mm_cvtss_si64(fs_reg);
}