From 47728528d7b8865e7cae980661dfd82f9f18a4cc Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 16 Mar 2014 17:56:34 -0700 Subject: [PATCH] softgpu: Use SSE in Vec?::Length(). Minor perf boost but if I do everything in Vec things get slower. --- GPU/Math3D.cpp | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/GPU/Math3D.cpp b/GPU/Math3D.cpp index a521628588..48dff42961 100644 --- a/GPU/Math3D.cpp +++ b/GPU/Math3D.cpp @@ -22,7 +22,16 @@ namespace Math3D { template<> float Vec2::Length() const { +#if defined(_M_SSE) + float ret; + __m128 sq = _mm_mul_ps(vec, vec); + const __m128 r2 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 1)); + const __m128 res = _mm_add_ss(sq, r2); + _mm_store_ps(&ret, _mm_sqrt_ss(res)); + return ret; +#else return sqrtf(Length2()); +#endif } template<> @@ -88,7 +97,17 @@ unsigned int Vec3::ToRGB() const template<> float Vec3::Length() const { +#if defined(_M_SSE) + float ret; + __m128 sq = _mm_mul_ps(vec, vec); + const __m128 r2 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 1)); + const __m128 r3 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 2)); + const __m128 res = _mm_add_ss(sq, _mm_add_ss(r2, r3)); + _mm_store_ps(&ret, _mm_sqrt_ss(res)); + return ret; +#else return sqrtf(Length2()); +#endif } template<> @@ -156,7 +175,16 @@ unsigned int Vec4::ToRGBA() const template<> float Vec4::Length() const { +#if defined(_M_SSE) + float ret; + __m128 sq = _mm_mul_ps(vec, vec); + const __m128 r2 = _mm_add_ps(sq, _mm_movehl_ps(sq, sq)); + const __m128 res = _mm_add_ss(r2, _mm_shuffle_ps(r2, r2, _MM_SHUFFLE(0, 0, 0, 1))); + _mm_store_ps(&ret, _mm_sqrt_ss(res)); + return ret; +#else return sqrtf(Length2()); +#endif } template<>