mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Don't use aligned loads in non-inlined funcs.
I'm wanting things to stay in registers, but that's not realistic for arguments. Force inline the others. May help #5699.
This commit is contained in:
parent
a26e6ce4b6
commit
56b83af1f0
2 changed files with 13 additions and 10 deletions
|
@ -24,10 +24,11 @@ float Vec2<float>::Length() const
|
||||||
{
|
{
|
||||||
#if defined(_M_SSE)
|
#if defined(_M_SSE)
|
||||||
float ret;
|
float ret;
|
||||||
__m128 sq = _mm_mul_ps(vec, vec);
|
__m128 xy = _mm_loadu_ps(&x);
|
||||||
|
__m128 sq = _mm_mul_ps(xy, xy);
|
||||||
const __m128 r2 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 1));
|
const __m128 r2 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 1));
|
||||||
const __m128 res = _mm_add_ss(sq, r2);
|
const __m128 res = _mm_add_ss(sq, r2);
|
||||||
_mm_store_ps(&ret, _mm_sqrt_ss(res));
|
_mm_store_ss(&ret, _mm_sqrt_ss(res));
|
||||||
return ret;
|
return ret;
|
||||||
#else
|
#else
|
||||||
return sqrtf(Length2());
|
return sqrtf(Length2());
|
||||||
|
@ -71,11 +72,12 @@ float Vec3<float>::Length() const
|
||||||
{
|
{
|
||||||
#if defined(_M_SSE)
|
#if defined(_M_SSE)
|
||||||
float ret;
|
float ret;
|
||||||
__m128 sq = _mm_mul_ps(vec, vec);
|
__m128 xyz = _mm_loadu_ps(&x);
|
||||||
|
__m128 sq = _mm_mul_ps(xyz, xyz);
|
||||||
const __m128 r2 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 1));
|
const __m128 r2 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 1));
|
||||||
const __m128 r3 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 2));
|
const __m128 r3 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 2));
|
||||||
const __m128 res = _mm_add_ss(sq, _mm_add_ss(r2, r3));
|
const __m128 res = _mm_add_ss(sq, _mm_add_ss(r2, r3));
|
||||||
_mm_store_ps(&ret, _mm_sqrt_ss(res));
|
_mm_store_ss(&ret, _mm_sqrt_ss(res));
|
||||||
return ret;
|
return ret;
|
||||||
#else
|
#else
|
||||||
return sqrtf(Length2());
|
return sqrtf(Length2());
|
||||||
|
@ -185,10 +187,11 @@ float Vec4<float>::Length() const
|
||||||
{
|
{
|
||||||
#if defined(_M_SSE)
|
#if defined(_M_SSE)
|
||||||
float ret;
|
float ret;
|
||||||
__m128 sq = _mm_mul_ps(vec, vec);
|
__m128 xyzw = _mm_loadu_ps(&x);
|
||||||
|
__m128 sq = _mm_mul_ps(xyzw, xyzw);
|
||||||
const __m128 r2 = _mm_add_ps(sq, _mm_movehl_ps(sq, sq));
|
const __m128 r2 = _mm_add_ps(sq, _mm_movehl_ps(sq, sq));
|
||||||
const __m128 res = _mm_add_ss(r2, _mm_shuffle_ps(r2, r2, _MM_SHUFFLE(0, 0, 0, 1)));
|
const __m128 res = _mm_add_ss(r2, _mm_shuffle_ps(r2, r2, _MM_SHUFFLE(0, 0, 0, 1)));
|
||||||
_mm_store_ps(&ret, _mm_sqrt_ss(res));
|
_mm_store_ss(&ret, _mm_sqrt_ss(res));
|
||||||
return ret;
|
return ret;
|
||||||
#else
|
#else
|
||||||
return sqrtf(Length2());
|
return sqrtf(Length2());
|
||||||
|
|
|
@ -919,7 +919,7 @@ inline Vec3<int> Vec3<int>::FromRGB(unsigned int rgb)
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
inline unsigned int Vec3<float>::ToRGB() const
|
__forceinline unsigned int Vec3<float>::ToRGB() const
|
||||||
{
|
{
|
||||||
#if defined(_M_SSE)
|
#if defined(_M_SSE)
|
||||||
__m128i c = _mm_cvtps_epi32(_mm_mul_ps(vec, _mm_set_ps1(255.0f)));
|
__m128i c = _mm_cvtps_epi32(_mm_mul_ps(vec, _mm_set_ps1(255.0f)));
|
||||||
|
@ -933,7 +933,7 @@ inline unsigned int Vec3<float>::ToRGB() const
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
inline unsigned int Vec3<int>::ToRGB() const
|
__forceinline unsigned int Vec3<int>::ToRGB() const
|
||||||
{
|
{
|
||||||
#if defined(_M_SSE)
|
#if defined(_M_SSE)
|
||||||
__m128i c16 = _mm_packs_epi32(ivec, ivec);
|
__m128i c16 = _mm_packs_epi32(ivec, ivec);
|
||||||
|
@ -973,7 +973,7 @@ inline Vec4<int> Vec4<int>::FromRGBA(unsigned int rgba)
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
inline unsigned int Vec4<float>::ToRGBA() const
|
__forceinline unsigned int Vec4<float>::ToRGBA() const
|
||||||
{
|
{
|
||||||
#if defined(_M_SSE)
|
#if defined(_M_SSE)
|
||||||
__m128i c = _mm_cvtps_epi32(_mm_mul_ps(vec, _mm_set_ps1(255.0f)));
|
__m128i c = _mm_cvtps_epi32(_mm_mul_ps(vec, _mm_set_ps1(255.0f)));
|
||||||
|
@ -988,7 +988,7 @@ inline unsigned int Vec4<float>::ToRGBA() const
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
inline unsigned int Vec4<int>::ToRGBA() const
|
__forceinline unsigned int Vec4<int>::ToRGBA() const
|
||||||
{
|
{
|
||||||
#if defined(_M_SSE)
|
#if defined(_M_SSE)
|
||||||
__m128i c16 = _mm_packs_epi32(ivec, ivec);
|
__m128i c16 = _mm_packs_epi32(ivec, ivec);
|
||||||
|
|
Loading…
Add table
Reference in a new issue