mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Don't use aligned loads in non-inlined funcs.
I'm wanting things to stay in registers, but that's not realistic for arguments. Force inline the others. May help #5699.
This commit is contained in:
parent
a26e6ce4b6
commit
56b83af1f0
2 changed files with 13 additions and 10 deletions
|
@ -24,10 +24,11 @@ float Vec2<float>::Length() const
|
|||
{
|
||||
#if defined(_M_SSE)
|
||||
float ret;
|
||||
__m128 sq = _mm_mul_ps(vec, vec);
|
||||
__m128 xy = _mm_loadu_ps(&x);
|
||||
__m128 sq = _mm_mul_ps(xy, xy);
|
||||
const __m128 r2 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 1));
|
||||
const __m128 res = _mm_add_ss(sq, r2);
|
||||
_mm_store_ps(&ret, _mm_sqrt_ss(res));
|
||||
_mm_store_ss(&ret, _mm_sqrt_ss(res));
|
||||
return ret;
|
||||
#else
|
||||
return sqrtf(Length2());
|
||||
|
@ -71,11 +72,12 @@ float Vec3<float>::Length() const
|
|||
{
|
||||
#if defined(_M_SSE)
|
||||
float ret;
|
||||
__m128 sq = _mm_mul_ps(vec, vec);
|
||||
__m128 xyz = _mm_loadu_ps(&x);
|
||||
__m128 sq = _mm_mul_ps(xyz, xyz);
|
||||
const __m128 r2 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 1));
|
||||
const __m128 r3 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 2));
|
||||
const __m128 res = _mm_add_ss(sq, _mm_add_ss(r2, r3));
|
||||
_mm_store_ps(&ret, _mm_sqrt_ss(res));
|
||||
_mm_store_ss(&ret, _mm_sqrt_ss(res));
|
||||
return ret;
|
||||
#else
|
||||
return sqrtf(Length2());
|
||||
|
@ -185,10 +187,11 @@ float Vec4<float>::Length() const
|
|||
{
|
||||
#if defined(_M_SSE)
|
||||
float ret;
|
||||
__m128 sq = _mm_mul_ps(vec, vec);
|
||||
__m128 xyzw = _mm_loadu_ps(&x);
|
||||
__m128 sq = _mm_mul_ps(xyzw, xyzw);
|
||||
const __m128 r2 = _mm_add_ps(sq, _mm_movehl_ps(sq, sq));
|
||||
const __m128 res = _mm_add_ss(r2, _mm_shuffle_ps(r2, r2, _MM_SHUFFLE(0, 0, 0, 1)));
|
||||
_mm_store_ps(&ret, _mm_sqrt_ss(res));
|
||||
_mm_store_ss(&ret, _mm_sqrt_ss(res));
|
||||
return ret;
|
||||
#else
|
||||
return sqrtf(Length2());
|
||||
|
|
|
@ -919,7 +919,7 @@ inline Vec3<int> Vec3<int>::FromRGB(unsigned int rgb)
|
|||
}
|
||||
|
||||
template<>
|
||||
inline unsigned int Vec3<float>::ToRGB() const
|
||||
__forceinline unsigned int Vec3<float>::ToRGB() const
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
__m128i c = _mm_cvtps_epi32(_mm_mul_ps(vec, _mm_set_ps1(255.0f)));
|
||||
|
@ -933,7 +933,7 @@ inline unsigned int Vec3<float>::ToRGB() const
|
|||
}
|
||||
|
||||
template<>
|
||||
inline unsigned int Vec3<int>::ToRGB() const
|
||||
__forceinline unsigned int Vec3<int>::ToRGB() const
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
__m128i c16 = _mm_packs_epi32(ivec, ivec);
|
||||
|
@ -973,7 +973,7 @@ inline Vec4<int> Vec4<int>::FromRGBA(unsigned int rgba)
|
|||
}
|
||||
|
||||
template<>
|
||||
inline unsigned int Vec4<float>::ToRGBA() const
|
||||
__forceinline unsigned int Vec4<float>::ToRGBA() const
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
__m128i c = _mm_cvtps_epi32(_mm_mul_ps(vec, _mm_set_ps1(255.0f)));
|
||||
|
@ -988,7 +988,7 @@ inline unsigned int Vec4<float>::ToRGBA() const
|
|||
}
|
||||
|
||||
template<>
|
||||
inline unsigned int Vec4<int>::ToRGBA() const
|
||||
__forceinline unsigned int Vec4<int>::ToRGBA() const
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
__m128i c16 = _mm_packs_epi32(ivec, ivec);
|
||||
|
|
Loading…
Add table
Reference in a new issue