From 3bd2441f4a400049f2a7197ed5d16a1ea88c9358 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 22 Dec 2024 09:46:47 +0100 Subject: [PATCH] CrossSIMD: Add SwapLowerElements to Vec4F32 --- Common/Math/CrossSIMD.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/Common/Math/CrossSIMD.h b/Common/Math/CrossSIMD.h index 0824ee0e10..4f0c48cbe2 100644 --- a/Common/Math/CrossSIMD.h +++ b/Common/Math/CrossSIMD.h @@ -111,6 +111,13 @@ struct Vec4F32 { return Vec4F32{ _mm_and_ps(v, _mm_load_ps((float *)mask)) }; } + // Swaps the two lower elements. Useful for reversing triangles.. + Vec4F32 SwapLowerElements() { + return Vec4F32{ + _mm_shuffle_ps(v, v, _MM_SHUFFLE(3, 2, 0, 1)) + }; + } + inline Vec4F32 AsVec3ByMatrix44(const Mat4F32 &m) { return Vec4F32{ _mm_add_ps( _mm_add_ps( @@ -210,8 +217,8 @@ struct Vec4S32 { // Swaps the two lower elements, but NOT the two upper ones. Useful for reversing triangles.. // This is quite awkward on ARM64 :/ Maybe there's a better solution? Vec4S32 SwapLowerElements() { - float32x2_t upper = vget_high_s32(v); - float32x2_t lowerSwapped = vrev64_s32(vget_low_s32(v)); + int32x2_t upper = vget_high_s32(v); + int32x2_t lowerSwapped = vrev64_s32(vget_low_s32(v)); return Vec4S32{ vcombine_s32(lowerSwapped, upper) }; }; @@ -281,6 +288,13 @@ struct Vec4F32 { return Vec4F32{ vsetq_lane_f32(0.0f, v, 3) }; } + // Swaps the two lower elements, but NOT the two upper ones. Useful for reversing triangles.. + // This is quite awkward on ARM64 :/ Maybe there's a better solution? + Vec4F32 SwapLowerElements() { + float32x2_t lowerSwapped = vrev64_f32(vget_low_f32(v)); + return Vec4F32{ vcombine_f32(lowerSwapped, vget_high_f32(v)) }; + }; + // One of many possible solutions. Sometimes we could also use vld4q_f32 probably.. static void Transpose(Vec4F32 &col0, Vec4F32 &col1, Vec4F32 &col2, Vec4F32 &col3) { #if PPSSPP_ARCH(ARM64_NEON)