From 5df88fc1aab1feb92e60a6b14eec01f3b56e85b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sat, 21 Dec 2024 11:33:37 +0100 Subject: [PATCH] Convert the rect implementation to CrossSIMD --- Common/Math/CrossSIMD.h | 20 ++++++++++++++++++++ GPU/Common/DepthRaster.cpp | 33 ++------------------------------- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/Common/Math/CrossSIMD.h b/Common/Math/CrossSIMD.h index 982c859439..83ac18add4 100644 --- a/Common/Math/CrossSIMD.h +++ b/Common/Math/CrossSIMD.h @@ -153,6 +153,16 @@ struct Vec4U16 { Vec4U16 CompareLT(Vec4U16 other) { return Vec4U16{ _mm_cmplt_epu16(v, other.v) }; } }; +struct Vec8U16 { + __m128i v; + + static Vec8U16 Zero() { return Vec8U16{ _mm_setzero_si128() }; } + static Vec8U16 Splat(uint16_t value) { return Vec8U16{ _mm_set1_epi16((int16_t)value) }; } + + static Vec8U16 Load(const uint16_t *mem) { return Vec8U16{ _mm_loadu_si128((__m128i *)mem) }; } + void Store(uint16_t *mem) { _mm_storeu_si128((__m128i *)mem, v); } +}; + Vec4U16 SignBits32ToMaskU16(Vec4S32 v) { __m128i temp = _mm_srai_epi32(v.v, 31); return Vec4U16 { @@ -342,6 +352,16 @@ Vec4U16 AndNot(Vec4U16 a, Vec4U16 inverted) { return Vec4U16{ vand_u16(a.v, vmvn_u16(inverted.v)) }; } +struct Vec8U16 { + uint16x8_t v; + + static Vec8U16 Zero() { return Vec8U16{ vdupq_n_u16(0) }; } + static Vec8U16 Splat(uint16_t value) { return Vec8U16{ vdupq_n_u16(value) }; } + + static Vec8U16 Load(const uint16_t *mem) { return Vec8U16{ vld1q_u16(mem) }; } + void Store(uint16_t *mem) { vst1q_u16(mem, v); } +}; + #else struct Vec4S32 { diff --git a/GPU/Common/DepthRaster.cpp b/GPU/Common/DepthRaster.cpp index 7075ebd9f7..4443974059 100644 --- a/GPU/Common/DepthRaster.cpp +++ b/GPU/Common/DepthRaster.cpp @@ -28,43 +28,17 @@ void DepthRasterRect(uint16_t *dest, int stride, int x1, int y1, int x2, int y2, return; } -#if PPSSPP_ARCH(SSE2) - __m128i valueX8 = _mm_set1_epi16(depthValue); - for (int y = y1; y < y2; y++) { - __m128i *ptr = (__m128i *)(dest + stride * y + x1); - int w = x2 - x1; - switch (compareMode) { - case ZCompareMode::Always: - if (depthValue == 0) { - memset(ptr, 0, w * 2); - } else { - while (w >= 8) { - _mm_storeu_si128(ptr, valueX8); - ptr++; - w -= 8; - } - } - break; - // TODO: Trailer - default: - // TODO - break; - } - } - -#elif PPSSPP_ARCH(ARM64_NEON) - uint16x8_t valueX8 = vdupq_n_u16(depthValue); + Vec8U16 valueX8 = Vec8U16::Splat(depthValue); for (int y = y1; y < y2; y++) { uint16_t *ptr = (uint16_t *)(dest + stride * y + x1); int w = x2 - x1; - switch (compareMode) { case ZCompareMode::Always: if (depthValue == 0) { memset(ptr, 0, w * 2); } else { while (w >= 8) { - vst1q_u16(ptr, valueX8); + valueX8.Store(ptr); ptr += 8; w -= 8; } @@ -76,9 +50,6 @@ void DepthRasterRect(uint16_t *dest, int stride, int x1, int y1, int x2, int y2, break; } } -#else - // Do nothing for now -#endif } alignas(16) static const int zero123[4] = {0, 1, 2, 3};