From 0eb3702ecbab5655d5fe7ecc25d8149c7663993c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 13 Jun 2023 11:47:31 +0200 Subject: [PATCH] Then add the early-outs for NEON too. --- GPU/Common/IndexGenerator.cpp | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/GPU/Common/IndexGenerator.cpp b/GPU/Common/IndexGenerator.cpp index 436ddd6b5a..7eb4c80079 100644 --- a/GPU/Common/IndexGenerator.cpp +++ b/GPU/Common/IndexGenerator.cpp @@ -163,19 +163,23 @@ void IndexGenerator::AddStrip(int numVerts, bool clockwise) { u16 *dst = inds_; uint16x8_t offsets0 = vaddq_u16(ibase8, vld1q_u16(offsets)); vst1q_u16(dst, offsets0); - uint16x8_t offsets1 = vaddq_u16(ibase8, vld1q_u16(offsets + 8)); - vst1q_u16(dst + 8, offsets1); - uint16x8_t offsets2 = vaddq_u16(ibase8, vld1q_u16(offsets + 16)); - vst1q_u16(dst + 16, offsets2); - uint16x8_t increment = vdupq_n_u16(8); - for (int i = 1; i < numChunks; i++) { - dst += 3 * 8; - offsets0 = vaddq_u16(offsets0, increment); - offsets1 = vaddq_u16(offsets1, increment); - offsets2 = vaddq_u16(offsets2, increment); - vst1q_u16(dst, offsets0); + if (numTris > 2) { + uint16x8_t offsets1 = vaddq_u16(ibase8, vld1q_u16(offsets + 8)); vst1q_u16(dst + 8, offsets1); - vst1q_u16(dst + 16, offsets2); + if (numTris > 5) { + uint16x8_t offsets2 = vaddq_u16(ibase8, vld1q_u16(offsets + 16)); + vst1q_u16(dst + 16, offsets2); + uint16x8_t increment = vdupq_n_u16(8); + for (int i = 1; i < numChunks; i++) { + dst += 3 * 8; + offsets0 = vaddq_u16(offsets0, increment); + offsets1 = vaddq_u16(offsets1, increment); + offsets2 = vaddq_u16(offsets2, increment); + vst1q_u16(dst, offsets0); + vst1q_u16(dst + 8, offsets1); + vst1q_u16(dst + 16, offsets2); + } + } } inds_ += numTris * 3; #else