Then add the early-outs for NEON too.

This commit is contained in:
Henrik Rydgård 2023-06-13 11:47:31 +02:00
parent 9647872a09
commit 0eb3702ecb

View file

@ -163,19 +163,23 @@ void IndexGenerator::AddStrip(int numVerts, bool clockwise) {
u16 *dst = inds_;
uint16x8_t offsets0 = vaddq_u16(ibase8, vld1q_u16(offsets));
vst1q_u16(dst, offsets0);
uint16x8_t offsets1 = vaddq_u16(ibase8, vld1q_u16(offsets + 8));
vst1q_u16(dst + 8, offsets1);
uint16x8_t offsets2 = vaddq_u16(ibase8, vld1q_u16(offsets + 16));
vst1q_u16(dst + 16, offsets2);
uint16x8_t increment = vdupq_n_u16(8);
for (int i = 1; i < numChunks; i++) {
dst += 3 * 8;
offsets0 = vaddq_u16(offsets0, increment);
offsets1 = vaddq_u16(offsets1, increment);
offsets2 = vaddq_u16(offsets2, increment);
vst1q_u16(dst, offsets0);
if (numTris > 2) {
uint16x8_t offsets1 = vaddq_u16(ibase8, vld1q_u16(offsets + 8));
vst1q_u16(dst + 8, offsets1);
vst1q_u16(dst + 16, offsets2);
if (numTris > 5) {
uint16x8_t offsets2 = vaddq_u16(ibase8, vld1q_u16(offsets + 16));
vst1q_u16(dst + 16, offsets2);
uint16x8_t increment = vdupq_n_u16(8);
for (int i = 1; i < numChunks; i++) {
dst += 3 * 8;
offsets0 = vaddq_u16(offsets0, increment);
offsets1 = vaddq_u16(offsets1, increment);
offsets2 = vaddq_u16(offsets2, increment);
vst1q_u16(dst, offsets0);
vst1q_u16(dst + 8, offsets1);
vst1q_u16(dst + 16, offsets2);
}
}
}
inds_ += numTris * 3;
#else