Convert the rect implementation to CrossSIMD

This commit is contained in:
Henrik Rydgård 2024-12-21 11:33:37 +01:00
parent 73ae6da757
commit 5df88fc1aa
2 changed files with 22 additions and 31 deletions

View file

@ -153,6 +153,16 @@ struct Vec4U16 {
Vec4U16 CompareLT(Vec4U16 other) { return Vec4U16{ _mm_cmplt_epu16(v, other.v) }; }
};
struct Vec8U16 {
__m128i v;
static Vec8U16 Zero() { return Vec8U16{ _mm_setzero_si128() }; }
static Vec8U16 Splat(uint16_t value) { return Vec8U16{ _mm_set1_epi16((int16_t)value) }; }
static Vec8U16 Load(const uint16_t *mem) { return Vec8U16{ _mm_loadu_si128((__m128i *)mem) }; }
void Store(uint16_t *mem) { _mm_storeu_si128((__m128i *)mem, v); }
};
Vec4U16 SignBits32ToMaskU16(Vec4S32 v) {
__m128i temp = _mm_srai_epi32(v.v, 31);
return Vec4U16 {
@ -342,6 +352,16 @@ Vec4U16 AndNot(Vec4U16 a, Vec4U16 inverted) {
return Vec4U16{ vand_u16(a.v, vmvn_u16(inverted.v)) };
}
struct Vec8U16 {
uint16x8_t v;
static Vec8U16 Zero() { return Vec8U16{ vdupq_n_u16(0) }; }
static Vec8U16 Splat(uint16_t value) { return Vec8U16{ vdupq_n_u16(value) }; }
static Vec8U16 Load(const uint16_t *mem) { return Vec8U16{ vld1q_u16(mem) }; }
void Store(uint16_t *mem) { vst1q_u16(mem, v); }
};
#else
struct Vec4S32 {

View file

@ -28,43 +28,17 @@ void DepthRasterRect(uint16_t *dest, int stride, int x1, int y1, int x2, int y2,
return;
}
#if PPSSPP_ARCH(SSE2)
__m128i valueX8 = _mm_set1_epi16(depthValue);
for (int y = y1; y < y2; y++) {
__m128i *ptr = (__m128i *)(dest + stride * y + x1);
int w = x2 - x1;
switch (compareMode) {
case ZCompareMode::Always:
if (depthValue == 0) {
memset(ptr, 0, w * 2);
} else {
while (w >= 8) {
_mm_storeu_si128(ptr, valueX8);
ptr++;
w -= 8;
}
}
break;
// TODO: Trailer
default:
// TODO
break;
}
}
#elif PPSSPP_ARCH(ARM64_NEON)
uint16x8_t valueX8 = vdupq_n_u16(depthValue);
Vec8U16 valueX8 = Vec8U16::Splat(depthValue);
for (int y = y1; y < y2; y++) {
uint16_t *ptr = (uint16_t *)(dest + stride * y + x1);
int w = x2 - x1;
switch (compareMode) {
case ZCompareMode::Always:
if (depthValue == 0) {
memset(ptr, 0, w * 2);
} else {
while (w >= 8) {
vst1q_u16(ptr, valueX8);
valueX8.Store(ptr);
ptr += 8;
w -= 8;
}
@ -76,9 +50,6 @@ void DepthRasterRect(uint16_t *dest, int stride, int x1, int y1, int x2, int y2,
break;
}
}
#else
// Do nothing for now
#endif
}
alignas(16) static const int zero123[4] = {0, 1, 2, 3};