mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Convert the rect implementation to CrossSIMD
This commit is contained in:
parent
73ae6da757
commit
5df88fc1aa
2 changed files with 22 additions and 31 deletions
|
@ -153,6 +153,16 @@ struct Vec4U16 {
|
|||
Vec4U16 CompareLT(Vec4U16 other) { return Vec4U16{ _mm_cmplt_epu16(v, other.v) }; }
|
||||
};
|
||||
|
||||
struct Vec8U16 {
|
||||
__m128i v;
|
||||
|
||||
static Vec8U16 Zero() { return Vec8U16{ _mm_setzero_si128() }; }
|
||||
static Vec8U16 Splat(uint16_t value) { return Vec8U16{ _mm_set1_epi16((int16_t)value) }; }
|
||||
|
||||
static Vec8U16 Load(const uint16_t *mem) { return Vec8U16{ _mm_loadu_si128((__m128i *)mem) }; }
|
||||
void Store(uint16_t *mem) { _mm_storeu_si128((__m128i *)mem, v); }
|
||||
};
|
||||
|
||||
Vec4U16 SignBits32ToMaskU16(Vec4S32 v) {
|
||||
__m128i temp = _mm_srai_epi32(v.v, 31);
|
||||
return Vec4U16 {
|
||||
|
@ -342,6 +352,16 @@ Vec4U16 AndNot(Vec4U16 a, Vec4U16 inverted) {
|
|||
return Vec4U16{ vand_u16(a.v, vmvn_u16(inverted.v)) };
|
||||
}
|
||||
|
||||
struct Vec8U16 {
|
||||
uint16x8_t v;
|
||||
|
||||
static Vec8U16 Zero() { return Vec8U16{ vdupq_n_u16(0) }; }
|
||||
static Vec8U16 Splat(uint16_t value) { return Vec8U16{ vdupq_n_u16(value) }; }
|
||||
|
||||
static Vec8U16 Load(const uint16_t *mem) { return Vec8U16{ vld1q_u16(mem) }; }
|
||||
void Store(uint16_t *mem) { vst1q_u16(mem, v); }
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
struct Vec4S32 {
|
||||
|
|
|
@ -28,43 +28,17 @@ void DepthRasterRect(uint16_t *dest, int stride, int x1, int y1, int x2, int y2,
|
|||
return;
|
||||
}
|
||||
|
||||
#if PPSSPP_ARCH(SSE2)
|
||||
__m128i valueX8 = _mm_set1_epi16(depthValue);
|
||||
for (int y = y1; y < y2; y++) {
|
||||
__m128i *ptr = (__m128i *)(dest + stride * y + x1);
|
||||
int w = x2 - x1;
|
||||
switch (compareMode) {
|
||||
case ZCompareMode::Always:
|
||||
if (depthValue == 0) {
|
||||
memset(ptr, 0, w * 2);
|
||||
} else {
|
||||
while (w >= 8) {
|
||||
_mm_storeu_si128(ptr, valueX8);
|
||||
ptr++;
|
||||
w -= 8;
|
||||
}
|
||||
}
|
||||
break;
|
||||
// TODO: Trailer
|
||||
default:
|
||||
// TODO
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
uint16x8_t valueX8 = vdupq_n_u16(depthValue);
|
||||
Vec8U16 valueX8 = Vec8U16::Splat(depthValue);
|
||||
for (int y = y1; y < y2; y++) {
|
||||
uint16_t *ptr = (uint16_t *)(dest + stride * y + x1);
|
||||
int w = x2 - x1;
|
||||
|
||||
switch (compareMode) {
|
||||
case ZCompareMode::Always:
|
||||
if (depthValue == 0) {
|
||||
memset(ptr, 0, w * 2);
|
||||
} else {
|
||||
while (w >= 8) {
|
||||
vst1q_u16(ptr, valueX8);
|
||||
valueX8.Store(ptr);
|
||||
ptr += 8;
|
||||
w -= 8;
|
||||
}
|
||||
|
@ -76,9 +50,6 @@ void DepthRasterRect(uint16_t *dest, int stride, int x1, int y1, int x2, int y2,
|
|||
break;
|
||||
}
|
||||
}
|
||||
#else
|
||||
// Do nothing for now
|
||||
#endif
|
||||
}
|
||||
|
||||
alignas(16) static const int zero123[4] = {0, 1, 2, 3};
|
||||
|
|
Loading…
Add table
Reference in a new issue