mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-04-02 11:01:50 -04:00
Use vmaxvq_s32 to implement AnyZeroSignBit more efficiently on ARM64
This commit is contained in:
parent
a40cf12593
commit
a5116e1590
1 changed files with 10 additions and 0 deletions
|
@ -655,17 +655,27 @@ inline void TranslateAndScaleInplace(Mat4F32 &m, Vec4F32 scale, Vec4F32 translat
|
|||
}
|
||||
|
||||
inline bool AnyZeroSignBit(Vec4S32 value) {
|
||||
#if PPSSPP_ARCH(ARM64_NEON)
|
||||
// Shortcut on arm64
|
||||
return vmaxvq_s32(value.v) >= 0;
|
||||
#else
|
||||
// Very suboptimal, let's optimize later.
|
||||
int32x2_t prod = vand_s32(vget_low_s32(value.v), vget_high_s32(value.v));
|
||||
int mask = vget_lane_s32(prod, 0) & vget_lane_s32(prod, 1);
|
||||
return (mask & 0x80000000) == 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline bool AnyZeroSignBit(Vec4F32 value) {
|
||||
int32x4_t ival = vreinterpretq_s32_f32(value.v);
|
||||
#if PPSSPP_ARCH(ARM64_NEON)
|
||||
// Shortcut on arm64
|
||||
return vmaxvq_s32(value.v) >= 0;
|
||||
#else
|
||||
int32x2_t prod = vand_s32(vget_low_s32(ival), vget_high_s32(ival));
|
||||
int mask = vget_lane_s32(prod, 0) & vget_lane_s32(prod, 1);
|
||||
return (mask & 0x80000000) == 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue