diff --git a/Common/Math/CrossSIMD.h b/Common/Math/CrossSIMD.h index ce42d9dcd4..c2851b5775 100644 --- a/Common/Math/CrossSIMD.h +++ b/Common/Math/CrossSIMD.h @@ -38,7 +38,7 @@ inline float32x4_t vmulq_laneq_f32(float32x4_t a, float32x4_t b, int lane) { case 0: return vmulq_lane_f32(a, vget_low_f32(b), 0); case 1: return vmulq_lane_f32(a, vget_low_f32(b), 1); case 2: return vmulq_lane_f32(a, vget_high_f32(b), 0); - case 3: return vmulq_lane_f32(a, vget_high_f32(b), 1); + default: return vmulq_lane_f32(a, vget_high_f32(b), 1); } } @@ -47,8 +47,12 @@ inline float32x4_t vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c, case 0: return vmlaq_lane_f32(a, b, vget_low_f32(c), 0); case 1: return vmlaq_lane_f32(a, b, vget_low_f32(c), 1); case 2: return vmlaq_lane_f32(a, b, vget_high_f32(c), 0); - case 3: return vmlaq_lane_f32(a, b, vget_high_f32(c), 1); + default: return vmlaq_lane_f32(a, b, vget_high_f32(c), 1); } } +inline uint32x4_t vcgezq_f32(float32x4_t v) { + return vcgeq_f32(v, vdupq_n_f32(0.0f)); +} + #endif diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index 52b48defb3..71ead60a02 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -444,11 +444,10 @@ bool DrawEngineCommon::TestBoundingBoxFast(const void *vdata, int vertexCount, u _mm_storeu_ps(verts + i * 3, pos); // TODO: use stride 4 to avoid clashing writes? } #elif PPSSPP_ARCH(ARM_NEON) - float32x4_t scaleFactor = vdupq_n_f32(1.0f / 32768.0f); for (int i = 0; i < vertexCount; i++) { const s16 *dataPtr = ((const s16 *)((const s8 *)vdata + i * stride + offset)); int32x4_t data = vmovl_s16(vld1_s16(dataPtr)); - float32x4_t pos = vmulq_f32(vcvtq_f32_s32(data), scaleFactor); + float32x4_t pos = vcvtq_n_f32_s32(data, 15); // >> 15 = division by 32768.0f vst1q_f32(verts + i * 3, pos); } #else