diff --git a/GPU/Math3D.h b/GPU/Math3D.h index ff05280c43..b8ede12cb8 100644 --- a/GPU/Math3D.h +++ b/GPU/Math3D.h @@ -64,6 +64,9 @@ public: #if defined(_M_SSE) __m128i ivec; __m128 vec; +#elif PPSSPP_ARCH(ARM64) + int32x4_t ivec; + float32x4_t vec; #endif }; @@ -76,6 +79,11 @@ public: #if defined(_M_SSE) Vec2(const __m128 &_vec) : vec(_vec) {} Vec2(const __m128i &_ivec) : ivec(_ivec) {} +#elif PPSSPP_ARCH(ARM64) + Vec2(const float32x4_t &_vec) : vec(_vec) {} +#if !defined(_MSC_VER) + Vec2(const int32x4_t &_ivec) : ivec(_ivec) {} +#endif #endif template @@ -204,6 +212,9 @@ public: #if defined(_M_SSE) __m128i ivec; __m128 vec; +#elif PPSSPP_ARCH(ARM64) + int32x4_t ivec; + float32x4_t vec; #endif }; @@ -220,6 +231,14 @@ public: Vec3(const Vec3Packed &_xyz) { vec = _mm_loadu_ps(_xyz.AsArray()); } +#elif PPSSPP_ARCH(ARM64) + Vec3(const float32x4_t &_vec) : vec(_vec) {} +#if !defined(_MSC_VER) + Vec3(const int32x4_t &_ivec) : ivec(_ivec) {} +#endif + Vec3(const Vec3Packed &_xyz) { + vec = vld1q_f32(_xyz.AsArray()); + } #else Vec3(const Vec3Packed &_xyz) : x(_xyz.x), y(_xyz.y), z(_xyz.z) {} #endif @@ -552,6 +571,9 @@ public: #if defined(_M_SSE) __m128i ivec; __m128 vec; +#elif PPSSPP_ARCH(ARM64) + int32x4_t ivec; + float32x4_t vec; #endif }; @@ -566,6 +588,11 @@ public: #if defined(_M_SSE) Vec4(const __m128 &_vec) : vec(_vec) {} Vec4(const __m128i &_ivec) : ivec(_ivec) {} +#elif PPSSPP_ARCH(ARM64) + Vec4(const float32x4_t &_vec) : vec(_vec) {} +#if !defined(_MSC_VER) + Vec4(const int32x4_t &_ivec) : ivec(_ivec) {} +#endif #endif template diff --git a/GPU/Software/DrawPixel.cpp b/GPU/Software/DrawPixel.cpp index f49ff94209..e3c7534523 100644 --- a/GPU/Software/DrawPixel.cpp +++ b/GPU/Software/DrawPixel.cpp @@ -379,13 +379,7 @@ static inline u32 ApplyLogicOp(GELogicOp op, u32 old_color, u32 new_color) { template void SOFTRAST_CALL DrawSinglePixel(int x, int y, int z, int fog, Vec4IntArg color_in, const PixelFuncID &pixelID) { -#if PPSSPP_ARCH(ARM64) - Vec4 prim_color; - vst1q_s32(prim_color.AsArray(), color_in); - prim_color = prim_color.Clamp(0, 255); -#else Vec4 prim_color = Vec4(color_in).Clamp(0, 255); -#endif // Depth range test - applied in clear mode, if not through mode. if (pixelID.applyDepthRange) if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax())