diff --git a/Common/Common.h b/Common/Common.h index c24342f6a2..e1049f2dda 100644 --- a/Common/Common.h +++ b/Common/Common.h @@ -87,17 +87,3 @@ #define __forceinline inline __attribute__((always_inline)) #endif - -#if defined __SSE4_2__ -# define _M_SSE 0x402 -#elif defined __SSE4_1__ -# define _M_SSE 0x401 -#elif defined __SSSE3__ -# define _M_SSE 0x301 -#elif defined __SSE3__ -# define _M_SSE 0x300 -#elif defined __SSE2__ -# define _M_SSE 0x200 -#elif !defined(__GNUC__) && (defined(_M_X64) || defined(_M_IX86)) -# define _M_SSE 0x402 -#endif diff --git a/Common/Math/SIMDHeaders.h b/Common/Math/SIMDHeaders.h index 61e8b6fd17..8e812a7819 100644 --- a/Common/Math/SIMDHeaders.h +++ b/Common/Math/SIMDHeaders.h @@ -67,6 +67,20 @@ static inline uint32x4_t vcgezq_f32(float32x4_t v) { #if PPSSPP_ARCH(SSE2) +#if defined __SSE4_2__ +# define _M_SSE 0x402 +#elif defined __SSE4_1__ +# define _M_SSE 0x401 +#elif defined __SSSE3__ +# define _M_SSE 0x301 +#elif defined __SSE3__ +# define _M_SSE 0x300 +#elif defined __SSE2__ +# define _M_SSE 0x200 +#elif !defined(__GNUC__) && (defined(_M_X64) || defined(_M_IX86)) +# define _M_SSE 0x402 +#endif + // These are SSE2 versions of SSE4.1 instructions, for compatibility and ease of // writing code. // May later figure out how to use the appropriate ones depending on compile flags. diff --git a/Common/Math/fast/fast_matrix.c b/Common/Math/fast/fast_matrix.c index 13d202e5e7..0402f36629 100644 --- a/Common/Math/fast/fast_matrix.c +++ b/Common/Math/fast/fast_matrix.c @@ -4,9 +4,7 @@ #include "fast_matrix.h" -#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64) - -#include +#if PPSSPP_ARCH(SSE2) #include "fast_matrix.h" @@ -28,12 +26,6 @@ void fast_matrix_mul_4x4_sse(float *dest, const float *a, const float *b) { #elif PPSSPP_ARCH(ARM_NEON) -#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64) -#include -#else -#include -#endif - #if PPSSPP_ARCH(ARM) static inline float32x4_t vfmaq_laneq_f32(float32x4_t _s, float32x4_t _a, float32x4_t _b, int lane) { if (lane == 0) return vmlaq_lane_f32(_s, _a, vget_low_f32(_b), 0); diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp index cbb678e2b4..1fe06707e4 100644 --- a/Core/MIPS/x86/CompVFPU.cpp +++ b/Core/MIPS/x86/CompVFPU.cpp @@ -23,11 +23,10 @@ #if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64) #include #include -#include - #include "Common/Math/math_util.h" #include "Common/CPUDetect.h" +#include "Common/Math/SIMDHeaders.h" #include "Common/Log.h" #include "Core/Compatibility.h" #include "Core/Config.h" diff --git a/Core/MIPS/x86/RegCacheFPU.cpp b/Core/MIPS/x86/RegCacheFPU.cpp index de2ef1e7ff..44c0e27ca6 100644 --- a/Core/MIPS/x86/RegCacheFPU.cpp +++ b/Core/MIPS/x86/RegCacheFPU.cpp @@ -19,8 +19,7 @@ #if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64) #include -#include - +#include "Common/Math/SIMDHeaders.h" #include "Common/Log.h" #include "Common/x64Emitter.h" #include "Core/MIPS/MIPSAnalyst.h" diff --git a/GPU/Common/IndexGenerator.cpp b/GPU/Common/IndexGenerator.cpp index 0488ab5cc9..2c7ce5975c 100644 --- a/GPU/Common/IndexGenerator.cpp +++ b/GPU/Common/IndexGenerator.cpp @@ -19,7 +19,6 @@ #include "ppsspp_config.h" -#include "Common/Common.h" #include "Common/Math/SIMDHeaders.h" #include "GPU/Common/IndexGenerator.h" diff --git a/GPU/Common/TextureScalerCommon.cpp b/GPU/Common/TextureScalerCommon.cpp index 4603638af9..43363c688f 100644 --- a/GPU/Common/TextureScalerCommon.cpp +++ b/GPU/Common/TextureScalerCommon.cpp @@ -25,12 +25,10 @@ #include "Core/Config.h" #include "Common/Common.h" #include "Common/Log.h" +#include "Common/Math/SIMDHeaders.h" #include "Common/Thread/ParallelLoop.h" #include "ext/xbrz/xbrz.h" -#include "Common/Math/SIMDHeaders.h" - - // Report the time and throughput for each larger scaling operation in the log //#define SCALING_MEASURE_TIME #include "Common/TimeUtil.h" diff --git a/GPU/Common/VertexDecoderX86.cpp b/GPU/Common/VertexDecoderX86.cpp index 1df3be8388..5c29bbcab2 100644 --- a/GPU/Common/VertexDecoderX86.cpp +++ b/GPU/Common/VertexDecoderX86.cpp @@ -16,12 +16,12 @@ // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #include "ppsspp_config.h" -#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64) -#include +#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64) #include "Common/CPUDetect.h" #include "Common/Data/Convert/ColorConv.h" +#include "Common/Math/SIMDHeaders.h" #include "Core/Config.h" #include "GPU/GPUState.h" #include "GPU/Common/VertexDecoderCommon.h" diff --git a/GPU/GPUState.cpp b/GPU/GPUState.cpp index 03b7722730..1049b8be96 100644 --- a/GPU/GPUState.cpp +++ b/GPU/GPUState.cpp @@ -17,13 +17,13 @@ #include "ppsspp_config.h" #include "Common/Common.h" +#include "Common/Math/SIMDHeaders.h" #include "Common/Serialize/Serializer.h" #include "Common/Serialize/SerializeFuncs.h" #include "Core/MemMap.h" #include "GPU/ge_constants.h" #include "GPU/GPUCommon.h" #include "GPU/GPUState.h" -#include "Common/Math/SIMDHeaders.h" // This must be aligned so that the matrices within are aligned. alignas(16) GPUgstate gstate; diff --git a/GPU/Math3D.cpp b/GPU/Math3D.cpp index c380a72022..2fa2caba40 100644 --- a/GPU/Math3D.cpp +++ b/GPU/Math3D.cpp @@ -15,8 +15,9 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. -#include "Common/Common.h" #include "GPU/Math3D.h" +#include "Common/Common.h" +#include "Common/Math/SIMDHeaders.h" #if PPSSPP_ARCH(SSE2) // For the SSE4 stuff. diff --git a/GPU/Software/DrawPixelX86.cpp b/GPU/Software/DrawPixelX86.cpp index 2c887f44f1..187c3b2a0e 100644 --- a/GPU/Software/DrawPixelX86.cpp +++ b/GPU/Software/DrawPixelX86.cpp @@ -16,12 +16,13 @@ // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. #include "ppsspp_config.h" + #if PPSSPP_ARCH(AMD64) -#include #include "Common/x64Emitter.h" #include "Common/CPUDetect.h" #include "Common/LogReporting.h" +#include "Common/Math/SIMDHeaders.h" #include "GPU/GPUState.h" #include "GPU/Software/DrawPixel.h" #include "GPU/Software/SoftGpu.h" diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp index 8613a92d6b..30ce034924 100644 --- a/GPU/Software/Rasterizer.cpp +++ b/GPU/Software/Rasterizer.cpp @@ -38,6 +38,11 @@ #include "Common/Math/SIMDHeaders.h" +// For the SSE4 stuff +#if PPSSPP_ARCH(SSE2) +#include +#endif + namespace Rasterizer { // Only OK on x64 where our stack is aligned diff --git a/GPU/Software/RasterizerRegCache.h b/GPU/Software/RasterizerRegCache.h index 4b8ada5007..20c9263687 100644 --- a/GPU/Software/RasterizerRegCache.h +++ b/GPU/Software/RasterizerRegCache.h @@ -25,9 +25,8 @@ #include #include "Common/Common.h" -#if defined(_M_SSE) -#include -#endif +#include "Common/Math/SIMDHeaders.h" + #if PPSSPP_ARCH(ARM64_NEON) #if defined(_MSC_VER) && PPSSPP_ARCH(ARM64) #include diff --git a/GPU/Software/SamplerX86.cpp b/GPU/Software/SamplerX86.cpp index 3b3d432210..31a607ef64 100644 --- a/GPU/Software/SamplerX86.cpp +++ b/GPU/Software/SamplerX86.cpp @@ -18,7 +18,7 @@ #include "ppsspp_config.h" #if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64) -#include +#include "Common/Math/SIMDHeaders.h" #include "Common/x64Emitter.h" #include "Common/BitScan.h" #include "Common/CPUDetect.h" diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index d6dde859e1..63b4ae54d4 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -15,7 +15,10 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. +#include "ppsspp_config.h" + #include + #include "Common/Common.h" #include "Common/CPUDetect.h" #include "Common/Math/math_util.h" @@ -25,12 +28,18 @@ #include "GPU/Common/DrawEngineCommon.h" #include "GPU/Common/VertexDecoderCommon.h" #include "GPU/Common/SoftwareTransformCommon.h" +#include "Common/Math/SIMDHeaders.h" #include "GPU/Software/BinManager.h" #include "GPU/Software/Clipper.h" #include "GPU/Software/Lighting.h" #include "GPU/Software/RasterizerRectangle.h" #include "GPU/Software/TransformUnit.h" +// For the SSE4 stuff +#if PPSSPP_ARCH(SSE2) +#include +#endif + #define TRANSFORM_BUF_SIZE (65536 * 48) TransformUnit::TransformUnit() { diff --git a/ext/at3_standalone/atrac3plusdsp.cpp b/ext/at3_standalone/atrac3plusdsp.cpp index a070148e0e..63d4d7c57f 100644 --- a/ext/at3_standalone/atrac3plusdsp.cpp +++ b/ext/at3_standalone/atrac3plusdsp.cpp @@ -659,7 +659,7 @@ void ff_atrac3p_ipqf(FFTContext *dct_ctx, Atrac3pIPQFChannelCtx *hist, const float *coeffs2 = ipqf_coeffs2[t]; float *outp = out + s * 16; -#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64) +#if PPSSPP_ARCH(SSE2) auto _mm_reverse = [](__m128 x) -> __m128 { return _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 1, 2, 3)); };