Commit e5c1632a authored by James Willis's avatar James Willis
Browse files

Removed all compiler macros for vectorisation and replace with HAVE_ macros....

Removed all compiler macros for vectorisation and replace with HAVE_ macros. Added WITH_VECTORIZATION #ifdef for turning vectorisation on and off.
parent 654ec8ad
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
#include "../config.h" #include "../config.h"
#ifdef WITH_VECTORIZATION
/* Need to check whether compiler supports this (IBM does not) /* Need to check whether compiler supports this (IBM does not)
This will prevent the macros to be defined and switch off This will prevent the macros to be defined and switch off
explicit vectorization if the compiled does not support it */ explicit vectorization if the compiled does not support it */
...@@ -39,7 +41,6 @@ ...@@ -39,7 +41,6 @@
/* So what will the vector size be? */ /* So what will the vector size be? */
#ifdef __MIC__ #ifdef __MIC__
#define VECTORIZE
#define VEC_HAVE_GATHER #define VEC_HAVE_GATHER
#define VEC_SIZE 16 #define VEC_SIZE 16
#define VEC_FLOAT __m512 #define VEC_FLOAT __m512
...@@ -85,8 +86,7 @@ ...@@ -85,8 +86,7 @@
.f[6] = a, .f[7] = a, .f[8] = a, .f[9] = a, .f[10] = a, .f[11] = a, \ .f[6] = a, .f[7] = a, .f[8] = a, .f[9] = a, .f[10] = a, .f[11] = a, \
.f[12] = a, .f[13] = a, .f[14] = a, .f[15] = a \ .f[12] = a, .f[13] = a, .f[14] = a, .f[15] = a \
} }
#elif defined(NO__AVX__) #elif defined(HAVE_AVX)
#define VECTORIZE
#define VEC_SIZE 8 #define VEC_SIZE 8
#define VEC_FLOAT __m256 #define VEC_FLOAT __m256
#define VEC_DBL __m256d #define VEC_DBL __m256d
...@@ -118,12 +118,11 @@ ...@@ -118,12 +118,11 @@
.f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a, .f[4] = a, .f[5] = a, \ .f[0] = a, .f[1] = a, .f[2] = a, .f[3] = a, .f[4] = a, .f[5] = a, \
.f[6] = a, .f[7] = a \ .f[6] = a, .f[7] = a \
} }
#ifdef __AVX2__ #ifdef HAVE_AVX2
#define VEC_HAVE_GATHER #define VEC_HAVE_GATHER
#define vec_gather(base, offsets) _mm256_i32gather_ps(base, offsets.m, 1) #define vec_gather(base, offsets) _mm256_i32gather_ps(base, offsets.m, 1)
#endif #endif
#elif defined(NO__SSE2__) #elif defined(HAVE_SSE2)
#define VECTORIZE
#define VEC_SIZE 4 #define VEC_SIZE 4
#define VEC_FLOAT __m128 #define VEC_FLOAT __m128
#define VEC_DBL __m128d #define VEC_DBL __m128d
...@@ -157,7 +156,6 @@ ...@@ -157,7 +156,6 @@
#endif #endif
/* Define the composite types for element access. */ /* Define the composite types for element access. */
#ifdef VECTORIZE
typedef union { typedef union {
VEC_FLOAT v; VEC_FLOAT v;
VEC_DBL vd; VEC_DBL vd;
...@@ -166,8 +164,12 @@ typedef union { ...@@ -166,8 +164,12 @@ typedef union {
double d[VEC_SIZE / 2]; double d[VEC_SIZE / 2];
int i[VEC_SIZE]; int i[VEC_SIZE];
} vector; } vector;
#endif
#endif #else
/* Needed for cache alignment. */
#define VEC_SIZE 16
#endif /* WITH_VECTORIZATION */
#endif /* VEC_MACRO */
#endif /* SWIFT_VECTOR_H */ #endif /* SWIFT_VECTOR_H */
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment