Commit 54c41342 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'macro-vectorisation' into 'master'

Macro vectorisation

Created a macro called `WITH_VECTORIZATION` in `config.h` that enables vectorisation throughout the code. Replaced all occurrences of `VECTORIZE` with `WITH_VECTORIZATION` throughout the code. Replaced compiler macros, i.e `__AVX__`,`__MIC__` with platform defined macros: `HAVE_AVX`, `HAVE_AVX512_F`... in `vector.h`.

Added place holders for vectorised versions of force and density interactions for Gadget2 and Minimal SPH schemes. An error will be displayed at runtime if the code is compiled with vectorisation enabled and using either Gadget2 or Minimal SPH schemes.

See merge request !200
parents 9e88fc31 7daa5b4c
...@@ -52,6 +52,13 @@ tests/testSPHStep ...@@ -52,6 +52,13 @@ tests/testSPHStep
tests/testKernel tests/testKernel
tests/testParser tests/testParser
tests/parser_output.yml tests/parser_output.yml
tests/test27cells.sh
tests/test27cellsPerturbed.sh
tests/testPair.sh
tests/testPairPerturbed.sh
tests/testParser.sh
tests/testReading.sh
theory/latex/swift.pdf theory/latex/swift.pdf
theory/kernel/kernels.pdf theory/kernel/kernels.pdf
......
...@@ -39,7 +39,7 @@ SWIFT has been successfully built and tested with the following compilers: ...@@ -39,7 +39,7 @@ SWIFT has been successfully built and tested with the following compilers:
- clang 3.4.x - clang 3.4.x
More recent versions and slightly older ones should also be able to More recent versions and slightly older ones should also be able to
built the software. build the software.
By default an attempt to choose suitable set of optimizing compiler flags By default an attempt to choose suitable set of optimizing compiler flags
will be made, targeted for the host machine of the build. If this doesn't will be made, targeted for the host machine of the build. If this doesn't
...@@ -67,6 +67,14 @@ for instance. GCC address sanitizer flags can be included using the ...@@ -67,6 +67,14 @@ for instance. GCC address sanitizer flags can be included using the
option. Note this requires a GCC compiler version of at least 4.8. option. Note this requires a GCC compiler version of at least 4.8.
By default vectorization is switched on. The highest instruction set
available on the platform will be automatically used. However, not all
implementations of SPH available in the code have vectorized
routines. Vectorization will have to be switched off for these. It can
also be switched off for benchmarking purposes. To do so, you can use:
./configure --disable-vec
Dependencies Dependencies
============ ============
......
...@@ -240,7 +240,12 @@ if test "$enable_vec" = "no"; then ...@@ -240,7 +240,12 @@ if test "$enable_vec" = "no"; then
else else
AC_MSG_WARN([Do not know how to disable vectorization for this compiler]) AC_MSG_WARN([Do not know how to disable vectorization for this compiler])
fi fi
HAVEVECTORIZATION=0
else
AC_DEFINE([WITH_VECTORIZATION],1,[Enable vectorization])
HAVEVECTORIZATION=1
fi fi
AM_CONDITIONAL([HAVEVECTORIZATION],[test -n "$HAVEVECTORIZATION"])
# Autoconf stuff. # Autoconf stuff.
AC_PROG_INSTALL AC_PROG_INSTALL
......
...@@ -266,6 +266,24 @@ int main(int argc, char *argv[]) { ...@@ -266,6 +266,24 @@ int main(int argc, char *argv[]) {
message("sizeof(struct cell) is %4zi bytes.", sizeof(struct cell)); message("sizeof(struct cell) is %4zi bytes.", sizeof(struct cell));
} }
/* Temporary abort to handle absence of vectorized functions */
#ifdef WITH_VECTORIZATION
#ifdef GADGET2_SPH
error(
"Vectorized version of Gadget SPH routines not implemented yet. "
"Reconfigure with --disable-vec and recompile or use DEFAULT_SPH.");
#endif
#ifdef MINIMAL_SPH
error(
"Vectorized version of Minimal SPH routines not implemented yet. "
"Reconfigure with --disable-vec and recompile or use DEFAULT_SPH.");
#endif
#endif
/* End temporary fix */
/* How vocal are we ? */ /* How vocal are we ? */
const int talking = (verbose == 1 && myrank == 0) || (verbose == 2); const int talking = (verbose == 1 && myrank == 0) || (verbose == 2);
......
...@@ -60,7 +60,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav( ...@@ -60,7 +60,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav(
__attribute__((always_inline)) INLINE static void runner_iact_vec_grav( __attribute__((always_inline)) INLINE static void runner_iact_vec_grav(
float *R2, float *Dx, struct gpart **pi, struct gpart **pj) { float *R2, float *Dx, struct gpart **pi, struct gpart **pj) {
#ifdef VECTORIZE #ifdef WITH_VECTORIZATION
vector ir, r, r2, dx[3]; vector ir, r, r2, dx[3];
vector w, acc, ai, aj; vector w, acc, ai, aj;
......
...@@ -101,7 +101,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_density( ...@@ -101,7 +101,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_density(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) { struct part **pj) {
#ifdef VECTORIZE #ifdef WITH_VECTORIZATION
vector r, ri, r2, xi, xj, hi, hj, hi_inv, hj_inv, wi, wj, wi_dx, wj_dx; vector r, ri, r2, xi, xj, hi, hj, hi_inv, hj_inv, wi, wj, wi_dx, wj_dx;
vector rhoi, rhoj, rhoi_dh, rhoj_dh, wcounti, wcountj, wcounti_dh, wcountj_dh; vector rhoi, rhoj, rhoi_dh, rhoj_dh, wcounti, wcountj, wcounti_dh, wcountj_dh;
...@@ -263,7 +263,7 @@ __attribute__((always_inline)) INLINE static void ...@@ -263,7 +263,7 @@ __attribute__((always_inline)) INLINE static void
runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
struct part **pi, struct part **pj) { struct part **pi, struct part **pj) {
#ifdef VECTORIZE #ifdef WITH_VECTORIZATION
vector r, ri, r2, xi, hi, hi_inv, wi, wi_dx; vector r, ri, r2, xi, hi, hi_inv, wi, wi_dx;
vector rhoi, rhoi_dh, wcounti, wcounti_dh, div_vi; vector rhoi, rhoi_dh, wcounti, wcounti_dh, div_vi;
...@@ -450,7 +450,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( ...@@ -450,7 +450,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) { struct part **pj) {
#ifdef VECTORIZE #ifdef WITH_VECTORIZATION
vector r, r2, ri; vector r, r2, ri;
vector xi, xj; vector xi, xj;
...@@ -648,8 +648,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( ...@@ -648,8 +648,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
pi[k]->force.v_sig = vi_sig.f[k]; pi[k]->force.v_sig = vi_sig.f[k];
pj[k]->force.v_sig = vj_sig.f[k]; pj[k]->force.v_sig = vj_sig.f[k];
for (j = 0; j < 3; j++) { for (j = 0; j < 3; j++) {
pi[k]->a[j] -= pia[j].f[k]; pi[k]->a_hydro[j] -= pia[j].f[k];
pj[k]->a[j] += pja[j].f[k]; pj[k]->a_hydro[j] += pja[j].f[k];
} }
} }
...@@ -758,7 +758,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( ...@@ -758,7 +758,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi, float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) { struct part **pj) {
#ifdef VECTORIZE #ifdef WITH_VECTORIZATION
vector r, r2, ri; vector r, r2, ri;
vector xi, xj; vector xi, xj;
...@@ -945,7 +945,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( ...@@ -945,7 +945,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
pi[k]->h_dt -= pih_dt.f[k]; pi[k]->h_dt -= pih_dt.f[k];
pi[k]->force.v_sig = vi_sig.f[k]; pi[k]->force.v_sig = vi_sig.f[k];
pj[k]->force.v_sig = vj_sig.f[k]; pj[k]->force.v_sig = vj_sig.f[k];
for (j = 0; j < 3; j++) pi[k]->a[j] -= pia[j].f[k]; for (j = 0; j < 3; j++) pi[k]->a_hydro[j] -= pia[j].f[k];
} }
#else #else
......
...@@ -103,6 +103,17 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( ...@@ -103,6 +103,17 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
pj->density.rot_v[2] += facj * curlvr[2]; pj->density.rot_v[2] += facj * curlvr[2];
} }
/**
* @brief Density loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_density(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"A vectorised version of the Gadget2 density interaction function does "
"not exist yet!");
}
/** /**
* @brief Density loop (non-symmetric version) * @brief Density loop (non-symmetric version)
*/ */
...@@ -151,6 +162,17 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( ...@@ -151,6 +162,17 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
pi->density.rot_v[2] += fac * curlvr[2]; pi->density.rot_v[2] += fac * curlvr[2];
} }
/**
* @brief Density loop (non-symmetric vectorized version)
*/
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
struct part **pi, struct part **pj) {
error(
"A vectorised version of the Gadget2 non-symmetric density interaction "
"function does not exist yet!");
}
/** /**
* @brief Force loop * @brief Force loop
*/ */
...@@ -248,6 +270,17 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( ...@@ -248,6 +270,17 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
pj->entropy_dt -= 0.5f * mi * visc_term * dvdr; pj->entropy_dt -= 0.5f * mi * visc_term * dvdr;
} }
/**
* @brief Force loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"A vectorised version of the Gadget2 force interaction function does not "
"exist yet!");
}
/** /**
* @brief Force loop (non-symmetric version) * @brief Force loop (non-symmetric version)
*/ */
...@@ -338,4 +371,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( ...@@ -338,4 +371,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
pi->entropy_dt += 0.5f * mj * visc_term * dvdr; pi->entropy_dt += 0.5f * mj * visc_term * dvdr;
} }
/**
* @brief Force loop (Vectorized non-symmetric version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"A vectorised version of the Gadget2 non-symmetric force interaction "
"function does not exist yet!");
}
#endif /* SWIFT_RUNNER_IACT_LEGACY_H */ #endif /* SWIFT_RUNNER_IACT_LEGACY_H */
...@@ -61,6 +61,17 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( ...@@ -61,6 +61,17 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
pj->density.wcount_dh -= xj * wj_dx; pj->density.wcount_dh -= xj * wj_dx;
} }
/**
* @brief Density loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_density(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"A vectorised version of the Minimal density interaction function does "
"not exist yet!");
}
/** /**
* @brief Density loop (non-symmetric version) * @brief Density loop (non-symmetric version)
*/ */
...@@ -85,6 +96,17 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( ...@@ -85,6 +96,17 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
pi->density.wcount_dh -= xi * wi_dx; pi->density.wcount_dh -= xi * wi_dx;
} }
/**
* @brief Density loop (non-symmetric vectorized version)
*/
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
struct part **pi, struct part **pj) {
error(
"A vectorised version of the Minimal non-symmetric density interaction "
"function does not exist yet!");
}
/** /**
* @brief Force loop * @brief Force loop
*/ */
...@@ -157,6 +179,17 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( ...@@ -157,6 +179,17 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
pj->force.v_sig = fmaxf(pj->force.v_sig, v_sig); pj->force.v_sig = fmaxf(pj->force.v_sig, v_sig);
} }
/**
* @brief Force loop (Vectorized version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"A vectorised version of the Minimal force interaction function does not "
"exist yet!");
}
/** /**
* @brief Force loop (non-symmetric version) * @brief Force loop (non-symmetric version)
*/ */
...@@ -222,4 +255,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( ...@@ -222,4 +255,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig);
} }
/**
* @brief Force loop (Vectorized non-symmetric version)
*/
__attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
float *R2, float *Dx, float *Hi, float *Hj, struct part **pi,
struct part **pj) {
error(
"A vectorised version of the Minimal non-symmetric force interaction "
"function does not exist yet!");
}
#endif /* SWIFT_RUNNER_IACT_MINIMAL_H */ #endif /* SWIFT_RUNNER_IACT_MINIMAL_H */
...@@ -79,7 +79,7 @@ __attribute__((always_inline)) INLINE static void kernel_grav_eval(float x, ...@@ -79,7 +79,7 @@ __attribute__((always_inline)) INLINE static void kernel_grav_eval(float x,
*W = w; *W = w;
} }
#ifdef VECTORIZE #ifdef WITH_VECTORIZATION
/** /**
* @brief Computes the gravity cubic spline for a given distance x (Vectorized * @brief Computes the gravity cubic spline for a given distance x (Vectorized
...@@ -155,7 +155,7 @@ __attribute__((always_inline)) INLINE static void blender_deval(float x, ...@@ -155,7 +155,7 @@ __attribute__((always_inline)) INLINE static void blender_deval(float x,
*dW_dx = dw_dx; *dW_dx = dw_dx;
} }
#ifdef VECTORIZE #ifdef WITH_VECTORIZATION
/** /**
* @brief Computes the cubic spline blender and its derivative for a given * @brief Computes the cubic spline blender and its derivative for a given
......
...@@ -225,7 +225,7 @@ __attribute__((always_inline)) INLINE static void kernel_eval( ...@@ -225,7 +225,7 @@ __attribute__((always_inline)) INLINE static void kernel_eval(
*W = w * kernel_constant * kernel_igamma3; *W = w * kernel_constant * kernel_igamma3;
} }
#ifdef VECTORIZE #ifdef WITH_VECTORIZATION
static const vector kernel_igamma_vec = FILL_VEC((float)kernel_igamma); static const vector kernel_igamma_vec = FILL_VEC((float)kernel_igamma);
......
...@@ -113,7 +113,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, ...@@ -113,7 +113,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
error("Don't use in actual runs ! Slow code !"); error("Don't use in actual runs ! Slow code !");
#ifdef VECTORIZE #ifdef WITH_VECTORIZATION
int icount = 0; int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16))); float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16)));
...@@ -169,7 +169,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, ...@@ -169,7 +169,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
/* Hit or miss? */ /* Hit or miss? */
if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) { if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {
#ifndef VECTORIZE #ifndef WITH_VECTORIZATION
IACT(r2, dx, hi, pj->h, pi, pj); IACT(r2, dx, hi, pj->h, pi, pj);
...@@ -199,7 +199,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, ...@@ -199,7 +199,7 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
} /* loop over the parts in ci. */ } /* loop over the parts in ci. */
#ifdef VECTORIZE #ifdef WITH_VECTORIZATION
/* Pick up any leftovers. */ /* Pick up any leftovers. */
if (icount > 0) if (icount > 0)
for (int k = 0; k < icount; k++) for (int k = 0; k < icount; k++)
...@@ -215,7 +215,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { ...@@ -215,7 +215,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
error("Don't use in actual runs ! Slow code !"); error("Don't use in actual runs ! Slow code !");
#ifdef VECTORIZE #ifdef WITH_VECTORIZATION
int icount = 0; int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16))); float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16)));
...@@ -258,7 +258,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { ...@@ -258,7 +258,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
/* Hit or miss? */ /* Hit or miss? */
if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) { if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {
#ifndef VECTORIZE #ifndef WITH_VECTORIZATION
IACT(r2, dx, hi, pj->h, pi, pj); IACT(r2, dx, hi, pj->h, pi, pj);
...@@ -288,7 +288,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { ...@@ -288,7 +288,7 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {
} /* loop over the parts in ci. */ } /* loop over the parts in ci. */
#ifdef VECTORIZE #ifdef WITH_VECTORIZATION
/* Pick up any leftovers. */ /* Pick up any leftovers. */
if (icount > 0) if (icount > 0)
for (int k = 0; k < icount; k++) for (int k = 0; k < icount; k++)
...@@ -317,7 +317,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, ...@@ -317,7 +317,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
error("Don't use in actual runs ! Slow code !"); error("Don't use in actual runs ! Slow code !");
#ifdef VECTORIZE #ifdef WITH_VECTORIZATION
int icount = 0; int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16))); float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16)));
...@@ -367,7 +367,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, ...@@ -367,7 +367,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
/* Hit or miss? */ /* Hit or miss? */
if (r2 < hig2) { if (r2 < hig2) {
#ifndef VECTORIZE #ifndef WITH_VECTORIZATION
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
...@@ -397,7 +397,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, ...@@ -397,7 +397,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
} /* loop over the parts in ci. */ } /* loop over the parts in ci. */
#ifdef VECTORIZE #ifdef WITH_VECTORIZATION
/* Pick up any leftovers. */ /* Pick up any leftovers. */
if (icount > 0) if (icount > 0)
for (int k = 0; k < icount; k++) for (int k = 0; k < icount; k++)
...@@ -424,7 +424,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, ...@@ -424,7 +424,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
struct engine *e = r->e; struct engine *e = r->e;
#ifdef VECTORIZE #ifdef WITH_VECTORIZATION
int icount = 0; int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16))); float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16)));
...@@ -499,7 +499,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, ...@@ -499,7 +499,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
/* Hit or miss? */ /* Hit or miss? */
if (r2 < hig2) { if (r2 < hig2) {
#ifndef VECTORIZE #ifndef WITH_VECTORIZATION
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
...@@ -564,7 +564,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, ...@@ -564,7 +564,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
/* Hit or miss? */ /* Hit or miss? */
if (r2 < hig2) { if (r2 < hig2) {
#ifndef VECTORIZE #ifndef WITH_VECTORIZATION
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
...@@ -595,7 +595,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, ...@@ -595,7 +595,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
} /* loop over the parts in ci. */ } /* loop over the parts in ci. */
} }
#ifdef VECTORIZE #ifdef WITH_VECTORIZATION
/* Pick up any leftovers. */ /* Pick up any leftovers. */
if (icount > 0) if (icount > 0)
for (int k = 0; k < icount; k++) for (int k = 0; k < icount; k++)
...@@ -618,7 +618,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, ...@@ -618,7 +618,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
struct part *restrict parts, int *restrict ind, int count) { struct part *restrict parts, int *restrict ind, int count) {
#ifdef VECTORIZE #ifdef WITH_VECTORIZATION
int icount = 0; int icount = 0;
float r2q[VEC_SIZE] __attribute__((aligned(16))); float r2q[VEC_SIZE] __attribute__((aligned(16)));
float hiq[VEC_SIZE] __attribute__((aligned(16))); float hiq[VEC_SIZE] __attribute__((aligned(16)));
...@@ -658,7 +658,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, ...@@ -658,7 +658,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
/* Hit or miss? */ /* Hit or miss? */
if (r2 > 0.0f && r2 < hig2) { if (r2 > 0.0f && r2 < hig2) {
#ifndef VECTORIZE #ifndef WITH_VECTORIZATION
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
...@@ -688,7 +688,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, ...@@ -688,7 +688,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
} /* loop over the parts in ci. */