diff --git a/src/cache.h b/src/cache.h index 3eb1e194dd4232319ac1d4a4323ca8099f044063..c939da28589c1421e0e4241dca124a8320d5c87b 100644 --- a/src/cache.h +++ b/src/cache.h @@ -349,10 +349,12 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]); z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]); h[i] = parts_i[idx].h; - m[i] = parts_i[idx].mass; vx[i] = parts_i[idx].v[0]; vy[i] = parts_i[idx].v[1]; vz[i] = parts_i[idx].v[2]; +#ifdef GADGET2_SPH + m[i] = parts_i[idx].mass; +#endif } #ifdef SWIFT_DEBUG_CHECKS @@ -431,10 +433,12 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]); zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]); hj[i] = parts_j[idx].h; - mj[i] = parts_j[idx].mass; vxj[i] = parts_j[idx].v[0]; vyj[i] = parts_j[idx].v[1]; vzj[i] = parts_j[idx].v[2]; +#ifdef GADGET2_SPH + mj[i] = parts_j[idx].mass; +#endif } #ifdef SWIFT_DEBUG_CHECKS @@ -572,15 +576,17 @@ cache_read_two_partial_cells_sorted_force( y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]); z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]); h[i] = parts_i[idx].h; - m[i] = parts_i[idx].mass; vx[i] = parts_i[idx].v[0]; vy[i] = parts_i[idx].v[1]; vz[i] = parts_i[idx].v[2]; +#ifdef GADGET2_SPH + m[i] = parts_i[idx].mass; rho[i] = parts_i[idx].rho; grad_h[i] = parts_i[idx].force.f; pOrho2[i] = parts_i[idx].force.P_over_rho2; balsara[i] = parts_i[idx].force.balsara; soundspeed[i] = parts_i[idx].force.soundspeed; +#endif } /* Pad cache with fake particles that exist outside the cell so will not @@ -635,15 +641,17 @@ cache_read_two_partial_cells_sorted_force( yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]); zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]); hj[i] = parts_j[idx].h; - mj[i] = parts_j[idx].mass; vxj[i] = parts_j[idx].v[0]; vyj[i] = parts_j[idx].v[1]; vzj[i] = parts_j[idx].v[2]; +#ifdef GADGET2_SPH + mj[i] = parts_j[idx].mass; rhoj[i] = parts_j[idx].rho; grad_hj[i] = parts_j[idx].force.f; pOrho2j[i] = parts_j[idx].force.P_over_rho2; balsaraj[i] = parts_j[idx].force.balsara; soundspeedj[i] = parts_j[idx].force.soundspeed; +#endif } /* Pad cache with fake particles that exist outside the cell so will not diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index 5c29f1d6feb4c2bc542397ef2f0d1b9ae2b49a65..ce175878fa4641ab0becd7132acbe856eb3fcd20 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -26,7 +26,8 @@ /* Local headers. */ #include "active.h" -#ifdef WITH_VECTORIZATION +#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) + static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2); /** @@ -515,7 +516,7 @@ populate_max_index_no_cache_force(const struct cell *ci, const struct cell *cj, *init_pj = last_pj; } -#endif /* WITH_VECTORIZATION */ +#endif /* WITH_VECTORIZATION && GADGET2_SPH */ /** * @brief Compute the cell self-interaction (non-symmetric) using vector @@ -527,7 +528,8 @@ populate_max_index_no_cache_force(const struct cell *ci, const struct cell *cj, __attribute__((always_inline)) INLINE void runner_doself1_density_vec( struct runner *r, struct cell *restrict c) { -#ifdef WITH_VECTORIZATION +#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) + /* Get some local variables */ const struct engine *e = r->e; const timebin_t max_active_bin = e->max_active_bin; @@ -723,6 +725,11 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec( } /* loop over all particles. */ TIMER_TOC(timer_doself_density); + +#else + + error("Incorrectly calling vectorized Gadget-2 functions!"); + #endif /* WITH_VECTORIZATION */ } @@ -740,10 +747,11 @@ __attribute__((always_inline)) INLINE void runner_doself_subset_density_vec( struct runner *r, struct cell *restrict c, struct part *restrict parts, int *restrict ind, int pi_count) { -#ifdef WITH_VECTORIZATION +#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) + const int count = c->count; - TIMER_TIC + TIMER_TIC; /* Get the particle cache from the runner and re-allocate * the cache if it is not big enough for the cell. */ @@ -925,6 +933,11 @@ __attribute__((always_inline)) INLINE void runner_doself_subset_density_vec( } /* loop over all particles. */ TIMER_TOC(timer_doself_subset); + +#else + + error("Incorrectly calling vectorized Gadget-2 functions!"); + #endif /* WITH_VECTORIZATION */ } @@ -938,7 +951,8 @@ __attribute__((always_inline)) INLINE void runner_doself_subset_density_vec( __attribute__((always_inline)) INLINE void runner_doself2_force_vec( struct runner *r, struct cell *restrict c) { -#ifdef WITH_VECTORIZATION +#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) + const struct engine *e = r->e; const timebin_t max_active_bin = e->max_active_bin; struct part *restrict parts = c->parts; @@ -1097,6 +1111,11 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec( } /* loop over all particles. */ TIMER_TOC(timer_doself_force); + +#else + + error("Incorrectly calling vectorized Gadget-2 functions!"); + #endif /* WITH_VECTORIZATION */ } @@ -1114,7 +1133,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const double *shift) { -#ifdef WITH_VECTORIZATION +#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) + const struct engine *restrict e = r->e; const timebin_t max_active_bin = e->max_active_bin; @@ -1442,6 +1462,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, TIMER_TOC(timer_dopair_density); +#else + + error("Incorrectly calling vectorized Gadget-2 functions!"); + #endif /* WITH_VECTORIZATION */ } @@ -1459,7 +1483,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const double *shift) { -#ifdef WITH_VECTORIZATION +#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) + const struct engine *restrict e = r->e; const timebin_t max_active_bin = e->max_active_bin; @@ -1816,5 +1841,9 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, TIMER_TOC(timer_dopair_density); } +#else + + error("Incorrectly calling vectorized Gadget-2 functions!"); + #endif /* WITH_VECTORIZATION */ }