Commit 7dd6e919 authored by James Willis's avatar James Willis
Browse files

Formatting.

parent 71dde066
......@@ -64,7 +64,7 @@ struct cache {
/* Maximum index into neighbouring cell for particles that are in range. */
int *restrict max_index SWIFT_CACHE_ALIGN;
/* Particle density. */
float *restrict rho SWIFT_CACHE_ALIGN;
......@@ -178,10 +178,14 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c,
error += posix_memalign((void **)&c->max_index, SWIFT_CACHE_ALIGNMENT,
sizeIntBytes);
error += posix_memalign((void **)&c->rho, SWIFT_CACHE_ALIGNMENT, sizeBytes);
error += posix_memalign((void **)&c->grad_h, SWIFT_CACHE_ALIGNMENT, sizeBytes);
error += posix_memalign((void **)&c->pOrho2, SWIFT_CACHE_ALIGNMENT, sizeBytes);
error += posix_memalign((void **)&c->balsara, SWIFT_CACHE_ALIGNMENT, sizeBytes);
error += posix_memalign((void **)&c->soundspeed, SWIFT_CACHE_ALIGNMENT, sizeBytes);
error +=
posix_memalign((void **)&c->grad_h, SWIFT_CACHE_ALIGNMENT, sizeBytes);
error +=
posix_memalign((void **)&c->pOrho2, SWIFT_CACHE_ALIGNMENT, sizeBytes);
error +=
posix_memalign((void **)&c->balsara, SWIFT_CACHE_ALIGNMENT, sizeBytes);
error +=
posix_memalign((void **)&c->soundspeed, SWIFT_CACHE_ALIGNMENT, sizeBytes);
if (error != 0)
error("Couldn't allocate cache, no. of particles: %d", (int)count);
......@@ -235,7 +239,8 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
}
/**
* @brief Populate cache for force interactions by reading in the particles in unsorted order.
* @brief Populate cache for force interactions by reading in the particles in
* unsorted order.
*
* @param ci The #cell.
* @param ci_cache The cache.
......@@ -257,10 +262,14 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
swift_declare_aligned_ptr(float, vy, ci_cache->vy, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, rho, ci_cache->rho, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, grad_h, ci_cache->grad_h, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, pOrho2, ci_cache->pOrho2, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, balsara, ci_cache->balsara, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, soundspeed, ci_cache->soundspeed, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, grad_h, ci_cache->grad_h,
SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, pOrho2, ci_cache->pOrho2,
SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, balsara, ci_cache->balsara,
SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(float, soundspeed, ci_cache->soundspeed,
SWIFT_CACHE_ALIGNMENT);
const struct part *restrict parts = ci->parts;
double loc[3];
......@@ -280,7 +289,7 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
vx[i] = parts[i].v[0];
vy[i] = parts[i].v[1];
vz[i] = parts[i].v[2];
rho[i] = parts[i].rho;
grad_h[i] = parts[i].force.f;
pOrho2[i] = parts[i].force.P_over_rho2;
......
......@@ -1168,7 +1168,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
}
#ifdef WITH_VECTORIZATION
static const vector const_viscosity_alpha_fac = FILL_VEC(-0.25f * const_viscosity_alpha);
static const vector const_viscosity_alpha_fac =
FILL_VEC(-0.25f * const_viscosity_alpha);
/**
* @brief Force interaction computed using 1 vector
......@@ -1177,12 +1178,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_1_vec_force(
vector *r2, vector *dx, vector *dy, vector *dz, vector vix, vector viy,
vector viz, vector pirho, vector grad_hi, vector piPOrho2,
vector balsara_i, vector ci, float *Vjx, float *Vjy, float *Vjz,
float *Pjrho, float *Grad_hj, float *PjPOrho2, float *Balsara_j, float *Cj,
float *Mj, vector hi_inv, float *Hj_inv, vector *a_hydro_xSum,
vector *a_hydro_ySum, vector *a_hydro_zSum, vector *h_dtSum,
vector *v_sigSum, vector *entropy_dtSum, mask_t mask) {
vector viz, vector pirho, vector grad_hi, vector piPOrho2, vector balsara_i,
vector ci, float *Vjx, float *Vjy, float *Vjz, float *Pjrho, float *Grad_hj,
float *PjPOrho2, float *Balsara_j, float *Cj, float *Mj, vector hi_inv,
float *Hj_inv, vector *a_hydro_xSum, vector *a_hydro_ySum,
vector *a_hydro_zSum, vector *h_dtSum, vector *v_sigSum,
vector *entropy_dtSum, mask_t mask) {
#ifdef WITH_VECTORIZATION
......@@ -1246,21 +1247,28 @@ runner_iact_nonsym_1_vec_force(
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij.v = vec_fmin(dvdr.v, vec_setzero());
mu_ij.v = vec_mul(fac_mu.v, vec_mul(ri.v, omega_ij.v)); /* This is 0 or negative */
mu_ij.v =
vec_mul(fac_mu.v, vec_mul(ri.v, omega_ij.v)); /* This is 0 or negative */
/* Compute signal velocity */
v_sig.v = vec_fnma(vec_set1(3.f), mu_ij.v, vec_add(ci.v, cj.v));
/* Now construct the full viscosity term */
rho_ij.v = vec_mul(vec_set1(0.5f), vec_add(pirho.v, pjrho.v));
visc.v = vec_div(vec_mul(const_viscosity_alpha_fac.v, vec_mul(v_sig.v, vec_mul(mu_ij.v, balsara.v))), rho_ij.v);
visc.v = vec_div(vec_mul(const_viscosity_alpha_fac.v,
vec_mul(v_sig.v, vec_mul(mu_ij.v, balsara.v))),
rho_ij.v);
/* Now, convolve with the kernel */
visc_term.v = vec_mul(vec_set1(0.5f), vec_mul(visc.v, vec_mul(vec_add(wi_dr.v, wj_dr.v), ri.v)));
visc_term.v =
vec_mul(vec_set1(0.5f),
vec_mul(visc.v, vec_mul(vec_add(wi_dr.v, wj_dr.v), ri.v)));
sph_term.v =
vec_mul(vec_fma(vec_mul(grad_hi.v, piPOrho2.v), wi_dr.v, vec_mul(grad_hj.v, vec_mul(pjPOrho2.v, wj_dr.v))), ri.v);
vec_mul(vec_fma(vec_mul(grad_hi.v, piPOrho2.v), wi_dr.v,
vec_mul(grad_hj.v, vec_mul(pjPOrho2.v, wj_dr.v))),
ri.v);
/* Eventually get the acceleration */
acc.v = vec_add(visc_term.v, sph_term.v);
......@@ -1270,7 +1278,8 @@ runner_iact_nonsym_1_vec_force(
piaz.v = vec_mul(mj.v, vec_mul(dz->v, acc.v));
/* Get the time derivative for h. */
pih_dt.v = vec_div(vec_mul(mj.v, vec_mul(dvdr.v, vec_mul(ri.v, wi_dr.v))), pjrho.v);
pih_dt.v =
vec_div(vec_mul(mj.v, vec_mul(dvdr.v, vec_mul(ri.v, wi_dr.v))), pjrho.v);
/* Change in entropy */
entropy_dt.v = vec_mul(mj.v, vec_mul(visc_term.v, dvdr.v));
......@@ -1299,13 +1308,12 @@ runner_iact_nonsym_1_vec_force(
__attribute__((always_inline)) INLINE static void
runner_iact_nonsym_2_vec_force(
float *R2, float *Dx, float *Dy, float *Dz, vector vix, vector viy,
vector viz, vector pirho, vector grad_hi, vector piPOrho2,
vector balsara_i, vector ci, float *Vjx, float *Vjy, float *Vjz,
float *Pjrho, float *Grad_hj, float *PjPOrho2, float *Balsara_j, float *Cj,
float *Mj, vector hi_inv, float *Hj_inv, vector *a_hydro_xSum,
vector *a_hydro_ySum, vector *a_hydro_zSum, vector *h_dtSum,
vector *v_sigSum, vector *entropy_dtSum, mask_t mask, mask_t mask_2,
short mask_cond) {
vector viz, vector pirho, vector grad_hi, vector piPOrho2, vector balsara_i,
vector ci, float *Vjx, float *Vjy, float *Vjz, float *Pjrho, float *Grad_hj,
float *PjPOrho2, float *Balsara_j, float *Cj, float *Mj, vector hi_inv,
float *Hj_inv, vector *a_hydro_xSum, vector *a_hydro_ySum,
vector *a_hydro_zSum, vector *h_dtSum, vector *v_sigSum,
vector *entropy_dtSum, mask_t mask, mask_t mask_2, short mask_cond) {
#ifdef WITH_VECTORIZATION
......@@ -1408,18 +1416,20 @@ runner_iact_nonsym_2_vec_force(
dvy_2.v = vec_sub(viy.v, vjy_2.v);
dvz.v = vec_sub(viz.v, vjz.v);
dvz_2.v = vec_sub(viz.v, vjz_2.v);
/* Compute dv dot r. */
dvdr.v = vec_fma(dvx.v, dx.v, vec_fma(dvy.v, dy.v, vec_mul(dvz.v, dz.v)));
dvdr_2.v =
vec_fma(dvx_2.v, dx_2.v, vec_fma(dvy_2.v, dy_2.v, vec_mul(dvz_2.v, dz_2.v)));
dvdr_2.v = vec_fma(dvx_2.v, dx_2.v,
vec_fma(dvy_2.v, dy_2.v, vec_mul(dvz_2.v, dz_2.v)));
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij.v = vec_fmin(dvdr.v, vec_setzero());
omega_ij_2.v = vec_fmin(dvdr_2.v, vec_setzero());
mu_ij.v = vec_mul(fac_mu.v, vec_mul(ri.v, omega_ij.v)); /* This is 0 or negative */
mu_ij_2.v = vec_mul(fac_mu.v, vec_mul(ri_2.v, omega_ij_2.v)); /* This is 0 or negative */
mu_ij.v =
vec_mul(fac_mu.v, vec_mul(ri.v, omega_ij.v)); /* This is 0 or negative */
mu_ij_2.v = vec_mul(
fac_mu.v, vec_mul(ri_2.v, omega_ij_2.v)); /* This is 0 or negative */
/* Compute signal velocity */
v_sig.v = vec_fnma(vec_set1(3.f), mu_ij.v, vec_add(ci.v, cj.v));
......@@ -1429,19 +1439,33 @@ runner_iact_nonsym_2_vec_force(
rho_ij.v = vec_mul(vec_set1(0.5f), vec_add(pirho.v, pjrho.v));
rho_ij_2.v = vec_mul(vec_set1(0.5f), vec_add(pirho.v, pjrho_2.v));
visc.v = vec_div(vec_mul(const_viscosity_alpha_fac.v, vec_mul(v_sig.v, vec_mul(mu_ij.v, balsara.v))), rho_ij.v);
visc_2.v = vec_div(vec_mul(const_viscosity_alpha_fac.v, vec_mul(v_sig_2.v, vec_mul(mu_ij_2.v, balsara_2.v))), rho_ij_2.v);
visc.v = vec_div(vec_mul(const_viscosity_alpha_fac.v,
vec_mul(v_sig.v, vec_mul(mu_ij.v, balsara.v))),
rho_ij.v);
visc_2.v =
vec_div(vec_mul(const_viscosity_alpha_fac.v,
vec_mul(v_sig_2.v, vec_mul(mu_ij_2.v, balsara_2.v))),
rho_ij_2.v);
/* Now, convolve with the kernel */
visc_term.v = vec_mul(vec_set1(0.5f), vec_mul(visc.v, vec_mul(vec_add(wi_dr.v, wj_dr.v), ri.v)));
visc_term_2.v = vec_mul(vec_set1(0.5f), vec_mul(visc_2.v, vec_mul(vec_add(wi_dr_2.v, wj_dr_2.v), ri_2.v)));
visc_term.v =
vec_mul(vec_set1(0.5f),
vec_mul(visc.v, vec_mul(vec_add(wi_dr.v, wj_dr.v), ri.v)));
visc_term_2.v = vec_mul(
vec_set1(0.5f),
vec_mul(visc_2.v, vec_mul(vec_add(wi_dr_2.v, wj_dr_2.v), ri_2.v)));
vector grad_hi_mul_piPOrho2;
grad_hi_mul_piPOrho2.v = vec_mul(grad_hi.v, piPOrho2.v);
sph_term.v =
vec_mul(vec_fma(grad_hi_mul_piPOrho2.v, wi_dr.v, vec_mul(grad_hj.v, vec_mul(pjPOrho2.v, wj_dr.v))), ri.v);
sph_term_2.v = vec_mul(vec_fma(grad_hi_mul_piPOrho2.v, wi_dr_2.v, vec_mul(grad_hj_2.v, vec_mul(pjPOrho2_2.v, wj_dr_2.v))), ri_2.v);
vec_mul(vec_fma(grad_hi_mul_piPOrho2.v, wi_dr.v,
vec_mul(grad_hj.v, vec_mul(pjPOrho2.v, wj_dr.v))),
ri.v);
sph_term_2.v =
vec_mul(vec_fma(grad_hi_mul_piPOrho2.v, wi_dr_2.v,
vec_mul(grad_hj_2.v, vec_mul(pjPOrho2_2.v, wj_dr_2.v))),
ri_2.v);
/* Eventually get the acceleration */
acc.v = vec_add(visc_term.v, sph_term.v);
......@@ -1456,8 +1480,11 @@ runner_iact_nonsym_2_vec_force(
piaz_2.v = vec_mul(mj_2.v, vec_mul(dz_2.v, acc_2.v));
/* Get the time derivative for h. */
pih_dt.v = vec_div(vec_mul(mj.v, vec_mul(dvdr.v, vec_mul(ri.v, wi_dr.v))), pjrho.v);
pih_dt_2.v = vec_div(vec_mul(mj_2.v, vec_mul(dvdr_2.v, vec_mul(ri_2.v, wi_dr_2.v))), pjrho_2.v);
pih_dt.v =
vec_div(vec_mul(mj.v, vec_mul(dvdr.v, vec_mul(ri.v, wi_dr.v))), pjrho.v);
pih_dt_2.v =
vec_div(vec_mul(mj_2.v, vec_mul(dvdr_2.v, vec_mul(ri_2.v, wi_dr_2.v))),
pjrho_2.v);
/* Change in entropy */
entropy_dt.v = vec_mul(mj.v, vec_mul(visc_term.v, dvdr.v));
......
......@@ -1039,7 +1039,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
const float hj = pj->h;
const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max + rshift;
if (dj - rshift > di_max) continue;
double pjx[3];
for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
const float hjg2 = hj * hj * kernel_gamma2;
......@@ -1451,7 +1451,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
const float hj = pj->h;
const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max + rshift;
if (dj - rshift > di_max) continue;
double pjx[3];
for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
const float hjg2 = hj * hj * kernel_gamma2;
......
......@@ -226,20 +226,27 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
}
/**
* @brief Compute the vector remainder force interactions from the secondary cache.
* @brief Compute the vector remainder force interactions from the secondary
* cache.
*
* @param int_cache (return) secondary #cache of interactions between two
* particles.
* @param icount Interaction count.
* @param a_hydro_xSum (return) #vector holding the cumulative sum of the x acceleration
* @param a_hydro_xSum (return) #vector holding the cumulative sum of the x
* acceleration
* update on pi.
* @param a_hydro_ySum (return) #vector holding the cumulative sum of the y acceleration
* @param a_hydro_ySum (return) #vector holding the cumulative sum of the y
* acceleration
* update on pi.
* @param a_hydro_zSum (return) #vector holding the cumulative sum of the z acceleration
* @param a_hydro_zSum (return) #vector holding the cumulative sum of the z
* acceleration
* update on pi.
* @param h_dtSum (return) #vector holding the cumulative sum of the time derivative of the smoothing length update on pi.
* @param v_sigSum (return) #vector holding the maximum of the signal velocity update on pi.
* @param entropy_dtSum (return) #vector holding the cumulative sum of the time derivative of the entropy
* @param h_dtSum (return) #vector holding the cumulative sum of the time
* derivative of the smoothing length update on pi.
* @param v_sigSum (return) #vector holding the maximum of the signal velocity
* update on pi.
* @param entropy_dtSum (return) #vector holding the cumulative sum of the time
* derivative of the entropy
* update on pi.
* @param v_hi_inv #vector of 1/h for pi.
* @param v_vix #vector of x velocity of pi.
......@@ -251,7 +258,8 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
* @param v_balsara_i #vector of balsara switch of pi.
* @param v_ci #vector of sound speed of pi.
* @param icount_align (return) Interaction count after the remainder
* @param num_vec_proc #int of the number of vectors to use to perform interaction.
* @param num_vec_proc #int of the number of vectors to use to perform
* interaction.
* interactions have been performed, should be a multiple of the vector length.
*/
__attribute__((always_inline)) INLINE static void calcRemForceInteractions(
......@@ -333,15 +341,21 @@ __attribute__((always_inline)) INLINE static void calcRemForceInteractions(
* @param int_cache (return) secondary #cache of interactions between two
* particles.
* @param icount Interaction count.
* @param a_hydro_xSum (return) #vector holding the cumulative sum of the x acceleration
* @param a_hydro_xSum (return) #vector holding the cumulative sum of the x
* acceleration
* update on pi.
* @param a_hydro_ySum (return) #vector holding the cumulative sum of the y
* acceleration
* update on pi.
* @param a_hydro_ySum (return) #vector holding the cumulative sum of the y acceleration
* @param a_hydro_zSum (return) #vector holding the cumulative sum of the z
* acceleration
* update on pi.
* @param a_hydro_zSum (return) #vector holding the cumulative sum of the z acceleration
* @param h_dtSum (return) #vector holding the cumulative sum of the time
* derivative of the smoothing length update on pi.
* @param v_sigSum (return) #vector holding the maximum of the signal velocity
* update on pi.
* @param h_dtSum (return) #vector holding the cumulative sum of the time derivative of the smoothing length update on pi.
* @param v_sigSum (return) #vector holding the maximum of the signal velocity update on pi.
* @param entropy_dtSum (return) #vector holding the cumulative sum of the time derivative of the entropy
* @param entropy_dtSum (return) #vector holding the cumulative sum of the time
* derivative of the entropy
* update on pi.
* @param v_hi_inv #vector of 1/h for pi.
* @param v_vix #vector of x velocity of pi.
......@@ -352,7 +366,8 @@ __attribute__((always_inline)) INLINE static void calcRemForceInteractions(
* @param v_pOrhoi2 #vector of pressure over density squared of pi.
* @param v_balsara_i #vector of balsara switch of pi.
* @param v_ci #vector of sound speed of pi.
* @param num_vec_proc #int of the number of vectors to use to perform interaction.
* @param num_vec_proc #int of the number of vectors to use to perform
* interaction.
* interactions have been performed, should be a multiple of the vector length.
*/
__attribute__((always_inline)) INLINE static void storeForceInteractions(
......@@ -993,17 +1008,17 @@ for (int pid = 0; pid < count; pid++) {
storeForceInteractions(
doi_mask, pjd, &v_r2, &v_dx_tmp, &v_dy_tmp, &v_dz_tmp, cell_cache,
&int_cache, &icount, &a_hydro_xSum, &a_hydro_ySum, &a_hydro_zSum,
&h_dtSum, &v_sigSum, &entropy_dtSum, v_hi_inv, v_vix, v_viy,
v_viz, v_rhoi, v_grad_hi, v_pOrhoi2, v_balsara_i, v_ci, 2);
&h_dtSum, &v_sigSum, &entropy_dtSum, v_hi_inv, v_vix, v_viy, v_viz,
v_rhoi, v_grad_hi, v_pOrhoi2, v_balsara_i, v_ci, 2);
}
} /* Loop over all other particles. */
/* Perform padded vector remainder interactions if any are present. */
calcRemForceInteractions(
&int_cache, icount, &a_hydro_xSum, &a_hydro_ySum, &a_hydro_zSum, &h_dtSum,
&v_sigSum, &entropy_dtSum, v_hi_inv, v_vix, v_viy, v_viz, v_rhoi,
v_grad_hi, v_pOrhoi2, v_balsara_i, v_ci, &icount_align, 2);
calcRemForceInteractions(&int_cache, icount, &a_hydro_xSum, &a_hydro_ySum,
&a_hydro_zSum, &h_dtSum, &v_sigSum, &entropy_dtSum,
v_hi_inv, v_vix, v_viy, v_viz, v_rhoi, v_grad_hi,
v_pOrhoi2, v_balsara_i, v_ci, &icount_align, 2);
/* Initialise masks to true in case remainder interactions have been
* performed. */
......@@ -1015,14 +1030,13 @@ for (int pid = 0; pid < count; pid++) {
for (int pjd = 0; pjd < icount_align; pjd += (2 * VEC_SIZE)) {
runner_iact_nonsym_2_vec_force(
&int_cache.r2q[pjd], &int_cache.dxq[pjd], &int_cache.dyq[pjd],
&int_cache.dzq[pjd], v_vix, v_viy, v_viz, v_rhoi, v_grad_hi,
v_pOrhoi2, v_balsara_i, v_ci, &int_cache.vxq[pjd],
&int_cache.vyq[pjd], &int_cache.vzq[pjd], &int_cache.rhoq[pjd],
&int_cache.grad_hq[pjd], &int_cache.pOrho2q[pjd],
&int_cache.balsaraq[pjd], &int_cache.soundspeedq[pjd],
&int_cache.mq[pjd], v_hi_inv, &int_cache.h_invq[pjd], &a_hydro_xSum,
&a_hydro_ySum, &a_hydro_zSum, &h_dtSum, &v_sigSum, &entropy_dtSum,
int_mask, int_mask2, 0);
&int_cache.dzq[pjd], v_vix, v_viy, v_viz, v_rhoi, v_grad_hi, v_pOrhoi2,
v_balsara_i, v_ci, &int_cache.vxq[pjd], &int_cache.vyq[pjd],
&int_cache.vzq[pjd], &int_cache.rhoq[pjd], &int_cache.grad_hq[pjd],
&int_cache.pOrho2q[pjd], &int_cache.balsaraq[pjd],
&int_cache.soundspeedq[pjd], &int_cache.mq[pjd], v_hi_inv,
&int_cache.h_invq[pjd], &a_hydro_xSum, &a_hydro_ySum, &a_hydro_zSum,
&h_dtSum, &v_sigSum, &entropy_dtSum, int_mask, int_mask2, 0);
}
VEC_HADD(a_hydro_xSum, pi->a_hydro[0]);
......@@ -1258,8 +1272,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
vector v_dx, v_dy, v_dz, v_r2;
#ifdef SWIFT_DEBUG_CHECKS
if (cj_cache_idx % VEC_SIZE != 0 || cj_cache_idx < 0 || cj_cache_idx + (VEC_SIZE - 1) > (last_pj_align + 1 + VEC_SIZE)) {
error("Unaligned read!!! cj_cache_idx=%d, last_pj_align=%d", cj_cache_idx, last_pj_align);
if (cj_cache_idx % VEC_SIZE != 0 || cj_cache_idx < 0 ||
cj_cache_idx + (VEC_SIZE - 1) > (last_pj_align + 1 + VEC_SIZE)) {
error("Unaligned read!!! cj_cache_idx=%d, last_pj_align=%d",
cj_cache_idx, last_pj_align);
}
#endif
......@@ -1384,8 +1400,13 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
ci_cache_idx < ci_cache_count; ci_cache_idx += VEC_SIZE) {
#ifdef SWIFT_DEBUG_CHECKS
if (ci_cache_idx % VEC_SIZE != 0 || ci_cache_idx < 0 || ci_cache_idx + (VEC_SIZE - 1) > (count_i - first_pi_align + VEC_SIZE)) {
error("Unaligned read!!! ci_cache_idx=%d, first_pi_align=%d, count_i=%d", ci_cache_idx, first_pi_align, count_i);
if (ci_cache_idx % VEC_SIZE != 0 || ci_cache_idx < 0 ||
ci_cache_idx + (VEC_SIZE - 1) >
(count_i - first_pi_align + VEC_SIZE)) {
error(
"Unaligned read!!! ci_cache_idx=%d, first_pi_align=%d, "
"count_i=%d",
ci_cache_idx, first_pi_align, count_i);
}
#endif
......
......@@ -126,37 +126,29 @@ void dump_indv_particle_fields(char *fileName, struct part *p) {
FILE *file = fopen(fileName, "a");
fprintf(file,
"%6llu %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f "
"%8.5f "
"%8.5f %8.5f %13e %13e %13e %13e %13e %8.5f %8.5f\n",
p->id, p->x[0],
p->x[1], p->x[2],
p->v[0], p->v[1],
p->v[2], p->h,
hydro_get_density(p),
"%6llu %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f %8.5f "
"%8.5f "
"%8.5f %8.5f %13e %13e %13e %13e %13e %8.5f %8.5f\n",
p->id, p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->h,
hydro_get_density(p),
#if defined(MINIMAL_SPH) || defined(SHADOWFAX_SPH)
0.f,
0.f,
#else
p->density.div_v,
p->density.div_v,
#endif
hydro_get_entropy(p),
hydro_get_internal_energy(p),
hydro_get_pressure(p),
hydro_get_soundspeed(p),
p->a_hydro[0], p->a_hydro[1],
p->a_hydro[2], p->force.h_dt,
hydro_get_entropy(p), hydro_get_internal_energy(p),
hydro_get_pressure(p), hydro_get_soundspeed(p), p->a_hydro[0],
p->a_hydro[1], p->a_hydro[2], p->force.h_dt,
#if defined(GADGET2_SPH)
p->force.v_sig, p->entropy_dt,
0.f
p->force.v_sig, p->entropy_dt, 0.f
#elif defined(DEFAULT_SPH)
p->force.v_sig, 0.f,
p->force.u_dt
p->force.v_sig, 0.f, p->force.u_dt
#elif defined(MINIMAL_SPH)
p->force.v_sig, 0.f, p->u_dt
p->force.v_sig, 0.f, p->u_dt
#else
0.f, 0.f, 0.f
0.f, 0.f, 0.f
#endif
);
);
fclose(file);
}
......@@ -174,7 +166,7 @@ void write_header(char *fileName) {
"ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "h", "rho",
"div_v", "S", "u", "P", "c", "a_x", "a_y", "a_z", "h_dt", "v_sig",
"dS/dt", "du/dt");
fclose(file);
}
......@@ -415,8 +407,9 @@ void test_interactions(struct part test_part, struct part *parts, size_t count,
* @param runs No. of times to call interactions
*
*/
void test_force_interactions(struct part test_part, struct part *parts, size_t count,
char *filePrefix, int runs, int num_vec_proc) {
void test_force_interactions(struct part test_part, struct part *parts,
size_t count, char *filePrefix, int runs,
int num_vec_proc) {
ticks serial_time = 0;
ticks vec_time = 0;
......@@ -449,7 +442,7 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
float dxq[count] __attribute__((aligned(array_align)));
float dyq[count] __attribute__((aligned(array_align)));
float dzq[count] __attribute__((aligned(array_align)));
float hiq[count] __attribute__((aligned(array_align)));
float vixq[count] __attribute__((aligned(array_align)));
float viyq[count] __attribute__((aligned(array_align)));
......@@ -459,7 +452,7 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
float pOrhoi2q[count] __attribute__((aligned(array_align)));
float balsaraiq[count] __attribute__((aligned(array_align)));
float ciq[count] __attribute__((aligned(array_align)));
float hj_invq[count] __attribute__((aligned(array_align)));
float mjq[count] __attribute__((aligned(array_align)));
float vjxq[count] __attribute__((aligned(array_align)));
......@@ -502,8 +495,8 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
#pragma novector
#endif
for (size_t i = 0; i < count; i++) {
runner_iact_nonsym_force(r2[i], &(dx[3 * i]), pi_serial.h, pj_serial[i].h, &pi_serial,
&pj_serial[i]);
runner_iact_nonsym_force(r2[i], &(dx[3 * i]), pi_serial.h, pj_serial[i].h,
&pi_serial, &pj_serial[i]);
}
serial_time += getticks() - tic;
}
......@@ -540,7 +533,7 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
dxq[i] = dx[0];
dyq[i] = dx[1];
dzq[i] = dx[2];
hiq[i] = pi_vec.h;
vixq[i] = pi_vec.v[0];
viyq[i] = pi_vec.v[1];
......@@ -550,7 +543,7 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
pOrhoi2q[i] = pi_vec.force.P_over_rho2;
balsaraiq[i] = pi_vec.force.balsara;
ciq[i] = pi_vec.force.soundspeed;
hj_invq[i] = 1.f / pj_vec[i].h;
mjq[i] = pj_vec[i].mass;
vjxq[i] = pj_vec[i].v[0];
......@@ -561,7 +554,6 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
pOrhoj2q[i] = pj_vec[i].force.P_over_rho2;
balsarajq[i] = pj_vec[i].force.balsara;
cjq[i] = pj_vec[i].force.soundspeed;
}
/* Only dump data on first run. */
......@@ -572,9 +564,11 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
dump_indv_particle_fields(vec_filename, pjq[i]);
}
/* Perform vector interaction. */
vector hi_vec, hi_inv_vec, vix_vec, viy_vec, viz_vec, rhoi_vec, grad_hi_vec, pOrhoi2_vec, balsara_i_vec, ci_vec;
vector a_hydro_xSum, a_hydro_ySum, a_hydro_zSum, h_dtSum, v_sigSum, entropy_dtSum;
/* Perform vector interaction. */
vector hi_vec, hi_inv_vec, vix_vec, viy_vec, viz_vec, rhoi_vec, grad_hi_vec,
pOrhoi2_vec, balsara_i_vec, ci_vec;
vector a_hydro_xSum, a_hydro_ySum, a_hydro_zSum, h_dtSum, v_sigSum,
entropy_dtSum;
a_hydro_xSum.v = vec_setzero();
a_hydro_ySum.v = vec_setzero();
......@@ -594,20 +588,23 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
ci_vec.v = vec_load(&ciq[0]);
hi_inv_vec = vec_reciprocal(hi_vec);
mask_t mask, mask2;
vec_init_mask_true(mask);
vec_init_mask_true(mask2);
const ticks vec_tic = getticks();
for (size_t i = 0; i < count; i += num_vec_proc * VEC_SIZE) {
if (num_vec_proc == 2) {
runner_iact_nonsym_2_vec_force(&(r2q[i]), &(dxq[i]), &(dyq[i]), &(dzq[i]),
(vix_vec), (viy_vec), (viz_vec), rhoi_vec, grad_hi_vec, pOrhoi2_vec, balsara_i_vec, ci_vec, &(vjxq[i]), &(vjyq[i]), &(vjzq[i]), &(rhojq[i]), &(grad_hjq[i]), &(pOrhoj2q[i]), &(balsarajq[i]), &(cjq[i]), &(mjq[i]), hi_inv_vec, &(hj_invq[i]), &a_hydro_xSum, &a_hydro_ySum, &a_hydro_zSum,
&h_dtSum, &v_sigSum, &entropy_dtSum,
mask, mask2, 0);
runner_iact_nonsym_2_vec_force(
&(r2q[i]), &(dxq[i]), &(dyq[i]), &(dzq[i]), (vix_vec), (viy_vec),
(viz_vec), rhoi_vec, grad_hi_vec, pOrhoi2_vec, balsara_i_vec,
ci_vec, &(vjxq[i]), &(vjyq[i]), &(vjzq[i]), &(rhojq[i]),
&(grad_hjq[i]), &(pOrhoj2q[i]), &(balsarajq[i]), &(cjq[i]),
&(mjq[i]), hi_inv_vec, &(hj_invq[i]), &a_hydro_xSum, &a_hydro_ySum,
&a_hydro_zSum, &h_dtSum, &v_sigSum, &entropy_dtSum, mask, mask2, 0);
} else { /* Only use one vector for interaction. */
vector r2, dx, dy, dz;
......@@ -617,11 +614,13 @@ void test_force_interactions(struct part test_part, struct part *parts, size_t c
dz.v = vec_load(&(dzq[i]));
runner_iact_nonsym_1_vec_force(
&r2, &dx, &dy, &dz, vix_vec, viy_vec, viz_vec, rhoi_vec, grad_hi_vec, pOrhoi2_vec, balsara_i_vec, ci_vec, &(vjxq[i]), &(vjyq[i]), &(vjzq[i]), &(rhojq[i]), &(grad_hjq[i]), &(pOrhoj2q[i]), &(balsarajq[i]), &(cjq[i]), &(mjq[i]), hi_inv_vec, &(hj_invq[i]), &a_hydro_xSum, &a_hydro_ySum, &a_hydro_zSum,
&h_dtSum, &v_sigSum, &entropy_dtSum,
mask);
&r2