Commit e828d7bc authored by James Willis's avatar James Willis
Browse files

Bug inside debug checks.

parent 3d0eb1c0
...@@ -273,7 +273,8 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache( ...@@ -273,7 +273,8 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache(
while (first_pi > 0 && sort_i[first_pi - 1].d + dx_max + hi_max > dj_min) { while (first_pi > 0 && sort_i[first_pi - 1].d + dx_max + hi_max > dj_min) {
first_pi--; first_pi--;
/* Store the index of the particle if it is active. */ /* Store the index of the particle if it is active. */
if (part_is_active_no_debug(&parts_i[sort_i[first_pi].i], max_active_bin)) active_id = first_pi; if (part_is_active_no_debug(&parts_i[sort_i[first_pi].i], max_active_bin))
active_id = first_pi;
} }
/* Set the first active pi in range of any particle in cell j. */ /* Set the first active pi in range of any particle in cell j. */
...@@ -320,7 +321,8 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache( ...@@ -320,7 +321,8 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache(
sort_j[last_pj + 1].d - hj_max - dx_max < di_max) { sort_j[last_pj + 1].d - hj_max - dx_max < di_max) {
last_pj++; last_pj++;
/* Store the index of the particle if it is active. */ /* Store the index of the particle if it is active. */
if (part_is_active_no_debug(&parts_j[sort_j[last_pj].i], max_active_bin)) active_id = last_pj; if (part_is_active_no_debug(&parts_j[sort_j[last_pj].i], max_active_bin))
active_id = last_pj;
} }
/* Set the last active pj in range of any particle in cell i. */ /* Set the last active pj in range of any particle in cell i. */
...@@ -603,14 +605,14 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec( ...@@ -603,14 +605,14 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
const int num_vec_proc = 1; const int num_vec_proc = 1;
const timebin_t max_active_bin = e->max_active_bin; const timebin_t max_active_bin = e->max_active_bin;
struct part *restrict parts = c->parts; struct part *restrict parts = c->parts;
const int count = c->count; const int count = c->count;
vector v_hi, v_vix, v_viy, v_viz, v_hig2, v_r2; vector v_hi, v_vix, v_viy, v_viz, v_hig2, v_r2;
vector v_rhoi, v_grad_hi, v_pOrhoi2, v_balsara_i, v_ci; vector v_rhoi, v_grad_hi, v_pOrhoi2, v_balsara_i, v_ci;
TIMER_TIC TIMER_TIC;
if (!cell_is_active(c, e)) return; if (!cell_is_active(c, e)) return;
...@@ -634,145 +636,146 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec( ...@@ -634,145 +636,146 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
if (pi->ti_drift != e->ti_current) if (pi->ti_drift != e->ti_current)
error("Particle pi not drifted to current time"); error("Particle pi not drifted to current time");
} }
}
#endif #endif
/* Loop over the particles in the cell. */ /* Loop over the particles in the cell. */
for (int pid = 0; pid < count; pid++) { for (int pid = 0; pid < count; pid++) {
/* Get a pointer to the ith particle. */ /* Get a pointer to the ith particle. */
pi = &parts[pid]; pi = &parts[pid];
/* Is the ith particle active? */ /* Is the ith particle active? */
if (!part_is_active_no_debug(pi, max_active_bin)) continue; if (!part_is_active_no_debug(pi, max_active_bin)) continue;
vector pix, piy, piz; vector pix, piy, piz;
const float hi = cell_cache->h[pid]; const float hi = cell_cache->h[pid];
/* Fill particle pi vectors. */ /* Fill particle pi vectors. */
pix.v = vec_set1(cell_cache->x[pid]); pix.v = vec_set1(cell_cache->x[pid]);
piy.v = vec_set1(cell_cache->y[pid]); piy.v = vec_set1(cell_cache->y[pid]);
piz.v = vec_set1(cell_cache->z[pid]); piz.v = vec_set1(cell_cache->z[pid]);
v_hi.v = vec_set1(hi); v_hi.v = vec_set1(hi);
v_vix.v = vec_set1(cell_cache->vx[pid]); v_vix.v = vec_set1(cell_cache->vx[pid]);
v_viy.v = vec_set1(cell_cache->vy[pid]); v_viy.v = vec_set1(cell_cache->vy[pid]);
v_viz.v = vec_set1(cell_cache->vz[pid]); v_viz.v = vec_set1(cell_cache->vz[pid]);
v_rhoi.v = vec_set1(cell_cache->rho[pid]); v_rhoi.v = vec_set1(cell_cache->rho[pid]);
v_grad_hi.v = vec_set1(cell_cache->grad_h[pid]); v_grad_hi.v = vec_set1(cell_cache->grad_h[pid]);
v_pOrhoi2.v = vec_set1(cell_cache->pOrho2[pid]); v_pOrhoi2.v = vec_set1(cell_cache->pOrho2[pid]);
v_balsara_i.v = vec_set1(cell_cache->balsara[pid]); v_balsara_i.v = vec_set1(cell_cache->balsara[pid]);
v_ci.v = vec_set1(cell_cache->soundspeed[pid]); v_ci.v = vec_set1(cell_cache->soundspeed[pid]);
const float hig2 = hi * hi * kernel_gamma2; const float hig2 = hi * hi * kernel_gamma2;
v_hig2.v = vec_set1(hig2); v_hig2.v = vec_set1(hig2);
/* Reset cumulative sums of update vectors. */ /* Reset cumulative sums of update vectors. */
vector a_hydro_xSum, a_hydro_ySum, a_hydro_zSum, h_dtSum, v_sigSum, vector a_hydro_xSum, a_hydro_ySum, a_hydro_zSum, h_dtSum, v_sigSum,
entropy_dtSum; entropy_dtSum;
/* Get the inverse of hi. */ /* Get the inverse of hi. */
vector v_hi_inv; vector v_hi_inv;
v_hi_inv = vec_reciprocal(v_hi); v_hi_inv = vec_reciprocal(v_hi);
a_hydro_xSum.v = vec_setzero(); a_hydro_xSum.v = vec_setzero();
a_hydro_ySum.v = vec_setzero(); a_hydro_ySum.v = vec_setzero();
a_hydro_zSum.v = vec_setzero(); a_hydro_zSum.v = vec_setzero();
h_dtSum.v = vec_setzero(); h_dtSum.v = vec_setzero();
v_sigSum.v = vec_set1(pi->force.v_sig); v_sigSum.v = vec_set1(pi->force.v_sig);
entropy_dtSum.v = vec_setzero(); entropy_dtSum.v = vec_setzero();
/* Pad cache if there is a serial remainder. */ /* Pad cache if there is a serial remainder. */
count_align = count; count_align = count;
int rem = count % (num_vec_proc * VEC_SIZE); int rem = count % (num_vec_proc * VEC_SIZE);
if (rem != 0) { if (rem != 0) {
int pad = (num_vec_proc * VEC_SIZE) - rem; int pad = (num_vec_proc * VEC_SIZE) - rem;
count_align += pad; count_align += pad;
/* Set positions to the same as particle pi so when the r2 > 0 mask is /* Set positions to the same as particle pi so when the r2 > 0 mask is
* applied these extra contributions are masked out.*/ * applied these extra contributions are masked out.*/
for (int i = count; i < count_align; i++) { for (int i = count; i < count_align; i++) {
cell_cache->x[i] = pix.f[0]; cell_cache->x[i] = pix.f[0];
cell_cache->y[i] = piy.f[0]; cell_cache->y[i] = piy.f[0];
cell_cache->z[i] = piz.f[0]; cell_cache->z[i] = piz.f[0];
cell_cache->h[i] = 1.f; cell_cache->h[i] = 1.f;
}
} }
}
vector pjx, pjy, pjz, hj, hjg2; vector pjx, pjy, pjz, hj, hjg2;
/* Find all of particle pi's interacions and store needed values in the /* Find all of particle pi's interacions and store needed values in the
* secondary cache.*/ * secondary cache.*/
for (int pjd = 0; pjd < count_align; pjd += (num_vec_proc * VEC_SIZE)) { for (int pjd = 0; pjd < count_align; pjd += (num_vec_proc * VEC_SIZE)) {
/* Load 1 set of vectors from the particle cache. */ /* Load 1 set of vectors from the particle cache. */
pjx.v = vec_load(&cell_cache->x[pjd]); pjx.v = vec_load(&cell_cache->x[pjd]);
pjy.v = vec_load(&cell_cache->y[pjd]); pjy.v = vec_load(&cell_cache->y[pjd]);
pjz.v = vec_load(&cell_cache->z[pjd]); pjz.v = vec_load(&cell_cache->z[pjd]);
hj.v = vec_load(&cell_cache->h[pjd]); hj.v = vec_load(&cell_cache->h[pjd]);
hjg2.v = vec_mul(vec_mul(hj.v, hj.v), kernel_gamma2_vec.v); hjg2.v = vec_mul(vec_mul(hj.v, hj.v), kernel_gamma2_vec.v);
/* Compute the pairwise distance. */ /* Compute the pairwise distance. */
vector v_dx, v_dy, v_dz; vector v_dx, v_dy, v_dz;
v_dx.v = vec_sub(pix.v, pjx.v); v_dx.v = vec_sub(pix.v, pjx.v);
v_dy.v = vec_sub(piy.v, pjy.v); v_dy.v = vec_sub(piy.v, pjy.v);
v_dz.v = vec_sub(piz.v, pjz.v); v_dz.v = vec_sub(piz.v, pjz.v);
v_r2.v = vec_mul(v_dx.v, v_dx.v); v_r2.v = vec_mul(v_dx.v, v_dx.v);
v_r2.v = vec_fma(v_dy.v, v_dy.v, v_r2.v); v_r2.v = vec_fma(v_dy.v, v_dy.v, v_r2.v);
v_r2.v = vec_fma(v_dz.v, v_dz.v, v_r2.v); v_r2.v = vec_fma(v_dz.v, v_dz.v, v_r2.v);
/* Form r2 > 0 mask, r2 < hig2 mask and r2 < hjg2 mask. */ /* Form r2 > 0 mask, r2 < hig2 mask and r2 < hjg2 mask. */
mask_t v_doi_mask, v_doi_mask_self_check; mask_t v_doi_mask, v_doi_mask_self_check;
int doi_mask; int doi_mask;
/* Form r2 > 0 mask.*/ /* Form r2 > 0 mask.*/
vec_create_mask(v_doi_mask_self_check, vec_cmp_gt(v_r2.v, vec_setzero())); vec_create_mask(v_doi_mask_self_check, vec_cmp_gt(v_r2.v, vec_setzero()));
/* Form a mask from r2 < hig2 mask and r2 < hjg2 mask. */ /* Form a mask from r2 < hig2 mask and r2 < hjg2 mask. */
vector v_h2; vector v_h2;
v_h2.v = vec_fmax(v_hig2.v, hjg2.v); v_h2.v = vec_fmax(v_hig2.v, hjg2.v);
vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_h2.v)); vec_create_mask(v_doi_mask, vec_cmp_lt(v_r2.v, v_h2.v));
/* Combine all 3 masks and form integer mask. */ /* Combine all 3 masks and form integer mask. */
v_doi_mask.v = vec_and(v_doi_mask.v, v_doi_mask_self_check.v); v_doi_mask.v = vec_and(v_doi_mask.v, v_doi_mask_self_check.v);
doi_mask = vec_form_int_mask(v_doi_mask); doi_mask = vec_form_int_mask(v_doi_mask);
/* If there are any interactions perform them. */ /* If there are any interactions perform them. */
if (doi_mask) { if (doi_mask) {
vector v_hj_inv; vector v_hj_inv;
v_hj_inv = vec_reciprocal(hj); v_hj_inv = vec_reciprocal(hj);
/* To stop floating point exceptions for when particle separations are 0. /* To stop floating point exceptions for when particle separations are
*/ * 0.
v_r2.v = vec_add(v_r2.v, vec_set1(FLT_MIN)); */
v_r2.v = vec_add(v_r2.v, vec_set1(FLT_MIN));
runner_iact_nonsym_1_vec_force(
&v_r2, &v_dx, &v_dy, &v_dz, v_vix, v_viy, v_viz, v_rhoi, v_grad_hi, runner_iact_nonsym_1_vec_force(
v_pOrhoi2, v_balsara_i, v_ci, &cell_cache->vx[pjd], &v_r2, &v_dx, &v_dy, &v_dz, v_vix, v_viy, v_viz, v_rhoi, v_grad_hi,
&cell_cache->vy[pjd], &cell_cache->vz[pjd], &cell_cache->rho[pjd], v_pOrhoi2, v_balsara_i, v_ci, &cell_cache->vx[pjd],
&cell_cache->grad_h[pjd], &cell_cache->pOrho2[pjd], &cell_cache->vy[pjd], &cell_cache->vz[pjd], &cell_cache->rho[pjd],
&cell_cache->balsara[pjd], &cell_cache->soundspeed[pjd], &cell_cache->grad_h[pjd], &cell_cache->pOrho2[pjd],
&cell_cache->m[pjd], v_hi_inv, v_hj_inv, &a_hydro_xSum, &a_hydro_ySum, &cell_cache->balsara[pjd], &cell_cache->soundspeed[pjd],
&a_hydro_zSum, &h_dtSum, &v_sigSum, &entropy_dtSum, v_doi_mask); &cell_cache->m[pjd], v_hi_inv, v_hj_inv, &a_hydro_xSum,
} &a_hydro_ySum, &a_hydro_zSum, &h_dtSum, &v_sigSum, &entropy_dtSum,
v_doi_mask);
}
} /* Loop over all other particles. */ } /* Loop over all other particles. */
VEC_HADD(a_hydro_xSum, pi->a_hydro[0]); VEC_HADD(a_hydro_xSum, pi->a_hydro[0]);
VEC_HADD(a_hydro_ySum, pi->a_hydro[1]); VEC_HADD(a_hydro_ySum, pi->a_hydro[1]);
VEC_HADD(a_hydro_zSum, pi->a_hydro[2]); VEC_HADD(a_hydro_zSum, pi->a_hydro[2]);
VEC_HADD(h_dtSum, pi->force.h_dt); VEC_HADD(h_dtSum, pi->force.h_dt);
VEC_HMAX(v_sigSum, pi->force.v_sig); VEC_HMAX(v_sigSum, pi->force.v_sig);
VEC_HADD(entropy_dtSum, pi->entropy_dt); VEC_HADD(entropy_dtSum, pi->entropy_dt);
} /* loop over all particles. */ } /* loop over all particles. */
TIMER_TOC(timer_doself_force); TIMER_TOC(timer_doself_force);
#endif /* WITH_VECTORIZATION */ #endif /* WITH_VECTORIZATION */
} }
...@@ -894,7 +897,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, ...@@ -894,7 +897,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
max_index_j = r->cj_cache.max_index; max_index_j = r->cj_cache.max_index;
/* Find particles maximum index into cj, max_index_i[] and ci, max_index_j[]. /* Find particles maximum index into cj, max_index_i[] and ci, max_index_j[].
*/ */
/* Also find the first pi that interacts with any particle in cj and the last /* Also find the first pi that interacts with any particle in cj and the last
* pj that interacts with any particle in ci. */ * pj that interacts with any particle in ci. */
populate_max_index_no_cache(ci, cj, sort_i, sort_j, dx_max, rshift, hi_max, populate_max_index_no_cache(ci, cj, sort_i, sort_j, dx_max, rshift, hi_max,
...@@ -1170,7 +1173,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, ...@@ -1170,7 +1173,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
} /* loop over the parts in ci. */ } /* loop over the parts in ci. */
/* Perform horizontal adds on vector sums and store result in particle pj. /* Perform horizontal adds on vector sums and store result in particle pj.
*/ */
VEC_HADD(rhoSum, pj->rho); VEC_HADD(rhoSum, pj->rho);
VEC_HADD(rho_dhSum, pj->density.rho_dh); VEC_HADD(rho_dhSum, pj->density.rho_dh);
VEC_HADD(wcountSum, pj->density.wcount); VEC_HADD(wcountSum, pj->density.wcount);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment