Commit d959e046 authored by James Willis's avatar James Willis
Browse files

Created a branching function for DOPAIR2 to only call the vectorised function...

Created a branching function for DOPAIR2 to only call the vectorised function when vectorisation is enabled and call the serial version when corner cells interact. Moved some debug statements inside the branching functions.
parent 778a21b7
...@@ -1884,13 +1884,8 @@ void *runner_main(void *data) { ...@@ -1884,13 +1884,8 @@ void *runner_main(void *data) {
else if (t->subtype == task_subtype_gradient) else if (t->subtype == task_subtype_gradient)
runner_dopair1_branch_gradient(r, ci, cj); runner_dopair1_branch_gradient(r, ci, cj);
#endif #endif
else if (t->subtype == task_subtype_force) { else if (t->subtype == task_subtype_force)
#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) runner_dopair2_branch_force(r, ci, cj);
runner_dopair2_force_vec(r, ci, cj);
#else
runner_dopair2_force(r, ci, cj);
#endif
}
else if (t->subtype == task_subtype_grav) else if (t->subtype == task_subtype_grav)
runner_dopair_grav(r, ci, cj, 1); runner_dopair_grav(r, ci, cj, 1);
else else
......
...@@ -781,37 +781,6 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, ...@@ -781,37 +781,6 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
const struct entry *restrict sort_j = cj->sort[sid]; const struct entry *restrict sort_j = cj->sort[sid];
#ifdef SWIFT_DEBUG_CHECKS #ifdef SWIFT_DEBUG_CHECKS
/* Check that the dx_max_sort values in the cell are indeed an upper
bound on particle movement. */
for (int pid = 0; pid < ci->count; pid++) {
const struct part *p = &ci->parts[sort_i[pid].i];
const float d = p->x[0] * runner_shift[sid][0] +
p->x[1] * runner_shift[sid][1] +
p->x[2] * runner_shift[sid][2];
if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort >
1.0e-4 * max(fabsf(d), ci->dx_max_sort_old))
error(
"particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d "
"cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e "
"ci->dx_max_sort_old=%e",
ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort,
ci->dx_max_sort_old);
}
for (int pjd = 0; pjd < cj->count; pjd++) {
const struct part *p = &cj->parts[sort_j[pjd].i];
const float d = p->x[0] * runner_shift[sid][0] +
p->x[1] * runner_shift[sid][1] +
p->x[2] * runner_shift[sid][2];
if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort >
1.0e-4 * max(fabsf(d), cj->dx_max_sort_old))
error(
"particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d "
"ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e "
"cj->dx_max_sort_old=%e",
cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort,
cj->dx_max_sort_old);
}
/* Some constants used to checks that the parts are in the right frame */ /* Some constants used to checks that the parts are in the right frame */
const float shift_threshold_x = const float shift_threshold_x =
2. * ci->width[0] + 2. * max(ci->dx_max_part, cj->dx_max_part); 2. * ci->width[0] + 2. * max(ci->dx_max_part, cj->dx_max_part);
...@@ -819,7 +788,6 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, ...@@ -819,7 +788,6 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
2. * ci->width[1] + 2. * max(ci->dx_max_part, cj->dx_max_part); 2. * ci->width[1] + 2. * max(ci->dx_max_part, cj->dx_max_part);
const float shift_threshold_z = const float shift_threshold_z =
2. * ci->width[2] + 2. * max(ci->dx_max_part, cj->dx_max_part); 2. * ci->width[2] + 2. * max(ci->dx_max_part, cj->dx_max_part);
#endif /* SWIFT_DEBUG_CHECKS */ #endif /* SWIFT_DEBUG_CHECKS */
/* Get some other useful values. */ /* Get some other useful values. */
...@@ -1028,6 +996,43 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { ...@@ -1028,6 +996,43 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) {
cj->dx_max_sort_old > space_maxreldx * cj->dmin) cj->dx_max_sort_old > space_maxreldx * cj->dmin)
error("Interacting unsorted cells."); error("Interacting unsorted cells.");
#ifdef SWIFT_DEBUG_CHECKS
/* Pick-out the sorted lists. */
const struct entry *restrict sort_i = ci->sort[sid];
const struct entry *restrict sort_j = cj->sort[sid];
/* Check that the dx_max_sort values in the cell are indeed an upper
bound on particle movement. */
for (int pid = 0; pid < ci->count; pid++) {
const struct part *p = &ci->parts[sort_i[pid].i];
const float d = p->x[0] * runner_shift[sid][0] +
p->x[1] * runner_shift[sid][1] +
p->x[2] * runner_shift[sid][2];
if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort >
1.0e-4 * max(fabsf(d), ci->dx_max_sort_old))
error(
"particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d "
"cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e "
"ci->dx_max_sort_old=%e",
ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort,
ci->dx_max_sort_old);
}
for (int pjd = 0; pjd < cj->count; pjd++) {
const struct part *p = &cj->parts[sort_j[pjd].i];
const float d = p->x[0] * runner_shift[sid][0] +
p->x[1] * runner_shift[sid][1] +
p->x[2] * runner_shift[sid][2];
if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort >
1.0e-4 * max(fabsf(d), cj->dx_max_sort_old))
error(
"particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d "
"ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e "
"cj->dx_max_sort_old=%e",
cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort,
cj->dx_max_sort_old);
}
#endif /* SWIFT_DEBUG_CHECKS */
#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && \ #if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && \
(DOPAIR1_BRANCH == runner_dopair1_density_branch) (DOPAIR1_BRANCH == runner_dopair1_density_branch)
if (!sort_is_corner(sid)) if (!sort_is_corner(sid))
...@@ -1046,7 +1051,8 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { ...@@ -1046,7 +1051,8 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) {
* @param ci The first #cell. * @param ci The first #cell.
* @param cj The second #cell. * @param cj The second #cell.
*/ */
void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
const double *shift) {
struct engine *restrict e = r->e; struct engine *restrict e = r->e;
...@@ -1057,24 +1063,6 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { ...@@ -1057,24 +1063,6 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
TIMER_TIC; TIMER_TIC;
/* Anything to do here? */
if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
error("Interacting undrifted cells.");
/* Get the shift ID. */
double shift[3] = {0.0, 0.0, 0.0};
const int sid = space_getsid(e->s, &ci, &cj, shift);
/* Have the cells been sorted? */
if (!(ci->sorted & (1 << sid)) ||
ci->dx_max_sort_old > space_maxreldx * ci->dmin)
error("Interacting unsorted cells.");
if (!(cj->sorted & (1 << sid)) ||
cj->dx_max_sort_old > space_maxreldx * cj->dmin)
error("Interacting unsorted cells.");
/* Get the cutoff shift. */ /* Get the cutoff shift. */
double rshift = 0.0; double rshift = 0.0;
for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
...@@ -1504,36 +1492,74 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { ...@@ -1504,36 +1492,74 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
*/ */
void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) {
// const struct engine *restrict e = r->e; const struct engine *restrict e = r->e;
//
// /* Anything to do here? */ /* Anything to do here? */
// if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
//
// /* Check that cells are drifted. */ /* Check that cells are drifted. */
// if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
// error("Interacting undrifted cells."); error("Interacting undrifted cells.");
//
// /* Get the sort ID. */ /* Get the sort ID. */
// double shift[3] = {0.0, 0.0, 0.0}; double shift[3] = {0.0, 0.0, 0.0};
// const int sid = space_getsid(e->s, &ci, &cj, shift); const int sid = space_getsid(e->s, &ci, &cj, shift);
//
// /* Have the cells been sorted? */ /* Have the cells been sorted? */
// if (!(ci->sorted & (1 << sid)) || if (!(ci->sorted & (1 << sid)) ||
// ci->dx_max_sort_old > space_maxreldx * ci->dmin) ci->dx_max_sort_old > space_maxreldx * ci->dmin)
// error("Interacting unsorted cells."); error("Interacting unsorted cells.");
// if (!(cj->sorted & (1 << sid)) || if (!(cj->sorted & (1 << sid)) ||
// cj->dx_max_sort_old > space_maxreldx * cj->dmin) cj->dx_max_sort_old > space_maxreldx * cj->dmin)
// error("Interacting unsorted cells."); error("Interacting unsorted cells.");
//
//#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && \ #ifdef SWIFT_DEBUG_CHECKS
// (DOPAIR2_BRANCH == runner_dopair2_force_branch)
// if (!sort_is_corner(sid)) /* Pick-out the sorted lists. */
// runner_dopair2_force_vec(r, ci, cj, sid, shift); const struct entry *restrict sort_i = ci->sort[sid];
// else const struct entry *restrict sort_j = cj->sort[sid];
// DOPAIR2(r, ci, cj, sid, shift);
//#else /* Check that the dx_max_sort values in the cell are indeed an upper
// DOPAIR2(r, ci, cj, sid, shift); bound on particle movement. */
//#endif for (int pid = 0; pid < ci->count; pid++) {
const struct part *p = &ci->parts[sort_i[pid].i];
const float d = p->x[0] * runner_shift[sid][0] +
p->x[1] * runner_shift[sid][1] +
p->x[2] * runner_shift[sid][2];
if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort >
1.0e-4 * max(fabsf(d), ci->dx_max_sort_old))
error(
"particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d "
"cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e "
"ci->dx_max_sort_old=%e",
ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort,
ci->dx_max_sort_old);
}
for (int pjd = 0; pjd < cj->count; pjd++) {
const struct part *p = &cj->parts[sort_j[pjd].i];
const float d = p->x[0] * runner_shift[sid][0] +
p->x[1] * runner_shift[sid][1] +
p->x[2] * runner_shift[sid][2];
if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort >
1.0e-4 * max(fabsf(d), cj->dx_max_sort_old))
error(
"particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d "
"ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e "
"cj->dx_max_sort_old=%e",
cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort,
cj->dx_max_sort_old);
}
#endif /* SWIFT_DEBUG_CHECKS */
#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && \
(DOPAIR2_BRANCH == runner_dopair2_force_branch)
if (!sort_is_corner(sid))
runner_dopair2_force_vec(r, ci, cj, sid, shift);
else
DOPAIR2(r, ci, cj, sid, shift);
#else
DOPAIR2(r, ci, cj, sid, shift);
#endif
} }
/** /**
...@@ -2341,11 +2367,7 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid, ...@@ -2341,11 +2367,7 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid,
error("Interacting unsorted cells."); error("Interacting unsorted cells.");
/* Compute the interactions. */ /* Compute the interactions. */
#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) DOPAIR2_BRANCH(r, ci, cj);
runner_dopair2_force_vec(r, ci, cj);
#else
DOPAIR2(r, ci, cj);
#endif
} }
if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR); if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR);
......
...@@ -368,7 +368,7 @@ populate_max_index_no_cache_force(const struct cell *ci, const struct cell *cj, ...@@ -368,7 +368,7 @@ populate_max_index_no_cache_force(const struct cell *ci, const struct cell *cj,
const double di_max, const double dj_min, const double di_max, const double dj_min,
int *max_index_i, int *max_index_j, int *max_index_i, int *max_index_j,
int *init_pi, int *init_pj, int *init_pi, int *init_pj,
const struct engine *e) { const timebin_t max_active_bin) {
const struct part *restrict parts_i = ci->parts; const struct part *restrict parts_i = ci->parts;
const struct part *restrict parts_j = cj->parts; const struct part *restrict parts_j = cj->parts;
...@@ -383,7 +383,7 @@ populate_max_index_no_cache_force(const struct cell *ci, const struct cell *cj, ...@@ -383,7 +383,7 @@ populate_max_index_no_cache_force(const struct cell *ci, const struct cell *cj,
while (first_pi > 0 && sort_i[first_pi - 1].d + dx_max + max(hi_max, hj_max) > dj_min) { while (first_pi > 0 && sort_i[first_pi - 1].d + dx_max + max(hi_max, hj_max) > dj_min) {
first_pi--; first_pi--;
/* Store the index of the particle if it is active. */ /* Store the index of the particle if it is active. */
if (part_is_active(&parts_i[sort_i[first_pi].i], e)) active_id = first_pi; if (part_is_active_no_debug(&parts_i[sort_i[first_pi].i], max_active_bin)) active_id = first_pi;
} }
/* Set the first active pi in range of any particle in cell j. */ /* Set the first active pi in range of any particle in cell j. */
...@@ -428,7 +428,7 @@ populate_max_index_no_cache_force(const struct cell *ci, const struct cell *cj, ...@@ -428,7 +428,7 @@ populate_max_index_no_cache_force(const struct cell *ci, const struct cell *cj,
sort_j[last_pj + 1].d - max(hj_max, hi_max) - dx_max < di_max) { sort_j[last_pj + 1].d - max(hj_max, hi_max) - dx_max < di_max) {
last_pj++; last_pj++;
/* Store the index of the particle if it is active. */ /* Store the index of the particle if it is active. */
if (part_is_active(&parts_j[sort_j[last_pj].i], e)) active_id = last_pj; if (part_is_active_no_debug(&parts_j[sort_j[last_pj].i], max_active_bin)) active_id = last_pj;
} }
/* Set the last active pj in range of any particle in cell i. */ /* Set the last active pj in range of any particle in cell i. */
...@@ -950,38 +950,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, ...@@ -950,38 +950,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
const struct entry *restrict sort_i = ci->sort[sid]; const struct entry *restrict sort_i = ci->sort[sid];
const struct entry *restrict sort_j = cj->sort[sid]; const struct entry *restrict sort_j = cj->sort[sid];
#ifdef SWIFT_DEBUG_CHECKS
/* Check that the dx_max_sort values in the cell are indeed an upper
bound on particle movement. */
for (int pid = 0; pid < ci->count; pid++) {
const struct part *p = &ci->parts[sort_i[pid].i];
const float d = p->x[0] * runner_shift[sid][0] +
p->x[1] * runner_shift[sid][1] +
p->x[2] * runner_shift[sid][2];
if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort >
1.0e-4 * max(fabsf(d), ci->dx_max_sort_old))
error(
"particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d "
"cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e "
"ci->dx_max_sort_old=%e",
ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort,
ci->dx_max_sort_old);
}
for (int pjd = 0; pjd < cj->count; pjd++) {
const struct part *p = &cj->parts[sort_j[pjd].i];
const float d = p->x[0] * runner_shift[sid][0] +
p->x[1] * runner_shift[sid][1] +
p->x[2] * runner_shift[sid][2];
if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort >
1.0e-4 * max(fabsf(d), cj->dx_max_sort_old))
error(
"particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d "
"ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e "
"cj->dx_max_sort_old=%e",
cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort,
cj->dx_max_sort_old);
}
#endif /* SWIFT_DEBUG_CHECKS */
/* Get some other useful values. */ /* Get some other useful values. */
const int count_i = ci->count; const int count_i = ci->count;
...@@ -1366,34 +1334,18 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, ...@@ -1366,34 +1334,18 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
* @param cj The second #cell. * @param cj The second #cell.
*/ */
void runner_dopair2_force_vec(struct runner *r, struct cell *ci, void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
struct cell *cj) { struct cell *cj, const int sid,
const double *shift) {
#ifdef WITH_VECTORIZATION #ifdef WITH_VECTORIZATION
const struct engine *restrict e = r->e; const struct engine *restrict e = r->e;
const timebin_t max_active_bin = e->max_active_bin;
vector v_hi, v_vix, v_viy, v_viz, v_hig2, v_r2; vector v_hi, v_vix, v_viy, v_viz, v_hig2, v_r2;
vector v_rhoi, v_grad_hi, v_pOrhoi2, v_balsara_i, v_ci; vector v_rhoi, v_grad_hi, v_pOrhoi2, v_balsara_i, v_ci;
TIMER_TIC; TIMER_TIC;
/* Anything to do here? */
if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
error("Interacting undrifted cells.");
/* Get the sort ID. */
double shift[3] = {0.0, 0.0, 0.0};
const int sid = space_getsid(e->s, &ci, &cj, shift);
/* Have the cells been sorted? */
if (!(ci->sorted & (1 << sid)) ||
ci->dx_max_sort_old > space_maxreldx * ci->dmin)
error("Interacting unsorted cells.");
if (!(cj->sorted & (1 << sid)) ||
cj->dx_max_sort_old > space_maxreldx * cj->dmin)
error("Interacting unsorted cells.");
/* Get the cutoff shift. */ /* Get the cutoff shift. */
double rshift = 0.0; double rshift = 0.0;
for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
...@@ -1402,38 +1354,6 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1402,38 +1354,6 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
const struct entry *restrict sort_i = ci->sort[sid]; const struct entry *restrict sort_i = ci->sort[sid];
const struct entry *restrict sort_j = cj->sort[sid]; const struct entry *restrict sort_j = cj->sort[sid];
#ifdef SWIFT_DEBUG_CHECKS
/* Check that the dx_max_sort values in the cell are indeed an upper
bound on particle movement. */
for (int pid = 0; pid < ci->count; pid++) {
const struct part *p = &ci->parts[sort_i[pid].i];
const float d = p->x[0] * runner_shift[sid][0] +
p->x[1] * runner_shift[sid][1] +
p->x[2] * runner_shift[sid][2];
if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort >
1.0e-4 * max(fabsf(d), ci->dx_max_sort_old))
error(
"particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d "
"cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e "
"ci->dx_max_sort_old=%e",
ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort,
ci->dx_max_sort_old);
}
for (int pjd = 0; pjd < cj->count; pjd++) {
const struct part *p = &cj->parts[sort_j[pjd].i];
const float d = p->x[0] * runner_shift[sid][0] +
p->x[1] * runner_shift[sid][1] +
p->x[2] * runner_shift[sid][2];
if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort >
1.0e-4 * max(fabsf(d), cj->dx_max_sort_old))
error(
"particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d "
"ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e "
"cj->dx_max_sort_old=%e",
cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort,
cj->dx_max_sort_old);
}
#endif /* SWIFT_DEBUG_CHECKS */
/* Get some other useful values. */ /* Get some other useful values. */
const int count_i = ci->count; const int count_i = ci->count;
...@@ -1447,31 +1367,37 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1447,31 +1367,37 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
const double di_max = sort_i[count_i - 1].d - rshift; const double di_max = sort_i[count_i - 1].d - rshift;
const double dj_min = sort_j[0].d; const double dj_min = sort_j[0].d;
const float dx_max = (ci->dx_max_sort + cj->dx_max_sort); const float dx_max = (ci->dx_max_sort + cj->dx_max_sort);
const int active_ci = cell_is_active(ci, e);
const int active_cj = cell_is_active(cj, e);
/* Check if any particles are active and return if there are not. */ /* Check if any particles are active and return if there are not. */
// int numActive = 0; int numActive = 0;
// for (int pid = count_i - 1;
// pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { const double h_max = max(hi_max, hj_max);
// struct part *restrict pi = &parts_i[sort_i[pid].i];
// if (part_is_active(pi, e)) { if (active_ci) {
// numActive++; for (int pid = count_i - 1;
// break; pid >= 0 && sort_i[pid].d + h_max + dx_max > dj_min; pid--) {
// } struct part *restrict pi = &parts_i[sort_i[pid].i];
//} if (part_is_active(pi, e)) {
numActive++;
// if (!numActive) { break;
// for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < }
// di_max; }
// pjd++) { }
// struct part *restrict pj = &parts_j[sort_j[pjd].i];
// if (part_is_active(pj, e)) { if (!numActive && active_cj) {
// numActive++; for (int pjd = 0; pjd < count_j && sort_j[pjd].d - h_max - dx_max < di_max;
// break; pjd++) {
// } struct part *restrict pj = &parts_j[sort_j[pjd].i];
// } if (part_is_active_no_debug(pj, max_active_bin)) {
//} numActive++;
break;
// if (numActive == 0) return; }
}
}
if (numActive == 0) return;
/* Get both particle caches from the runner and re-allocate /* Get both particle caches from the runner and re-allocate
* them if they are not big enough for the cells. */ * them if they are not big enough for the cells. */
...@@ -1497,7 +1423,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1497,7 +1423,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
* pj that interacts with any particle in ci. */ * pj that interacts with any particle in ci. */
populate_max_index_no_cache_force(ci, cj, sort_i, sort_j, dx_max, rshift, populate_max_index_no_cache_force(ci, cj, sort_i, sort_j, dx_max, rshift,
hi_max_raw, hj_max_raw, hi_max, hj_max, di_max, dj_min, max_index_i, hi_max_raw, hj_max_raw, hi_max, hj_max, di_max, dj_min, max_index_i,
max_index_j, &first_pi, &last_pj, e); max_index_j, &first_pi, &last_pj, max_active_bin);
/* Limits of the outer loops. */ /* Limits of the outer loops. */
int first_pi_loop = first_pi; int first_pi_loop = first_pi;
...@@ -1519,7 +1445,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1519,7 +1445,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/* Get the number of particles read into the ci cache. */ /* Get the number of particles read into the ci cache. */
int ci_cache_count = count_i - first_pi_align; int ci_cache_count = count_i - first_pi_align;
if (cell_is_active(ci, e)) { if (active_ci) {
/* Loop over the parts in ci until nothing is within range in cj. */ /* Loop over the parts in ci until nothing is within range in cj. */
for (int pid = count_i - 1; pid >= first_pi_loop; pid--) { for (int pid = count_i - 1; pid >= first_pi_loop; pid--) {
...@@ -1673,7 +1599,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci, ...@@ -1673,7 +1599,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
} /* loop over the parts in ci. */ } /* loop over the parts in ci. */
} }
if (cell_is_active(cj, e)) { if (active_cj) {
/* Loop over the parts in cj until nothing is within range in ci. */ /* Loop over the parts in cj until nothing is within range in ci. */
for (int pjd = 0; pjd <= last_pj_loop; pjd++) { for (int pjd = 0; pjd <= last_pj_loop; pjd++) {
......
...@@ -40,6 +40,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *restrict ci, ...@@ -40,6 +40,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *restrict ci,
struct cell *restrict cj, const int sid, struct cell *restrict cj, const int sid,
const double *shift); const double *shift);
void runner_dopair2_force_vec(struct runner *r, struct cell *restrict ci, void runner_dopair2_force_vec(struct runner *r, struct cell *restrict ci,
struct cell *restrict cj); struct cell *restrict cj, const int sid,
const double *shift);
#endif /* SWIFT_RUNNER_VEC_H */ #endif /* SWIFT_RUNNER_VEC_H */
...@@ -33,15 +33,18 @@ ...@@ -33,15 +33,18 @@
#if defined(WITH_VECTORIZATION) #if defined(WITH_VECTORIZATION)
#define DOSELF2 runner_doself2_force_vec #define DOSELF2 runner_doself2_force_vec
#define DOPAIR2 runner_dopair2_force_vec #define DOPAIR2 runner_dopair2_branch_force
#define DOSELF2_NAME "runner_doself2_force_vec" #define DOSELF2_NAME "runner_doself2_force_vec"
#define DOPAIR2_NAME "runner_dopair2_force_vec" #define DOPAIR2_NAME "runner_dopair2_force_vec"
#endif #endif
#ifndef DOSELF2 #ifndef DOSELF2
#define DOSELF2 runner_doself2_force