Commit 6a1b49d8 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge branch 'no_unnecessary_loop_in_pair' into 'master'

No unnecessary loop in pair

This implements two tiny improvements:
 
 - In the PAIR task, only loop over the particles in a cell that has active particles.
 - In the ghost task, construct the list of indices of active particles only. No need to loop over the inactive ones.

See merge request !313
parents f9c1d350 7173903b
......@@ -591,7 +591,6 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
struct part *restrict parts = c->parts;
struct xpart *restrict xparts = c->xparts;
int redo, count = c->count;
const struct engine *e = r->e;
const float target_wcount = e->hydro_properties->target_neighbours;
const float max_wcount =
......@@ -599,6 +598,7 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
const float min_wcount =
target_wcount - e->hydro_properties->delta_neighbours;
const int max_smoothing_iter = e->hydro_properties->max_smoothing_iterations;
int redo = 0, count = 0;
TIMER_TIC;
......@@ -611,11 +611,15 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
if (c->progeny[k] != NULL) runner_do_ghost(r, c->progeny[k], 0);
} else {
/* Init the IDs that have to be updated. */
/* Init the list of active particles that have to be updated. */
int *pid = NULL;
if ((pid = malloc(sizeof(int) * count)) == NULL)
if ((pid = malloc(sizeof(int) * c->count)) == NULL)
error("Can't allocate memory for pid.");
for (int k = 0; k < count; k++) pid[k] = k;
for (int k = 0; k < c->count; k++)
if (part_is_active(&parts[k], e)) {
pid[count] = k;
++count;
}
/* While there are particles that need to be updated... */
for (int num_reruns = 0; count > 0 && num_reruns < max_smoothing_iter;
......@@ -624,18 +628,23 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
/* Reset the redo-count. */
redo = 0;
/* Loop over the parts in this cell. */
/* Loop over the remaining active parts in this cell. */
for (int i = 0; i < count; i++) {
/* Get a direct pointer on the part. */
struct part *restrict p = &parts[pid[i]];
struct xpart *restrict xp = &xparts[pid[i]];
#ifdef SWIFT_DEBUG_CHECKS
/* Is this part within the timestep? */
if (part_is_active(p, e)) {
if (!part_is_active(p, e)) error("Ghost applied to inactive particle");
#endif
/* Finish the density calculation */
hydro_end_density(p);
/* Finish the density calculation */
hydro_end_density(p);
/* Did we get the right number of neighbours? */
if (p->density.wcount > max_wcount || p->density.wcount < min_wcount) {
float h_corr = 0.f;
......@@ -651,37 +660,32 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) {
h_corr = (h_corr > -0.5f * p->h) ? h_corr : -0.5f * p->h;
}
/* Did we get the right number density? */
if (p->density.wcount > max_wcount ||
p->density.wcount < min_wcount) {
/* Ok, correct then */
p->h += h_corr;
/* Ok, correct then */
p->h += h_corr;
/* Flag for another round of fun */
pid[redo] = pid[i];
redo += 1;
/* Flag for another round of fun */
pid[redo] = pid[i];
redo += 1;
/* Re-initialise everything */
hydro_init_part(p);
/* Re-initialise everything */
hydro_init_part(p);
/* Off we go ! */
continue;
}
/* Off we go ! */
continue;
}
/* We now have a particle whose smoothing length has converged */
/* We now have a particle whose smoothing length has converged */
/* As of here, particle force variables will be set. */
/* As of here, particle force variables will be set. */
/* Compute variables required for the force loop */
hydro_prepare_force(p, xp);
/* Compute variables required for the force loop */
hydro_prepare_force(p, xp);
/* The particle force values are now set. Do _NOT_
try to read any particle density variables! */
/* The particle force values are now set. Do _NOT_
try to read any particle density variables! */
/* Prepare the particle for the force loop over neighbours */
hydro_reset_acceleration(p);
}
/* Prepare the particle for the force loop over neighbours */
hydro_reset_acceleration(p);
}
/* We now need to treat the particles whose smoothing length had not
......
......@@ -776,145 +776,153 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {
const double dj_min = sort_j[0].d;
const float dx_max = (ci->dx_max + cj->dx_max);
/* Loop over the parts in ci. */
for (int pid = count_i - 1;
pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
if (cell_is_active(ci, e)) {
/* Get a hold of the ith part in ci. */
struct part *restrict pi = &parts_i[sort_i[pid].i];
if (!part_is_active(pi, e)) continue;
const float hi = pi->h;
const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
if (di < dj_min) continue;
/* Loop over the parts in ci. */
for (int pid = count_i - 1;
pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
double pix[3];
for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
const float hig2 = hi * hi * kernel_gamma2;
/* Get a hold of the ith part in ci. */
struct part *restrict pi = &parts_i[sort_i[pid].i];
if (!part_is_active(pi, e)) continue;
const float hi = pi->h;
const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
if (di < dj_min) continue;
/* Loop over the parts in cj. */
for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
double pix[3];
for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
const float hig2 = hi * hi * kernel_gamma2;
/* Get a pointer to the jth particle. */
struct part *restrict pj = &parts_j[sort_j[pjd].i];
/* Loop over the parts in cj. */
for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
/* Compute the pairwise distance. */
float r2 = 0.0f;
float dx[3];
for (int k = 0; k < 3; k++) {
dx[k] = pix[k] - pj->x[k];
r2 += dx[k] * dx[k];
}
/* Get a pointer to the jth particle. */
struct part *restrict pj = &parts_j[sort_j[pjd].i];
/* Compute the pairwise distance. */
float r2 = 0.0f;
float dx[3];
for (int k = 0; k < 3; k++) {
dx[k] = pix[k] - pj->x[k];
r2 += dx[k] * dx[k];
}
#ifdef SWIFT_DEBUG_CHECKS
/* Check that particles have been drifted to the current time */
if (pi->ti_drift != e->ti_current)
error("Particle pi not drifted to current time");
if (pj->ti_drift != e->ti_current)
error("Particle pj not drifted to current time");
/* Check that particles have been drifted to the current time */
if (pi->ti_drift != e->ti_current)
error("Particle pi not drifted to current time");
if (pj->ti_drift != e->ti_current)
error("Particle pj not drifted to current time");
#endif
/* Hit or miss? */
if (r2 < hig2) {
/* Hit or miss? */
if (r2 < hig2) {
#ifndef WITH_OLD_VECTORIZATION
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
#else
/* Add this interaction to the queue. */
r2q[icount] = r2;
dxq[3 * icount + 0] = dx[0];
dxq[3 * icount + 1] = dx[1];
dxq[3 * icount + 2] = dx[2];
hiq[icount] = hi;
hjq[icount] = pj->h;
piq[icount] = pi;
pjq[icount] = pj;
icount += 1;
/* Add this interaction to the queue. */
r2q[icount] = r2;
dxq[3 * icount + 0] = dx[0];
dxq[3 * icount + 1] = dx[1];
dxq[3 * icount + 2] = dx[2];
hiq[icount] = hi;
hjq[icount] = pj->h;
piq[icount] = pi;
pjq[icount] = pj;
icount += 1;
/* Flush? */
if (icount == VEC_SIZE) {
IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
icount = 0;
}
/* Flush? */
if (icount == VEC_SIZE) {
IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
icount = 0;
}
#endif
}
}
} /* loop over the parts in cj. */
} /* loop over the parts in cj. */
} /* loop over the parts in ci. */
} /* loop over the parts in ci. */
/* Loop over the parts in cj. */
for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
pjd++) {
} /* Cell ci is active */
/* Get a hold of the jth part in cj. */
struct part *restrict pj = &parts_j[sort_j[pjd].i];
if (!part_is_active(pj, e)) continue;
const float hj = pj->h;
const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift;
if (dj > di_max) continue;
if (cell_is_active(cj, e)) {
double pjx[3];
for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
const float hjg2 = hj * hj * kernel_gamma2;
/* Loop over the parts in cj. */
for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
pjd++) {
/* Loop over the parts in ci. */
for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
/* Get a hold of the jth part in cj. */
struct part *restrict pj = &parts_j[sort_j[pjd].i];
if (!part_is_active(pj, e)) continue;
const float hj = pj->h;
const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift;
if (dj > di_max) continue;
/* Get a pointer to the jth particle. */
struct part *restrict pi = &parts_i[sort_i[pid].i];
double pjx[3];
for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
const float hjg2 = hj * hj * kernel_gamma2;
/* Compute the pairwise distance. */
float r2 = 0.0f;
float dx[3];
for (int k = 0; k < 3; k++) {
dx[k] = pjx[k] - pi->x[k];
r2 += dx[k] * dx[k];
}
/* Loop over the parts in ci. */
for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
/* Get a pointer to the jth particle. */
struct part *restrict pi = &parts_i[sort_i[pid].i];
/* Compute the pairwise distance. */
float r2 = 0.0f;
float dx[3];
for (int k = 0; k < 3; k++) {
dx[k] = pjx[k] - pi->x[k];
r2 += dx[k] * dx[k];
}
#ifdef SWIFT_DEBUG_CHECKS
/* Check that particles have been drifted to the current time */
if (pi->ti_drift != e->ti_current)
error("Particle pi not drifted to current time");
if (pj->ti_drift != e->ti_current)
error("Particle pj not drifted to current time");
/* Check that particles have been drifted to the current time */
if (pi->ti_drift != e->ti_current)
error("Particle pi not drifted to current time");
if (pj->ti_drift != e->ti_current)
error("Particle pj not drifted to current time");
#endif
/* Hit or miss? */
if (r2 < hjg2) {
/* Hit or miss? */
if (r2 < hjg2) {
#ifndef WITH_OLD_VECTORIZATION
IACT_NONSYM(r2, dx, hj, pi->h, pj, pi);
IACT_NONSYM(r2, dx, hj, pi->h, pj, pi);
#else
/* Add this interaction to the queue. */
r2q[icount] = r2;
dxq[3 * icount + 0] = dx[0];
dxq[3 * icount + 1] = dx[1];
dxq[3 * icount + 2] = dx[2];
hiq[icount] = hj;
hjq[icount] = pi->h;
piq[icount] = pj;
pjq[icount] = pi;
icount += 1;
/* Add this interaction to the queue. */
r2q[icount] = r2;
dxq[3 * icount + 0] = dx[0];
dxq[3 * icount + 1] = dx[1];
dxq[3 * icount + 2] = dx[2];
hiq[icount] = hj;
hjq[icount] = pi->h;
piq[icount] = pj;
pjq[icount] = pi;
icount += 1;
/* Flush? */
if (icount == VEC_SIZE) {
IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
icount = 0;
}
/* Flush? */
if (icount == VEC_SIZE) {
IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
icount = 0;
}
#endif
}
}
} /* loop over the parts in ci. */
} /* loop over the parts in cj. */
} /* loop over the parts in ci. */
} /* Cell cj is active */
#ifdef WITH_OLD_VECTORIZATION
/* Pick up any leftovers. */
......
......@@ -27,91 +27,100 @@ void DOPAIR1_NOSORT(struct runner *r, struct cell *ci, struct cell *cj) {
struct part *restrict parts_i = ci->parts;
struct part *restrict parts_j = cj->parts;
/* Loop over the parts in ci. */
for (int pid = 0; pid < count_i; pid++) {
/* Get a hold of the ith part in ci. */
struct part *restrict pi = &parts_i[pid];
if (!part_is_active(pi, e)) continue;
const float hi = pi->h;
if (cell_is_active(ci, e)) {
double pix[3];
for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
const float hig2 = hi * hi * kernel_gamma2;
/* Loop over the parts in ci. */
for (int pid = 0; pid < count_i; pid++) {
/* Loop over the parts in cj. */
for (int pjd = 0; pjd < count_j; pjd++) {
/* Get a hold of the ith part in ci. */
struct part *restrict pi = &parts_i[pid];
if (!part_is_active(pi, e)) continue;
const float hi = pi->h;
/* Get a pointer to the jth particle. */
struct part *restrict pj = &parts_j[pjd];
double pix[3];
for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
const float hig2 = hi * hi * kernel_gamma2;
/* Compute the pairwise distance. */
float r2 = 0.0f;
float dx[3];
for (int k = 0; k < 3; k++) {
dx[k] = pix[k] - pj->x[k];
r2 += dx[k] * dx[k];
}
/* Loop over the parts in cj. */
for (int pjd = 0; pjd < count_j; pjd++) {
/* Get a pointer to the jth particle. */
struct part *restrict pj = &parts_j[pjd];
/* Compute the pairwise distance. */
float r2 = 0.0f;
float dx[3];
for (int k = 0; k < 3; k++) {
dx[k] = pix[k] - pj->x[k];
r2 += dx[k] * dx[k];
}
#ifdef SWIFT_DEBUG_CHECKS
/* Check that particles have been drifted to the current time */
if (pi->ti_drift != e->ti_current)
error("Particle pi not drifted to current time");
if (pj->ti_drift != e->ti_current)
error("Particle pj not drifted to current time");
/* Check that particles have been drifted to the current time */
if (pi->ti_drift != e->ti_current)
error("Particle pi not drifted to current time");
if (pj->ti_drift != e->ti_current)
error("Particle pj not drifted to current time");
#endif
/* Hit or miss? */
if (r2 < hig2) {
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
}
/* Hit or miss? */
if (r2 < hig2) {
IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
}
} /* loop over the parts in cj. */
} /* loop over the parts in cj. */
} /* loop over the parts in ci. */
} /* loop over the parts in ci. */
/* Loop over the parts in cj. */
for (int pjd = 0; pjd < count_j; pjd++) {
} /* Cell ci is active */
/* Get a hold of the ith part in ci. */
struct part *restrict pj = &parts_j[pjd];
if (!part_is_active(pj, e)) continue;
const float hj = pj->h;
if (cell_is_active(cj, e)) {
double pjx[3];
for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
const float hjg2 = hj * hj * kernel_gamma2;
/* Loop over the parts in cj. */
for (int pjd = 0; pjd < count_j; pjd++) {
/* Loop over the parts in ci. */
for (int pid = 0; pid < count_i; pid++) {
/* Get a hold of the ith part in ci. */
struct part *restrict pj = &parts_j[pjd];
if (!part_is_active(pj, e)) continue;
const float hj = pj->h;
/* Get a pointer to the jth particle. */
struct part *restrict pi = &parts_i[pid];
double pjx[3];
for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
const float hjg2 = hj * hj * kernel_gamma2;
/* Compute the pairwise distance. */
float r2 = 0.0f;
float dx[3];
for (int k = 0; k < 3; k++) {
dx[k] = pjx[k] - pi->x[k];
r2 += dx[k] * dx[k];
}
/* Loop over the parts in ci. */
for (int pid = 0; pid < count_i; pid++) {
/* Get a pointer to the jth particle. */
struct part *restrict pi = &parts_i[pid];
/* Compute the pairwise distance. */
float r2 = 0.0f;
float dx[3];
for (int k = 0; k < 3; k++) {
dx[k] = pjx[k] - pi->x[k];
r2 += dx[k] * dx[k];
}
#ifdef SWIFT_DEBUG_CHECKS
/* Check that particles have been drifted to the current time */
if (pj->ti_drift != e->ti_current)
error("Particle pj not drifted to current time");
if (pi->ti_drift != e->ti_current)
error("Particle pi not drifted to current time");
/* Check that particles have been drifted to the current time */
if (pj->ti_drift != e->ti_current)
error("Particle pj not drifted to current time");
if (pi->ti_drift != e->ti_current)
error("Particle pi not drifted to current time");
#endif
/* Hit or miss? */
if (r2 < hjg2) {
IACT_NONSYM(r2, dx, hj, pi->h, pj, pi);
}
/* Hit or miss? */
if (r2 < hjg2) {
IACT_NONSYM(r2, dx, hj, pi->h, pj, pi);
}
} /* loop over the parts in ci. */
} /* loop over the parts in ci. */
} /* loop over the parts in cj. */
} /* loop over the parts in cj. */
} /* Cell cj is active */
TIMER_TOC(TIMER_DOPAIR);
}
......@@ -144,93 +153,101 @@ void DOPAIR2_NOSORT(struct runner *r, struct cell *ci, struct cell *cj) {
struct part *restrict parts_i = ci->parts;
struct part *restrict parts_j = cj->parts;
/* Loop over the parts in ci. */
for (int pid = 0; pid < count_i; pid++) {
if (cell_is_active(ci, e)) {
/* Get a hold of the ith part in ci. */
struct part *restrict pi = &parts_i[pid];
if (!part_is_active(pi, e)) continue;
const float hi = pi->h;
/* Loop over the parts in ci. */
for (int pid = 0; pid < count_i; pid++) {
double pix[3];
for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
const float hig2 = hi * hi * kernel_gamma2;
/* Get a hold of the ith part in ci. */
struct part *restrict pi = &parts_i[pid];
if (!part_is_active(pi, e)) continue;
const float hi = pi->h;
/* Loop over the parts in cj. */
for (int pjd = 0; pjd < count_j; pjd++) {
double pix[3];
for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
const float hig2 = hi * hi * kernel_gamma2;
/* Get a pointer to the jth particle. */
struct part *restrict pj = &parts_j[pjd];
const float hjg2 = pj->h * pj->h * kernel_gamma2;
/* Loop over the parts in cj. */
for (int pjd = 0; pjd < count_j; pjd++) {
/* Compute the pairwise distance. */
float r2 = 0.0f;
float dx[3];
for (int k = 0; k < 3; k++) {
dx[k] = pix[k] - pj->x[k];
r2 += dx[k] * dx[k];
}
/* Get a pointer to the jth particle. */
struct part *restrict pj = &parts_j[pjd];
const float hjg2 = pj->h * pj->h * kernel_gamma2;
/* Compute the pairwise distance. */
float r2 = 0.0f;
float dx[3];
for (int k = 0; k < 3; k++) {
dx[k] = pix[k] - pj->x[k];
r2 += dx[k] * dx[k];
}
#ifdef SWIFT_DEBUG_CHECKS
/* Check that particles have been drifted to the current time */
if (pi->ti_drift != e->ti_current)
error("Particle pi not drifted to current time");
if (pj->ti_drift != e->ti_current)
error("Particle pj not drifted to current time");
/* Check that particles have been drifted to the current time */
if (pi->ti_drift != e->ti_current)
error("Particle pi not drifted to current time");
if (pj->ti_drift != e->ti_current)
error("Particle pj not drifted to current time");
#endif
/* Hit or miss? */