diff --git a/examples/main.c b/examples/main.c index a86cdc60a7473e84e8fba8c32417e7e19ebaeb60..c0b191d58ff6ac2acfdf6f6d184739ff17799f0c 100644 --- a/examples/main.c +++ b/examples/main.c @@ -153,12 +153,6 @@ int main(int argc, char *argv[]) { #endif -/* Let's pin the main thread */ -#if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE) - if (((ENGINE_POLICY)&engine_policy_setaffinity) == engine_policy_setaffinity) - engine_pin(); -#endif - /* Welcome to SWIFT, you made the right choice */ if (myrank == 0) greetings(); @@ -329,6 +323,12 @@ int main(int argc, char *argv[]) { return 1; } +/* Let's pin the main thread, now we know if affinity will be used. */ +#if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE) + if (with_aff && ((ENGINE_POLICY)&engine_policy_setaffinity) == engine_policy_setaffinity) + engine_pin(); +#endif + /* Genesis 1.1: And then, there was time ! */ clocks_set_cpufreq(cpufreq); diff --git a/src/engine.c b/src/engine.c index 65d541bab1c428aa6e0c719a847554563aa0b489..47766a44545812c4d86970c8a09c9cbd970218a9 100644 --- a/src/engine.c +++ b/src/engine.c @@ -4468,8 +4468,8 @@ void engine_init(struct engine *e, struct space *s, /* Avoid (unexpected) interference between engine and runner threads. We can * do this once we've made at least one call to engine_entry_affinity and * maybe numa_node_of_cpu(sched_getcpu()), even if the engine isn't already - * pinned. Also unpin this when asked to not pin at all (!with_aff). */ - engine_unpin(); + * pinned. */ + if (with_aff) engine_unpin(); #endif if (with_aff) { diff --git a/src/hydro/DebugInteractions/hydro.h b/src/hydro/DebugInteractions/hydro.h index dfffab642ae99e3bbb1928984e8daaf44d76c8eb..194c9e52bcc549f6633e34e571aee2a9012e7d0f 100644 --- a/src/hydro/DebugInteractions/hydro.h +++ b/src/hydro/DebugInteractions/hydro.h @@ -24,6 +24,7 @@ * @brief Empty SPH implementation used solely to test the SELF/PAIR routines. */ +#include "equation_of_state.h" #include "hydro_properties.h" #include "hydro_space.h" #include "part.h" diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 5812b9641dbb371eff74c1b9a1c4c68b8f1d3f79..473c0604ee60d755f8aef87a96a37bef30ca6279 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -897,8 +897,8 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; /* Pick-out the sorted lists. */ - const struct entry *restrict sort_i = ci->sort[sid]; - const struct entry *restrict sort_j = cj->sort[sid]; + struct entry *restrict sort_i = ci->sort[sid]; + struct entry *restrict sort_j = cj->sort[sid]; #ifdef SWIFT_DEBUG_CHECKS /* Check that the dx_max_sort values in the cell are indeed an upper @@ -940,117 +940,259 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { const int count_j = cj->count; struct part *restrict parts_i = ci->parts; struct part *restrict parts_j = cj->parts; + + /* Maximal displacement since last rebuild */ + const double dx_max = (ci->dx_max_sort + cj->dx_max_sort); + + /* Position on the axis of the particles closest to the interface */ const double di_max = sort_i[count_i - 1].d; const double dj_min = sort_j[0].d; - const double dx_max = (ci->dx_max_sort + cj->dx_max_sort); - if (cell_is_active(ci, e)) { + /* Shifts to apply to the particles to be in a good frame */ + const double shift_i[3] = {cj->loc[0] + shift[0], cj->loc[1] + shift[1], + cj->loc[2] + shift[2]}; + const double shift_j[3] = {cj->loc[0], cj->loc[1], cj->loc[2]}; + + int count_active_i = 0, count_active_j = 0; + struct entry *restrict sort_active_i = NULL, *restrict sort_active_j = NULL; + + if (cell_is_all_active(ci, e)) { + /* If everybody is active don't bother copying */ + sort_active_i = sort_i; + count_active_i = count_i; + } else if (cell_is_active(ci, e)) { + if (posix_memalign((void *)&sort_active_i, SWIFT_CACHE_ALIGNMENT, + sizeof(struct entry) * count_i) != 0) + error("Failed to allocate active sortlists."); + + /* Collect the active particles in ci */ + for (int k = 0; k < count_i; k++) { + if (part_is_active(&parts_i[sort_i[k].i], e)) { + sort_active_i[count_active_i] = sort_i[k]; + count_active_i++; + } + } + } - /* Loop over the parts in ci until nothing is within range in cj. - * We start from the centre and move outwards. */ - for (int pid = count_i - 1; pid >= 0; pid--) { + if (cell_is_all_active(cj, e)) { + /* If everybody is active don't bother copying */ + sort_active_j = sort_j; + count_active_j = count_j; + } else if (cell_is_active(cj, e)) { + if (posix_memalign((void *)&sort_active_j, SWIFT_CACHE_ALIGNMENT, + sizeof(struct entry) * count_j) != 0) + error("Failed to allocate active sortlists."); + + /* Collect the active particles in cj */ + for (int k = 0; k < count_j; k++) { + if (part_is_active(&parts_j[sort_j[k].i], e)) { + sort_active_j[count_active_j] = sort_j[k]; + count_active_j++; + } + } + } - /* Get a hold of the ith part in ci. */ - struct part *pi = &parts_i[sort_i[pid].i]; - const float hi = pi->h; + /* Loop over *all* the parts in ci starting from the centre until + we are out of range of anything in cj (using the maximal hi). */ + for (int pid = count_i - 1; + pid >= 0 && + sort_i[pid].d + hi_max * kernel_gamma + dx_max - rshift > dj_min; + pid--) { - /* Skip inactive particles */ - if (!part_is_active(pi, e)) continue; + /* Get a hold of the ith part in ci. */ + struct part *pi = &parts_i[sort_i[pid].i]; + const float hi = pi->h; - /* Is there anything we need to interact with ? */ - const double di = - sort_i[pid].d + max(hi, hj_max) * kernel_gamma + dx_max - rshift; - if (di < dj_min) continue; + /* Is there anything we need to interact with (for this specific hi) ? */ + const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; + if (di < dj_min) continue; - /* Where do we have to stop when looping over cell j? */ - int last_pj = count_j - 1; - while (sort_j[last_pj].d > di && last_pj > 0) last_pj--; + /* Get some additional information about pi */ + const float hig2 = hi * hi * kernel_gamma2; + const float pix = pi->x[0] - shift_i[0]; + const float piy = pi->x[1] - shift_i[1]; + const float piz = pi->x[2] - shift_i[2]; - last_pj += 1; - if (last_pj >= count_j) last_pj = count_j - 1; + /* Do we need to only check active parts in cj + (i.e. pi does not need updating) ? */ + if (!part_is_active(pi, e)) { - /* Get some additional information about pi */ - const float hig2 = hi * hi * kernel_gamma2; - const float pix = pi->x[0] - (cj->loc[0] + shift[0]); - const float piy = pi->x[1] - (cj->loc[1] + shift[1]); - const float piz = pi->x[2] - (cj->loc[2] + shift[2]); + /* Loop over the *active* parts in cj within range of pi */ + for (int pjd = 0; pjd < count_active_j && sort_active_j[pjd].d < di; + pjd++) { + + /* Recover pj */ + struct part *pj = &parts_j[sort_active_j[pjd].i]; + const float hj = pj->h; + + /* Get the position of pj in the right frame */ + const float pjx = pj->x[0] - shift_j[0]; + const float pjy = pj->x[1] - shift_j[1]; + const float pjz = pj->x[2] - shift_j[2]; + + /* Compute the pairwise distance. */ + float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? + (note that we will do the other condition in the reverse loop) */ + if (r2 < hig2) { + IACT_NONSYM(r2, dx, hj, hi, pj, pi); + } + } /* loop over the active parts in cj. */ + } - /* Now loop over the relevant particles in cj */ - for (int pjd = 0; pjd <= last_pj; ++pjd) { + else { /* pi is active, we may need to update pi and pj */ + + /* Loop over *all* the parts in cj in range of pi. */ + for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) { /* Recover pj */ struct part *pj = &parts_j[sort_j[pjd].i]; const float hj = pj->h; - const float hjg2 = hj * hj * kernel_gamma2; - const float pjx = pj->x[0] - cj->loc[0]; - const float pjy = pj->x[1] - cj->loc[1]; - const float pjz = pj->x[2] - cj->loc[2]; + + /* Get the position of pj in the right frame */ + const float pjx = pj->x[0] - shift_j[0]; + const float pjy = pj->x[1] - shift_j[1]; + const float pjz = pj->x[2] - shift_j[2]; /* Compute the pairwise distance. */ float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - if (r2 < hig2 || r2 < hjg2) { +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? + (note that we will do the other condition in the reverse loop) */ + if (r2 < hig2) { - IACT_NONSYM(r2, dx, hi, hj, pi, pj); + /* Does pj need to be updated too? */ + if (part_is_active(pj, e)) + IACT(r2, dx, hi, hj, pi, pj); + else + IACT_NONSYM(r2, dx, hi, hj, pi, pj); } - } /* Loop over the parts in cj */ - } /* Loop over the parts in ci */ - } /* Cell ci is active */ - - if (cell_is_active(cj, e)) { - - /* Loop over the parts in cj until nothing is within range in ci. - * We start from the centre and move outwards. */ - for (int pjd = 0; pjd < count_j; pjd++) { + } /* loop over the parts in cj. */ + } /* Is pi active? */ + } /* Loop over all ci */ + + /* Loop over *all* the parts in cj starting from the centre until + we are out of range of anything in ci (using the maximal hj). */ + for (int pjd = 0; + pjd < count_j && + sort_j[pjd].d - hj_max * kernel_gamma - dx_max < di_max - rshift; + pjd++) { + + /* Get a hold of the jth part in cj. */ + struct part *pj = &parts_j[sort_j[pjd].i]; + const float hj = pj->h; + + /* Is there anything we need to interact with (for this specific hj) ? */ + const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max; + if (dj > di_max - rshift) continue; + + /* Get some additional information about pj */ + const float hjg2 = hj * hj * kernel_gamma2; + const float pjx = pj->x[0] - shift_j[0]; + const float pjy = pj->x[1] - shift_j[1]; + const float pjz = pj->x[2] - shift_j[2]; + + /* Do we need to only check active parts in ci + (i.e. pj does not need updating) ? */ + if (!part_is_active(pj, e)) { + + /* Loop over the *active* parts in ci. */ + for (int pid = count_active_i - 1; + pid >= 0 && sort_active_i[pid].d - rshift > dj; pid--) { - /* Get a hold of the jth part in cj. */ - struct part *pj = &parts_j[sort_j[pjd].i]; - const float hj = pj->h; + /* Recover pi */ + struct part *pi = &parts_i[sort_active_i[pid].i]; + const float hi = pi->h; + const float hig2 = hi * hi * kernel_gamma2; - /* Skip inactive particles */ - if (!part_is_active(pj, e)) continue; + /* Get the position of pi in the right frame */ + const float pix = pi->x[0] - shift_i[0]; + const float piy = pi->x[1] - shift_i[1]; + const float piz = pi->x[2] - shift_i[2]; - /* Is there anything we need to interact with ? */ - const double dj = sort_j[pjd].d - max(hj, hi_max) * kernel_gamma - dx_max; - if (dj > di_max - rshift) continue; + /* Compute the pairwise distance. */ + float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - /* Where do we have to stop when looping over cell i? */ - int first_pi = 0; - while (sort_i[first_pi].d - rshift < dj && first_pi < count_i - 1) - first_pi++; +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif - first_pi -= 1; - if (first_pi < 0) first_pi = 0; + /* Hit or miss? + (note that we must avoid the r2 < hig2 cases we already processed) */ + if (r2 < hjg2 && r2 > hig2) { + IACT_NONSYM(r2, dx, hi, hj, pi, pj); + } + } /* loop over the active parts in ci. */ + } - /* Get some additional information about pj */ - const float hjg2 = hj * hj * kernel_gamma2; - const float pjx = pj->x[0] - cj->loc[0]; - const float pjy = pj->x[1] - cj->loc[1]; - const float pjz = pj->x[2] - cj->loc[2]; + else { /* pj is active, we may need to update pj and pi */ - /* Now loop over the relevant particles in ci */ - for (int pid = count_i - 1; pid >= first_pi; --pid) { + /* Loop over *all* the parts in ci. */ + for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d - rshift > dj; + pid--) { /* Recover pi */ struct part *pi = &parts_i[sort_i[pid].i]; const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; - const float pix = pi->x[0] - (cj->loc[0] + shift[0]); - const float piy = pi->x[1] - (cj->loc[1] + shift[1]); - const float piz = pi->x[2] - (cj->loc[2] + shift[2]); + + /* Get the position of pi in the right frame */ + const float pix = pi->x[0] - shift_i[0]; + const float piy = pi->x[1] - shift_i[1]; + const float piz = pi->x[2] - shift_i[2]; /* Compute the pairwise distance. */ float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - if (r2 < hjg2 || r2 < hig2) { +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif - IACT_NONSYM(r2, dx, hj, hi, pj, pi); + /* Hit or miss? + (note that we must avoid the r2 < hig2 cases we already processed) */ + if (r2 < hjg2 && r2 > hig2) { + + /* Does pi need to be updated too? */ + if (part_is_active(pi, e)) + IACT(r2, dx, hj, hi, pj, pi); + else + IACT_NONSYM(r2, dx, hj, hi, pj, pi); } - } /* Loop over the parts in ci */ - } /* Loop over the parts in cj */ - } /* Cell cj is active */ + } /* loop over the parts in ci. */ + } /* Is pj active? */ + } /* Loop over all cj */ + + /* Clean-up if necessary */ + if (cell_is_active(ci, e) && !cell_is_all_active(ci, e)) free(sort_active_i); + if (cell_is_active(cj, e) && !cell_is_all_active(cj, e)) free(sort_active_j); TIMER_TOC(TIMER_DOPAIR); } diff --git a/src/statistics.c b/src/statistics.c index 4ec798aefda082b279742f643cce6e23649bc069..d50143b9ebf6fbbfa2715c2382c7e3eacbf13eda 100644 --- a/src/statistics.c +++ b/src/statistics.c @@ -301,9 +301,11 @@ void stats_collect(const struct space *s, struct statistics *stats) { */ void stats_finalize(struct statistics *stats) { - stats->centre_of_mass[0] /= stats->mass; - stats->centre_of_mass[1] /= stats->mass; - stats->centre_of_mass[2] /= stats->mass; + if (stats->mass > 0.) { + stats->centre_of_mass[0] /= stats->mass; + stats->centre_of_mass[1] /= stats->mass; + stats->centre_of_mass[2] /= stats->mass; + } } /**