diff --git a/src/Makefile.am b/src/Makefile.am index 5982be729020fff7ef732a78b5155ad0f5a430eb..5434241c0eb203f069901dd9be99640b91dd8e90 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -59,7 +59,7 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h runner_doiact_fft.h \ - units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \ + runner_doiact_nosort.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \ dimension.h equation_of_state.h \ gravity.h gravity_io.h \ gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \ diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 7b40343954e628f670df0b02f65f31b55de47348..6fa04018088a05ed0319489e88677c3ebcabd0f2 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -32,9 +32,18 @@ #define _DOPAIR2(f) PASTE(runner_dopair2, f) #define DOPAIR2 _DOPAIR2(FUNCTION) +#define _DOPAIR1_NOSORT(f) PASTE(runner_dopair1_nosort, f) +#define DOPAIR1_NOSORT _DOPAIR1_NOSORT(FUNCTION) + +#define _DOPAIR2_NOSORT(f) PASTE(runner_dopair2_nosort, f) +#define DOPAIR2_NOSORT _DOPAIR2_NOSORT(FUNCTION) + #define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f) #define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION) +#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f) +#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION) + #define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f) #define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION) @@ -98,6 +107,8 @@ #define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f) #define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION) +#include "runner_doiact_nosort.h" + /** * @brief Compute the interactions between a cell pair. * @@ -422,6 +433,13 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, struct engine *e = r->e; +#ifdef WITH_MPI + if (ci->nodeID != cj->nodeID) { + DOPAIR_SUBSET_NOSORT(r, ci, parts_i, ind, count, cj); + return; + } +#endif + #ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); @@ -707,6 +725,13 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { const struct engine *restrict e = r->e; +#ifdef WITH_MPI + if (ci->nodeID != cj->nodeID) { + DOPAIR1_NOSORT(r, ci, cj); + return; + } +#endif + #ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); @@ -912,6 +937,13 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { struct engine *restrict e = r->e; +#ifdef WITH_MPI + if (ci->nodeID != cj->nodeID) { + DOPAIR2_NOSORT(r, ci, cj); + return; + } +#endif + #ifdef WITH_OLD_VECTORIZATION int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); diff --git a/src/runner_doiact_nosort.h b/src/runner_doiact_nosort.h new file mode 100644 index 0000000000000000000000000000000000000000..057dd756b4a8d636253eccd8808417ac452e193d --- /dev/null +++ b/src/runner_doiact_nosort.h @@ -0,0 +1,305 @@ + +/** + * @brief Compute the interactions between a cell pair. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + */ +void DOPAIR1_NOSORT(struct runner *r, struct cell *ci, struct cell *cj) { + + const struct engine *e = r->e; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + + if (!cell_is_drifted(ci, e)) cell_drift(ci, e); + if (!cell_is_drifted(cj, e)) cell_drift(cj, e); + + const int count_i = ci->count; + const int count_j = cj->count; + struct part *restrict parts_i = ci->parts; + struct part *restrict parts_j = cj->parts; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + space_getsid(e->s, &ci, &cj, shift); + + /* Loop over the parts in ci. */ + for (int pid = 0; pid < count_i; pid++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pi = &parts_i[pid]; + if (!part_is_active(pi, e)) continue; + const float hi = pi->h; + + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const float hig2 = hi * hi * kernel_gamma2; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pix[k] - pj->x[k]; + r2 += dx[k] * dx[k]; + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hig2) { + IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); + } + + } /* loop over the parts in cj. */ + + } /* loop over the parts in ci. */ + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pj = &parts_j[pjd]; + if (!part_is_active(pj, e)) continue; + const float hj = pj->h; + + double pjx[3]; + for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k]; + const float hjg2 = hj * hj * kernel_gamma2; + + /* Loop over the parts in ci. */ + for (int pid = 0; pid < count_i; pid++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pi = &parts_i[pid]; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pjx[k] - pi->x[k]; + r2 += dx[k] * dx[k]; + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hjg2) { + IACT_NONSYM(r2, dx, hj, pi->h, pj, pi); + } + + } /* loop over the parts in ci. */ + + } /* loop over the parts in cj. */ + + TIMER_TOC(TIMER_DOPAIR); +} + +/** + * @brief Compute the interactions between a cell pair. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + */ +void DOPAIR2_NOSORT(struct runner *r, struct cell *ci, struct cell *cj) { + + const struct engine *e = r->e; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + + if (!cell_is_drifted(ci, e)) cell_drift(ci, e); + if (!cell_is_drifted(cj, e)) cell_drift(cj, e); + + const int count_i = ci->count; + const int count_j = cj->count; + struct part *restrict parts_i = ci->parts; + struct part *restrict parts_j = cj->parts; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + space_getsid(e->s, &ci, &cj, shift); + + /* Loop over the parts in ci. */ + for (int pid = 0; pid < count_i; pid++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pi = &parts_i[pid]; + if (!part_is_active(pi, e)) continue; + const float hi = pi->h; + + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const float hig2 = hi * hi * kernel_gamma2; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + const float hjg2 = pj->h * pj->h * kernel_gamma2; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pix[k] - pj->x[k]; + r2 += dx[k] * dx[k]; + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hig2 || r2 < hjg2) { + IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); + } + + } /* loop over the parts in cj. */ + + } /* loop over the parts in ci. */ + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pj = &parts_j[pjd]; + if (!part_is_active(pj, e)) continue; + const float hj = pj->h; + + double pjx[3]; + for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k]; + const float hjg2 = hj * hj * kernel_gamma2; + + /* Loop over the parts in ci. */ + for (int pid = 0; pid < count_i; pid++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pi = &parts_i[pid]; + const float hig2 = pi->h * pi->h * kernel_gamma2; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pjx[k] - pi->x[k]; + r2 += dx[k] * dx[k]; + } + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); +#endif + + /* Hit or miss? */ + if (r2 < hjg2 || r2 < hig2) { + IACT_NONSYM(r2, dx, hj, pi->h, pj, pi); + } + + } /* loop over the parts in ci. */ + + } /* loop over the parts in cj. */ + + TIMER_TOC(TIMER_DOPAIR); +} + +/** + * @brief Compute the interactions between a cell pair, but only for the + * given indices in ci. + * + * @param r The #runner. + * @param ci The first #cell. + * @param parts_i The #part to interact with @c cj. + * @param ind The list of indices of particles in @c ci to interact with. + * @param count The number of particles in @c ind. + * @param cj The second #cell. + */ +void DOPAIR_SUBSET_NOSORT(struct runner *r, struct cell *restrict ci, + struct part *restrict parts_i, int *restrict ind, + int count, struct cell *restrict cj) { + + struct engine *e = r->e; + + TIMER_TIC; + + const int count_j = cj->count; + struct part *restrict parts_j = cj->parts; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + + /* Loop over the parts_i. */ + for (int pid = 0; pid < count; pid++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pi = &parts_i[ind[pid]]; + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const float hi = pi->h; + const float hig2 = hi * hi * kernel_gamma2; + + if (!part_is_active(pi, e)) + error("Trying to correct smoothing length of inactive particle !"); + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pix[k] - pj->x[k]; + r2 += dx[k] * dx[k]; + } + + /* Hit or miss? */ + if (r2 < hig2) { + + IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); + } + } /* loop over the parts in cj. */ + } /* loop over the parts in ci. */ + + TIMER_TOC(timer_dopair_subset); +} diff --git a/src/space.c b/src/space.c index 8d5dc34dc7f0060953ca1a34cd5b60fc7f186cbd..fa707f41d241fd6e49a7408afcf501942179d552 100644 --- a/src/space.c +++ b/src/space.c @@ -472,7 +472,7 @@ void space_rebuild(struct space *s, int verbose) { /* Be verbose about this. */ #ifdef SWIFT_DEBUG_CHECKS - message("re)building space"); + if (s->e->nodeID == 0 || verbose) message("re)building space"); fflush(stdout); #endif @@ -844,6 +844,13 @@ void space_parts_get_cell_index_mapper(void *map_data, int nr_parts, cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z); ind[k] = index; +#ifdef SWIFT_DEBUG_CHECKS + if (pos_x > dim_x || pos_y > dim_y || pos_z > pos_z || pos_x < 0. || + pos_y < 0. || pos_z < 0.) + error("Particle outside of simulation box. p->x=[%e %e %e]", pos_x, pos_y, + pos_z); +#endif + /* Update the position */ p->x[0] = pos_x; p->x[1] = pos_y; @@ -1007,7 +1014,7 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, if (ind[i - 1] > ind[i]) error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1, ind[i - 1], i, ind[i], min, max); - message("Sorting succeeded."); + if (s->e->nodeID == 0 || verbose) message("Sorting succeeded."); #endif /* Clean up. */ @@ -1185,7 +1192,7 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max, if (ind[i - 1] > ind[i]) error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1, ind[i - 1], i, ind[i], min, max); - message("Sorting succeeded."); + if (s->e->nodeID == 0 || verbose) message("Sorting succeeded."); #endif /* Clean up. */