diff --git a/src/cell.c b/src/cell.c index 58893f15840c3af549d78dd5e3a55defd1f92e7c..c2e7ceada1ca86cbab1c80549b14482113c8477d 100644 --- a/src/cell.c +++ b/src/cell.c @@ -1241,6 +1241,9 @@ void cell_check_part_drift_point(struct cell *c, void *data) { /* Only check local cells */ if (c->nodeID != engine_rank) return; + /* Only check cells with content */ + if (c->hydro.count == 0) return; + if (c->hydro.ti_old_part != ti_drift) error("Cell in an incorrect time-zone! c->hydro.ti_old=%lld ti_drift=%lld", c->hydro.ti_old_part, ti_drift); @@ -1273,6 +1276,9 @@ void cell_check_gpart_drift_point(struct cell *c, void *data) { /* Only check local cells */ if (c->nodeID != engine_rank) return; + /* Only check cells with content */ + if (c->grav.count == 0) return; + if (c->grav.ti_old_part != ti_drift) error( "Cell in an incorrect time-zone! c->grav.ti_old_part=%lld " @@ -1309,12 +1315,18 @@ void cell_check_multipole_drift_point(struct cell *c, void *data) { const integertime_t ti_drift = *(integertime_t *)data; + /* Only check local cells */ + if (c->nodeID != engine_rank) return; + + /* Only check cells with content */ + if (c->grav.count == 0) return; + if (c->grav.ti_old_multipole != ti_drift) error( "Cell multipole in an incorrect time-zone! " "c->grav.ti_old_multipole=%lld " - "ti_drift=%lld (depth=%d)", - c->grav.ti_old_multipole, ti_drift, c->depth); + "ti_drift=%lld (depth=%d, node=%d)", + c->grav.ti_old_multipole, ti_drift, c->depth, c->nodeID); #else error("Calling debugging code without debugging flag activated."); diff --git a/src/engine.c b/src/engine.c index ca9156d5d8d4a442c5863135f6a2e6cd670049e1..b17b7b9e460f13c2de3b15dc64267b8c4fd63d8c 100644 --- a/src/engine.c +++ b/src/engine.c @@ -4520,9 +4520,6 @@ void engine_rebuild(struct engine *e, int repartitioned, /* Re-build the space. */ space_rebuild(e->s, repartitioned, e->verbose); - /* Construct the list of purely local cells */ - space_list_local_cells(e->s); - /* Update the global counters of particles */ long long num_particles[3] = {e->s->nr_parts, e->s->nr_gparts, e->s->nr_sparts}; @@ -4582,7 +4579,7 @@ void engine_rebuild(struct engine *e, int repartitioned, engine_maketasks(e); /* Make the list of top-level cells that have tasks */ - space_list_cells_with_tasks(e->s); + space_list_useful_top_level_cells(e->s); #ifdef SWIFT_DEBUG_CHECKS /* Check that all cells have been drifted to the current time. @@ -5926,12 +5923,16 @@ void engine_unskip(struct engine *e) { void engine_do_drift_all_mapper(void *map_data, int num_elements, void *extra_data) { - struct engine *e = (struct engine *)extra_data; - struct cell *cells = (struct cell *)map_data; + const struct engine *e = (const struct engine *)extra_data; + struct space *s = e->s; + struct cell *cells_top = s->cells_top; + int *local_cells = (int *)map_data; for (int ind = 0; ind < num_elements; ind++) { - struct cell *c = &cells[ind]; - if (c != NULL && c->nodeID == e->nodeID) { + struct cell *c = &cells_top[local_cells[ind]]; + + if (c->nodeID == e->nodeID) { + /* Drift all the particles */ cell_drift_part(c, e, 1); @@ -5967,8 +5968,9 @@ void engine_drift_all(struct engine *e) { } #endif - threadpool_map(&e->threadpool, engine_do_drift_all_mapper, e->s->cells_top, - e->s->nr_cells, sizeof(struct cell), 0, e); + threadpool_map(&e->threadpool, engine_do_drift_all_mapper, + e->s->local_cells_with_tasks_top, + e->s->nr_local_cells_with_tasks, sizeof(int), 0, e); /* Synchronize particle positions */ space_synchronize_particle_positions(e->s); diff --git a/src/part.c b/src/part.c index db221dbd4bf9ff2b829b02fbae673aa1c65f339e..f45223d7cb8cec3ccec690f046cf2e0181bb5fa9 100644 --- a/src/part.c +++ b/src/part.c @@ -133,6 +133,8 @@ void part_verify_links(struct part *parts, struct gpart *gparts, struct spart *sparts, size_t nr_parts, size_t nr_gparts, size_t nr_sparts, int verbose) { + ticks tic = getticks(); + for (size_t k = 0; k < nr_gparts; ++k) { /* We have a DM particle */ @@ -246,6 +248,9 @@ void part_verify_links(struct part *parts, struct gpart *gparts, } if (verbose) message("All links OK"); + if (verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } #ifdef WITH_MPI diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index ebd08a662cee9595f8a82cbd5e4091d8325e1380..1e15df821e7068458203b55a147694deee8510a9 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -1718,7 +1718,8 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, /* Recover the list of top-level cells */ struct cell *cells = e->s->cells_top; - const int nr_cells = e->s->nr_cells; + int *cells_with_particles = e->s->cells_with_particles_top; + const int nr_cells_with_particles = e->s->nr_cells_with_particles; /* Anything to do here? */ if (!cell_is_active_gravity(ci, e)) return; @@ -1744,10 +1745,10 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, /* Loop over all the top-level cells and go for a M-M interaction if * well-separated */ - for (int n = 0; n < nr_cells; ++n) { + for (int n = 0; n < nr_cells_with_particles; ++n) { /* Handle on the top-level cell and it's gravity business*/ - struct cell *cj = &cells[n]; + const struct cell *cj = &cells[cells_with_particles[n]]; const struct gravity_tensors *const multi_j = cj->grav.multipole; /* Avoid self contributions */ diff --git a/src/space.c b/src/space.c index 716040c6e99b4adbbb52783740a07c8547b49fc0..91d5c6ca0e842e9157e8b1c2df300a7d080ea778 100644 --- a/src/space.c +++ b/src/space.c @@ -283,26 +283,33 @@ void space_regrid(struct space *s, int verbose) { // tic = getticks(); float h_max = s->cell_min / kernel_gamma / space_stretch; if (nr_parts > 0) { - if (s->local_cells_top != NULL) { - for (int k = 0; k < s->nr_local_cells; ++k) { - const struct cell *c = &s->cells_top[s->local_cells_top[k]]; + + /* Can we use the list of local non-empty top-level cells? */ + if (s->local_cells_with_particles_top != NULL) { + for (int k = 0; k < s->nr_local_cells_with_particles; ++k) { + const struct cell *c = + &s->cells_top[s->local_cells_with_particles_top[k]]; if (c->hydro.h_max > h_max) { - h_max = s->cells_top[k].hydro.h_max; + h_max = c->hydro.h_max; } if (c->stars.h_max > h_max) { - h_max = s->cells_top[k].stars.h_max; + h_max = c->stars.h_max; } } + + /* Can we instead use all the top-level cells? */ } else if (s->cells_top != NULL) { for (int k = 0; k < s->nr_cells; k++) { const struct cell *c = &s->cells_top[k]; if (c->nodeID == engine_rank && c->hydro.h_max > h_max) { - h_max = s->cells_top[k].hydro.h_max; + h_max = c->hydro.h_max; } if (c->nodeID == engine_rank && c->stars.h_max > h_max) { - h_max = s->cells_top[k].stars.h_max; + h_max = c->stars.h_max; } } + + /* Last option: run through the particles */ } else { for (size_t k = 0; k < nr_parts; k++) { if (s->parts[k].h > h_max) h_max = s->parts[k].h; @@ -401,6 +408,8 @@ void space_regrid(struct space *s, int verbose) { space_free_cells(s); free(s->local_cells_with_tasks_top); free(s->local_cells_top); + free(s->cells_with_particles_top); + free(s->local_cells_with_particles_top); free(s->cells_top); free(s->multipoles_top); } @@ -441,9 +450,23 @@ void space_regrid(struct space *s, int verbose) { /* Allocate the indices of local cells with tasks */ if (posix_memalign((void **)&s->local_cells_with_tasks_top, SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0) - error("Failed to allocate indices of local top-level cells."); + error("Failed to allocate indices of local top-level cells with tasks."); bzero(s->local_cells_with_tasks_top, s->nr_cells * sizeof(int)); + /* Allocate the indices of cells with particles */ + if (posix_memalign((void **)&s->cells_with_particles_top, + SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0) + error("Failed to allocate indices of top-level cells with particles."); + bzero(s->cells_with_particles_top, s->nr_cells * sizeof(int)); + + /* Allocate the indices of local cells with particles */ + if (posix_memalign((void **)&s->local_cells_with_particles_top, + SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0) + error( + "Failed to allocate indices of local top-level cells with " + "particles."); + bzero(s->local_cells_with_particles_top, s->nr_cells * sizeof(int)); + /* Set the cells' locks */ for (int k = 0; k < s->nr_cells; k++) { if (lock_init(&s->cells_top[k].hydro.lock) != 0) @@ -1062,12 +1085,15 @@ void space_rebuild(struct space *s, int repartitioned, int verbose) { nr_sparts, verbose); #endif - /* Hook the cells up to the parts. */ - // tic = getticks(); + /* Hook the cells up to the parts. Make list of local and non-empty cells */ + ticks tic2 = getticks(); struct part *finger = s->parts; struct xpart *xfinger = s->xparts; struct gpart *gfinger = s->gparts; struct spart *sfinger = s->sparts; + s->nr_cells_with_particles = 0; + s->nr_local_cells_with_particles = 0; + s->nr_local_cells = 0; for (int k = 0; k < s->nr_cells; k++) { struct cell *restrict c = &cells_top[k]; c->hydro.ti_old_part = ti_current; @@ -1079,7 +1105,11 @@ void space_rebuild(struct space *s, int repartitioned, int verbose) { last_cell_id++; #endif - if (c->nodeID == engine_rank) { + const int is_local = (c->nodeID == engine_rank); + const int has_particles = + (c->hydro.count > 0) || (c->grav.count > 0) || (c->stars.count > 0); + + if (is_local) { c->hydro.parts = finger; c->hydro.xparts = xfinger; c->grav.parts = gfinger; @@ -1088,14 +1118,31 @@ void space_rebuild(struct space *s, int repartitioned, int verbose) { xfinger = &xfinger[c->hydro.count]; gfinger = &gfinger[c->grav.count]; sfinger = &sfinger[c->stars.count]; + + /* Add this cell to the list of local cells */ + s->local_cells_top[s->nr_local_cells] = k; + s->nr_local_cells++; } + + if (is_local && has_particles) { + + /* Add this cell to the list of non-empty cells */ + s->local_cells_with_particles_top[s->nr_local_cells_with_particles] = k; + s->nr_local_cells_with_particles++; + } + } + if (verbose) { + message("Have %d local top-level cells with particles (total=%d)", + s->nr_local_cells_with_particles, s->nr_cells); + message("Have %d local top-level cells (total=%d)", s->nr_local_cells, + s->nr_cells); + message("hooking up cells took %.3f %s.", + clocks_from_ticks(getticks() - tic2), clocks_getunit()); } - // message( "hooking up cells took %.3f %s." , - // clocks_from_ticks(getticks() - tic), clocks_getunit()); /* At this point, we have the upper-level cells, old or new. Now make sure that the parts in each cell are ok. */ - space_split(s, cells_top, s->nr_cells, verbose); + space_split(s, verbose); #ifdef SWIFT_DEBUG_CHECKS /* Check that the multipole construction went OK */ @@ -1113,22 +1160,21 @@ void space_rebuild(struct space *s, int repartitioned, int verbose) { } /** - * @brief Split particles between cells of a hierarchy + * @brief Split particles between cells of a hierarchy. * * This is done in parallel using threads in the #threadpool. + * Only do this for the local non-empty top-level cells. * * @param s The #space. - * @param cells The cell hierarchy. - * @param nr_cells The number of cells. * @param verbose Are we talkative ? */ -void space_split(struct space *s, struct cell *cells, int nr_cells, - int verbose) { +void space_split(struct space *s, int verbose) { const ticks tic = getticks(); - threadpool_map(&s->e->threadpool, space_split_mapper, cells, nr_cells, - sizeof(struct cell), 0, s); + threadpool_map(&s->e->threadpool, space_split_mapper, + s->local_cells_with_particles_top, + s->nr_local_cells_with_particles, sizeof(int), 0, s); if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -1597,8 +1643,9 @@ void space_sparts_get_cell_index(struct space *s, int *sind, int *cell_counts, * @param num_bins Total number of bins (length of count). * @param parts_offset Offset of the #part array from the global #part array. */ -void space_parts_sort(struct part *parts, struct xpart *xparts, int *ind, - int *counts, int num_bins, ptrdiff_t parts_offset) { +void space_parts_sort(struct part *parts, struct xpart *xparts, + int *restrict ind, int *restrict counts, int num_bins, + ptrdiff_t parts_offset) { /* Create the offsets array. */ size_t *offsets = NULL; if (posix_memalign((void **)&offsets, SWIFT_STRUCT_ALIGNMENT, @@ -1659,8 +1706,9 @@ void space_parts_sort(struct part *parts, struct xpart *xparts, int *ind, * @param sparts_offset Offset of the #spart array from the global #spart. * array. */ -void space_sparts_sort(struct spart *sparts, int *ind, int *counts, - int num_bins, ptrdiff_t sparts_offset) { +void space_sparts_sort(struct spart *sparts, int *restrict ind, + int *restrict counts, int num_bins, + ptrdiff_t sparts_offset) { /* Create the offsets array. */ size_t *offsets = NULL; if (posix_memalign((void **)&offsets, SWIFT_STRUCT_ALIGNMENT, @@ -1719,8 +1767,8 @@ void space_sparts_sort(struct spart *sparts, int *ind, int *counts, * @param num_bins Total number of bins (length of counts). */ void space_gparts_sort(struct gpart *gparts, struct part *parts, - struct spart *sparts, int *ind, int *counts, - int num_bins) { + struct spart *sparts, int *restrict ind, + int *restrict counts, int num_bins) { /* Create the offsets array. */ size_t *offsets = NULL; if (posix_memalign((void **)&offsets, SWIFT_STRUCT_ALIGNMENT, @@ -2362,10 +2410,12 @@ void space_split_mapper(void *map_data, int num_cells, void *extra_data) { /* Unpack the inputs. */ struct space *s = (struct space *)extra_data; - struct cell *restrict cells_top = (struct cell *)map_data; + struct cell *cells_top = s->cells_top; + int *local_cells_with_particles = (int *)map_data; + /* Loop over the non-empty cells */ for (int ind = 0; ind < num_cells; ind++) { - struct cell *c = &cells_top[ind]; + struct cell *c = &cells_top[local_cells_with_particles[ind]]; space_split_recursive(s, c, NULL, NULL, NULL); } @@ -2373,8 +2423,8 @@ void space_split_mapper(void *map_data, int num_cells, void *extra_data) { /* All cells and particles should have consistent h_max values. */ for (int ind = 0; ind < num_cells; ind++) { int depth = 0; - if (!checkCellhdxmax(&cells_top[ind], &depth)) - message(" at cell depth %d", depth); + const struct cell *c = &cells_top[local_cells_with_particles[ind]]; + if (!checkCellhdxmax(c, &depth)) message(" at cell depth %d", depth); } #endif } @@ -2559,46 +2609,47 @@ void space_free_buff_sort_indices(struct space *s) { /** * @brief Construct the list of top-level cells that have any tasks in - * their hierarchy on this MPI rank. + * their hierarchy on this MPI rank. Also construct the list of top-level + * cells on any rank that have > 0 particles (of any kind). * * This assumes the list has been pre-allocated at a regrid. * * @param s The #space. */ -void space_list_cells_with_tasks(struct space *s) { +void space_list_useful_top_level_cells(struct space *s) { + + const ticks tic = getticks(); s->nr_local_cells_with_tasks = 0; + s->nr_cells_with_particles = 0; - for (int i = 0; i < s->nr_cells; ++i) - if (cell_has_tasks(&s->cells_top[i])) { + for (int i = 0; i < s->nr_cells; ++i) { + struct cell *c = &s->cells_top[i]; + + if (cell_has_tasks(c)) { s->local_cells_with_tasks_top[s->nr_local_cells_with_tasks] = i; s->nr_local_cells_with_tasks++; } - if (s->e->verbose) - message("Have %d local top-level cells with tasks (total=%d)", - s->nr_local_cells_with_tasks, s->nr_cells); -} -/** - * @brief Construct the list of local top-level cells. - * - * This assumes the list has been pre-allocated at a regrid. - * - * @param s The #space. - */ -void space_list_local_cells(struct space *s) { - - s->nr_local_cells = 0; + const int has_particles = (c->hydro.count > 0) || (c->grav.count > 0) || + (c->stars.count > 0) || + (c->grav.multipole != NULL && c->grav.multipole->m_pole.M_000 > 0.f); - for (int i = 0; i < s->nr_cells; ++i) - if (s->cells_top[i].nodeID == engine_rank) { - s->local_cells_top[s->nr_local_cells] = i; - s->nr_local_cells++; + if (has_particles) { + s->cells_with_particles_top[s->nr_cells_with_particles] = i; + s->nr_cells_with_particles++; } + } + if (s->e->verbose) { + message("Have %d local top-level cells with tasks (total=%d)", + s->nr_local_cells_with_tasks, s->nr_cells); + message("Have %d top-level cells with particles (total=%d)", + s->nr_cells_with_particles, s->nr_cells); + } if (s->e->verbose) - message("Have %d local top-level cells (total=%d)", s->nr_local_cells, - s->nr_cells); + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } void space_synchronize_particle_positions_mapper(void *map_data, int nr_gparts, @@ -2646,11 +2697,17 @@ void space_synchronize_particle_positions_mapper(void *map_data, int nr_gparts, void space_synchronize_particle_positions(struct space *s) { + const ticks tic = getticks(); + if ((s->nr_gparts > 0 && s->nr_parts > 0) || (s->nr_gparts > 0 && s->nr_sparts > 0)) threadpool_map(&s->e->threadpool, space_synchronize_particle_positions_mapper, s->gparts, s->nr_gparts, sizeof(struct gpart), 0, (void *)s); + + if (s->e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } void space_first_init_parts_mapper(void *restrict map_data, int count, @@ -3612,6 +3669,8 @@ void space_clean(struct space *s) { free(s->multipoles_top); free(s->local_cells_top); free(s->local_cells_with_tasks_top); + free(s->cells_with_particles_top); + free(s->local_cells_with_particles_top); free(s->parts); free(s->xparts); free(s->gparts); @@ -3664,6 +3723,8 @@ void space_struct_restore(struct space *s, FILE *stream) { s->multipoles_sub = NULL; s->local_cells_top = NULL; s->local_cells_with_tasks_top = NULL; + s->cells_with_particles_top = NULL; + s->local_cells_with_particles_top = NULL; s->grav_top_level = NULL; #ifdef WITH_MPI s->parts_foreign = NULL; diff --git a/src/space.h b/src/space.h index 1aa5efe64e30fdab70168b1c8457feda1252b328..8b459d706bb978f7c43480829282e495d051a72f 100644 --- a/src/space.h +++ b/src/space.h @@ -119,6 +119,12 @@ struct space { /*! Number of *local* top-level cells with tasks */ int nr_local_cells_with_tasks; + /*! Number of top-level cells that have >0 particle (of any kind) */ + int nr_cells_with_particles; + + /*! Number of top-level cells that have >0 particle (of any kind) */ + int nr_local_cells_with_particles; + /*! The (level 0) cells themselves. */ struct cell *cells_top; @@ -137,6 +143,12 @@ struct space { /*! The indices of the *local* top-level cells with tasks */ int *local_cells_with_tasks_top; + /*! The indices of the top-level cells that have >0 particles (of any kind) */ + int *cells_with_particles_top; + + /*! The indices of the top-level cells that have >0 particles (of any kind) */ + int *local_cells_with_particles_top; + /*! The total number of parts in the space. */ size_t nr_parts, size_parts; @@ -247,11 +259,9 @@ void space_recycle_list(struct space *s, struct cell *cell_list_begin, struct cell *cell_list_end, struct gravity_tensors *multipole_list_begin, struct gravity_tensors *multipole_list_end); -void space_split(struct space *s, struct cell *cells, int nr_cells, - int verbose); +void space_split(struct space *s, int verbose); void space_split_mapper(void *map_data, int num_elements, void *extra_data); -void space_list_local_cells(struct space *s); -void space_list_cells_with_tasks(struct space *s); +void space_list_useful_top_level_cells(struct space *s); void space_parts_get_cell_index(struct space *s, int *ind, int *cell_counts, int *count_inibibited_parts, int verbose); void space_gparts_get_cell_index(struct space *s, int *gind, int *cell_counts,