diff --git a/src/cell.c b/src/cell.c index 0f4103673e9a6ba17eafa3c2ed859dd80e4b5425..bd1022f1fa23b5911c4056b602008601fa36ce68 100644 --- a/src/cell.c +++ b/src/cell.c @@ -98,6 +98,14 @@ int cell_getsize(struct cell *c) { */ int cell_link_parts(struct cell *c, struct part *parts) { +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) + error("Linking foreign particles in a local cell!"); + + if (c->hydro.parts != NULL) + error("Linking parts into a cell that was already linked"); +#endif + c->hydro.parts = parts; /* Fill the progeny recursively, depth-first. */ @@ -123,6 +131,14 @@ int cell_link_parts(struct cell *c, struct part *parts) { */ int cell_link_gparts(struct cell *c, struct gpart *gparts) { +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) + error("Linking foreign particles in a local cell!"); + + if (c->grav.parts != NULL) + error("Linking gparts into a cell that was already linked"); +#endif + c->grav.parts = gparts; /* Fill the progeny recursively, depth-first. */ @@ -148,6 +164,14 @@ int cell_link_gparts(struct cell *c, struct gpart *gparts) { */ int cell_link_sparts(struct cell *c, struct spart *sparts) { +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) + error("Linking foreign particles in a local cell!"); + + if (c->stars.parts != NULL) + error("Linking sparts into a cell that was already linked"); +#endif + c->stars.parts = sparts; /* Fill the progeny recursively, depth-first. */ @@ -163,6 +187,182 @@ int cell_link_sparts(struct cell *c, struct spart *sparts) { return c->stars.count; } +/** + * @brief Recurse down foreign cells until reaching one with hydro + * tasks; then trigger the linking of the #part array from that + * level. + * + * @param c The #cell. + * @param parts The #part array. + * + * @return The number of particles linked. + */ +int cell_link_foreign_parts(struct cell *c, struct part *parts) { + +#ifdef WITH_MPI + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) + error("Linking foreign particles in a local cell!"); +#endif + + /* Do we have a hydro task at this level? */ + if (c->mpi.hydro.recv_xv != NULL) { + + /* Recursively attach the parts */ + const int counts = cell_link_parts(c, parts); +#ifdef SWIFT_DEBUG_CHECKS + if (counts != c->hydro.count) + error("Something is wrong with the foreign counts"); +#endif + return counts; + } + + /* Go deeper to find the level where the tasks are */ + if (c->split) { + int count = 0; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + count += cell_link_foreign_parts(c->progeny[k], &parts[count]); + } + } + return count; + } else { + return 0; + } + +#else + error("Calling linking of foregin particles in non-MPI mode."); +#endif +} + +/** + * @brief Recurse down foreign cells until reaching one with gravity + * tasks; then trigger the linking of the #gpart array from that + * level. + * + * @param c The #cell. + * @param gparts The #gpart array. + * + * @return The number of particles linked. + */ +int cell_link_foreign_gparts(struct cell *c, struct gpart *gparts) { + +#ifdef WITH_MPI + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) + error("Linking foreign particles in a local cell!"); +#endif + + /* Do we have a hydro task at this level? */ + if (c->mpi.grav.recv != NULL) { + + /* Recursively attach the gparts */ + const int counts = cell_link_gparts(c, gparts); +#ifdef SWIFT_DEBUG_CHECKS + if (counts != c->grav.count) + error("Something is wrong with the foreign counts"); +#endif + return counts; + } + + /* Go deeper to find the level where the tasks are */ + if (c->split) { + int count = 0; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + count += cell_link_foreign_gparts(c->progeny[k], &gparts[count]); + } + } + return count; + } else { + return 0; + } + +#else + error("Calling linking of foregin particles in non-MPI mode."); +#endif +} + +/** + * @brief Recursively count the number of #part in foreign cells that + * are in cells with hydro-related tasks. + * + * @param c The #cell. + * + * @return The number of particles linked. + */ +int cell_count_parts_for_tasks(const struct cell *c) { + +#ifdef WITH_MPI + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) + error("Counting foreign particles in a local cell!"); +#endif + + /* Do we have a hydro task at this level? */ + if (c->mpi.hydro.recv_xv != NULL) { + return c->hydro.count; + } + + if (c->split) { + int count = 0; + for (int k = 0; k < 8; ++k) { + if (c->progeny[k] != NULL) { + count += cell_count_parts_for_tasks(c->progeny[k]); + } + } + return count; + } else { + return 0; + } + +#else + error("Calling linking of foregin particles in non-MPI mode."); +#endif +} + +/** + * @brief Recursively count the number of #gpart in foreign cells that + * are in cells with gravity-related tasks. + * + * @param c The #cell. + * + * @return The number of particles linked. + */ +int cell_count_gparts_for_tasks(const struct cell *c) { + +#ifdef WITH_MPI + +#ifdef SWIFT_DEBUG_CHECKS + if (c->nodeID == engine_rank) + error("Counting foreign particles in a local cell!"); +#endif + + /* Do we have a hydro task at this level? */ + if (c->mpi.grav.recv != NULL) { + return c->grav.count; + } + + if (c->split) { + int count = 0; + for (int k = 0; k < 8; ++k) { + if (c->progeny[k] != NULL) { + count += cell_count_gparts_for_tasks(c->progeny[k]); + } + } + return count; + } else { + return 0; + } + +#else + error("Calling linking of foregin particles in non-MPI mode."); +#endif +} + /** * @brief Pack the data of the given cell and all it's sub-cells. * diff --git a/src/cell.h b/src/cell.h index 8b00be8165a359f43bf13dc370d530f5b9046dc5..c5fbc9b8c02b0e008d337189fdbf582faf4fa600 100644 --- a/src/cell.h +++ b/src/cell.h @@ -691,6 +691,10 @@ int cell_getsize(struct cell *c); int cell_link_parts(struct cell *c, struct part *parts); int cell_link_gparts(struct cell *c, struct gpart *gparts); int cell_link_sparts(struct cell *c, struct spart *sparts); +int cell_link_foreign_parts(struct cell *c, struct part *parts); +int cell_link_foreign_gparts(struct cell *c, struct gpart *gparts); +int cell_count_parts_for_tasks(const struct cell *c); +int cell_count_gparts_for_tasks(const struct cell *c); void cell_clean_links(struct cell *c, void *data); void cell_make_multipoles(struct cell *c, integertime_t ti_current); void cell_check_multipole(struct cell *c); diff --git a/src/engine.c b/src/engine.c index 1cc5527751707ce497788d7798fe9144a6c8ee1a..9354da36ade1211209e8482e2c96bc93cab5213a 100644 --- a/src/engine.c +++ b/src/engine.c @@ -1132,84 +1132,12 @@ void engine_exchange_cells(struct engine *e) { #ifdef WITH_MPI - struct space *s = e->s; - const int nr_proxies = e->nr_proxies; const int with_gravity = e->policy & engine_policy_self_gravity; const ticks tic = getticks(); /* Exchange the cell structure with neighbouring ranks. */ proxy_cells_exchange(e->proxies, e->nr_proxies, e->s, with_gravity); - ticks tic2 = getticks(); - - /* Count the number of particles we need to import and re-allocate - the buffer if needed. */ - size_t count_parts_in = 0, count_gparts_in = 0, count_sparts_in = 0; - for (int k = 0; k < nr_proxies; k++) - for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { - if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) - count_parts_in += e->proxies[k].cells_in[j]->hydro.count; - if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) - count_gparts_in += e->proxies[k].cells_in[j]->grav.count; - count_sparts_in += e->proxies[k].cells_in[j]->stars.count; - } - if (count_parts_in > s->size_parts_foreign) { - if (s->parts_foreign != NULL) free(s->parts_foreign); - s->size_parts_foreign = 1.1 * count_parts_in; - if (posix_memalign((void **)&s->parts_foreign, part_align, - sizeof(struct part) * s->size_parts_foreign) != 0) - error("Failed to allocate foreign part data."); - } - if (count_gparts_in > s->size_gparts_foreign) { - if (s->gparts_foreign != NULL) free(s->gparts_foreign); - s->size_gparts_foreign = 1.1 * count_gparts_in; - if (posix_memalign((void **)&s->gparts_foreign, gpart_align, - sizeof(struct gpart) * s->size_gparts_foreign) != 0) - error("Failed to allocate foreign gpart data."); - } - if (count_sparts_in > s->size_sparts_foreign) { - if (s->sparts_foreign != NULL) free(s->sparts_foreign); - s->size_sparts_foreign = 1.1 * count_sparts_in; - if (posix_memalign((void **)&s->sparts_foreign, spart_align, - sizeof(struct spart) * s->size_sparts_foreign) != 0) - error("Failed to allocate foreign spart data."); - } - - if (e->verbose) - message("Counting and allocating arrays took %.3f %s.", - clocks_from_ticks(getticks() - tic2), clocks_getunit()); - - tic2 = getticks(); - - /* Unpack the cells and link to the particle data. */ - struct part *parts = s->parts_foreign; - struct gpart *gparts = s->gparts_foreign; - struct spart *sparts = s->sparts_foreign; - for (int k = 0; k < nr_proxies; k++) { - for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { - - if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) { - cell_link_parts(e->proxies[k].cells_in[j], parts); - parts = &parts[e->proxies[k].cells_in[j]->hydro.count]; - } - - if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) { - cell_link_gparts(e->proxies[k].cells_in[j], gparts); - gparts = &gparts[e->proxies[k].cells_in[j]->grav.count]; - } - - cell_link_sparts(e->proxies[k].cells_in[j], sparts); - sparts = &sparts[e->proxies[k].cells_in[j]->stars.count]; - } - } - s->nr_parts_foreign = parts - s->parts_foreign; - s->nr_gparts_foreign = gparts - s->gparts_foreign; - s->nr_sparts_foreign = sparts - s->sparts_foreign; - - if (e->verbose) - message("Recursively linking arrays took %.3f %s.", - clocks_from_ticks(getticks() - tic2), clocks_getunit()); - if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -1824,6 +1752,124 @@ void engine_exchange_proxy_multipoles(struct engine *e) { #endif } +/** + * @brief Allocate memory for the foreign particles. + * + * We look into the proxies for cells that have tasks and count + * the number of particles in these cells. We then allocate + * memory and link all the cells that have tasks and all cells + * deeper in the tree. + * + * @param e The #engine. + */ +void engine_allocate_foreign_particles(struct engine *e) { + +#ifdef WITH_MPI + + const int nr_proxies = e->nr_proxies; + struct space *s = e->s; + ticks tic = getticks(); + + /* Count the number of particles we need to import and re-allocate + the buffer if needed. */ + size_t count_parts_in = 0, count_gparts_in = 0, count_sparts_in = 0; + for (int k = 0; k < nr_proxies; k++) { + for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { + + if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) { + count_parts_in += cell_count_parts_for_tasks(e->proxies[k].cells_in[j]); + } + + if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) { + count_gparts_in += + cell_count_gparts_for_tasks(e->proxies[k].cells_in[j]); + } + + /* For stars, we just use the numbers in the top-level cells */ + count_sparts_in += e->proxies[k].cells_in[j]->stars.count; + } + } + + if (e->verbose) + message("Counting number of foreign particles took %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + + tic = getticks(); + + /* Allocate space for the foreign particles we will receive */ + if (count_parts_in > s->size_parts_foreign) { + if (s->parts_foreign != NULL) free(s->parts_foreign); + s->size_parts_foreign = engine_foreign_alloc_margin * count_parts_in; + if (posix_memalign((void **)&s->parts_foreign, part_align, + sizeof(struct part) * s->size_parts_foreign) != 0) + error("Failed to allocate foreign part data."); + } + /* Allocate space for the foreign particles we will receive */ + if (count_gparts_in > s->size_gparts_foreign) { + if (s->gparts_foreign != NULL) free(s->gparts_foreign); + s->size_gparts_foreign = engine_foreign_alloc_margin * count_gparts_in; + if (posix_memalign((void **)&s->gparts_foreign, gpart_align, + sizeof(struct gpart) * s->size_gparts_foreign) != 0) + error("Failed to allocate foreign gpart data."); + } + /* Allocate space for the foreign particles we will receive */ + if (count_sparts_in > s->size_sparts_foreign) { + if (s->sparts_foreign != NULL) free(s->sparts_foreign); + s->size_sparts_foreign = engine_foreign_alloc_margin * count_sparts_in; + if (posix_memalign((void **)&s->sparts_foreign, spart_align, + sizeof(struct spart) * s->size_sparts_foreign) != 0) + error("Failed to allocate foreign spart data."); + } + + if (e->verbose) + message("Allocating %zd/%zd/%zd foreign part/gpart/spart (%zd/%zd/%zd MB)", + s->size_parts_foreign, s->size_gparts_foreign, + s->size_sparts_foreign, + s->size_parts_foreign * sizeof(struct part) / (1024 * 1024), + s->size_gparts_foreign * sizeof(struct gpart) / (1024 * 1024), + s->size_sparts_foreign * sizeof(struct spart) / (1024 * 1024)); + + /* Unpack the cells and link to the particle data. */ + struct part *parts = s->parts_foreign; + struct gpart *gparts = s->gparts_foreign; + struct spart *sparts = s->sparts_foreign; + for (int k = 0; k < nr_proxies; k++) { + for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { + + if (e->proxies[k].cells_in_type[j] & proxy_cell_type_hydro) { + + const size_t count_parts = + cell_link_foreign_parts(e->proxies[k].cells_in[j], parts); + parts = &parts[count_parts]; + } + + if (e->proxies[k].cells_in_type[j] & proxy_cell_type_gravity) { + + const size_t count_gparts = + cell_link_foreign_gparts(e->proxies[k].cells_in[j], gparts); + gparts = &gparts[count_gparts]; + } + + /* For stars, we just use the numbers in the top-level cells */ + cell_link_sparts(e->proxies[k].cells_in[j], sparts); + sparts = &sparts[e->proxies[k].cells_in[j]->stars.count]; + } + } + + /* Update the counters */ + s->nr_parts_foreign = parts - s->parts_foreign; + s->nr_gparts_foreign = gparts - s->gparts_foreign; + s->nr_sparts_foreign = sparts - s->sparts_foreign; + + if (e->verbose) + message("Recursively linking foreign arrays took %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + +#else + error("SWIFT was not compiled with MPI support."); +#endif +} + /** * @brief Prints the number of tasks in the engine * diff --git a/src/engine.h b/src/engine.h index 757f2b00add656ed2ae7b40ebe431f0487cb9ad2..7e6cc7ffe49c90fbeffdaa0249507d04f2a35c78 100644 --- a/src/engine.h +++ b/src/engine.h @@ -101,6 +101,7 @@ enum engine_step_properties { #define engine_max_proxy_centre_frac 0.2 #define engine_redistribute_alloc_margin 1.2 #define engine_rebuild_link_alloc_margin 1.2 +#define engine_foreign_alloc_margin 1.05 #define engine_default_energy_file_name "energy" #define engine_default_timesteps_file_name "timesteps" #define engine_max_parts_per_ghost 1000 @@ -405,6 +406,7 @@ void engine_unskip(struct engine *e); void engine_drift_all(struct engine *e, const int drift_mpoles); void engine_drift_top_multipoles(struct engine *e); void engine_reconstruct_multipoles(struct engine *e); +void engine_allocate_foreign_particles(struct engine *e); void engine_print_stats(struct engine *e); void engine_check_for_dumps(struct engine *e); void engine_dump_snapshot(struct engine *e); diff --git a/src/engine_maketasks.c b/src/engine_maketasks.c index adcb3e69707691c5218ebc9834f1e8dd0226c80a..2175595cd149d9d5da5a3aa5f5341ff181fd58d6 100644 --- a/src/engine_maketasks.c +++ b/src/engine_maketasks.c @@ -2461,6 +2461,10 @@ void engine_maketasks(struct engine *e) { message("Creating recv tasks took %.3f %s.", clocks_from_ticks(getticks() - tic2), clocks_getunit()); } + + /* Allocate memory for foreign particles */ + engine_allocate_foreign_particles(e); + #endif /* Report the number of tasks we actually used */ diff --git a/tools/analyse_runtime.py b/tools/analyse_runtime.py index 9fc6ce8e08bd88ba6d2ba51dca495a9e1cb58352..68dc6776fd89bb90db21cee6826259984f96c9b1 100755 --- a/tools/analyse_runtime.py +++ b/tools/analyse_runtime.py @@ -73,6 +73,8 @@ labels = [ "Creating send tasks", "Exchanging cell tags", "Creating recv tasks", + "Counting number of foreign particles", + "Recursively linking foreign arrays", "Setting unlocks", "Ranking the tasks", "scheduler_reweight:", @@ -123,6 +125,8 @@ is_rebuild = [ 1, 1, 1, + 1, + 1, 0, 0, 0,