diff --git a/src/engine_fof.c b/src/engine_fof.c index e98a50e263d08623d5a469ca4cdd03129aa058ef..bab1715dc5c4eb7db7149ef6e3c8d8611421d17f 100644 --- a/src/engine_fof.c +++ b/src/engine_fof.c @@ -189,6 +189,8 @@ void engine_fof(struct engine *e, const int dump_results, /* Free the foreign particles */ space_free_foreign_parts(e->s, /*clear pointers=*/1); + /* Make a list of purely local groups to speed up the attaching */ + fof_build_list_of_purely_local_groups(e->fof_properties, e->s); #endif /* Finish the operations attaching the attachables to their groups */ diff --git a/src/fof.c b/src/fof.c index 2fe8cb9c930acd99cd443aaafb6f35ba6f41c9e2..e83ebec0cd104abf574e798f0f736a5d1038fec9 100644 --- a/src/fof.c +++ b/src/fof.c @@ -1222,6 +1222,8 @@ static INLINE void add_foreign_link_to_list( message("Re-allocating local group links from %d to %d elements.", *local_link_count, new_size); + + if (new_size < 0) error("Overflow in size of list of foreign links"); } /* Store the particle group properties for communication. */ @@ -3396,6 +3398,122 @@ void fof_link_attachable_particles(struct fof_props *props, clocks_from_ticks(getticks() - tic_total), clocks_getunit()); } +/** + * @brief Construct an array indicating whether a given root does not appear + * in the global list of fragments to link. + * + * Nothing to do here if not running with MPI or if there are no attacheables. + * + * @param props The properties fof the FOF scheme. + * @param s The #space we work with. + */ +void fof_build_list_of_purely_local_groups(struct fof_props *props, + const struct space *s) { + + /* Is there anything to attach? + * (The array we construct here is only useful with attacheables)*/ + if (!current_fof_attach_type) return; + +#ifdef WITH_MPI + + struct engine *e = s->e; + + /* Abort if only one node */ + if (e->nr_nodes == 1) return; + + /* Local copy of the variable set in the mapper */ + const size_t nr_gparts = s->nr_gparts; + size_t *restrict group_index = props->group_index; + const int group_link_count = props->group_link_count; + + /* Sum the total number of links across MPI domains over each MPI rank. */ + int global_group_link_count = 0; + MPI_Allreduce(&group_link_count, &global_group_link_count, 1, MPI_INT, + MPI_SUM, MPI_COMM_WORLD); + + if (global_group_link_count < 0) + error("Overflow of the size of the global list of foregin links"); + + struct fof_mpi *global_group_links = NULL; + int *displ = NULL, *group_link_counts = NULL; + + if (swift_memalign("fof_global_group_links", (void **)&global_group_links, + SWIFT_STRUCT_ALIGNMENT, + global_group_link_count * sizeof(struct fof_mpi)) != 0) + error("Error while allocating memory for the global list of group links"); + + if (posix_memalign((void **)&group_link_counts, SWIFT_STRUCT_ALIGNMENT, + e->nr_nodes * sizeof(int)) != 0) + error( + "Error while allocating memory for the number of group links on each " + "MPI rank"); + + if (posix_memalign((void **)&displ, SWIFT_STRUCT_ALIGNMENT, + e->nr_nodes * sizeof(int)) != 0) + error( + "Error while allocating memory for the displacement in memory for the " + "global group link list"); + + /* Gather the total number of links on each rank. */ + MPI_Allgather(&group_link_count, 1, MPI_INT, group_link_counts, 1, MPI_INT, + MPI_COMM_WORLD); + + /* Set the displacements into the global link list using the link counts from + * each rank */ + displ[0] = 0; + for (int i = 1; i < e->nr_nodes; i++) { + displ[i] = displ[i - 1] + group_link_counts[i - 1]; + if (displ[i] < 0) error("Number of group links overflowing!"); + } + + /* Gather the global link list on all ranks. */ + MPI_Allgatherv(props->group_links, group_link_count, fof_mpi_type, + global_group_links, group_link_counts, displ, fof_mpi_type, + MPI_COMM_WORLD); + + /* Clean up memory. */ + free(group_link_counts); + free(displ); + + /* We now have a list of all the fragment connections. + * We can iterate over the *local* groups to identify the ones which + * are *not* appearing in the list */ + + if (posix_memalign((void **)&props->is_purely_local, SWIFT_STRUCT_ALIGNMENT, + nr_gparts * sizeof(char)) != 0) + error("Error while allocating memory for the list of purely local groups"); + + /* Start by pretending every group is purely local */ + for (size_t i = 0; i < nr_gparts; ++i) props->is_purely_local[i] = 1; + + /* Now loop over the list of inter-rank connections and flag each halo present + * in the list */ + for (int k = 0; k < global_group_link_count; ++k) { + + const size_t group_i = global_group_links[k].group_i; + const size_t group_j = global_group_links[k].group_j; + + const size_t root_i = + fof_find_global(group_i - node_offset, group_index, nr_gparts); + const size_t root_j = + fof_find_global(group_j - node_offset, group_index, nr_gparts); + + if (is_local(root_i, nr_gparts)) { + const size_t local_root = root_i - node_offset; + props->is_purely_local[local_root] = 0; + } + + if (is_local(root_j, nr_gparts)) { + const size_t local_root = root_j - node_offset; + props->is_purely_local[local_root] = 0; + } + } + + /* Clean up the last allocated array */ + swift_free("fof_global_group_links", global_group_links); +#endif +} + /** * @brief Process all the attachable-linkable connections to add the * attachables to the groups they belong to. @@ -3412,14 +3530,15 @@ void fof_finalise_attachables(struct fof_props *props, const struct space *s) { const size_t nr_gparts = s->nr_gparts; - char *found_attachable_link = props->found_attachable_link; - size_t *restrict group_index = props->group_index; + char *restrict found_attachable_link = props->found_attachable_link; size_t *restrict attach_index = props->attach_index; + size_t *restrict group_index = props->group_index; size_t *restrict group_size = props->group_size; #ifdef WITH_MPI /* Get pointers to global arrays. */ + char *restrict is_purely_local = props->is_purely_local; int *restrict group_links_size = &props->group_links_size; int *restrict group_link_count = &props->group_link_count; struct fof_mpi **group_links = &props->group_links; @@ -3436,16 +3555,30 @@ void fof_finalise_attachables(struct fof_props *props, const struct space *s) { const size_t root_i = fof_find_global(group_index[i] - node_offset, group_index, nr_gparts); - /* Update the size of the group the particle belongs to */ + /* Update the size of the group the particle belongs to. + * The strategy emploed depends on where the root and particles are. */ if (is_local(root_j, nr_gparts)) { const size_t local_root = root_j - node_offset; - group_index[i] = local_root + node_offset; - group_size[local_root]++; + if (is_purely_local[local_root]) { /* All parties involved are local */ - } else { + /* We can directly attack the list of groups */ + group_index[i] = local_root + node_offset; + group_size[local_root]++; + } else { /* Group is involved in some cross-boundaries mixing */ + + /* Add to the list of links to be resolved globally later */ + add_foreign_link_to_list(group_link_count, group_links_size, + group_links, group_links, root_i, root_j, + /*size_i=*/1, + /*size_j=*/2); + } + + } else { /* Root is foreign */ + + /* Add to the list of links to be resolved globally later */ add_foreign_link_to_list(group_link_count, group_links_size, group_links, group_links, root_i, root_j, /*size_i=*/1, @@ -3454,6 +3587,9 @@ void fof_finalise_attachables(struct fof_props *props, const struct space *s) { } } + /* We can free the list of purely local groups */ + free(props->is_purely_local); + #else /* not WITH_MPI */ /* Loop over all the attachables and added them to the group they belong to */ @@ -3522,6 +3658,9 @@ void fof_link_foreign_fragments(struct fof_props *props, MPI_Allreduce(&group_link_count, &global_group_link_count, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + if (global_group_link_count < 0) + error("Overflow of the size of the global list of foreign links"); + struct fof_mpi *global_group_links = NULL; int *displ = NULL, *group_link_counts = NULL; diff --git a/src/fof.h b/src/fof.h index c7213e8532c7ff614c45d18ef8ea1ee8df1a5e7c..4496c233a2cf6b5fbb32ee6eabc2a5b7c1b1cd02 100644 --- a/src/fof.h +++ b/src/fof.h @@ -93,6 +93,9 @@ struct fof_props { /*! Has the particle found a linkable to attach to? */ char *found_attachable_link; + /*! Is the group purely local after linking the foreign particles? */ + char *is_purely_local; + /*! For attachable particles: distance to the current nearest linkable part */ float *distance_to_link; @@ -198,6 +201,8 @@ void fof_link_attachable_particles(struct fof_props *props, const struct space *s); void fof_finalise_attachables(struct fof_props *props, const struct space *s); void fof_link_foreign_fragments(struct fof_props *props, const struct space *s); +void fof_build_list_of_purely_local_groups(struct fof_props *props, + const struct space *s); void fof_compute_group_props(struct fof_props *props, const struct black_holes_props *bh_props, const struct phys_const *constants,