diff --git a/src/engine.c b/src/engine.c index 97eb8bd796e8e31971820f94b22f95b27e1c378e..d26953f1642ded4c916bb9cd7e98481688d3e211 100644 --- a/src/engine.c +++ b/src/engine.c @@ -3050,7 +3050,7 @@ void engine_collect_end_of_step_mapper(void *map_data, int num_elements, data->s_updated += s_updated; data->b_updated += b_updated; - /* Add the SFH information from this engine to the global data */ + /* Add the SFH information from this engine to the global data */ star_formation_logger_add(sfh_top, &sfh_updated); if (ti_hydro_end_min > e->ti_current) @@ -6490,6 +6490,9 @@ void engine_activate_fof_tasks(struct engine *e) { void engine_fof(struct engine *e, const int dump_results, const int seed_black_holes) { +#ifdef WITH_MPI + MPI_Barrier(MPI_COMM_WORLD); +#endif ticks tic = getticks(); /* Compute number of DM particles */ @@ -6501,7 +6504,7 @@ void engine_fof(struct engine *e, const int dump_results, fof_allocate(e->s, total_nr_dmparts, e->fof_properties); message("FOF allocate (FOF SCALING) took: %.3f %s.", - clocks_from_ticks(getticks() - tic), clocks_getunit()); + clocks_from_ticks(getticks() - tic), clocks_getunit()); /* Make FOF tasks */ engine_make_fof_tasks(e); @@ -6510,14 +6513,16 @@ void engine_fof(struct engine *e, const int dump_results, engine_activate_fof_tasks(e); ticks tic_local_fof = getticks(); - + /* Perform local FOF tasks. */ engine_launch(e); message("Local FOF search (tasks FOF SCALING) took: %.3f %s.", - clocks_from_ticks(getticks() - tic_local_fof), clocks_getunit()); + clocks_from_ticks(getticks() - tic_local_fof), clocks_getunit()); - message("FOF allocate + task creation + local search (FOF SCALING) took: %.3f %s.", + message( + "FOF allocate + task creation + local search (FOF SCALING) took: %.3f " + "%s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); /* Perform FOF search over foreign particles and @@ -6535,7 +6540,9 @@ void engine_fof(struct engine *e, const int dump_results, /* ... and find the next FOF time */ if (seed_black_holes) engine_compute_next_fof_time(e); +#ifdef WITH_MPI MPI_Barrier(MPI_COMM_WORLD); +#endif if (engine_rank == 0) message("Complete FOF search took: %.3f %s.", diff --git a/src/fof.c b/src/fof.c index b5f4f61986aeb628432e2cb564cf99c2c91a319f..0941422ede3d596aa64d0c7375d34fd8bcbf32ac 100644 --- a/src/fof.c +++ b/src/fof.c @@ -50,6 +50,9 @@ #define UNION_BY_SIZE_OVER_MPI (1) #define FOF_COMPRESS_PATHS_MIN_LENGTH (2) +/* Are we timing the FOF? */ +//#define WITH_FOF_TIMING + /** * @brief Properties of a group used for black hole seeding */ @@ -385,28 +388,28 @@ __attribute__((always_inline)) INLINE static size_t fof_find_global( * @param nr_gparts The number of g-particles on this node. * @param group_index Array of group root indices. */ -//__attribute__((always_inline)) INLINE static size_t fof_find_local( -// const size_t i, const size_t nr_gparts, const size_t *group_index) { -//#ifdef WITH_MPI -// size_t root = node_offset + i; -// -// while ((group_index[root - node_offset] != root) && -// (group_index[root - node_offset] >= node_offset) && -// (group_index[root - node_offset] < node_offset + nr_gparts)) { -// root = group_index[root - node_offset]; -// } -// -// return root - node_offset; -//#else -// size_t root = i; -// -// while ((group_index[root] != root) && (group_index[root] < nr_gparts)) { -// root = group_index[root]; -// } -// -// return root; -//#endif -//} +__attribute__((always_inline)) INLINE static size_t fof_find_local( + const size_t i, const size_t nr_gparts, const size_t *group_index) { +#ifdef WITH_MPI + size_t root = node_offset + i; + + while ((group_index[root - node_offset] != root) && + (group_index[root - node_offset] >= node_offset) && + (group_index[root - node_offset] < node_offset + nr_gparts)) { + root = group_index[root - node_offset]; + } + + return root - node_offset; +#else + size_t root = i; + + while ((group_index[root] != root) && (group_index[root] < nr_gparts)) { + root = group_index[root]; + } + + return root; +#endif +} /** * @brief Finds the local root ID of the group a particle exists in. @@ -2391,7 +2394,11 @@ void fof_search_tree(struct fof_props *props, const size_t nr_gparts = s->nr_gparts; const size_t min_group_size = props->min_group_size; - //const size_t group_id_offset = props->group_id_offset; +#ifndef WITH_FOF_TIMING + const size_t group_id_offset = props->group_id_offset; + const size_t group_id_default = props->group_id_default; +#endif + #ifdef WITH_MPI const int nr_nodes = s->e->nr_nodes; #endif @@ -2411,8 +2418,6 @@ void fof_search_tree(struct fof_props *props, if (engine_rank == 0 && verbose) message("Size of hash table element: %ld", sizeof(hashmap_element_t)); - //const size_t group_id_default = props->group_id_default; - #ifdef WITH_MPI /* Reset global variable */ @@ -2421,15 +2426,15 @@ void fof_search_tree(struct fof_props *props, /* Determine number of gparts on lower numbered MPI ranks */ long long nr_gparts_cumulative; long long nr_gparts_local = s->nr_gparts; - + ticks comms_tic = getticks(); MPI_Scan(&nr_gparts_local, &nr_gparts_cumulative, 1, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); - - //if (verbose) + + // if (verbose) message("MPI_Scan Imbalance took: %.3f %s.", - clocks_from_ticks(getticks() - comms_tic), clocks_getunit()); + clocks_from_ticks(getticks() - comms_tic), clocks_getunit()); node_offset = nr_gparts_cumulative - nr_gparts_local; @@ -2450,8 +2455,8 @@ void fof_search_tree(struct fof_props *props, nr_gparts, sizeof(struct gpart), 0, s); message("FOF calc group size took (FOF SCALING): %.3f %s.", - clocks_from_ticks(getticks() - tic_calc_group_size), - clocks_getunit()); + clocks_from_ticks(getticks() - tic_calc_group_size), + clocks_getunit()); #ifdef WITH_MPI if (nr_nodes > 1) { @@ -2461,16 +2466,21 @@ void fof_search_tree(struct fof_props *props, /* Search for group links across MPI domains. */ fof_search_foreign_cells(props, s); - message("fof_search_foreign_cells() took (FOF SCALING): %.3f %s.", + message("fof_search_foreign_cells() took (FOF SCALING): %.3f %s.", clocks_from_ticks(getticks() - tic_mpi), clocks_getunit()); - - message("fof_search_foreign_cells() + calc_group_size took (FOF SCALING): %.3f %s.", - clocks_from_ticks(getticks() - tic_total), clocks_getunit()); + + message( + "fof_search_foreign_cells() + calc_group_size took (FOF SCALING): %.3f " + "%s.", + clocks_from_ticks(getticks() - tic_total), clocks_getunit()); } #endif - size_t num_groups_local = 0;//, num_parts_in_groups_local = 0, - // max_group_size_local = 0; + size_t num_groups_local = 0; +#ifndef WITH_FOF_TIMING + size_t num_parts_in_groups_local = 0; + size_t max_group_size_local = 0; +#endif ticks tic_num_groups_calc = getticks(); @@ -2486,256 +2496,259 @@ void fof_search_tree(struct fof_props *props, num_groups_local++; #endif -// /* Find the total number of particles in groups. */ -// if (group_size[i] >= min_group_size) -// num_parts_in_groups_local += group_size[i]; -// -// /* Find the largest group. */ -// if (group_size[i] > max_group_size_local) -// max_group_size_local = group_size[i]; +#ifndef WITH_FOF_TIMING + /* Find the total number of particles in groups. */ + if (group_size[i] >= min_group_size) + num_parts_in_groups_local += group_size[i]; + + /* Find the largest group. */ + if (group_size[i] > max_group_size_local) + max_group_size_local = group_size[i]; +#endif } - - message("Calculating the total no. of local groups took: (FOF SCALING): %.3f %s.", - clocks_from_ticks(getticks() - tic_num_groups_calc), clocks_getunit()); + + message( + "Calculating the total no. of local groups took: (FOF SCALING): %.3f %s.", + clocks_from_ticks(getticks() - tic_num_groups_calc), clocks_getunit()); /* Sort the groups in descending order based upon size and re-label their IDs * 0-num_groups. */ -// struct group_length *high_group_sizes = NULL; -// int group_count = 0; -// -// if (swift_memalign("fof_high_group_sizes", (void **)&high_group_sizes, 32, -// num_groups_local * sizeof(struct group_length)) != 0) -// error("Failed to allocate list of large groups."); -// -// /* Store the group_sizes and their offset. */ -// for (size_t i = 0; i < nr_gparts; i++) { -// -//#ifdef WITH_MPI -// if (group_index[i] == i + node_offset && group_size[i] >= min_group_size) { -// high_group_sizes[group_count].index = node_offset + i; -// high_group_sizes[group_count++].size = group_size[i]; -// } -//#else -// if (group_index[i] == i && group_size[i] >= min_group_size) { -// high_group_sizes[group_count].index = i; -// high_group_sizes[group_count++].size = group_size[i]; -// } -//#endif -// } - -// ticks tic = getticks(); -// -// /* Find global properties. */ +#ifndef WITH_FOF_TIMING + struct group_length *high_group_sizes = NULL; + int group_count = 0; + + if (swift_memalign("fof_high_group_sizes", (void **)&high_group_sizes, 32, + num_groups_local * sizeof(struct group_length)) != 0) + error("Failed to allocate list of large groups."); + + /* Store the group_sizes and their offset. */ + for (size_t i = 0; i < nr_gparts; i++) { + +#ifdef WITH_MPI + if (group_index[i] == i + node_offset && group_size[i] >= min_group_size) { + high_group_sizes[group_count].index = node_offset + i; + high_group_sizes[group_count++].size = group_size[i]; + } +#else + if (group_index[i] == i && group_size[i] >= min_group_size) { + high_group_sizes[group_count].index = i; + high_group_sizes[group_count++].size = group_size[i]; + } +#endif + } + + ticks tic = getticks(); + + /* Find global properties. */ #ifdef WITH_MPI MPI_Allreduce(&num_groups_local, &num_groups, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - message("Finding the total no. of groups took: (FOF SCALING): %.3f %s.", - clocks_from_ticks(getticks() - tic_num_groups_calc), clocks_getunit()); + message("Finding the total no. of groups took: (FOF SCALING): %.3f %s.", + clocks_from_ticks(getticks() - tic_num_groups_calc), + clocks_getunit()); - //MPI_Reduce(&num_parts_in_groups_local, &num_parts_in_groups, 1, MPI_INT, - // MPI_SUM, 0, MPI_COMM_WORLD); - //MPI_Reduce(&max_group_size_local, &max_group_size, 1, MPI_INT, MPI_MAX, 0, - // MPI_COMM_WORLD); + MPI_Reduce(&num_parts_in_groups_local, &num_parts_in_groups, 1, MPI_INT, + MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&max_group_size_local, &max_group_size, 1, MPI_INT, MPI_MAX, 0, + MPI_COMM_WORLD); #else num_groups = num_groups_local; - //num_parts_in_groups = num_parts_in_groups_local; - //max_group_size = max_group_size_local; + num_parts_in_groups = num_parts_in_groups_local; + max_group_size = max_group_size_local; #endif /* WITH_MPI */ props->num_groups = num_groups; -// -// /* Find number of groups on lower numbered MPI ranks */ -//#ifdef WITH_MPI -// long long nglocal = num_groups_local; -// long long ngsum; -// MPI_Scan(&nglocal, &ngsum, 1, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); -// const size_t num_groups_prev = (size_t)(ngsum - nglocal); -//#endif /* WITH_MPI */ -// -// /* Sort local groups into descending order of size */ -// qsort(high_group_sizes, num_groups_local, sizeof(struct group_length), -// cmp_func_group_size); -// -// /* Set default group ID for all particles */ -// for (size_t i = 0; i < nr_gparts; i++) gparts[i].group_id = group_id_default; -// -// /* -// Assign final group IDs to local root particles where the global root is on -// this node and the group is large enough. Within a node IDs are assigned in -// descending order of particle number. -// */ -// for (size_t i = 0; i < num_groups_local; i++) { -//#ifdef WITH_MPI -// gparts[high_group_sizes[i].index - node_offset].group_id = -// group_id_offset + i + num_groups_prev; -//#else -// gparts[high_group_sizes[i].index].group_id = group_id_offset + i; -//#endif -// } -// -//#ifdef WITH_MPI -// /* -// Now, for each local root where the global root is on some other node -// AND the total size of the group is >= min_group_size we need to retrieve -// the gparts.group_id we just assigned to the global root. -// -// Will do that by sending the group_index of these lcoal roots to the node -// where their global root is stored and receiving back the new group_id -// associated with that particle. -// */ -// -// /* -// Identify local roots with global root on another node and large enough -// group_size. Store index of the local and global roots in these cases. -// -// NOTE: if group_size only contains the total FoF mass for global roots, -// then we have to communicate ALL fragments where the global root is not -// on this node. Hence the commented out extra conditions below. -// */ -// size_t nsend = 0; -// for (size_t i = 0; i < nr_gparts; i += 1) { -// if ((!is_local(group_index[i], -// nr_gparts))) { /* && (group_size[i] >= min_group_size)) { */ -// nsend += 1; -// } -// } -// struct fof_final_index *fof_index_send = -// (struct fof_final_index *)swift_malloc( -// "fof_index_send", sizeof(struct fof_final_index) * nsend); -// nsend = 0; -// for (size_t i = 0; i < nr_gparts; i += 1) { -// if ((!is_local(group_index[i], -// nr_gparts))) { /* && (group_size[i] >= min_group_size)) { */ -// fof_index_send[nsend].local_root = node_offset + i; -// fof_index_send[nsend].global_root = group_index[i]; -// nsend += 1; -// } -// } -// -// /* Sort by global root - this puts the groups in order of which node they're -// * stored on */ -// qsort(fof_index_send, nsend, sizeof(struct fof_final_index), -// compare_fof_final_index_global_root); -// -// /* Determine range of global indexes (i.e. particles) on each node */ -// size_t *num_on_node = (size_t *)malloc(nr_nodes * sizeof(size_t)); -// MPI_Allgather(&nr_gparts, sizeof(size_t), MPI_BYTE, num_on_node, -// sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); -// size_t *first_on_node = (size_t *)malloc(nr_nodes * sizeof(size_t)); -// first_on_node[0] = 0; -// for (int i = 1; i < nr_nodes; i += 1) -// first_on_node[i] = first_on_node[i - 1] + num_on_node[i - 1]; -// -// /* Determine how many entries go to each node */ -// int *sendcount = (int *)malloc(nr_nodes * sizeof(int)); -// for (int i = 0; i < nr_nodes; i += 1) sendcount[i] = 0; -// int dest = 0; -// for (size_t i = 0; i < nsend; i += 1) { -// while ((fof_index_send[i].global_root >= -// first_on_node[dest] + num_on_node[dest]) || -// (num_on_node[dest] == 0)) -// dest += 1; -// if (dest >= nr_nodes) error("Node index out of range!"); -// sendcount[dest] += 1; -// } -// -// int *recvcount = NULL, *sendoffset = NULL, *recvoffset = NULL; -// size_t nrecv = 0; -// -// fof_compute_send_recv_offsets(nr_nodes, sendcount, &recvcount, &sendoffset, -// &recvoffset, &nrecv); -// -// struct fof_final_index *fof_index_recv = -// (struct fof_final_index *)swift_malloc( -// "fof_index_recv", nrecv * sizeof(struct fof_final_index)); -// -// /* Exchange group indexes */ -// MPI_Alltoallv(fof_index_send, sendcount, sendoffset, fof_final_index_type, -// fof_index_recv, recvcount, recvoffset, fof_final_index_type, -// MPI_COMM_WORLD); -// -// /* For each received global root, look up the group ID we assigned and store -// * it in the struct */ -// for (size_t i = 0; i < nrecv; i += 1) { -// if ((fof_index_recv[i].global_root < node_offset) || -// (fof_index_recv[i].global_root >= node_offset + nr_gparts)) { -// error("Received global root index out of range!"); -// } -// fof_index_recv[i].global_root = -// gparts[fof_index_recv[i].global_root - node_offset].group_id; -// } -// -// /* Send the result back */ -// MPI_Alltoallv(fof_index_recv, recvcount, recvoffset, fof_final_index_type, -// fof_index_send, sendcount, sendoffset, fof_final_index_type, -// MPI_COMM_WORLD); -// -// /* Update local gparts.group_id */ -// for (size_t i = 0; i < nsend; i += 1) { -// if ((fof_index_send[i].local_root < node_offset) || -// (fof_index_send[i].local_root >= node_offset + nr_gparts)) { -// error("Sent local root index out of range!"); -// } -// gparts[fof_index_send[i].local_root - node_offset].group_id = -// fof_index_send[i].global_root; -// } -// -// free(sendcount); -// free(recvcount); -// free(sendoffset); -// free(recvoffset); -// swift_free("fof_index_send", fof_index_send); -// swift_free("fof_index_recv", fof_index_recv); -// -//#endif /* WITH_MPI */ -// -// /* Assign every particle the group_id of its local root. */ -// for (size_t i = 0; i < nr_gparts; i++) { -// const size_t root = fof_find_local(i, nr_gparts, group_index); -// gparts[i].group_id = gparts[root].group_id; -// } -// -// if (verbose) -// message("Group sorting took: %.3f %s.", clocks_from_ticks(getticks() - tic), -// clocks_getunit()); -// -// /* Allocate and initialise a group mass array. */ -// if (swift_memalign("group_mass", (void **)&props->group_mass, 32, -// num_groups_local * sizeof(double)) != 0) -// error("Failed to allocate list of group masses for FOF search."); -// -// bzero(props->group_mass, num_groups_local * sizeof(double)); -// -// ticks tic_seeding = getticks(); -// -// double *group_mass = props->group_mass; -//#ifdef WITH_MPI -// fof_calc_group_mass(props, s, num_groups_local, num_groups_prev, num_on_node, -// first_on_node, group_mass); -// free(num_on_node); -// free(first_on_node); -//#else -// fof_calc_group_mass(props, s, num_groups_local, 0, NULL, NULL, group_mass); -//#endif -// -// if (verbose) -// message("Black hole seeding took: %.3f %s.", -// clocks_from_ticks(getticks() - tic_seeding), clocks_getunit()); -// -// /* Dump group data. */ -// if (dump_results) { -// fof_dump_group_data(props, output_file_name, s, num_groups_local, -// high_group_sizes); -// } -// -// /* Seed black holes */ -// if (seed_black_holes) { -// fof_seed_black_holes(props, bh_props, constants, cosmo, s, num_groups_local, -// high_group_sizes); -// } - - /* Free the left-overs */ - //swift_free("fof_high_group_sizes", high_group_sizes); + + /* Find number of groups on lower numbered MPI ranks */ +#ifdef WITH_MPI + long long nglocal = num_groups_local; + long long ngsum; + MPI_Scan(&nglocal, &ngsum, 1, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); + const size_t num_groups_prev = (size_t)(ngsum - nglocal); +#endif /* WITH_MPI */ + + /* Sort local groups into descending order of size */ + qsort(high_group_sizes, num_groups_local, sizeof(struct group_length), + cmp_func_group_size); + + /* Set default group ID for all particles */ + for (size_t i = 0; i < nr_gparts; i++) gparts[i].group_id = group_id_default; + + /* Assign final group IDs to local root particles where the global root is + * on this node and the group is large enough. Within a node IDs are + * assigned in descending order of particle number. */ + for (size_t i = 0; i < num_groups_local; i++) { +#ifdef WITH_MPI + gparts[high_group_sizes[i].index - node_offset].group_id = + group_id_offset + i + num_groups_prev; +#else + gparts[high_group_sizes[i].index].group_id = group_id_offset + i; +#endif + } + +#ifdef WITH_MPI + + /* Now, for each local root where the global root is on some other node + * AND the total size of the group is >= min_group_size we need to + * retrieve the gparts.group_id we just assigned to the global root. + * + * Will do that by sending the group_index of these lcoal roots to the + * node where their global root is stored and receiving back the new + * group_id associated with that particle. + * + * Identify local roots with global root on another node and large enough + * group_size. Store index of the local and global roots in these cases. + * + * NOTE: if group_size only contains the total FoF mass for global roots, + * then we have to communicate ALL fragments where the global root is not + * on this node. Hence the commented out extra conditions below.*/ + size_t nsend = 0; + for (size_t i = 0; i < nr_gparts; i += 1) { + if ((!is_local(group_index[i], + nr_gparts))) { /* && (group_size[i] >= min_group_size)) { */ + nsend += 1; + } + } + struct fof_final_index *fof_index_send = + (struct fof_final_index *)swift_malloc( + "fof_index_send", sizeof(struct fof_final_index) * nsend); + nsend = 0; + for (size_t i = 0; i < nr_gparts; i += 1) { + if ((!is_local(group_index[i], + nr_gparts))) { /* && (group_size[i] >= min_group_size)) { */ + fof_index_send[nsend].local_root = node_offset + i; + fof_index_send[nsend].global_root = group_index[i]; + nsend += 1; + } + } + + /* Sort by global root - this puts the groups in order of which node they're + * stored on */ + qsort(fof_index_send, nsend, sizeof(struct fof_final_index), + compare_fof_final_index_global_root); + + /* Determine range of global indexes (i.e. particles) on each node */ + size_t *num_on_node = (size_t *)malloc(nr_nodes * sizeof(size_t)); + MPI_Allgather(&nr_gparts, sizeof(size_t), MPI_BYTE, num_on_node, + sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD); + size_t *first_on_node = (size_t *)malloc(nr_nodes * sizeof(size_t)); + first_on_node[0] = 0; + for (int i = 1; i < nr_nodes; i += 1) + first_on_node[i] = first_on_node[i - 1] + num_on_node[i - 1]; + + /* Determine how many entries go to each node */ + int *sendcount = (int *)malloc(nr_nodes * sizeof(int)); + for (int i = 0; i < nr_nodes; i += 1) sendcount[i] = 0; + int dest = 0; + for (size_t i = 0; i < nsend; i += 1) { + while ((fof_index_send[i].global_root >= + first_on_node[dest] + num_on_node[dest]) || + (num_on_node[dest] == 0)) + dest += 1; + if (dest >= nr_nodes) error("Node index out of range!"); + sendcount[dest] += 1; + } + + int *recvcount = NULL, *sendoffset = NULL, *recvoffset = NULL; + size_t nrecv = 0; + + fof_compute_send_recv_offsets(nr_nodes, sendcount, &recvcount, &sendoffset, + &recvoffset, &nrecv); + + struct fof_final_index *fof_index_recv = + (struct fof_final_index *)swift_malloc( + "fof_index_recv", nrecv * sizeof(struct fof_final_index)); + + /* Exchange group indexes */ + MPI_Alltoallv(fof_index_send, sendcount, sendoffset, fof_final_index_type, + fof_index_recv, recvcount, recvoffset, fof_final_index_type, + MPI_COMM_WORLD); + + /* For each received global root, look up the group ID we assigned and store + * it in the struct */ + for (size_t i = 0; i < nrecv; i += 1) { + if ((fof_index_recv[i].global_root < node_offset) || + (fof_index_recv[i].global_root >= node_offset + nr_gparts)) { + error("Received global root index out of range!"); + } + fof_index_recv[i].global_root = + gparts[fof_index_recv[i].global_root - node_offset].group_id; + } + + /* Send the result back */ + MPI_Alltoallv(fof_index_recv, recvcount, recvoffset, fof_final_index_type, + fof_index_send, sendcount, sendoffset, fof_final_index_type, + MPI_COMM_WORLD); + + /* Update local gparts.group_id */ + for (size_t i = 0; i < nsend; i += 1) { + if ((fof_index_send[i].local_root < node_offset) || + (fof_index_send[i].local_root >= node_offset + nr_gparts)) { + error("Sent local root index out of range!"); + } + gparts[fof_index_send[i].local_root - node_offset].group_id = + fof_index_send[i].global_root; + } + + free(sendcount); + free(recvcount); + free(sendoffset); + free(recvoffset); + swift_free("fof_index_send", fof_index_send); + swift_free("fof_index_recv", fof_index_recv); + +#endif /* WITH_MPI */ + + /* Assign every particle the group_id of its local root. */ + for (size_t i = 0; i < nr_gparts; i++) { + const size_t root = fof_find_local(i, nr_gparts, group_index); + gparts[i].group_id = gparts[root].group_id; + } + + if (verbose) + message("Group sorting took: %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); + + /* Allocate and initialise a group mass array. */ + if (swift_memalign("group_mass", (void **)&props->group_mass, 32, + num_groups_local * sizeof(double)) != 0) + error("Failed to allocate list of group masses for FOF search."); + + bzero(props->group_mass, num_groups_local * sizeof(double)); + + ticks tic_seeding = getticks(); + + double *group_mass = props->group_mass; +#ifdef WITH_MPI + fof_calc_group_mass(props, s, num_groups_local, num_groups_prev, num_on_node, + first_on_node, group_mass); + free(num_on_node); + free(first_on_node); +#else + fof_calc_group_mass(props, s, num_groups_local, 0, NULL, NULL, group_mass); +#endif + + if (verbose) + message("Black hole seeding took: %.3f %s.", + clocks_from_ticks(getticks() - tic_seeding), clocks_getunit()); + + /* Dump group data. */ + if (dump_results) { + fof_dump_group_data(props, output_file_name, s, num_groups_local, + high_group_sizes); + } + + /* Seed black holes */ + if (seed_black_holes) { + fof_seed_black_holes(props, bh_props, constants, cosmo, s, num_groups_local, + high_group_sizes); + } +#endif + + /* Free the left-overs */ +#ifndef WITH_FOF_TIMING + swift_free("fof_high_group_sizes", high_group_sizes); +#endif swift_free("fof_group_index", props->group_index); swift_free("fof_group_size", props->group_size); swift_free("fof_group_mass", props->group_mass);