/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2018 James Willis (james.s.willis@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
******************************************************************************/
/* Config parameters. */
#include
#ifdef WITH_FOF
/* Some standard headers. */
#include
#include
#include
/* MPI headers. */
#ifdef WITH_MPI
#include
#endif
/* This object's header. */
#include "fof.h"
/* Local headers. */
#include "black_holes.h"
#include "common_io.h"
#include "engine.h"
#include "fof_catalogue_io.h"
#include "hashmap.h"
#include "memuse.h"
#include "proxy.h"
#include "threadpool.h"
#include "tools.h"
#include "tracers.h"
#define fof_props_default_group_id 2147483647
#define fof_props_default_group_id_offset 1
#define fof_props_default_group_link_size 20000
/* Constants. */
#define UNION_BY_SIZE_OVER_MPI (1)
#define FOF_COMPRESS_PATHS_MIN_LENGTH (2)
/* The FoF policy we are running */
int current_fof_linking_type;
/* The FoF policy for particles attached to the main type */
int current_fof_attach_type;
/* The FoF policy for particles ignored altogether */
int current_fof_ignore_type;
/* Are we timing calculating group properties in the FOF? */
// #define WITHOUT_GROUP_PROPS
/**
* @brief Properties of a group used for black hole seeding
*/
enum fof_halo_seeding_props {
fof_halo_has_no_gas = -1LL,
fof_halo_has_black_hole = -2LL,
fof_halo_has_too_low_mass = -3LL
};
#ifdef WITH_MPI
/* MPI types used for communications */
MPI_Datatype fof_mpi_type;
MPI_Datatype group_length_mpi_type;
MPI_Datatype fof_final_index_type;
MPI_Datatype fof_final_mass_type;
/*! Offset between the first particle on this MPI rank and the first particle in
* the global order */
size_t node_offset;
#endif
#ifdef SWIFT_DEBUG_CHECKS
static integertime_t ti_current;
#endif
void fof_set_current_types(const struct fof_props *props) {
/* Initialize the FoF linking mode */
current_fof_linking_type = 0;
for (int i = 0; i < swift_type_count; ++i)
if (props->fof_linking_types[i]) {
current_fof_linking_type |= (1 << (i + 1));
}
/* Initialize the FoF attaching mode */
current_fof_attach_type = 0;
for (int i = 0; i < swift_type_count; ++i)
if (props->fof_attach_types[i]) {
current_fof_attach_type |= (1 << (i + 1));
}
/* Construct the combined mask of ignored particles */
current_fof_ignore_type =
~(current_fof_linking_type | current_fof_attach_type);
}
/**
* @brief Initialise the properties of the FOF code.
*
* @param props the #fof_props structure to fill.
* @param params the parameter file parser.
* @param phys_const The physical constants in internal units.
* @param us The internal unit system.
* @param stand_alone_fof Are we initialising for stand-alone? (1) or
* on-the-fly? (0)
*/
void fof_init(struct fof_props *props, struct swift_params *params,
const struct phys_const *phys_const, const struct unit_system *us,
const int stand_alone_fof) {
/* Base name for the FOF output file */
parser_get_param_string(params, "FOF:basename", props->base_name);
/* Check that we can write outputs by testing if the output
* directory exists and is searchable and writable. */
char directory[PARSER_MAX_LINE_SIZE] = {0};
sprintf(directory, "%s", props->base_name);
const char *dirp = dirname(directory);
if (engine_rank == 0) safe_checkdir(dirp, /*create=*/1);
/* Read the minimum group size. */
props->min_group_size = parser_get_param_int(params, "FOF:min_group_size");
/* Read whether we're doing FoF calls to seed black holes. */
props->seed_black_holes_enabled =
parser_get_param_int(params, "FOF:seed_black_holes_enabled");
/* Read the default group ID of particles in groups below the minimum group
* size. */
props->group_id_default = parser_get_opt_param_int(
params, "FOF:group_id_default", fof_props_default_group_id);
/* Read the starting group ID. */
props->group_id_offset = parser_get_opt_param_int(
params, "FOF:group_id_offset", fof_props_default_group_id_offset);
/* Read the linking length ratio to the mean inter-particle separation. */
props->l_x_ratio =
parser_get_opt_param_double(params, "FOF:linking_length_ratio", -1.);
/* Read value of absolute linking length aksed by the user */
props->l_x_absolute =
parser_get_opt_param_double(params, "FOF:absolute_linking_length", -1.);
if (props->l_x_ratio == -1. && props->l_x_absolute <= 0.)
error("The FOF linking length can't be negative!");
if (props->l_x_ratio <= 0. && props->l_x_absolute == -1.)
error("The FOF linking length ratio can't be negative!");
if (!stand_alone_fof && props->seed_black_holes_enabled) {
/* Read the minimal halo mass for black hole seeding */
props->seed_halo_mass =
parser_get_param_double(params, "FOF:black_hole_seed_halo_mass_Msun");
/* Convert to internal units */
props->seed_halo_mass *= phys_const->const_solar_mass;
}
/* Read what particle types we want to run FOF on */
parser_get_param_int_array(params, "FOF:linking_types", swift_type_count,
props->fof_linking_types);
/* Read what particle types we want to attach to FOF groups */
parser_get_param_int_array(params, "FOF:attaching_types", swift_type_count,
props->fof_attach_types);
/* Check that there is something to do */
int sum = 0;
for (int i = 0; i < swift_type_count; ++i)
if (props->fof_linking_types[i]) sum++;
if (sum == 0) error("FOF must run on at least one type of particles!");
for (int i = 0; i < swift_type_count; ++i)
if (props->fof_linking_types[i] && props->fof_attach_types[i])
error("FOF can't use a type (%s) as both linking and attaching type!",
part_type_names[i]);
/* Set the current FOF types */
fof_set_current_types(props);
/* Report what we do */
if (engine_rank == 0) {
printf("FOF using the following types for linking:");
for (int i = 0; i < swift_type_count; ++i)
if (props->fof_linking_types[i]) printf("'%s' ", part_type_names[i]);
printf("\n");
printf("FOF using the following types for attaching:");
for (int i = 0; i < swift_type_count; ++i)
if (props->fof_attach_types[i]) printf("'%s' ", part_type_names[i]);
printf("\n");
printf("FOF ignoring the following types:");
for (int i = 0; i < swift_type_count; ++i)
if (current_fof_ignore_type & (1 << (i + 1)))
printf("'%s' ", part_type_names[i]);
printf("\n");
}
#if defined(WITH_MPI) && defined(UNION_BY_SIZE_OVER_MPI)
if (engine_rank == 0)
message(
"Performing FOF over MPI using union by size and union by rank "
"locally.");
#else
message("Performing FOF using union by rank.");
#endif
}
/**
* @brief Registers MPI types used by FOF.
*/
void fof_create_mpi_types(void) {
#ifdef WITH_MPI
if (MPI_Type_contiguous(sizeof(struct fof_mpi) / sizeof(unsigned char),
MPI_BYTE, &fof_mpi_type) != MPI_SUCCESS ||
MPI_Type_commit(&fof_mpi_type) != MPI_SUCCESS) {
error("Failed to create MPI type for fof.");
}
if (MPI_Type_contiguous(sizeof(struct group_length) / sizeof(unsigned char),
MPI_BYTE, &group_length_mpi_type) != MPI_SUCCESS ||
MPI_Type_commit(&group_length_mpi_type) != MPI_SUCCESS) {
error("Failed to create MPI type for group_length.");
}
/* Define type for sending fof_final_index struct */
if (MPI_Type_contiguous(sizeof(struct fof_final_index), MPI_BYTE,
&fof_final_index_type) != MPI_SUCCESS ||
MPI_Type_commit(&fof_final_index_type) != MPI_SUCCESS) {
error("Failed to create MPI type for fof_final_index.");
}
/* Define type for sending fof_final_mass struct */
if (MPI_Type_contiguous(sizeof(struct fof_final_mass), MPI_BYTE,
&fof_final_mass_type) != MPI_SUCCESS ||
MPI_Type_commit(&fof_final_mass_type) != MPI_SUCCESS) {
error("Failed to create MPI type for fof_final_mass.");
}
#else
error("Calling an MPI function in non-MPI code.");
#endif
}
/**
* @brief Mapper function to set the initial group indices.
*
* @param map_data The array of group indices.
* @param num_elements Chunk size.
* @param extra_data Pointer to first group index.
*/
void fof_set_initial_group_index_mapper(void *map_data, int num_elements,
void *extra_data) {
size_t *group_index = (size_t *)map_data;
size_t *group_index_start = (size_t *)extra_data;
const ptrdiff_t offset = group_index - group_index_start;
for (int i = 0; i < num_elements; ++i) {
group_index[i] = i + offset;
}
}
/**
* @brief Mapper function to set the initial attach group indices.
*
* @param map_data The array of attach group indices.
* @param num_elements Chunk size.
* @param extra_data Pointer to first group index.
*/
void fof_set_initial_attach_index_mapper(void *map_data, int num_elements,
void *extra_data) {
size_t *attach_index = (size_t *)map_data;
size_t *attach_index_start = (size_t *)extra_data;
const ptrdiff_t offset = attach_index - attach_index_start;
for (int i = 0; i < num_elements; ++i) {
attach_index[i] = i + offset;
}
}
/**
* @brief Mapper function to set the initial group sizes.
*
* @param map_data The array of group sizes.
* @param num_elements Chunk size.
* @param extra_data N/A.
*/
void fof_set_initial_group_size_mapper(void *map_data, int num_elements,
void *extra_data) {
size_t *group_size = (size_t *)map_data;
for (int i = 0; i < num_elements; ++i) {
group_size[i] = 1;
}
}
/**
* @brief Mapper function to set the initial distances.
*
* @param map_data The array of distance.
* @param num_elements Chunk size.
* @param extra_data N/A.
*/
void fof_set_initial_part_distances_mapper(void *map_data, int num_elements,
void *extra_data) {
float *distance = (float *)map_data;
for (int i = 0; i < num_elements; ++i) {
distance[i] = FLT_MAX;
}
}
/**
* @brief Mapper function to set the initial group IDs.
*
* @param map_data The array of #gpart%s.
* @param num_elements Chunk size.
* @param extra_data Pointer to the default group ID.
*/
void fof_set_initial_group_id_mapper(void *map_data, int num_elements,
void *extra_data) {
/* Unpack the information */
struct gpart *gparts = (struct gpart *)map_data;
const size_t group_id_default = *((size_t *)extra_data);
for (int i = 0; i < num_elements; ++i) {
gparts[i].fof_data.group_id = group_id_default;
}
}
/**
* @brief Allocate the memory and initialise the arrays for a FOF calculation.
*
* @param s The #space to act on.
* @param props The properties of the FOF structure.
*/
void fof_allocate(const struct space *s, struct fof_props *props) {
const int verbose = s->e->verbose;
const ticks total_tic = getticks();
/* Start by computing the mean inter DM particle separation */
/* Collect the mean mass of the non-background gpart */
double high_res_DM_mass = 0.;
long long num_high_res_DM = 0;
for (size_t i = 0; i < s->nr_gparts; ++i) {
const struct gpart *gp = &s->gparts[i];
if (gp->type == swift_type_dark_matter &&
gp->time_bin != time_bin_inhibited &&
gp->time_bin != time_bin_not_created) {
high_res_DM_mass += gp->mass;
num_high_res_DM++;
}
}
#ifdef WITH_MPI
/* Gather the information from all ranks */
MPI_Allreduce(MPI_IN_PLACE, &high_res_DM_mass, 1, MPI_DOUBLE, MPI_SUM,
MPI_COMM_WORLD);
MPI_Allreduce(MPI_IN_PLACE, &num_high_res_DM, 1, MPI_LONG_LONG, MPI_SUM,
MPI_COMM_WORLD);
#endif
high_res_DM_mass /= (double)num_high_res_DM;
/* Are we using the aboslute value or the one derived from the mean
inter-particle sepration? */
if (props->l_x_absolute != -1.) {
props->l_x2 = props->l_x_absolute * props->l_x_absolute;
if (s->e->nodeID == 0) {
message("Linking length is set to %e [internal units].",
props->l_x_absolute);
}
} else {
/* Safety check */
if (!(s->e->policy & engine_policy_cosmology))
error(
"Attempting to run FoF on a simulation using cosmological "
"information but cosmology was not initialised");
/* Calculate the mean inter-particle separation as if we were in
a scenario where the entire box was filled with high-resolution
particles */
const double Omega_cdm = s->e->cosmology->Omega_cdm;
const double Omega_b = s->e->cosmology->Omega_b;
const double Omega_m = Omega_cdm + Omega_b;
const double critical_density_0 = s->e->cosmology->critical_density_0;
double mean_matter_density;
if (s->with_hydro)
mean_matter_density = Omega_cdm * critical_density_0;
else
mean_matter_density = Omega_m * critical_density_0;
/* Mean inter-particle separation of the DM particles */
const double mean_inter_particle_sep =
cbrt(high_res_DM_mass / mean_matter_density);
/* Calculate the particle linking length based upon the mean inter-particle
* spacing of the DM particles. */
const double l_x = props->l_x_ratio * mean_inter_particle_sep;
props->l_x2 = l_x * l_x;
if (s->e->nodeID == 0) {
message(
"Linking length is set to %e [internal units] (%f of mean "
"inter-DM-particle separation).",
l_x, props->l_x_ratio);
}
}
#ifdef WITH_MPI
/* Check size of linking length against the top-level cell dimensions. */
if (props->l_x2 > s->width[0] * s->width[0])
error(
"Linking length greater than the width of a top-level cell. Need to "
"check more than one layer of top-level cells for links.");
#endif
/* Allocate and initialise a group index array. */
if (swift_memalign("fof_group_index", (void **)&props->group_index, 64,
s->nr_gparts * sizeof(size_t)) != 0)
error("Failed to allocate list of particle group indices for FOF search.");
/* Allocate and initialise a group index array for attachables. */
if (swift_memalign("fof_attach_index", (void **)&props->attach_index, 64,
s->nr_gparts * sizeof(size_t)) != 0)
error(
"Failed to allocate list of particle distances array for FOF search.");
/* Allocate and initialise a group index array for attachables. */
if (swift_memalign("fof_found_attach", (void **)&props->found_attachable_link,
64, s->nr_gparts * sizeof(char)) != 0)
error(
"Failed to allocate list of particle distances array for FOF search.");
/* Allocate and initialise the closest distance array. */
if (swift_memalign("fof_distance", (void **)&props->distance_to_link, 64,
s->nr_gparts * sizeof(float)) != 0)
error(
"Failed to allocate list of particle distances array for FOF search.");
/* Allocate and initialise a group size array. */
if (swift_memalign("fof_group_size", (void **)&props->group_size, 64,
s->nr_gparts * sizeof(size_t)) != 0)
error("Failed to allocate list of group size for FOF search.");
ticks tic = getticks();
/* Set initial group index */
threadpool_map(&s->e->threadpool, fof_set_initial_group_index_mapper,
props->group_index, s->nr_gparts, sizeof(size_t),
threadpool_auto_chunk_size, props->group_index);
if (verbose)
message("Setting initial group index took: %.3f %s.",
clocks_from_ticks(getticks() - tic), clocks_getunit());
tic = getticks();
/* Set initial attach index */
threadpool_map(&s->e->threadpool, fof_set_initial_attach_index_mapper,
props->attach_index, s->nr_gparts, sizeof(size_t),
threadpool_auto_chunk_size, props->attach_index);
bzero(props->found_attachable_link, s->nr_gparts * sizeof(char));
if (verbose)
message("Setting initial attach index took: %.3f %s.",
clocks_from_ticks(getticks() - tic), clocks_getunit());
tic = getticks();
/* Set initial distances */
threadpool_map(&s->e->threadpool, fof_set_initial_part_distances_mapper,
props->distance_to_link, s->nr_gparts, sizeof(float),
threadpool_auto_chunk_size, NULL);
if (verbose)
message("Setting initial distances took: %.3f %s.",
clocks_from_ticks(getticks() - tic), clocks_getunit());
tic = getticks();
/* Set initial group sizes */
threadpool_map(&s->e->threadpool, fof_set_initial_group_size_mapper,
props->group_size, s->nr_gparts, sizeof(size_t),
threadpool_auto_chunk_size, NULL);
if (verbose)
message("Setting initial group sizes took: %.3f %s.",
clocks_from_ticks(getticks() - tic), clocks_getunit());
#ifdef SWIFT_DEBUG_CHECKS
ti_current = s->e->ti_current;
#endif
if (verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - total_tic),
clocks_getunit());
}
/**
* @brief Comparison function for qsort call comparing group sizes.
*
* @param a The first #group_length object.
* @param b The second #group_length object.
* @return 1 if the size of the group b is larger than the size of group a, -1
* if a is the largest and 0 if they are equal.
*/
int cmp_func_group_size(const void *a, const void *b) {
struct group_length *a_group_size = (struct group_length *)a;
struct group_length *b_group_size = (struct group_length *)b;
if (b_group_size->size > a_group_size->size)
return 1;
else if (b_group_size->size < a_group_size->size)
return -1;
else
return 0;
}
#ifdef WITH_MPI
/**
* @brief Comparison function for qsort call comparing group global roots.
*
* @param a The first #fof_final_index object.
* @param b The second #fof_final_index object.
* @return 1 if the global of the group b is *smaller* than the global group of
* group a, -1 if a is the smaller one and 0 if they are equal.
*/
int compare_fof_final_index_global_root(const void *a, const void *b) {
struct fof_final_index *fof_final_index_a = (struct fof_final_index *)a;
struct fof_final_index *fof_final_index_b = (struct fof_final_index *)b;
if (fof_final_index_b->global_root < fof_final_index_a->global_root)
return 1;
else if (fof_final_index_b->global_root > fof_final_index_a->global_root)
return -1;
else
return 0;
}
/**
* @brief Comparison function for qsort call comparing group global roots
*
* @param a The first #fof_final_mass object.
* @param b The second #fof_final_mass object.
* @return 1 if the global of the group b is *smaller* than the global group of
* group a, -1 if a is the smaller one and 0 if they are equal.
*/
int compare_fof_final_mass_global_root(const void *a, const void *b) {
struct fof_final_mass *fof_final_mass_a = (struct fof_final_mass *)a;
struct fof_final_mass *fof_final_mass_b = (struct fof_final_mass *)b;
if (fof_final_mass_b->global_root < fof_final_mass_a->global_root)
return 1;
else if (fof_final_mass_b->global_root > fof_final_mass_a->global_root)
return -1;
else
return 0;
}
/**
* @brief Check whether a given group ID is on the local node.
*
* This function only makes sense in MPI mode.
*
* @param group_id The ID to check.
* @param nr_gparts The number of gparts on this node.
*/
__attribute__((always_inline)) INLINE static int is_local(
const size_t group_id, const size_t nr_gparts) {
#ifdef WITH_MPI
return (group_id >= node_offset && group_id < node_offset + nr_gparts);
#else
error("Calling MPI function in non-MPI mode");
return 1;
#endif
}
/**
* @brief Find the global root ID of a given particle
*
* This function only makes sense in MPI mode.
*
* @param i Index of the particle.
* @param group_index Array of group root indices.
* @param nr_gparts The number of g-particles on this node.
*/
__attribute__((always_inline)) INLINE static size_t fof_find_global(
const size_t i, const size_t *group_index, const size_t nr_gparts) {
#ifdef WITH_MPI
size_t root = node_offset + i;
if (!is_local(root, nr_gparts)) {
/* Non local --> This is the root */
return root;
} else {
/* Local --> Follow the links until we find the root */
while (root != group_index[root - node_offset]) {
root = group_index[root - node_offset];
if (!is_local(root, nr_gparts)) break;
}
}
/* Perform path compression. */
// int index = i;
// while(index != root) {
// int next = group_index[index];
// group_index[index] = root;
// index = next;
//}
return root;
#else
error("Calling MPI function in non-MPI mode");
return -1;
#endif
}
#endif /* WITH_MPI */
/**
* @brief Finds the local root ID of the group a particle exists in
* when group_index contains globally unique identifiers -
* i.e. we stop *before* we advance to a foreign root.
*
* Here we assume that the input i is a local index and we
* return the local index of the root.
*
* @param i Index of the particle.
* @param nr_gparts The number of g-particles on this node.
* @param group_index Array of group root indices.
*/
__attribute__((always_inline)) INLINE static size_t fof_find_local(
const size_t i, const size_t nr_gparts, const size_t *group_index) {
#ifdef WITH_MPI
size_t root = node_offset + i;
while ((group_index[root - node_offset] != root) &&
(group_index[root - node_offset] >= node_offset) &&
(group_index[root - node_offset] < node_offset + nr_gparts)) {
root = group_index[root - node_offset];
}
return root - node_offset;
#else
size_t root = i;
while ((group_index[root] != root) && (group_index[root] < nr_gparts)) {
root = group_index[root];
}
return root;
#endif
}
/**
* @brief Returns whether a #gpart is of the 'attachable' kind.
*/
__attribute__((always_inline)) INLINE static int gpart_is_attachable(
const struct gpart *gp) {
return current_fof_attach_type & (1 << (gp->type + 1));
}
/**
* @brief Returns whether a #gpart is of the 'linkable' kind.
*/
__attribute__((always_inline)) INLINE static int gpart_is_linkable(
const struct gpart *gp) {
return current_fof_linking_type & (1 << (gp->type + 1));
}
/**
* @brief Returns whether a #gpart is to be ignored by FOF.
*/
__attribute__((always_inline)) INLINE static int gpart_is_ignorable(
const struct gpart *gp) {
return current_fof_ignore_type & (1 << (gp->type + 1));
}
/**
* @brief Finds the local root ID of the group a particle exists in.
*
* We follow the group_index array until reaching the root of the group.
*
* Also performs path compression if the path is long.
*
* @param i The index of the particle.
* @param group_index Array of group root indices.
*/
__attribute__((always_inline)) INLINE static size_t fof_find(
const size_t i, size_t *group_index) {
size_t root = i;
int tree_depth = 0;
while (root != group_index[root]) {
#ifdef PATH_HALVING
atomic_cas(&group_index[root], group_index[root],
group_index[group_index[root]]);
#endif
root = group_index[root];
tree_depth++;
}
/* Only perform path compression on trees with a depth of
* FOF_COMPRESS_PATHS_MIN_LENGTH or higher. */
if (tree_depth >= FOF_COMPRESS_PATHS_MIN_LENGTH)
atomic_cas(&group_index[i], group_index[i], root);
return root;
}
/**
* @brief Atomically update the root of a group
*
* @param address The address of the value to update.
* @param y The new value to write.
*
* @return 1 If successful, 0 otherwise.
*/
__attribute__((always_inline)) INLINE static int atomic_update_root(
volatile size_t *address, const size_t y) {
size_t *size_t_ptr = (size_t *)address;
size_t old_val = *address;
size_t test_val = old_val;
size_t new_val = y;
/* atomic_cas returns old_val if *size_t_ptr has not changed since being
* read.*/
old_val = atomic_cas(size_t_ptr, test_val, new_val);
if (test_val == old_val)
return 1;
else
return 0;
}
/**
* @brief Unifies two groups by setting them to the same root.
*
* @param root_i The root of the first group. Will be updated.
* @param root_j The root of the second group.
* @param group_index The list of group roots.
*/
__attribute__((always_inline)) INLINE static void fof_union(
size_t *restrict root_i, const size_t root_j,
size_t *restrict group_index) {
int result = 0;
/* Loop until the root can be set to a new value. */
do {
size_t root_i_new = fof_find(*root_i, group_index);
const size_t root_j_new = fof_find(root_j, group_index);
/* Skip particles in the same group. */
if (root_i_new == root_j_new) return;
/* If the root ID of pj is lower than pi's root ID set pi's root to point to
* pj's. Otherwise set pj's root to point to pi's.*/
if (root_j_new < root_i_new) {
/* Updates the root and checks that its value has not been changed since
* being read. */
result = atomic_update_root(&group_index[root_i_new], root_j_new);
/* Update root_i on the fly. */
*root_i = root_j_new;
} else {
/* Updates the root and checks that its value has not been changed since
* being read. */
result = atomic_update_root(&group_index[root_j_new], root_i_new);
/* Update root_i on the fly. */
*root_i = root_i_new;
}
} while (result != 1);
}
/**
* @brief Compute th minimal distance between any two points in two cells.
*
* @param ci The first #cell.
* @param cj The second #cell.
* @param dim The size of the simulation domain.
*/
__attribute__((always_inline)) INLINE static double cell_min_dist(
const struct cell *restrict ci, const struct cell *restrict cj,
const double dim[3]) {
/* Get cell locations. */
const double cix_min = ci->loc[0];
const double ciy_min = ci->loc[1];
const double ciz_min = ci->loc[2];
const double cjx_min = cj->loc[0];
const double cjy_min = cj->loc[1];
const double cjz_min = cj->loc[2];
const double cix_max = ci->loc[0] + ci->width[0];
const double ciy_max = ci->loc[1] + ci->width[1];
const double ciz_max = ci->loc[2] + ci->width[2];
const double cjx_max = cj->loc[0] + cj->width[0];
const double cjy_max = cj->loc[1] + cj->width[1];
const double cjz_max = cj->loc[2] + cj->width[2];
double not_same_range[3];
/* If two cells are in the same range of coordinates along
any of the 3 axis, the distance along this axis is 0 */
if (ci->width[0] > cj->width[0]) {
if ((cix_min <= cjx_min) && (cjx_max <= cix_max))
not_same_range[0] = 0.;
else
not_same_range[0] = 1.;
} else {
if ((cjx_min <= cix_min) && (cix_max <= cjx_max))
not_same_range[0] = 0.;
else
not_same_range[0] = 1.;
}
if (ci->width[1] > cj->width[1]) {
if ((ciy_min <= cjy_min) && (cjy_max <= ciy_max))
not_same_range[1] = 0.;
else
not_same_range[1] = 1.;
} else {
if ((cjy_min <= ciy_min) && (ciy_max <= cjy_max))
not_same_range[1] = 0.;
else
not_same_range[1] = 1.;
}
if (ci->width[2] > cj->width[2]) {
if ((ciz_min <= cjz_min) && (cjz_max <= ciz_max))
not_same_range[2] = 0.;
else
not_same_range[2] = 1.;
} else {
if ((cjz_min <= ciz_min) && (ciz_max <= cjz_max))
not_same_range[2] = 0.;
else
not_same_range[2] = 1.;
}
/* Find the shortest distance between cells, remembering to account for
* periodic boundary conditions. */
double dx[3];
dx[0] = min4(fabs(nearest(cix_min - cjx_min, dim[0])),
fabs(nearest(cix_min - cjx_max, dim[0])),
fabs(nearest(cix_max - cjx_min, dim[0])),
fabs(nearest(cix_max - cjx_max, dim[0])));
dx[1] = min4(fabs(nearest(ciy_min - cjy_min, dim[1])),
fabs(nearest(ciy_min - cjy_max, dim[1])),
fabs(nearest(ciy_max - cjy_min, dim[1])),
fabs(nearest(ciy_max - cjy_max, dim[1])));
dx[2] = min4(fabs(nearest(ciz_min - cjz_min, dim[2])),
fabs(nearest(ciz_min - cjz_max, dim[2])),
fabs(nearest(ciz_max - cjz_min, dim[2])),
fabs(nearest(ciz_max - cjz_max, dim[2])));
double r2 = 0.;
for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k] * not_same_range[k];
return r2;
}
#ifdef WITH_MPI
/* Add a group to the hash table. */
__attribute__((always_inline)) INLINE static void hashmap_add_group(
const size_t group_id, const size_t group_offset, hashmap_t *map) {
int created_new_element = 0;
hashmap_value_t *offset =
hashmap_get_new(map, group_id, &created_new_element);
if (offset != NULL) {
/* If the element is a new entry set its value. */
if (created_new_element) {
(*offset).value_st = group_offset;
}
} else
error("Couldn't find key (%zu) or create new one.", group_id);
}
/* Find a group in the hash table. */
__attribute__((always_inline)) INLINE static size_t hashmap_find_group_offset(
const size_t group_id, hashmap_t *map) {
hashmap_value_t *group_offset = hashmap_get(map, group_id);
if (group_offset == NULL)
error("Couldn't find key (%zu) or create new one.", group_id);
return (size_t)(*group_offset).value_st;
}
/* Compute send/recv offsets for MPI communication. */
__attribute__((always_inline)) INLINE static void fof_compute_send_recv_offsets(
const int nr_nodes, int *sendcount, int **recvcount, int **sendoffset,
int **recvoffset, size_t *nrecv) {
/* Determine number of entries to receive */
*recvcount = (int *)malloc(nr_nodes * sizeof(int));
MPI_Alltoall(sendcount, 1, MPI_INT, *recvcount, 1, MPI_INT, MPI_COMM_WORLD);
/* Compute send/recv offsets */
*sendoffset = (int *)malloc(nr_nodes * sizeof(int));
(*sendoffset)[0] = 0;
for (int i = 1; i < nr_nodes; i++)
(*sendoffset)[i] = (*sendoffset)[i - 1] + sendcount[i - 1];
*recvoffset = (int *)malloc(nr_nodes * sizeof(int));
(*recvoffset)[0] = 0;
for (int i = 1; i < nr_nodes; i++)
(*recvoffset)[i] = (*recvoffset)[i - 1] + (*recvcount)[i - 1];
/* Allocate receive buffer */
*nrecv = 0;
for (int i = 0; i < nr_nodes; i++) (*nrecv) += (*recvcount)[i];
}
#endif /* WITH_MPI */
/**
* @brief Perform a FOF search using union-find on a given leaf-cell
*
* @param props The properties fof the FOF scheme.
* @param l_x2 The square of the FOF linking length.
* @param space_gparts The start of the #gpart array in the #space structure.
* @param c The #cell in which to perform FOF.
*/
void fof_search_self_cell(const struct fof_props *props, const double l_x2,
const struct gpart *const space_gparts,
const struct cell *c) {
#ifdef SWIFT_DEBUG_CHECKS
if (c->split) error("Performing the FOF search at a non-leaf level!");
#endif
const size_t count = c->grav.count;
const struct gpart *gparts = c->grav.parts;
/* Index of particles in the global group list */
size_t *const group_index = props->group_index;
/* Make a list of particle offsets into the global gparts array. */
size_t *const offset = group_index + (ptrdiff_t)(gparts - space_gparts);
#ifdef SWIFT_DEBUG_CHECKS
if (c->nodeID != engine_rank)
error("Performing self FOF search on foreign cell.");
#endif
/* Loop over particles and find which particles belong in the same group. */
for (size_t i = 0; i < count; i++) {
const struct gpart *pi = &gparts[i];
/* Ignore inhibited particles */
if (pi->time_bin >= time_bin_inhibited) continue;
/* Check whether we ignore this particle type altogether */
if (gpart_is_ignorable(pi)) continue;
#ifdef SWIFT_DEBUG_CHECKS
if (pi->ti_drift != ti_current)
error("Running FOF on an un-drifted particle!");
#endif
const double pix = pi->x[0];
const double piy = pi->x[1];
const double piz = pi->x[2];
/* Find the root of pi. */
size_t root_i = fof_find(offset[i], group_index);
/* Get the nature of the linking */
const int is_link_i = gpart_is_linkable(pi);
for (size_t j = i + 1; j < count; j++) {
const struct gpart *pj = &gparts[j];
/* Ignore inhibited particles */
if (pj->time_bin >= time_bin_inhibited) continue;
/* Check whether we ignore this particle type altogether */
if (gpart_is_ignorable(pj)) continue;
/* Get the nature of the linking */
const int is_link_j = gpart_is_linkable(pj);
/* Both particles must be of the linking kind */
if (!(is_link_i && is_link_j)) continue;
#ifdef SWIFT_DEBUG_CHECKS
if (pj->ti_drift != ti_current)
error("Running FOF on an un-drifted particle!");
#endif
/* Find the root of pj. */
const size_t root_j = fof_find(offset[j], group_index);
/* Skip particles in the same group. */
if (root_i == root_j) continue;
const double pjx = pj->x[0];
const double pjy = pj->x[1];
const double pjz = pj->x[2];
/* Compute the pairwise distance */
float dx[3], r2 = 0.0f;
dx[0] = pix - pjx;
dx[1] = piy - pjy;
dx[2] = piz - pjz;
for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k];
/* Hit or miss? */
if (r2 < l_x2) {
/* Merge the groups` */
fof_union(&root_i, root_j, group_index);
}
}
}
}
/**
* @brief Perform a FOF search using union-find between two cells
*
* @param props The properties fof the FOF scheme.
* @param dim The dimension of the simulation volume.
* @param l_x2 The square of the FOF linking length.
* @param periodic Are we using periodic BCs?
* @param space_gparts The start of the #gpart array in the #space structure.
* @param ci The first #cell in which to perform FOF.
* @param cj The second #cell in which to perform FOF.
*/
void fof_search_pair_cells(const struct fof_props *props, const double dim[3],
const double l_x2, const int periodic,
const struct gpart *const space_gparts,
const struct cell *restrict ci,
const struct cell *restrict cj) {
const size_t count_i = ci->grav.count;
const size_t count_j = cj->grav.count;
const struct gpart *gparts_i = ci->grav.parts;
const struct gpart *gparts_j = cj->grav.parts;
/* Index of particles in the global group list */
size_t *const group_index = props->group_index;
/* Make a list of particle offsets into the global gparts array. */
size_t *const offset_i = group_index + (ptrdiff_t)(gparts_i - space_gparts);
size_t *const offset_j = group_index + (ptrdiff_t)(gparts_j - space_gparts);
#ifdef SWIFT_DEBUG_CHECKS
if (offset_j > offset_i && (offset_j < offset_i + count_i))
error("Overlapping cells");
if (offset_i > offset_j && (offset_i < offset_j + count_j))
error("Overlapping cells");
if (ci->nodeID != cj->nodeID) error("Searching foreign cells!");
#endif
/* Account for boundary conditions.*/
double shift[3] = {0.0, 0.0, 0.0};
/* Get the relative distance between the pairs, wrapping. */
double diff[3];
for (int k = 0; k < 3; k++) {
diff[k] = cj->loc[k] - ci->loc[k];
if (periodic && diff[k] < -dim[k] * 0.5)
shift[k] = dim[k];
else if (periodic && diff[k] > dim[k] * 0.5)
shift[k] = -dim[k];
else
shift[k] = 0.0;
diff[k] += shift[k];
}
/* Loop over particles and find which particles belong in the same group. */
for (size_t i = 0; i < count_i; i++) {
const struct gpart *restrict pi = &gparts_i[i];
/* Ignore inhibited particles */
if (pi->time_bin >= time_bin_inhibited) continue;
/* Check whether we ignore this particle type altogether */
if (gpart_is_ignorable(pi)) continue;
#ifdef SWIFT_DEBUG_CHECKS
if (pi->ti_drift != ti_current)
error("Running FOF on an un-drifted particle!");
#endif
const double pix = pi->x[0] - shift[0];
const double piy = pi->x[1] - shift[1];
const double piz = pi->x[2] - shift[2];
/* Find the root of pi. */
size_t root_i = fof_find(offset_i[i], group_index);
/* Get the nature of the linking */
const int is_link_i = gpart_is_linkable(pi);
for (size_t j = 0; j < count_j; j++) {
const struct gpart *restrict pj = &gparts_j[j];
/* Ignore inhibited particles */
if (pj->time_bin >= time_bin_inhibited) continue;
/* Check whether we ignore this particle type altogether */
if (gpart_is_ignorable(pj)) continue;
/* Get the nature of the linking */
const int is_link_j = gpart_is_linkable(pj);
/* At least one of the particles has to be of linking type */
if (!(is_link_i && is_link_j)) continue;
#ifdef SWIFT_DEBUG_CHECKS
if (pj->ti_drift != ti_current)
error("Running FOF on an un-drifted particle!");
#endif
/* Find the root of pj. */
const size_t root_j = fof_find(offset_j[j], group_index);
/* Skip particles in the same group. */
if (root_i == root_j) continue;
const double pjx = pj->x[0];
const double pjy = pj->x[1];
const double pjz = pj->x[2];
/* Compute pairwise distance (periodic BCs were accounted
for by the shift vector) */
float dx[3], r2 = 0.0f;
dx[0] = pix - pjx;
dx[1] = piy - pjy;
dx[2] = piz - pjz;
for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k];
/* Hit or miss? */
if (r2 < l_x2) {
/* Merge the groups */
fof_union(&root_i, root_j, group_index);
}
}
}
}
#ifdef WITH_MPI
/**
* @brief Add a local<->foreign pair in range to the list of links
*
* Possibly reallocates the local_group_links if we run out of space.
*/
static INLINE void add_foreign_link_to_list(
int *local_link_count, int *group_links_size, struct fof_mpi **group_links,
struct fof_mpi **local_group_links, const size_t root_i,
const size_t root_j, const size_t size_i, const size_t size_j) {
/* If the group_links array is not big enough re-allocate it. */
if (*local_link_count + 1 > *group_links_size) {
const int new_size = 2 * (*group_links_size);
*group_links_size = new_size;
(*group_links) = (struct fof_mpi *)realloc(
*group_links, new_size * sizeof(struct fof_mpi));
/* Reset the local pointer */
(*local_group_links) = *group_links;
message("Re-allocating local group links from %d to %d elements.",
*local_link_count, new_size);
if (new_size < 0) error("Overflow in size of list of foreign links");
}
/* Store the particle group properties for communication. */
(*local_group_links)[*local_link_count].group_i = root_i;
(*local_group_links)[*local_link_count].group_i_size = size_i;
(*local_group_links)[*local_link_count].group_j = root_j;
(*local_group_links)[*local_link_count].group_j_size = size_j;
(*local_link_count)++;
}
#endif
/* Perform a FOF search between a local and foreign cell using the Union-Find
* algorithm. Store any links found between particles.*/
void fof_search_pair_cells_foreign(
const struct fof_props *props, const double dim[3], const double l_x2,
const int periodic, const struct gpart *const space_gparts,
const size_t nr_gparts, const struct cell *restrict ci,
const struct cell *restrict cj, int *restrict link_count,
struct fof_mpi **group_links, int *restrict group_links_size) {
#ifdef WITH_MPI
const size_t count_i = ci->grav.count;
const size_t count_j = cj->grav.count;
const struct gpart *gparts_i = ci->grav.parts;
const struct gpart *gparts_j = cj->grav.parts;
/* Get local pointers */
const size_t *restrict group_index = props->group_index;
const size_t *restrict group_size = props->group_size;
/* Values local to this function to avoid dereferencing */
struct fof_mpi *local_group_links = *group_links;
int local_link_count = *link_count;
/* Make a list of particle offsets into the global gparts array. */
const size_t *const offset_i =
group_index + (ptrdiff_t)(gparts_i - space_gparts);
#ifdef SWIFT_DEBUG_CHECKS
/* Check whether cells are local to the node. */
const int ci_local = (ci->nodeID == engine_rank);
const int cj_local = (cj->nodeID == engine_rank);
if ((ci_local && cj_local) || (!ci_local && !cj_local))
error(
"FOF search of foreign cells called on two local cells or two foreign "
"cells.");
if (!ci_local) {
error("Cell ci, is not local.");
}
#endif
double shift[3] = {0.0, 0.0, 0.0};
/* Get the relative distance between the pairs, wrapping. */
for (int k = 0; k < 3; k++) {
const double diff = cj->loc[k] - ci->loc[k];
if (periodic && diff < -dim[k] / 2)
shift[k] = dim[k];
else if (periodic && diff > dim[k] / 2)
shift[k] = -dim[k];
else
shift[k] = 0.0;
}
/* Loop over particles and find which particles belong in the same group. */
for (size_t i = 0; i < count_i; i++) {
const struct gpart *pi = &gparts_i[i];
/* Ignore inhibited particles */
if (pi->time_bin >= time_bin_inhibited) continue;
/* Check whether we ignore this particle type altogether */
if (gpart_is_ignorable(pi)) continue;
#ifdef SWIFT_DEBUG_CHECKS
if (pi->ti_drift != ti_current)
error("Running FOF on an un-drifted particle!");
#endif
const double pix = pi->x[0] - shift[0];
const double piy = pi->x[1] - shift[1];
const double piz = pi->x[2] - shift[2];
/* Find the root of pi. */
const size_t root_i =
fof_find_global(offset_i[i] - node_offset, group_index, nr_gparts);
/* Get the nature of the linking */
const int is_link_i = gpart_is_linkable(pi);
for (size_t j = 0; j < count_j; j++) {
const struct gpart *pj = &gparts_j[j];
/* Ignore inhibited particles */
if (pj->time_bin >= time_bin_inhibited) continue;
/* Check whether we ignore this particle type altogether */
if (gpart_is_ignorable(pj)) continue;
/* Get the nature of the linking */
const int is_link_j = gpart_is_linkable(pj);
/* Only consider linkable<->linkable pairs */
if (!(is_link_i && is_link_j)) continue;
#ifdef SWIFT_DEBUG_CHECKS
if (pj->ti_drift != ti_current)
error("Running FOF on an un-drifted particle!");
#endif
const double pjx = pj->x[0];
const double pjy = pj->x[1];
const double pjz = pj->x[2];
/* Compute pairwise distance (periodic BCs were accounted
for by the shift vector) */
float dx[3], r2 = 0.0f;
dx[0] = pix - pjx;
dx[1] = piy - pjy;
dx[2] = piz - pjz;
for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k];
/* Hit or miss? */
if (r2 < l_x2) {
/* Check that the links have not already been added to the list. */
for (int l = 0; l < local_link_count; l++) {
if (local_group_links[l].group_i == root_i &&
local_group_links[l].group_j == pj->fof_data.group_id) {
continue;
}
}
/* Add a possible link to the list */
add_foreign_link_to_list(
&local_link_count, group_links_size, group_links,
&local_group_links, root_i, pj->fof_data.group_id,
group_size[root_i - node_offset], pj->fof_data.group_size);
}
}
}
/* Update the returned values */
*link_count = local_link_count;
#else
error("Calling MPI function in non-MPI mode.");
#endif
}
/**
* @brief Recursively perform a union-find FOF between two cells.
*
* If cells are more distant than the linking length, we abort early.
*
* @param props The properties fof the FOF scheme.
* @param dim The dimension of the space.
* @param search_r2 the square of the FOF linking length.
* @param periodic Are we using periodic BCs?
* @param space_gparts The start of the #gpart array in the #space structure.
* @param ci The first #cell in which to perform FOF.
* @param cj The second #cell in which to perform FOF.
*/
void rec_fof_search_pair(const struct fof_props *props, const double dim[3],
const double search_r2, const int periodic,
const struct gpart *const space_gparts,
struct cell *restrict ci, struct cell *restrict cj) {
/* Find the shortest distance between cells, remembering to account for
* boundary conditions. */
const double r2 = cell_min_dist(ci, cj, dim);
#ifdef SWIFT_DEBUG_CHECKS
if (ci == cj) error("Pair FOF called on same cell!!!");
#endif
/* Return if cells are out of range of each other. */
if (r2 > search_r2) return;
/* Recurse on both cells if they are both split. */
if (ci->split && cj->split) {
for (int k = 0; k < 8; k++) {
if (ci->progeny[k] != NULL) {
for (int l = 0; l < 8; l++)
if (cj->progeny[l] != NULL)
rec_fof_search_pair(props, dim, search_r2, periodic, space_gparts,
ci->progeny[k], cj->progeny[l]);
}
}
} else if (ci->split) {
for (int k = 0; k < 8; k++) {
if (ci->progeny[k] != NULL)
rec_fof_search_pair(props, dim, search_r2, periodic, space_gparts,
ci->progeny[k], cj);
}
} else if (cj->split) {
for (int k = 0; k < 8; k++) {
if (cj->progeny[k] != NULL)
rec_fof_search_pair(props, dim, search_r2, periodic, space_gparts, ci,
cj->progeny[k]);
}
} else {
/* Perform FOF search between pairs of cells that are within the linking
* length and not the same cell. */
fof_search_pair_cells(props, dim, search_r2, periodic, space_gparts, ci,
cj);
}
}
#ifdef WITH_MPI
/* Recurse on a pair of cells (one local, one foreign) and perform a FOF search
* between cells that are within range. */
void rec_fof_search_pair_foreign(
const struct fof_props *props, const double dim[3], const double search_r2,
const int periodic, const struct gpart *const space_gparts,
const size_t nr_gparts, const struct cell *ci, const struct cell *cj,
int *restrict link_count, struct fof_mpi **group_links,
int *restrict group_links_size) {
#ifdef SWIFT_DEBUG_CHECKS
if (ci == cj) error("Pair FOF called on same cell!!!");
if (ci->nodeID == cj->nodeID) error("Fully local pair!");
#endif
/* Find the shortest distance between cells, remembering to account for
* boundary conditions. */
const double r2 = cell_min_dist(ci, cj, dim);
/* Return if cells are out of range of each other. */
if (r2 > search_r2) return;
/* Recurse on both cells if they are both split. */
if (ci->split && cj->split) {
for (int k = 0; k < 8; k++) {
if (ci->progeny[k] != NULL) {
for (int l = 0; l < 8; l++)
if (cj->progeny[l] != NULL)
rec_fof_search_pair_foreign(props, dim, search_r2, periodic,
space_gparts, nr_gparts, ci->progeny[k],
cj->progeny[l], link_count, group_links,
group_links_size);
}
}
} else if (ci->split) {
for (int k = 0; k < 8; k++) {
if (ci->progeny[k] != NULL)
rec_fof_search_pair_foreign(props, dim, search_r2, periodic,
space_gparts, nr_gparts, ci->progeny[k], cj,
link_count, group_links, group_links_size);
}
} else if (cj->split) {
for (int k = 0; k < 8; k++) {
if (cj->progeny[k] != NULL)
rec_fof_search_pair_foreign(props, dim, search_r2, periodic,
space_gparts, nr_gparts, ci, cj->progeny[k],
link_count, group_links, group_links_size);
}
} else {
/* Perform FOF search between pairs of cells that are within the linking
* length and not the same cell. */
fof_search_pair_cells_foreign(props, dim, search_r2, periodic, space_gparts,
nr_gparts, ci, cj, link_count, group_links,
group_links_size);
}
}
#endif /* WITH_MPI */
/**
* @brief Recursively perform a union-find FOF on a cell.
*
* @param props The properties fof the FOF scheme.
* @param dim The dimension of the space.
* @param space_gparts The start of the #gpart array in the #space structure.
* @param search_r2 the square of the FOF linking length.
* @param periodic Are we using periodic BCs?
* @param c The #cell in which to perform FOF.
*/
void rec_fof_search_self(const struct fof_props *props, const double dim[3],
const double search_r2, const int periodic,
const struct gpart *const space_gparts,
struct cell *c) {
/* Recurse? */
if (c->split) {
/* Loop over all progeny. Perform pair and self recursion on progenies.*/
for (int k = 0; k < 8; k++) {
if (c->progeny[k] != NULL) {
rec_fof_search_self(props, dim, search_r2, periodic, space_gparts,
c->progeny[k]);
for (int l = k + 1; l < 8; l++)
if (c->progeny[l] != NULL)
rec_fof_search_pair(props, dim, search_r2, periodic, space_gparts,
c->progeny[k], c->progeny[l]);
}
}
}
/* Otherwise, compute self-interaction. */
else
fof_search_self_cell(props, search_r2, space_gparts, c);
}
/**
* @brief Perform the attaching operation using union-find on a given leaf-cell
*
* @param props The properties fof the FOF scheme.
* @param l_x2 The square of the FOF linking length.
* @param space_gparts The start of the #gpart array in the #space structure.
* @param nr_gparts The number of #gpart in the local #space structure.
* @param c The #cell in which to perform FOF.
*/
void fof_attach_self_cell(const struct fof_props *props, const double l_x2,
const struct gpart *const space_gparts,
const size_t nr_gparts, const struct cell *c) {
#ifdef SWIFT_DEBUG_CHECKS
if (c->split) error("Performing the FOF search at a non-leaf level!");
#endif
const size_t count = c->grav.count;
struct gpart *gparts = (struct gpart *)c->grav.parts;
/* Make a list of particle offsets into the global gparts array. */
size_t *const group_index = props->group_index;
#ifndef WITH_MPI
size_t *const index_offset = group_index + (ptrdiff_t)(gparts - space_gparts);
#endif
size_t *const attach_index = props->attach_index;
size_t *const attach_offset =
attach_index + (ptrdiff_t)(gparts - space_gparts);
char *const found_attach_index = props->found_attachable_link;
char *const found_attach_offset =
found_attach_index + (ptrdiff_t)(gparts - space_gparts);
/* Distances of particles in the global list */
float *const offset_dist =
props->distance_to_link + (ptrdiff_t)(gparts - space_gparts);
#ifdef SWIFT_DEBUG_CHECKS
if (c->nodeID != engine_rank)
error("Performing self FOF search on foreign cell.");
#endif
/* Loop over particles and find which particles belong in the same group. */
for (size_t i = 0; i < count; i++) {
struct gpart *pi = &gparts[i];
/* Ignore inhibited particles */
if (pi->time_bin >= time_bin_inhibited) continue;
/* Check whether we ignore this particle type altogether */
if (gpart_is_ignorable(pi)) continue;
#ifdef SWIFT_DEBUG_CHECKS
if (pi->ti_drift != ti_current)
error("Running FOF on an un-drifted particle!");
#endif
const double pix = pi->x[0];
const double piy = pi->x[1];
const double piz = pi->x[2];
/* Find the root of pi. */
#ifdef WITH_MPI
const size_t root_i = fof_find_global(
i + (ptrdiff_t)(gparts - space_gparts), group_index, nr_gparts);
#else
const size_t root_i = fof_find(index_offset[i], group_index);
#endif
/* Get the nature of the linking */
const int is_link_i = gpart_is_linkable(pi);
const int is_attach_i = gpart_is_attachable(pi);
#ifdef SWIFT_DEBUG_CHECKS
if (is_link_i && is_attach_i)
error("Particle cannot be both linkable and attachable!");
#endif
for (size_t j = i + 1; j < count; j++) {
struct gpart *pj = &gparts[j];
/* Ignore inhibited particles */
if (pj->time_bin >= time_bin_inhibited) continue;
/* Check whether we ignore this particle type altogether */
if (gpart_is_ignorable(pj)) continue;
/* Get the nature of the linking */
const int is_link_j = gpart_is_linkable(pj);
const int is_attach_j = gpart_is_attachable(pj);
#ifdef SWIFT_DEBUG_CHECKS
if (is_link_j && is_attach_j)
error("Particle cannot be both linkable and attachable!");
#endif
/* We only want link<->attach pairs */
if (is_attach_i && is_attach_j) continue;
if (is_link_i && is_link_j) continue;
#ifdef SWIFT_DEBUG_CHECKS
if (pj->ti_drift != ti_current)
error("Running FOF on an un-drifted particle!");
#endif
/* Find the root of pi. */
#ifdef WITH_MPI
const size_t root_j = fof_find_global(
j + (ptrdiff_t)(gparts - space_gparts), group_index, nr_gparts);
#else
const size_t root_j = fof_find(index_offset[j], group_index);
#endif
const double pjx = pj->x[0];
const double pjy = pj->x[1];
const double pjz = pj->x[2];
/* Compute the pairwise distance */
float dx[3], r2 = 0.0f;
dx[0] = pix - pjx;
dx[1] = piy - pjy;
dx[2] = piz - pjz;
for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k];
/* Hit or miss? */
if (r2 < l_x2) {
/* Now that we are within the linking length,
* decide what to do based on linking types */
if (is_link_i && is_link_j) {
#ifdef SWIFT_DEBUG_CHECKS
error("Fundamental logic error!");
#endif
} else if (is_link_i && is_attach_j) {
/* We got a linkable and an attachable.
* See whether it is closer and if so re-link.
* This is safe to do as the attachables are never roots and
* nothing is attached to them */
const float dist = sqrtf(r2);
if (dist < offset_dist[j]) {
/* Store the new min dist */
offset_dist[j] = dist;
/* Store the current best root */
attach_offset[j] = root_i;
found_attach_offset[j] = 1;
}
} else if (is_link_j && is_attach_i) {
/* We got a linkable and an attachable.
* See whether it is closer and if so re-link.
* This is safe to do as the attachables are never roots and
* nothing is attached to them */
const float dist = sqrtf(r2);
if (dist < offset_dist[i]) {
/* Store the new min dist */
offset_dist[i] = dist;
/* Store the current best root */
attach_offset[i] = root_j;
found_attach_offset[i] = 1;
}
} else {
#ifdef SWIFT_DEBUG_CHECKS
error("Fundamental logic error!");
#endif
}
}
}
}
}
/**
* @brief Perform the attaching operation using union-find between two cells
*
* @param props The properties fof the FOF scheme.
* @param dim The dimension of the simulation volume.
* @param l_x2 The square of the FOF linking length.
* @param periodic Are we using periodic BCs?
* @param space_gparts The start of the #gpart array in the #space structure.
* @param nr_gparts The number of #gpart in the local #space structure.
* @param ci The first #cell in which to perform FOF.
* @param cj The second #cell in which to perform FOF.
* @param ci_local Is the #cell ci on the local MPI rank?
* @param cj_local Is the #cell cj on the local MPI rank?
*/
void fof_attach_pair_cells(const struct fof_props *props, const double dim[3],
const double l_x2, const int periodic,
const struct gpart *const space_gparts,
const size_t nr_gparts,
const struct cell *restrict ci,
const struct cell *restrict cj, const int ci_local,
const int cj_local) {
const size_t count_i = ci->grav.count;
const size_t count_j = cj->grav.count;
struct gpart *gparts_i = (struct gpart *)ci->grav.parts;
struct gpart *gparts_j = (struct gpart *)cj->grav.parts;
/* Index of particles in the global group list */
size_t *const group_index = props->group_index;
/* Make a list of particle offsets into the global gparts array. */
size_t *const index_offset_i =
group_index + (ptrdiff_t)(gparts_i - space_gparts);
size_t *const index_offset_j =
group_index + (ptrdiff_t)(gparts_j - space_gparts);
size_t *const attach_offset_i =
props->attach_index + (ptrdiff_t)(gparts_i - space_gparts);
size_t *const attach_offset_j =
props->attach_index + (ptrdiff_t)(gparts_j - space_gparts);
char *const found_attach_offset_i =
props->found_attachable_link + (ptrdiff_t)(gparts_i - space_gparts);
char *const found_attach_offset_j =
props->found_attachable_link + (ptrdiff_t)(gparts_j - space_gparts);
/* Distances of particles in the global list */
float *const offset_dist_i =
props->distance_to_link + (ptrdiff_t)(gparts_i - space_gparts);
float *const offset_dist_j =
props->distance_to_link + (ptrdiff_t)(gparts_j - space_gparts);
#ifdef SWIFT_DEBUG_CHECKS
if (index_offset_j > index_offset_i &&
(index_offset_j < index_offset_i + count_i) && (ci->nodeID == cj->nodeID))
error("Overlapping cells");
if (index_offset_i > index_offset_j &&
(index_offset_i < index_offset_j + count_j) && (ci->nodeID == cj->nodeID))
error("Overlapping cells");
#endif
/* Account for boundary conditions.*/
double shift[3] = {0.0, 0.0, 0.0};
/* Get the relative distance between the pairs, wrapping. */
double diff[3];
for (int k = 0; k < 3; k++) {
diff[k] = cj->loc[k] - ci->loc[k];
if (periodic && diff[k] < -dim[k] * 0.5)
shift[k] = dim[k];
else if (periodic && diff[k] > dim[k] * 0.5)
shift[k] = -dim[k];
else
shift[k] = 0.0;
diff[k] += shift[k];
}
/* Loop over particles and find which particles belong in the same group. */
for (size_t i = 0; i < count_i; i++) {
struct gpart *restrict pi = &gparts_i[i];
/* Ignore inhibited particles */
if (pi->time_bin >= time_bin_inhibited) continue;
/* Check whether we ignore this particle type altogether */
if (gpart_is_ignorable(pi)) continue;
#ifdef SWIFT_DEBUG_CHECKS
if (pi->ti_drift != ti_current)
error("Running FOF on an un-drifted particle!");
#endif
const double pix = pi->x[0] - shift[0];
const double piy = pi->x[1] - shift[1];
const double piz = pi->x[2] - shift[2];
/* Find the root of pi. */
#ifdef WITH_MPI
size_t root_i;
if (ci_local) {
root_i = fof_find_global(index_offset_i[i] - node_offset, group_index,
nr_gparts);
} else {
root_i = pi->fof_data.group_id;
}
#else
const size_t root_i = fof_find(index_offset_i[i], group_index);
#endif
/* Get the nature of the linking */
const int is_link_i = gpart_is_linkable(pi);
const int is_attach_i = gpart_is_attachable(pi);
#ifdef SWIFT_DEBUG_CHECKS
if (is_link_i && is_attach_i)
error("Particle cannot be both linkable and attachable!");
#endif
for (size_t j = 0; j < count_j; j++) {
struct gpart *restrict pj = &gparts_j[j];
/* Ignore inhibited particles */
if (pj->time_bin >= time_bin_inhibited) continue;
/* Check whether we ignore this particle type altogether */
if (gpart_is_ignorable(pj)) continue;
/* Get the nature of the linking */
const int is_link_j = gpart_is_linkable(pj);
const int is_attach_j = gpart_is_attachable(pj);
#ifdef SWIFT_DEBUG_CHECKS
if (is_link_j && is_attach_j)
error("Particle cannot be both linkable and attachable!");
#endif
/* We only want link<->attach pairs */
if (is_attach_i && is_attach_j) continue;
if (is_link_i && is_link_j) continue;
#ifdef SWIFT_DEBUG_CHECKS
if (pj->ti_drift != ti_current)
error("Running FOF on an un-drifted particle!");
#endif
/* Find the root of pj. */
#ifdef WITH_MPI
size_t root_j;
if (cj_local) {
root_j = fof_find_global(index_offset_j[j] - node_offset, group_index,
nr_gparts);
} else {
root_j = pj->fof_data.group_id;
}
#else
const size_t root_j = fof_find(index_offset_j[j], group_index);
#endif
const double pjx = pj->x[0];
const double pjy = pj->x[1];
const double pjz = pj->x[2];
/* Compute pairwise distance (periodic BCs were accounted
for by the shift vector) */
float dx[3], r2 = 0.0f;
dx[0] = pix - pjx;
dx[1] = piy - pjy;
dx[2] = piz - pjz;
for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k];
/* Hit or miss? */
if (r2 < l_x2) {
/* Now that we are within the linking length,
* decide what to do based on linking types */
if (is_link_i && is_link_j) {
#ifdef SWIFT_DEBUG_CHECKS
error("Fundamental logic error!");
#endif
} else if (is_link_i && is_attach_j) {
/* We got a linkable and an attachable.
* See whether it is closer and if so re-link.
* This is safe to do as the attachables are never roots and
* nothing is attached to them */
if (cj_local) {
const float dist = sqrtf(r2);
if (dist < offset_dist_j[j]) {
/* Store the new min dist */
offset_dist_j[j] = dist;
/* Store the current best root */
attach_offset_j[j] = root_i;
found_attach_offset_j[j] = 1;
}
}
} else if (is_link_j && is_attach_i) {
/* We got a linkable and an attachable.
* See whether it is closer and if so re-link.
* This is safe to do as the attachables are never roots and
* nothing is attached to them */
if (ci_local) {
const float dist = sqrtf(r2);
if (dist < offset_dist_i[i]) {
/* Store the new min dist */
offset_dist_i[i] = dist;
/* Store the current best root */
attach_offset_i[i] = root_j;
found_attach_offset_i[i] = 1;
}
}
} else {
#ifdef SWIFT_DEBUG_CHECKS
error("Fundamental logic error!");
#endif
}
}
}
}
}
/**
* @brief Recursively perform a union-find attaching between two cells.
*
* If cells are more distant than the linking length, we abort early.
*
* @param props The properties fof the FOF scheme.
* @param dim The dimension of the space.
* @param attach_r2 the square of the FOF linking length.
* @param periodic Are we using periodic BCs?
* @param space_gparts The start of the #gpart array in the #space structure.
* @param nr_gparts The number of #gpart in the local #space structure.
* @param ci The first #cell in which to perform FOF.
* @param cj The second #cell in which to perform FOF.
* @param ci_local Is the #cell ci on the local MPI rank?
* @param cj_local Is the #cell cj on the local MPI rank?
*/
void rec_fof_attach_pair(const struct fof_props *props, const double dim[3],
const double attach_r2, const int periodic,
const struct gpart *const space_gparts,
const size_t nr_gparts, struct cell *restrict ci,
struct cell *restrict cj, const int ci_local,
const int cj_local) {
/* Find the shortest distance between cells, remembering to account for
* boundary conditions. */
const double r2 = cell_min_dist(ci, cj, dim);
#ifdef SWIFT_DEBUG_CHECKS
if (ci == cj) error("Pair FOF called on same cell!!!");
#endif
/* Return if cells are out of range of each other. */
if (r2 > attach_r2) return;
/* Recurse on both cells if they are both split. */
if (ci->split && cj->split) {
for (int k = 0; k < 8; k++) {
if (ci->progeny[k] != NULL) {
for (int l = 0; l < 8; l++)
if (cj->progeny[l] != NULL)
rec_fof_attach_pair(props, dim, attach_r2, periodic, space_gparts,
nr_gparts, ci->progeny[k], cj->progeny[l],
ci_local, cj_local);
}
}
} else if (ci->split) {
for (int k = 0; k < 8; k++) {
if (ci->progeny[k] != NULL)
rec_fof_attach_pair(props, dim, attach_r2, periodic, space_gparts,
nr_gparts, ci->progeny[k], cj, ci_local, cj_local);
}
} else if (cj->split) {
for (int k = 0; k < 8; k++) {
if (cj->progeny[k] != NULL)
rec_fof_attach_pair(props, dim, attach_r2, periodic, space_gparts,
nr_gparts, ci, cj->progeny[k], ci_local, cj_local);
}
} else {
/* Perform FOF attach between pairs of cells that are within the linking
* length and not the same cell. */
fof_attach_pair_cells(props, dim, attach_r2, periodic, space_gparts,
nr_gparts, ci, cj, ci_local, cj_local);
}
}
/**
* @brief Recursively perform a the attaching operation on a cell.
*
* @param props The properties fof the FOF scheme.
* @param dim The dimension of the space.
* @param attach_r2 the square of the FOF linking length.
* @param periodic Are we using periodic BCs?
* @param space_gparts The start of the #gpart array in the #space structure.
* @param nr_gparts The number of #gpart in the local #space structure.
* @param c The #cell in which to perform FOF.
*/
void rec_fof_attach_self(const struct fof_props *props, const double dim[3],
const double attach_r2, const int periodic,
const struct gpart *const space_gparts,
const size_t nr_gparts, struct cell *c) {
/* Recurse? */
if (c->split) {
/* Loop over all progeny. Perform pair and self recursion on progenies.*/
for (int k = 0; k < 8; k++) {
if (c->progeny[k] != NULL) {
rec_fof_attach_self(props, dim, attach_r2, periodic, space_gparts,
nr_gparts, c->progeny[k]);
for (int l = k + 1; l < 8; l++)
if (c->progeny[l] != NULL)
rec_fof_attach_pair(props, dim, attach_r2, periodic, space_gparts,
nr_gparts, c->progeny[k], c->progeny[l],
/*ci_local=*/1,
/*cj_local=*/1);
}
}
} else {
/* Otherwise, compute self-interaction. */
fof_attach_self_cell(props, attach_r2, space_gparts, nr_gparts, c);
}
}
/* Mapper function to atomically update the group size array. */
void fof_update_group_size_mapper(hashmap_key_t key, hashmap_value_t *value,
void *data) {
size_t *group_size = (size_t *)data;
/* Use key to index into group size array. */
atomic_add(&group_size[key], value->value_st);
}
/**
* @brief Mapper function to calculate the group sizes.
*
* @param map_data An array of #gpart%s.
* @param num_elements Chunk size.
* @param extra_data Pointer to a #space.
*/
void fof_calc_group_size_mapper(void *map_data, int num_elements,
void *extra_data) {
/* Retrieve mapped data. */
struct space *s = (struct space *)extra_data;
struct gpart *gparts = (struct gpart *)map_data;
size_t *restrict group_index = s->e->fof_properties->group_index;
size_t *restrict group_size = s->e->fof_properties->group_size;
/* Offset into gparts array. */
const ptrdiff_t gparts_offset = (ptrdiff_t)(gparts - s->gparts);
size_t *const group_index_offset = group_index + gparts_offset;
/* Create hash table. */
hashmap_t map;
hashmap_init(&map);
for (int ind = 0; ind < num_elements; ind++) {
const hashmap_key_t root =
(hashmap_key_t)fof_find(group_index_offset[ind], group_index);
const size_t gpart_index = gparts_offset + ind;
/* Only add particles which aren't the root of a group. Stops groups of size
* 1 being added to the hash table. */
if (root != gpart_index) {
hashmap_value_t *size = hashmap_get(&map, root);
if (size != NULL)
(*size).value_st++;
else
error("Couldn't find key (%zu) or create new one.", root);
}
}
/* Update the group size array. */
if (map.size > 0)
hashmap_iterate(&map, fof_update_group_size_mapper, group_size);
hashmap_free(&map);
}
/* Mapper function to atomically update the group mass array. */
static INLINE void fof_update_group_mass_iterator(hashmap_key_t key,
hashmap_value_t *value,
void *data) {
double *group_mass = (double *)data;
/* Use key to index into group mass array. */
atomic_add_d(&group_mass[key], value->value_dbl);
}
/* Mapper function to atomically update the group size array. */
static INLINE void fof_update_group_size_iterator(hashmap_key_t key,
hashmap_value_t *value,
void *data) {
long long *group_size = (long long *)data;
/* Use key to index into group mass array. */
atomic_add(&group_size[key], value->value_st);
}
/**
* @brief Mapper function to calculate the group masses.
*
* @param map_data An array of #gpart%s.
* @param num_elements Chunk size.
* @param extra_data Pointer to a #space.
*/
void fof_calc_group_mass_mapper(void *map_data, int num_elements,
void *extra_data) {
/* Retrieve mapped data. */
struct space *s = (struct space *)extra_data;
struct gpart *gparts = (struct gpart *)map_data;
double *group_mass = s->e->fof_properties->group_mass;
long long *group_size = s->e->fof_properties->final_group_size;
const size_t group_id_default = s->e->fof_properties->group_id_default;
const size_t group_id_offset = s->e->fof_properties->group_id_offset;
/* Create hash table. */
hashmap_t map;
hashmap_init(&map);
/* Loop over particles and increment the group mass for groups above
* min_group_size. */
for (int ind = 0; ind < num_elements; ind++) {
/* Only check groups above the minimum size. */
if (gparts[ind].fof_data.group_id != group_id_default) {
hashmap_key_t index = gparts[ind].fof_data.group_id - group_id_offset;
hashmap_value_t *data = hashmap_get(&map, index);
/* Update group mass */
if (data != NULL) {
(*data).value_dbl += gparts[ind].mass;
(*data).value_st += 1;
} else
error("Couldn't find key (%zu) or create new one.", index);
}
}
/* Update the group mass array. */
if (map.size > 0)
hashmap_iterate(&map, fof_update_group_mass_iterator, group_mass);
if (map.size > 0)
hashmap_iterate(&map, fof_update_group_size_iterator, group_size);
hashmap_free(&map);
}
#ifdef WITH_MPI
/* Mapper function to unpack hash table into array. */
void fof_unpack_group_mass_mapper(hashmap_key_t key, hashmap_value_t *value,
void *data) {
struct fof_mass_send_hashmap *fof_mass_send =
(struct fof_mass_send_hashmap *)data;
struct fof_final_mass *mass_send = fof_mass_send->mass_send;
size_t *nsend = &fof_mass_send->nsend;
/* Store elements from hash table in array. */
mass_send[*nsend].global_root = key;
mass_send[*nsend].group_mass = value->value_dbl;
mass_send[*nsend].final_group_size = value->value_ll;
mass_send[*nsend].first_position[0] = value->value_array2_dbl[0];
mass_send[*nsend].first_position[1] = value->value_array2_dbl[1];
mass_send[*nsend].first_position[2] = value->value_array2_dbl[2];
mass_send[*nsend].centre_of_mass[0] = value->value_array_dbl[0];
mass_send[*nsend].centre_of_mass[1] = value->value_array_dbl[1];
mass_send[*nsend].centre_of_mass[2] = value->value_array_dbl[2];
mass_send[*nsend].max_part_density_index = value->value_st;
mass_send[*nsend].max_part_density = value->value_flt;
(*nsend)++;
}
#endif /* WITH_MPI */
/**
* @brief Calculates the total mass and CoM of each group above min_group_size
* and finds the densest particle for black hole seeding.
*/
void fof_calc_group_mass(struct fof_props *props, const struct space *s,
const int seed_black_holes,
const size_t num_groups_local,
const size_t num_groups_prev,
size_t *restrict num_on_node,
size_t *restrict first_on_node,
double *restrict group_mass) {
const size_t nr_gparts = s->nr_gparts;
struct gpart *gparts = s->gparts;
const struct part *parts = s->parts;
const size_t group_id_offset = props->group_id_offset;
const size_t group_id_default = props->group_id_default;
const double seed_halo_mass = props->seed_halo_mass;
const int periodic = s->periodic;
const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
#ifdef WITH_MPI
size_t *group_index = props->group_index;
const int nr_nodes = s->e->nr_nodes;
/* Direct pointers to the arrays */
long long *max_part_density_index = props->max_part_density_index;
float *max_part_density = props->max_part_density;
double *centre_of_mass = props->group_centre_of_mass;
double *first_position = props->group_first_position;
long long *final_group_size = props->final_group_size;
/* Start the hash map */
hashmap_t map;
hashmap_init(&map);
/* Collect information about the local particles and update the local AND
* foreign group fragments */
for (size_t i = 0; i < nr_gparts; i++) {
/* Ignore inhibited particles */
if (gparts[i].time_bin >= time_bin_inhibited) continue;
/* Check whether we ignore this particle type altogether */
if (gpart_is_ignorable(&gparts[i])) continue;
/* Check if the particle is in a group above the threshold. */
if (gparts[i].fof_data.group_id != group_id_default) {
const size_t root = fof_find_global(i, group_index, nr_gparts);
if (is_local(root, nr_gparts)) {
/* The root is local */
const size_t index =
gparts[i].fof_data.group_id - group_id_offset - num_groups_prev;
/* Update group mass */
group_mass[index] += gparts[i].mass;
/* Update group size */
final_group_size[index]++;
} else {
/* The root is *not* local */
/* Get the root in the foreign hashmap (create if necessary) */
hashmap_value_t *const data = hashmap_get(&map, (hashmap_key_t)root);
if (data == NULL)
error("Couldn't find key (%zu) or create new one.", root);
/* Compute the centre of mass */
const double mass = gparts[i].mass;
double x[3] = {gparts[i].x[0], gparts[i].x[1], gparts[i].x[2]};
/* Add mass fragments of groups */
data->value_dbl += mass;
/* Increase fragment size */
data->value_ll++;
/* Record the first particle of this fragment that we encounter so we
* we can use it as reference frame for the centre of mass calculation
*/
if (data->value_array2_dbl[0] == (double)(-FLT_MAX)) {
data->value_array2_dbl[0] = gparts[i].x[0];
data->value_array2_dbl[1] = gparts[i].x[1];
data->value_array2_dbl[2] = gparts[i].x[2];
}
if (periodic) {
x[0] = nearest(x[0] - data->value_array2_dbl[0], dim[0]);
x[1] = nearest(x[1] - data->value_array2_dbl[1], dim[1]);
x[2] = nearest(x[2] - data->value_array2_dbl[2], dim[2]);
}
data->value_array_dbl[0] += mass * x[0];
data->value_array_dbl[1] += mass * x[1];
data->value_array_dbl[2] += mass * x[2];
/* Also accumulate the densest gas particle and its index */
if (gparts[i].type == swift_type_gas &&
data->value_st != fof_halo_has_black_hole) {
const size_t gas_index = -gparts[i].id_or_neg_offset;
const float rho_com = hydro_get_comoving_density(&parts[gas_index]);
/* Update index if a denser gas particle is found. */
if (rho_com > data->value_flt) {
data->value_flt = rho_com;
data->value_st = gas_index;
}
} else if (gparts[i].type == swift_type_black_hole) {
/* If there is already a black hole in the fragment we don't need to
create a new one. */
data->value_st = fof_halo_has_black_hole;
data->value_flt = 0.f;
}
} /* Foreign root */
} /* Particle is in a group */
} /* Loop over particles */
size_t nsend = map.size;
struct fof_mass_send_hashmap hashmap_mass_send = {NULL, 0};
/* Allocate and initialise a mass array. */
if (posix_memalign((void **)&hashmap_mass_send.mass_send, 32,
nsend * sizeof(struct fof_final_mass)) != 0)
error("Failed to allocate list of group masses for FOF search.");
hashmap_mass_send.nsend = 0;
struct fof_final_mass *fof_mass_send = hashmap_mass_send.mass_send;
/* Unpack mass fragments and roots from hash table. */
if (map.size > 0)
hashmap_iterate(&map, fof_unpack_group_mass_mapper, &hashmap_mass_send);
nsend = hashmap_mass_send.nsend;
#ifdef SWIFT_DEBUG_CHECKS
if (nsend != map.size)
error("No. of mass fragments to send != elements in hash table.");
#endif
hashmap_free(&map);
/* Sort by global root - this puts the groups in order of which node they're
* stored on */
qsort(fof_mass_send, nsend, sizeof(struct fof_final_mass),
compare_fof_final_mass_global_root);
/* Determine how many entries go to each node */
int *sendcount = (int *)calloc(nr_nodes, sizeof(int));
int dest = 0;
for (size_t i = 0; i < nsend; i += 1) {
while ((fof_mass_send[i].global_root >=
first_on_node[dest] + num_on_node[dest]) ||
(num_on_node[dest] == 0))
dest += 1;
if (dest >= nr_nodes) error("Node index out of range!");
sendcount[dest] += 1;
}
int *recvcount = NULL, *sendoffset = NULL, *recvoffset = NULL;
size_t nrecv = 0;
fof_compute_send_recv_offsets(nr_nodes, sendcount, &recvcount, &sendoffset,
&recvoffset, &nrecv);
struct fof_final_mass *fof_mass_recv =
(struct fof_final_mass *)malloc(nrecv * sizeof(struct fof_final_mass));
/* Exchange group mass */
MPI_Alltoallv(fof_mass_send, sendcount, sendoffset, fof_final_mass_type,
fof_mass_recv, recvcount, recvoffset, fof_final_mass_type,
MPI_COMM_WORLD);
/* For each received global root, look up the group ID we assigned and
* increment the group mass */
for (size_t i = 0; i < nrecv; i++) {
#ifdef SWIFT_DEBUG_CHECKS
if ((fof_mass_recv[i].global_root < node_offset) ||
(fof_mass_recv[i].global_root >= node_offset + nr_gparts)) {
error("Received global root index out of range!");
}
#endif
const size_t local_root_index = fof_mass_recv[i].global_root - node_offset;
const size_t local_group_offset = group_id_offset + num_groups_prev;
const size_t index =
gparts[local_root_index].fof_data.group_id - local_group_offset;
group_mass[index] += fof_mass_recv[i].group_mass;
final_group_size[index] += fof_mass_recv[i].final_group_size;
}
/* Loop over particles, densest particle in each *local* group.
* We can do this now as we eventually have the total group mass */
for (size_t i = 0; i < nr_gparts; i++) {
/* Ignore inhibited particles */
if (gparts[i].time_bin >= time_bin_inhibited) continue;
/* Check whether we ignore this particle type altogether */
if (current_fof_ignore_type & (1 << (gparts[i].type + 1))) continue;
/* Only check groups above the minimum mass threshold. */
if (gparts[i].fof_data.group_id != group_id_default) {
const size_t root = fof_find_global(i, group_index, nr_gparts);
if (is_local(root, nr_gparts)) {
const size_t index =
gparts[i].fof_data.group_id - group_id_offset - num_groups_prev;
/* Compute the centre of mass */
const double mass = gparts[i].mass;
double x[3] = {gparts[i].x[0], gparts[i].x[1], gparts[i].x[2]};
/* Record the first particle of this group that we encounter so we
* can use it as reference frame for the centre of mass calculation */
if (first_position[index * 3 + 0] == (double)(-FLT_MAX)) {
first_position[index * 3 + 0] = x[0];
first_position[index * 3 + 1] = x[1];
first_position[index * 3 + 2] = x[2];
}
if (periodic) {
x[0] = nearest(x[0] - first_position[index * 3 + 0], dim[0]);
x[1] = nearest(x[1] - first_position[index * 3 + 1], dim[1]);
x[2] = nearest(x[2] - first_position[index * 3 + 2], dim[2]);
}
centre_of_mass[index * 3 + 0] += mass * x[0];
centre_of_mass[index * 3 + 1] += mass * x[1];
centre_of_mass[index * 3 + 2] += mass * x[2];
/* Check haloes above the seeding threshold */
if (group_mass[index] > seed_halo_mass) {
/* Find the densest gas particle.
* Account for groups that already have a black hole and groups that
* contain no gas. */
if (gparts[i].type == swift_type_gas &&
max_part_density_index[index] != fof_halo_has_black_hole) {
const size_t gas_index = -gparts[i].id_or_neg_offset;
const float rho_com = hydro_get_comoving_density(&parts[gas_index]);
/* Update index if a denser gas particle is found. */
if (rho_com > max_part_density[index]) {
max_part_density_index[index] = gas_index;
max_part_density[index] = rho_com;
}
}
/* If there is already a black hole in the group we don't need to
create a new one. */
else if (gparts[i].type == swift_type_black_hole) {
max_part_density_index[index] = fof_halo_has_black_hole;
}
} else {
max_part_density_index[index] = fof_halo_has_too_low_mass;
}
}
}
}
/* For each received global root, look up the group ID we assigned and find
* the global maximum gas density */
for (size_t i = 0; i < nrecv; i++) {
const size_t local_root_index = fof_mass_recv[i].global_root - node_offset;
const size_t local_group_offset = group_id_offset + num_groups_prev;
const size_t index =
gparts[local_root_index].fof_data.group_id - local_group_offset;
double fragment_mass = fof_mass_recv[i].group_mass;
double fragment_centre_of_mass[3] = {
fof_mass_recv[i].centre_of_mass[0] / fof_mass_recv[i].group_mass,
fof_mass_recv[i].centre_of_mass[1] / fof_mass_recv[i].group_mass,
fof_mass_recv[i].centre_of_mass[2] / fof_mass_recv[i].group_mass};
fragment_centre_of_mass[0] += fof_mass_recv[i].first_position[0];
fragment_centre_of_mass[1] += fof_mass_recv[i].first_position[1];
fragment_centre_of_mass[2] += fof_mass_recv[i].first_position[2];
if (periodic) {
fragment_centre_of_mass[0] = nearest(
fragment_centre_of_mass[0] - first_position[3 * index + 0], dim[0]);
fragment_centre_of_mass[1] = nearest(
fragment_centre_of_mass[1] - first_position[3 * index + 1], dim[1]);
fragment_centre_of_mass[2] = nearest(
fragment_centre_of_mass[2] - first_position[3 * index + 2], dim[2]);
}
centre_of_mass[index * 3 + 0] += fragment_mass * fragment_centre_of_mass[0];
centre_of_mass[index * 3 + 1] += fragment_mass * fragment_centre_of_mass[1];
centre_of_mass[index * 3 + 2] += fragment_mass * fragment_centre_of_mass[2];
/* Only seed groups above the mass threshold. */
if (group_mass[index] > seed_halo_mass) {
/* Only check groups that don't already contain a black hole. */
if (max_part_density_index[index] != fof_halo_has_black_hole) {
/* Find the densest particle in each group using the densest particle
* from each group fragment. */
if (fof_mass_recv[i].max_part_density > max_part_density[index]) {
max_part_density[index] = fof_mass_recv[i].max_part_density;
max_part_density_index[index] =
fof_mass_recv[i].max_part_density_index;
}
}
/* If there is already a black hole in the group we don't need to create a
new one. */
else if (fof_mass_recv[i].max_part_density_index ==
fof_halo_has_black_hole) {
max_part_density_index[index] = fof_halo_has_black_hole;
}
} else {
max_part_density_index[index] = fof_halo_has_too_low_mass;
}
}
/* For each received global root, look up the group ID we assigned and send
* the global maximum gas density index back */
for (size_t i = 0; i < nrecv; i++) {
#ifdef SWIFT_DEBUG_CHECKS
if ((fof_mass_recv[i].global_root < node_offset) ||
(fof_mass_recv[i].global_root >= node_offset + nr_gparts)) {
error("Received global root index out of range!");
}
#endif
const size_t local_root_index = fof_mass_recv[i].global_root - node_offset;
const size_t local_group_offset = group_id_offset + num_groups_prev;
const size_t index =
gparts[local_root_index].fof_data.group_id - local_group_offset;
/* If the densest particle found locally is not the global max, make sure we
* don't seed two black holes. */
if (max_part_density_index[index] ==
fof_mass_recv[i].max_part_density_index) {
/* If the local index has been set to a foreign index then we don't need
* to seed a black hole locally. */
max_part_density_index[index] = fof_halo_has_black_hole;
} else {
/* The densest particle is on the same node as the global root so we don't
need to seed a black hole on the other node. */
fof_mass_recv[i].max_part_density_index = fof_halo_has_black_hole;
}
}
/* Send the result back */
MPI_Alltoallv(fof_mass_recv, recvcount, recvoffset, fof_final_mass_type,
fof_mass_send, sendcount, sendoffset, fof_final_mass_type,
MPI_COMM_WORLD);
int extra_seed_count = 0;
size_t density_index_size = num_groups_local;
/* Add the index of the densest particle to the local list if the global root
* is not on this node. */
for (size_t i = 0; i < nsend; i++) {
/* Only add the index if:
* 1) there is not already a black hole in the group
* AND
* 2) there is gas in the group. */
if (fof_mass_send[i].max_part_density_index >= 0) {
/* Re-allocate the list if it's needed. */
if (num_groups_local + extra_seed_count >= density_index_size) {
const size_t new_size = 2 * density_index_size;
max_part_density_index = (long long *)realloc(
max_part_density_index, new_size * sizeof(long long));
message(
"Re-allocating max_part_density_index from %zu to %zu elements.",
density_index_size, new_size);
density_index_size = new_size;
props->max_part_density_index = max_part_density_index;
}
/* Add particle index onto the end of the array. */
max_part_density_index[num_groups_local + extra_seed_count] =
fof_mass_send[i].max_part_density_index;
extra_seed_count++;
}
}
props->extra_bh_seed_count = extra_seed_count;
free(sendcount);
free(recvcount);
free(sendoffset);
free(recvoffset);
free(fof_mass_send);
free(fof_mass_recv);
#else
/* Increment the group mass for groups above min_group_size. */
threadpool_map(&s->e->threadpool, fof_calc_group_mass_mapper, gparts,
nr_gparts, sizeof(struct gpart), threadpool_auto_chunk_size,
(struct space *)s);
/* Direct pointers to the arrays */
long long *max_part_density_index = props->max_part_density_index;
float *max_part_density = props->max_part_density;
double *centre_of_mass = props->group_centre_of_mass;
double *first_position = props->group_first_position;
/* Loop over particles, compute CoM and find the densest particle in each
* group. */
/* JSW TODO: Parallelise with threadpool*/
for (size_t i = 0; i < nr_gparts; i++) {
/* Ignore inhibited particles */
if (gparts[i].time_bin >= time_bin_inhibited) continue;
/* Check whether we ignore this particle type altogether */
if (gpart_is_ignorable(&gparts[i])) continue;
const size_t index = gparts[i].fof_data.group_id - group_id_offset;
/* Only check groups above the minimum mass threshold. */
if (gparts[i].fof_data.group_id != group_id_default) {
/* Compute the centre of mass */
const double mass = gparts[i].mass;
double x[3] = {gparts[i].x[0], gparts[i].x[1], gparts[i].x[2]};
/* Record the first particle of this group that we encounter so we
* can use it as reference frame for the centre of mass calculation */
if (first_position[index * 3 + 0] == (double)(-FLT_MAX)) {
first_position[index * 3 + 0] = x[0];
first_position[index * 3 + 1] = x[1];
first_position[index * 3 + 2] = x[2];
}
if (periodic) {
x[0] = nearest(x[0] - first_position[index * 3 + 0], dim[0]);
x[1] = nearest(x[1] - first_position[index * 3 + 1], dim[1]);
x[2] = nearest(x[2] - first_position[index * 3 + 2], dim[2]);
}
centre_of_mass[index * 3 + 0] += mass * x[0];
centre_of_mass[index * 3 + 1] += mass * x[1];
centre_of_mass[index * 3 + 2] += mass * x[2];
/* Check haloes above the seeding threshold */
if (group_mass[index] > seed_halo_mass) {
/* Find the densest gas particle.
* Account for groups that already have a black hole and groups that
* contain no gas. */
if (gparts[i].type == swift_type_gas &&
max_part_density_index[index] != fof_halo_has_black_hole) {
const size_t gas_index = -gparts[i].id_or_neg_offset;
const float rho_com = hydro_get_comoving_density(&parts[gas_index]);
/* Update index if a denser gas particle is found. */
if (rho_com > max_part_density[index]) {
max_part_density_index[index] = gas_index;
max_part_density[index] = rho_com;
}
}
/* If there is already a black hole in the group we don't need to
create a new one. */
else if (gparts[i].type == swift_type_black_hole) {
max_part_density_index[index] = fof_halo_has_black_hole;
}
} else {
max_part_density_index[index] = fof_halo_has_too_low_mass;
}
}
}
props->extra_bh_seed_count = 0;
#endif
}
/**
* @brief Mapper function to perform FOF search.
*
* @param map_data An array of #cell pair indices.
* @param num_elements Chunk size.
* @param extra_data Pointer to a #space.
*/
void fof_find_foreign_links_mapper(void *map_data, int num_elements,
void *extra_data) {
#ifdef WITH_MPI
/* Retrieve mapped data. */
struct space *s = (struct space *)extra_data;
const int periodic = s->periodic;
const size_t nr_gparts = s->nr_gparts;
const struct gpart *const gparts = s->gparts;
const struct engine *e = s->e;
struct fof_props *props = e->fof_properties;
struct cell_pair_indices *cell_pairs = (struct cell_pair_indices *)map_data;
const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
const double search_r2 = props->l_x2;
/* Store links in an array local to this thread. */
int local_link_count = 0;
int local_group_links_size = props->group_links_size / e->nr_threads;
/* Init the local group links buffer. */
struct fof_mpi *local_group_links = (struct fof_mpi *)swift_calloc(
"fof_local_group_links", sizeof(struct fof_mpi), local_group_links_size);
if (local_group_links == NULL)
error("Failed to allocate temporary group links buffer.");
/* Loop over all pairs of local and foreign cells, recurse then perform a
* FOF search. */
for (int ind = 0; ind < num_elements; ind++) {
/* Get the local and foreign cells to recurse on. */
const struct cell *restrict local_cell = cell_pairs[ind].local;
const struct cell *restrict foreign_cell = cell_pairs[ind].foreign;
rec_fof_search_pair_foreign(props, dim, search_r2, periodic, gparts,
nr_gparts, local_cell, foreign_cell,
&local_link_count, &local_group_links,
&local_group_links_size);
}
/* Add links found by this thread to the global link list. */
/* Lock to prevent race conditions while adding to the global link list.*/
if (lock_lock(&s->lock) == 0) {
/* Get pointers to global arrays. */
int *restrict group_links_size = &props->group_links_size;
int *restrict group_link_count = &props->group_link_count;
struct fof_mpi **group_links = &props->group_links;
/* If the global group_links array is not big enough re-allocate it. */
if (*group_link_count + local_link_count > *group_links_size) {
const int old_size = *group_links_size;
const int new_size =
max(*group_link_count + local_link_count, 2 * old_size);
(*group_links) = (struct fof_mpi *)realloc(
*group_links, new_size * sizeof(struct fof_mpi));
*group_links_size = new_size;
message("Re-allocating global group links from %d to %d elements.",
old_size, new_size);
}
/* Copy the local links to the global list. */
for (int i = 0; i < local_link_count; i++) {
int found = 0;
/* Check that the links have not already been added to the list by another
* thread. */
for (int l = 0; l < *group_link_count; l++) {
if ((*group_links)[l].group_i == local_group_links[i].group_i &&
(*group_links)[l].group_j == local_group_links[i].group_j) {
found = 1;
break;
}
}
if (!found) {
(*group_links)[*group_link_count].group_i =
local_group_links[i].group_i;
(*group_links)[*group_link_count].group_i_size =
local_group_links[i].group_i_size;
(*group_links)[*group_link_count].group_j =
local_group_links[i].group_j;
(*group_links)[*group_link_count].group_j_size =
local_group_links[i].group_j_size;
(*group_link_count) = (*group_link_count) + 1;
}
}
}
/* Release lock. */
if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space");
swift_free("fof_local_group_links", local_group_links);
#endif
}
void fof_finalise_group_data(struct fof_props *props,
const struct group_length *group_sizes,
const struct gpart *gparts, const int periodic,
const double dim[3], const int num_groups) {
size_t *group_size =
(size_t *)swift_malloc("fof_group_size", num_groups * sizeof(size_t));
size_t *group_index =
(size_t *)swift_malloc("fof_group_index", num_groups * sizeof(size_t));
double *group_centre_of_mass = (double *)swift_malloc(
"fof_group_centre_of_mass", 3 * num_groups * sizeof(double));
for (int i = 0; i < num_groups; i++) {
const size_t group_offset = group_sizes[i].index;
/* Centre of mass, including possible box wrapping */
double CoM[3] = {
props->group_centre_of_mass[i * 3 + 0] / props->group_mass[i],
props->group_centre_of_mass[i * 3 + 1] / props->group_mass[i],
props->group_centre_of_mass[i * 3 + 2] / props->group_mass[i]};
if (periodic) {
CoM[0] =
box_wrap(CoM[0] + props->group_first_position[i * 3 + 0], 0., dim[0]);
CoM[1] =
box_wrap(CoM[1] + props->group_first_position[i * 3 + 1], 0., dim[1]);
CoM[2] =
box_wrap(CoM[2] + props->group_first_position[i * 3 + 2], 0., dim[2]);
}
#ifdef WITH_MPI
group_index[i] = gparts[group_offset - node_offset].fof_data.group_id;
group_size[i] = props->group_size[group_offset - node_offset];
#else
group_index[i] = gparts[group_offset].fof_data.group_id;
group_size[i] = props->group_size[group_offset];
#endif
group_centre_of_mass[i * 3 + 0] = CoM[0];
group_centre_of_mass[i * 3 + 1] = CoM[1];
group_centre_of_mass[i * 3 + 2] = CoM[2];
}
swift_free("fof_group_centre_of_mass", props->group_centre_of_mass);
swift_free("fof_group_size", props->group_size);
swift_free("fof_group_index", props->group_index);
props->group_centre_of_mass = group_centre_of_mass;
props->group_size = group_size;
props->group_index = group_index;
}
/**
* @brief Seed black holes from gas particles in the haloes on the local MPI
* rank that passed the criteria.
*
* @param props The properties of the FOF scheme.
* @param bh_props The properties of the black hole scheme.
* @param constants The physical constants.
* @param cosmo The cosmological model.
* @param s The @space we act on.
* @param num_groups_local The number of groups on the current MPI rank.
* @param group_sizes List of groups sorted in size order.
*/
void fof_seed_black_holes(const struct fof_props *props,
const struct black_holes_props *bh_props,
const struct phys_const *constants,
const struct cosmology *cosmo, struct space *s,
const int num_groups_local) {
const long long *max_part_density_index = props->max_part_density_index;
/* Count the number of black holes to seed */
int num_seed_black_holes = 0;
for (int i = 0; i < num_groups_local + props->extra_bh_seed_count; i++) {
if (max_part_density_index[i] >= 0) ++num_seed_black_holes;
}
#ifdef WITH_MPI
int total_num_seed_black_holes = 0;
/* Sum the total number of black holes over each MPI rank. */
MPI_Reduce(&num_seed_black_holes, &total_num_seed_black_holes, 1, MPI_INT,
MPI_SUM, 0, MPI_COMM_WORLD);
#else
int total_num_seed_black_holes = num_seed_black_holes;
#endif
if (engine_rank == 0)
message("Seeding %d black hole(s)", total_num_seed_black_holes);
/* Anything to do this time on this rank? */
if (num_seed_black_holes == 0) return;
/* Do we need to reallocate the black hole array for the new particles? */
if (s->nr_bparts + num_seed_black_holes > s->size_bparts) {
const size_t nr_bparts_new = s->nr_bparts + num_seed_black_holes;
s->size_bparts = engine_parts_size_grow * nr_bparts_new;
struct bpart *bparts_new = NULL;
if (swift_memalign("bparts", (void **)&bparts_new, bpart_align,
sizeof(struct bpart) * s->size_bparts) != 0)
error("Failed to allocate new bpart data.");
memcpy(bparts_new, s->bparts, sizeof(struct bpart) * s->nr_bparts);
swift_free("bparts", s->bparts);
s->bparts = bparts_new;
}
int k = s->nr_bparts;
/* Loop over the local groups */
for (int i = 0; i < num_groups_local + props->extra_bh_seed_count; i++) {
const long long part_index = max_part_density_index[i];
/* Should we seed? */
if (part_index >= 0) {
/* Handle on the particle to convert */
struct part *p = &s->parts[part_index];
struct xpart *xp = &s->xparts[part_index];
struct gpart *gp = p->gpart;
/* Let's destroy the gas particle */
p->time_bin = time_bin_inhibited;
p->gpart = NULL;
/* Mark the gpart as black hole */
gp->type = swift_type_black_hole;
/* Basic properties of the black hole */
struct bpart *bp = &s->bparts[k];
bzero(bp, sizeof(struct bpart));
bp->time_bin = gp->time_bin;
/* Re-link things */
bp->gpart = gp;
gp->id_or_neg_offset = -(bp - s->bparts);
/* Synchronize masses, positions and velocities */
bp->mass = gp->mass;
bp->x[0] = gp->x[0];
bp->x[1] = gp->x[1];
bp->x[2] = gp->x[2];
bp->v[0] = gp->v_full[0];
bp->v[1] = gp->v_full[1];
bp->v[2] = gp->v_full[2];
/* Set a smoothing length */
bp->h = p->h;
/* Save the ID */
bp->id = p->id;
/* Save the tree depth */
bp->depth_h = p->depth_h;
#ifdef SWIFT_DEBUG_CHECKS
bp->ti_kick = p->ti_kick;
bp->ti_drift = p->ti_drift;
#endif
/* Copy over all the gas properties that we want */
black_holes_create_from_gas(bp, bh_props, constants, cosmo, p, xp,
s->e->ti_current);
tracers_first_init_bpart(bp, s->e->internal_units,
s->e->physical_constants, cosmo);
/* Move to the next BH slot */
k++;
}
}
#ifdef SWIFT_DEBUG_CHECKS
if ((int)(s->nr_bparts) + num_seed_black_holes != k) {
error("Seeded the wrong number of black holes!");
}
#endif
/* Update the count of black holes. */
s->nr_bparts = k;
}
/* Dump FOF group data. */
void fof_dump_group_data(const struct fof_props *props, const int my_rank,
const int nr_nodes, const char *out_file_name,
struct space *s, const int num_groups) {
FILE *file = NULL;
struct part *parts = s->parts;
long long *final_group_size = props->final_group_size;
size_t *group_index = props->group_index;
double *group_mass = props->group_mass;
double *group_centre_of_mass = props->group_centre_of_mass;
const long long *max_part_density_index = props->max_part_density_index;
const float *max_part_density = props->max_part_density;
for (int rank = 0; rank < nr_nodes; ++rank) {
#ifdef WITH_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
if (rank == my_rank) {
const char *mode;
if (my_rank == 0)
mode = "w";
else
mode = "a";
file = fopen(out_file_name, mode);
if (file == NULL)
error("Could not open the file '%s' with mode '%s'.", out_file_name,
mode);
if (my_rank == 0) {
fprintf(file, "# %8s %12s %12s %12s %12s %12s %12s %24s %24s \n",
"Group ID", "Group Size", "Group Mass", "CoM_x", "CoM_y",
"CoM_z", "Max Density", "Max Density Local Index",
"Particle ID");
fprintf(file,
"#-------------------------------------------------------------"
"------"
"------------------------------\n");
}
for (int i = 0; i < num_groups; i++) {
const long long part_id = props->max_part_density_index[i] >= 0
? parts[max_part_density_index[i]].id
: -1;
fprintf(file, " %8zu %12lld %12e %12e %12e %12e %12e %24lld %24lld\n",
group_index[i], final_group_size[i], group_mass[i],
group_centre_of_mass[i * 3 + 0],
group_centre_of_mass[i * 3 + 1],
group_centre_of_mass[i * 3 + 2], max_part_density[i],
max_part_density_index[i], part_id);
}
/* Dump the extra black hole seeds. */
for (int i = num_groups; i < num_groups + props->extra_bh_seed_count;
i++) {
const long long part_id = max_part_density_index[i] >= 0
? parts[max_part_density_index[i]].id
: -1;
fprintf(file, " %8zu %12zu %12e %12e %12e %12e %12e %24lld %24lld\n",
0UL, 0UL, 0., 0., 0., 0., 0., 0LL, part_id);
}
fclose(file);
}
}
}
struct mapper_data {
size_t *group_index;
size_t *group_size;
float *distance_to_link;
size_t nr_gparts;
struct gpart *space_gparts;
};
/**
* @brief Mapper function to set the roots of #gpart%s going to other MPI ranks.
*
* @param map_data The list of outgoing local cells.
* @param num_elements Chunk size.
* @param extra_data Pointer to mapper data.
*/
void fof_set_outgoing_root_mapper(void *map_data, int num_elements,
void *extra_data) {
#ifdef WITH_MPI
/* Unpack the data */
struct cell **local_cells = (struct cell **)map_data;
const struct mapper_data *data = (struct mapper_data *)extra_data;
const size_t *const group_index = data->group_index;
const size_t *const group_size = data->group_size;
const size_t nr_gparts = data->nr_gparts;
const struct gpart *const space_gparts = data->space_gparts;
/* Loop over the out-going local cells */
for (int i = 0; i < num_elements; ++i) {
/* Get the cell and its gparts */
struct cell *local_cell = local_cells[i];
struct gpart *gparts = local_cell->grav.parts;
/* Make a list of particle offsets into the global gparts array. */
const size_t *const offset =
group_index + (ptrdiff_t)(gparts - space_gparts);
/* Set each particle's root and group properties found in the local FOF.*/
for (int k = 0; k < local_cell->grav.count; k++) {
/* TODO: Can we skip ignorable particles here?
* Likely makes no difference */
/* Recall we did alter the group_index with a global_offset.
* We need to remove that here as we want the *local* root */
const size_t root =
fof_find_global(offset[k] - node_offset, group_index, nr_gparts);
/* TODO: Could we call fof_find() here instead?
* Likely yes but we don't want path compression at this stage.
* So, probably not */
gparts[k].fof_data.group_id = root;
gparts[k].fof_data.group_size = group_size[root - node_offset];
}
}
#else
error("Calling MPI function in non-MPI mode");
#endif
}
/**
* @brief Search foreign cells for links and communicate any found to the
* appropriate node.
*
* @param props the properties of the FOF scheme.
* @param s Pointer to a #space.
*/
void fof_search_foreign_cells(struct fof_props *props, const struct space *s) {
#ifdef WITH_MPI
struct engine *e = s->e;
const int verbose = e->verbose;
/* Abort if only one node */
if (e->nr_nodes == 1) return;
size_t *restrict group_index = props->group_index;
size_t *restrict group_size = props->group_size;
const size_t nr_gparts = s->nr_gparts;
const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
const double search_r2 = props->l_x2;
const ticks tic_total = getticks();
ticks tic = getticks();
/* Make group IDs globally unique. */
for (size_t i = 0; i < nr_gparts; i++) group_index[i] += node_offset;
struct cell_pair_indices *cell_pairs = NULL;
int cell_pair_count = 0;
props->group_links_size = fof_props_default_group_link_size;
int num_cells_out = 0;
int num_cells_in = 0;
/* Find the maximum no. of cell pairs that can communicate. */
for (int i = 0; i < e->nr_proxies; i++) {
for (int j = 0; j < e->proxies[i].nr_cells_out; j++) {
/* Only include gravity cells. */
if (e->proxies[i].cells_out_type[j] & proxy_cell_type_gravity)
num_cells_out++;
}
for (int j = 0; j < e->proxies[i].nr_cells_in; j++) {
/* Only include gravity cells. */
if (e->proxies[i].cells_in_type[j] & proxy_cell_type_gravity)
num_cells_in++;
}
}
if (verbose)
message(
"Finding max no. of cells + offset IDs"
"took: %.3f %s.",
clocks_from_ticks(getticks() - tic), clocks_getunit());
const int cell_pair_size = num_cells_in * num_cells_out;
/* Allocate memory for all the possible cell links */
if (swift_memalign("fof_group_links", (void **)&props->group_links,
SWIFT_STRUCT_ALIGNMENT,
props->group_links_size * sizeof(struct fof_mpi)) != 0)
error("Error while allocating memory for FOF links over an MPI domain");
if (swift_memalign("fof_cell_pairs", (void **)&cell_pairs,
SWIFT_STRUCT_ALIGNMENT,
cell_pair_size * sizeof(struct cell_pair_indices)) != 0)
error("Error while allocating memory for FOF cell pair indices");
ticks tic_pairs = getticks();
/* Loop over cells_in and cells_out for each proxy and find which cells are in
* range of each other to perform the FOF search. Store local cells that are
* touching foreign cells in a list. */
for (int i = 0; i < e->nr_proxies; i++) {
/* Only find links across an MPI domain on one rank. */
if (engine_rank == min(engine_rank, e->proxies[i].nodeID)) {
for (int j = 0; j < e->proxies[i].nr_cells_out; j++) {
/* Skip non-gravity cells. */
if (!(e->proxies[i].cells_out_type[j] & proxy_cell_type_gravity))
continue;
struct cell *restrict local_cell = e->proxies[i].cells_out[j];
/* Skip empty cells. */
if (local_cell->grav.count == 0) continue;
for (int k = 0; k < e->proxies[i].nr_cells_in; k++) {
/* Skip non-gravity cells. */
if (!(e->proxies[i].cells_in_type[k] & proxy_cell_type_gravity))
continue;
struct cell *restrict foreign_cell = e->proxies[i].cells_in[k];
/* Skip empty cells. */
if (foreign_cell->grav.count == 0) continue;
/* Add candidates in range to the list of pairs of cells to treat. */
const double r2 = cell_min_dist(local_cell, foreign_cell, dim);
if (r2 < search_r2) {
cell_pairs[cell_pair_count].local = local_cell;
cell_pairs[cell_pair_count].foreign = foreign_cell;
cell_pair_count++;
}
}
}
}
}
if (verbose)
message("Finding local/foreign cell pairs took: %.3f %s.",
clocks_from_ticks(getticks() - tic_pairs), clocks_getunit());
const ticks tic_set_roots = getticks();
/* Set the root of outgoing particles. */
/* Allocate array of outgoing cells and populate it */
struct cell **local_cells =
(struct cell **)malloc(num_cells_out * sizeof(struct cell *));
int count = 0;
for (int i = 0; i < e->nr_proxies; i++) {
for (int j = 0; j < e->proxies[i].nr_cells_out; j++) {
/* Only include gravity cells. */
if (e->proxies[i].cells_out_type[j] & proxy_cell_type_gravity) {
local_cells[count] = e->proxies[i].cells_out[j];
++count;
}
}
}
/* Now set the *local* roots of all the gparts we are sending */
struct mapper_data data;
data.group_index = group_index;
data.group_size = group_size;
data.nr_gparts = nr_gparts;
data.space_gparts = s->gparts;
threadpool_map(&e->threadpool, fof_set_outgoing_root_mapper, local_cells,
num_cells_out, sizeof(struct cell **),
threadpool_auto_chunk_size, &data);
if (verbose)
message("Initialising particle roots took: %.3f %s.",
clocks_from_ticks(getticks() - tic_set_roots), clocks_getunit());
free(local_cells);
if (verbose)
message(
"Finding local/foreign cell pairs and initialising particle roots "
"took: %.3f %s.",
clocks_from_ticks(getticks() - tic), clocks_getunit());
/* Activate the tasks exchanging all the required gparts */
engine_activate_gpart_comms(e);
ticks local_fof_tic = getticks();
/* Wait for all the communication tasks to be ready */
MPI_Barrier(MPI_COMM_WORLD);
if (verbose)
message("Local FOF imbalance: %.3f %s.",
clocks_from_ticks(getticks() - local_fof_tic), clocks_getunit());
tic = getticks();
/* Perform send and receive tasks. */
engine_launch(e, "fof comms");
if (verbose)
message("MPI send/recv comms took: %.3f %s.",
clocks_from_ticks(getticks() - tic), clocks_getunit());
/* We have now recevied the foreign particles. Each particle received
* carries information about its own *foreign* (to us) root and the
* size of the group fragment it belongs too its original foreign rank. */
tic = getticks();
props->group_link_count = 0;
/* Perform search of group links between local and foreign cells with the
* threadpool. */
threadpool_map(&s->e->threadpool, fof_find_foreign_links_mapper, cell_pairs,
cell_pair_count, sizeof(struct cell_pair_indices), 1,
(struct space *)s);
/* Clean up memory used by foreign particles. */
swift_free("fof_cell_pairs", cell_pairs);
if (verbose)
message("Searching for foreign links took: %.3f %s.",
clocks_from_ticks(getticks() - tic), clocks_getunit());
tic = getticks();
const ticks comms_tic = getticks();
MPI_Barrier(MPI_COMM_WORLD);
if (verbose)
message("Imbalance took: %.3f %s.",
clocks_from_ticks(getticks() - comms_tic), clocks_getunit());
if (verbose)
message("fof_search_foreign_cells() took (FOF SCALING): %.3f %s.",
clocks_from_ticks(getticks() - tic_total), clocks_getunit());
#endif /* WITH_MPI */
}
/**
* @brief Run all the tasks attaching the attachables to their
* nearest linkable particle.
*
* @param props The properties fof the FOF scheme.
* @param s The #space we work with.
*/
void fof_link_attachable_particles(struct fof_props *props,
const struct space *s) {
/* Is there anything to attach? */
if (!current_fof_attach_type) return;
const ticks tic_total = getticks();
/* Activate the tasks attaching attachable particles to the linkable ones */
engine_activate_fof_attach_tasks(s->e);
/* Perform FOF tasks for attachable particles. */
engine_launch(s->e, "fof");
if (s->e->verbose)
message("fof_link_attachable_particles() took (FOF SCALING): %.3f %s.",
clocks_from_ticks(getticks() - tic_total), clocks_getunit());
}
/**
* @brief Construct an array indicating whether a given root does not appear
* in the global list of fragments to link.
*
* Nothing to do here if not running with MPI or if there are no attacheables.
*
* @param props The properties fof the FOF scheme.
* @param s The #space we work with.
*/
void fof_build_list_of_purely_local_groups(struct fof_props *props,
const struct space *s) {
/* Is there anything to attach?
* (The array we construct here is only useful with attacheables)*/
if (!current_fof_attach_type) return;
#ifdef WITH_MPI
struct engine *e = s->e;
/* Abort if only one node */
if (e->nr_nodes == 1) return;
/* Local copy of the variable set in the mapper */
const size_t nr_gparts = s->nr_gparts;
size_t *restrict group_index = props->group_index;
const int group_link_count = props->group_link_count;
/* Sum the total number of links across MPI domains over each MPI rank. */
int global_group_link_count = 0;
MPI_Allreduce(&group_link_count, &global_group_link_count, 1, MPI_INT,
MPI_SUM, MPI_COMM_WORLD);
if (global_group_link_count < 0)
error("Overflow of the size of the global list of foregin links");
struct fof_mpi *global_group_links = NULL;
int *displ = NULL, *group_link_counts = NULL;
if (swift_memalign("fof_global_group_links", (void **)&global_group_links,
SWIFT_STRUCT_ALIGNMENT,
global_group_link_count * sizeof(struct fof_mpi)) != 0)
error("Error while allocating memory for the global list of group links");
if (posix_memalign((void **)&group_link_counts, SWIFT_STRUCT_ALIGNMENT,
e->nr_nodes * sizeof(int)) != 0)
error(
"Error while allocating memory for the number of group links on each "
"MPI rank");
if (posix_memalign((void **)&displ, SWIFT_STRUCT_ALIGNMENT,
e->nr_nodes * sizeof(int)) != 0)
error(
"Error while allocating memory for the displacement in memory for the "
"global group link list");
/* Gather the total number of links on each rank. */
MPI_Allgather(&group_link_count, 1, MPI_INT, group_link_counts, 1, MPI_INT,
MPI_COMM_WORLD);
/* Set the displacements into the global link list using the link counts from
* each rank */
displ[0] = 0;
for (int i = 1; i < e->nr_nodes; i++) {
displ[i] = displ[i - 1] + group_link_counts[i - 1];
if (displ[i] < 0) error("Number of group links overflowing!");
}
/* Gather the global link list on all ranks. */
MPI_Allgatherv(props->group_links, group_link_count, fof_mpi_type,
global_group_links, group_link_counts, displ, fof_mpi_type,
MPI_COMM_WORLD);
/* Clean up memory. */
free(group_link_counts);
free(displ);
/* We now have a list of all the fragment connections.
* We can iterate over the *local* groups to identify the ones which
* are *not* appearing in the list */
if (posix_memalign((void **)&props->is_purely_local, SWIFT_STRUCT_ALIGNMENT,
nr_gparts * sizeof(char)) != 0)
error("Error while allocating memory for the list of purely local groups");
/* Start by pretending every group is purely local */
for (size_t i = 0; i < nr_gparts; ++i) props->is_purely_local[i] = 1;
/* Now loop over the list of inter-rank connections and flag each halo present
* in the list */
for (int k = 0; k < global_group_link_count; ++k) {
const size_t group_i = global_group_links[k].group_i;
const size_t group_j = global_group_links[k].group_j;
const size_t root_i =
fof_find_global(group_i - node_offset, group_index, nr_gparts);
const size_t root_j =
fof_find_global(group_j - node_offset, group_index, nr_gparts);
if (is_local(root_i, nr_gparts)) {
const size_t local_root = root_i - node_offset;
props->is_purely_local[local_root] = 0;
}
if (is_local(root_j, nr_gparts)) {
const size_t local_root = root_j - node_offset;
props->is_purely_local[local_root] = 0;
}
}
/* Clean up the last allocated array */
swift_free("fof_global_group_links", global_group_links);
#endif
}
/**
* @brief Process all the attachable-linkable connections to add the
* attachables to the groups they belong to.
*
* @param props The properties fof the FOF scheme.
* @param s The #space we work with.
*/
void fof_finalise_attachables(struct fof_props *props, const struct space *s) {
/* Is there anything to attach? */
if (!current_fof_attach_type) return;
const ticks tic_total = getticks();
const size_t nr_gparts = s->nr_gparts;
char *restrict found_attachable_link = props->found_attachable_link;
size_t *restrict attach_index = props->attach_index;
size_t *restrict group_index = props->group_index;
size_t *restrict group_size = props->group_size;
#ifdef WITH_MPI
/* Get pointers to global arrays. */
char *restrict is_purely_local = props->is_purely_local;
int *restrict group_links_size = &props->group_links_size;
int *restrict group_link_count = &props->group_link_count;
struct fof_mpi **group_links = &props->group_links;
/* Loop over all the attachables and added them to the group they belong to */
for (size_t i = 0; i < nr_gparts; ++i) {
const struct gpart *gp = &s->gparts[i];
if (gpart_is_attachable(gp) && found_attachable_link[i]) {
/* Update its root */
const size_t root_j = attach_index[i];
const size_t root_i =
fof_find_global(group_index[i] - node_offset, group_index, nr_gparts);
/* Update the size of the group the particle belongs to.
* The strategy emploed depends on where the root and particles are. */
if (is_local(root_j, nr_gparts)) {
const size_t local_root = root_j - node_offset;
if (is_purely_local[local_root]) { /* All parties involved are local */
/* We can directly attack the list of groups */
group_index[i] = local_root + node_offset;
group_size[local_root]++;
} else { /* Group is involved in some cross-boundaries mixing */
/* Add to the list of links to be resolved globally later */
add_foreign_link_to_list(group_link_count, group_links_size,
group_links, group_links, root_i, root_j,
/*size_i=*/1,
/*size_j=*/2);
}
} else { /* Root is foreign */
/* Add to the list of links to be resolved globally later */
add_foreign_link_to_list(group_link_count, group_links_size,
group_links, group_links, root_i, root_j,
/*size_i=*/1,
/*size_j=*/2);
}
}
}
/* We can free the list of purely local groups */
free(props->is_purely_local);
#else /* not WITH_MPI */
/* Loop over all the attachables and added them to the group they belong to */
for (size_t i = 0; i < nr_gparts; ++i) {
const struct gpart *gp = &s->gparts[i];
if (gpart_is_attachable(gp) && found_attachable_link[i]) {
const size_t root = attach_index[i];
/* Update its root */
group_index[i] = root;
/* Update the size of the group the particle belongs to */
group_size[root]++;
}
}
#endif /* WITH_MPI */
if (s->e->verbose)
message("fof_finalise_attachables() took (FOF SCALING): %.3f %s.",
clocks_from_ticks(getticks() - tic_total), clocks_getunit());
}
/**
* @brief Process all the group fragments spanning more than
* one rank to link them.
*
* This is the final global union-find pass which concludes
* the MPI-FOF-algorithm.
*
* @param props The properties fof the FOF scheme.
* @param s The #space we work with.
*/
void fof_link_foreign_fragments(struct fof_props *props,
const struct space *s) {
#ifdef WITH_MPI
struct engine *e = s->e;
const int verbose = e->verbose;
/* Abort if only one node */
if (e->nr_nodes == 1) return;
const size_t nr_gparts = s->nr_gparts;
size_t *restrict group_index = props->group_index;
size_t *restrict group_size = props->group_size;
const ticks tic_total = getticks();
ticks tic = getticks();
const ticks comms_tic = getticks();
if (verbose)
message(
"Searching %zu gravity particles for cross-node links with l_x: %lf",
nr_gparts, sqrt(props->l_x2));
/* Local copy of the variable set in the mapper */
const int group_link_count = props->group_link_count;
/* Sum the total number of links across MPI domains over each MPI rank. */
int global_group_link_count = 0;
MPI_Allreduce(&group_link_count, &global_group_link_count, 1, MPI_INT,
MPI_SUM, MPI_COMM_WORLD);
if (global_group_link_count < 0)
error("Overflow of the size of the global list of foreign links");
struct fof_mpi *global_group_links = NULL;
int *displ = NULL, *group_link_counts = NULL;
if (swift_memalign("fof_global_group_links", (void **)&global_group_links,
SWIFT_STRUCT_ALIGNMENT,
global_group_link_count * sizeof(struct fof_mpi)) != 0)
error("Error while allocating memory for the global list of group links");
if (posix_memalign((void **)&group_link_counts, SWIFT_STRUCT_ALIGNMENT,
e->nr_nodes * sizeof(int)) != 0)
error(
"Error while allocating memory for the number of group links on each "
"MPI rank");
if (posix_memalign((void **)&displ, SWIFT_STRUCT_ALIGNMENT,
e->nr_nodes * sizeof(int)) != 0)
error(
"Error while allocating memory for the displacement in memory for the "
"global group link list");
/* Gather the total number of links on each rank. */
MPI_Allgather(&group_link_count, 1, MPI_INT, group_link_counts, 1, MPI_INT,
MPI_COMM_WORLD);
/* Set the displacements into the global link list using the link counts from
* each rank */
displ[0] = 0;
for (int i = 1; i < e->nr_nodes; i++) {
displ[i] = displ[i - 1] + group_link_counts[i - 1];
if (displ[i] < 0) error("Number of group links overflowing!");
}
/* Gather the global link list on all ranks. */
MPI_Allgatherv(props->group_links, group_link_count, fof_mpi_type,
global_group_links, group_link_counts, displ, fof_mpi_type,
MPI_COMM_WORLD);
/* Clean up memory. */
free(group_link_counts);
free(displ);
swift_free("fof_group_links", props->group_links);
props->group_links = NULL;
if (verbose) {
message("Communication took: %.3f %s.",
clocks_from_ticks(getticks() - comms_tic), clocks_getunit());
message("Global comms took: %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
}
tic = getticks();
/* Transform the group IDs to a local list going from 0-group_count so a
* union-find can be performed.
* Each member of a link is stored separately --> Need 2x as many entries */
size_t *global_group_index = NULL, *global_group_id = NULL,
*global_group_size = NULL;
const int global_group_list_size = 2 * global_group_link_count;
if (swift_memalign("fof_global_group_index", (void **)&global_group_index,
SWIFT_STRUCT_ALIGNMENT,
global_group_list_size * sizeof(size_t)) != 0)
error(
"Error while allocating memory for the displacement in memory for the "
"global group link list");
if (swift_memalign("fof_global_group_id", (void **)&global_group_id,
SWIFT_STRUCT_ALIGNMENT,
global_group_list_size * sizeof(size_t)) != 0)
error(
"Error while allocating memory for the displacement in memory for the "
"global group link list");
if (swift_memalign("fof_global_group_size", (void **)&global_group_size,
SWIFT_STRUCT_ALIGNMENT,
global_group_list_size * sizeof(size_t)) != 0)
error(
"Error while allocating memory for the displacement in memory for the "
"global group link list");
bzero(global_group_size, global_group_list_size * sizeof(size_t));
/* Create hash table. */
hashmap_t map;
hashmap_init(&map);
/* Store each group ID and its properties. */
int group_count = 0;
for (int k = 0; k < global_group_link_count; k++) {
const size_t group_i = global_group_links[k].group_i;
const size_t group_j = global_group_links[k].group_j;
global_group_size[group_count] += global_group_links[k].group_i_size;
global_group_id[group_count] = group_i;
hashmap_add_group(group_i, group_count, &map);
group_count++;
global_group_size[group_count] += global_group_links[k].group_j_size;
global_group_id[group_count] = group_j;
hashmap_add_group(group_j, group_count, &map);
group_count++;
}
if (verbose)
message("Global list compression took: %.3f %s.",
clocks_from_ticks(getticks() - tic), clocks_getunit());
tic = getticks();
/* Create a global_group_index list of groups across MPI domains so that you
* can perform a union-find locally on each node.
* The value of which is an offset into global_group_id, which is the actual
* root. */
for (int i = 0; i < group_count; i++) global_group_index[i] = i;
/* Store the original group size before incrementing in the Union-Find. */
size_t *orig_global_group_size = NULL;
if (swift_memalign("fof_orig_global_group_size",
(void **)&orig_global_group_size, SWIFT_STRUCT_ALIGNMENT,
group_count * sizeof(size_t)) != 0)
error(
"Error while allocating memory for the displacement in memory for the "
"global group link list");
memcpy(orig_global_group_size, global_group_size,
group_count * sizeof(size_t));
/* Perform a union-find on the group links. */
for (int k = 0; k < global_group_link_count; k++) {
/* Use the hash table to find the group offsets in the index array. */
const size_t find_i =
hashmap_find_group_offset(global_group_links[k].group_i, &map);
const size_t find_j =
hashmap_find_group_offset(global_group_links[k].group_j, &map);
/* Use the offset to find the group's root. */
const size_t root_i = fof_find(find_i, global_group_index);
const size_t root_j = fof_find(find_j, global_group_index);
const size_t group_i = global_group_id[root_i];
const size_t group_j = global_group_id[root_j];
if (group_i == group_j) continue;
/* Update roots accordingly. */
const size_t size_i = global_group_size[root_i];
const size_t size_j = global_group_size[root_j];
#ifdef UNION_BY_SIZE_OVER_MPI
if (size_i < size_j) {
global_group_index[root_i] = root_j;
global_group_size[root_j] += size_i;
} else {
global_group_index[root_j] = root_i;
global_group_size[root_i] += size_j;
}
#else
if (group_j < group_i) {
global_group_index[root_i] = root_j;
global_group_size[root_j] += size_i;
} else {
global_group_index[root_j] = root_i;
global_group_size[root_i] += size_j;
}
#endif
}
hashmap_free(&map);
if (verbose)
message("global_group_index construction took: %.3f %s.",
clocks_from_ticks(getticks() - tic), clocks_getunit());
tic = getticks();
/* Update each group locally with new root information. */
for (int i = 0; i < group_count; i++) {
const size_t group_id = global_group_id[i];
const size_t offset = fof_find(global_group_index[i], global_group_index);
const size_t new_root = global_group_id[offset];
/* If the group is local update its root and size. */
if (is_local(group_id, nr_gparts) && new_root != group_id) {
group_index[group_id - node_offset] = new_root;
group_size[group_id - node_offset] -= orig_global_group_size[i];
}
/* If the group linked to a local root update its size. */
if (is_local(new_root, nr_gparts) && new_root != group_id) {
/* Use group sizes before Union-Find */
group_size[new_root - node_offset] += orig_global_group_size[i];
}
}
if (verbose)
message("Updating groups locally took: %.3f %s.",
clocks_from_ticks(getticks() - tic), clocks_getunit());
/* Clean up memory. */
swift_free("fof_global_group_links", global_group_links);
swift_free("fof_global_group_index", global_group_index);
swift_free("fof_global_group_size", global_group_size);
swift_free("fof_global_group_id", global_group_id);
swift_free("fof_orig_global_group_size", orig_global_group_size);
if (verbose) {
message("link_foreign_fragmens() took (FOF SCALING): %.3f %s.",
clocks_from_ticks(getticks() - tic_total), clocks_getunit());
}
#endif /* WITH_MPI */
}
/**
* @brief Compute the local size of each FOF group fragment.
*
* @param props The properties of the FOF scheme.
* @param s The #space containing the particles.
*/
void fof_compute_local_sizes(struct fof_props *props, struct space *s) {
const int verbose = s->e->verbose;
struct gpart *gparts = s->gparts;
const size_t nr_gparts = s->nr_gparts;
const ticks tic_total = getticks();
if (engine_rank == 0 && verbose)
message("Size of hash table element: %ld", sizeof(hashmap_element_t));
#ifdef WITH_MPI
const ticks comms_tic = getticks();
/* Determine number of gparts on lower numbered MPI ranks */
const long long nr_gparts_local = s->nr_gparts;
long long nr_gparts_cumulative;
MPI_Scan(&nr_gparts_local, &nr_gparts_cumulative, 1, MPI_LONG_LONG, MPI_SUM,
MPI_COMM_WORLD);
if (verbose)
message("MPI_Scan Imbalance took: %.3f %s.",
clocks_from_ticks(getticks() - comms_tic), clocks_getunit());
/* Reset global variable containing the rank particle count offset */
node_offset = nr_gparts_cumulative - nr_gparts_local;
#endif
/* Compute the group sizes of the local fragments
* (in non-MPI land that is the final group size of the haloes) */
const ticks tic_calc_group_size = getticks();
threadpool_map(&s->e->threadpool, fof_calc_group_size_mapper, gparts,
nr_gparts, sizeof(struct gpart), threadpool_auto_chunk_size,
s);
if (verbose)
message("FOF calc group size took (FOF SCALING): %.3f %s.",
clocks_from_ticks(getticks() - tic_calc_group_size),
clocks_getunit());
if (verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic_total),
clocks_getunit());
}
/**
* @brief Compute all the group properties
*
* @param props The properties of the FOF scheme.
* @param bh_props The properties of the black hole scheme.
* @param constants The physical constants in internal units.
* @param cosmo The current cosmological model.
* @param s The #space containing the particles.
* @param dump_debug_results Are we writing txt-file debug catalogues including
* BH-seeding info?
* @param dump_results Do we want to write the group catalogue to a hdf5 file?
* @param seed_black_holes Do we want to seed black holes in haloes?
*/
void fof_compute_group_props(struct fof_props *props,
const struct black_holes_props *bh_props,
const struct phys_const *constants,
const struct cosmology *cosmo, struct space *s,
const int dump_results,
const int dump_debug_results,
const int seed_black_holes) {
const int verbose = s->e->verbose;
#ifdef WITH_MPI
const int nr_nodes = s->e->nr_nodes;
#endif
const ticks tic_total = getticks();
struct gpart *gparts = s->gparts;
const size_t nr_gparts = s->nr_gparts;
const size_t min_group_size = props->min_group_size;
const size_t group_id_offset = props->group_id_offset;
const size_t group_id_default = props->group_id_default;
size_t num_groups_local = 0;
size_t num_parts_in_groups_local = 0;
size_t max_group_size_local = 0;
/* Local copy of the arrays */
size_t *restrict group_index = props->group_index;
size_t *restrict group_size = props->group_size;
const ticks tic_num_groups_calc = getticks();
for (size_t i = 0; i < nr_gparts; i++) {
#ifdef WITH_MPI
/* Find the total number of groups. */
if (group_index[i] == i + node_offset && group_size[i] >= min_group_size)
num_groups_local++;
#else
/* Find the total number of groups. */
if (group_index[i] == i && group_size[i] >= min_group_size)
num_groups_local++;
#endif
/* Find the total number of particles in groups. */
if (group_size[i] >= min_group_size)
num_parts_in_groups_local += group_size[i];
/* Find the largest group. */
if (group_size[i] > max_group_size_local)
max_group_size_local = group_size[i];
}
if (verbose)
message(
"Calculating the total no. of local groups took: (FOF SCALING): %.3f "
"%s.",
clocks_from_ticks(getticks() - tic_num_groups_calc), clocks_getunit());
/* Sort the groups in descending order based upon size and re-label their
* IDs 0-num_groups. */
struct group_length *high_group_sizes = NULL;
int group_count = 0;
if (swift_memalign("fof_high_group_sizes", (void **)&high_group_sizes, 32,
num_groups_local * sizeof(struct group_length)) != 0)
error("Failed to allocate list of large groups.");
/* Store the group_sizes and their offset. */
for (size_t i = 0; i < nr_gparts; i++) {
#ifdef WITH_MPI
if (group_index[i] == i + node_offset && group_size[i] >= min_group_size) {
high_group_sizes[group_count].index = node_offset + i;
high_group_sizes[group_count++].size = group_size[i];
}
#else
if (group_index[i] == i && group_size[i] >= min_group_size) {
high_group_sizes[group_count].index = i;
high_group_sizes[group_count++].size = group_size[i];
}
#endif
}
ticks tic = getticks();
/* Find global properties. */
long long num_groups = 0, num_parts_in_groups = 0, max_group_size = 0;
#ifdef WITH_MPI
MPI_Allreduce(&num_groups_local, &num_groups, 1, MPI_LONG_LONG_INT, MPI_SUM,
MPI_COMM_WORLD);
if (verbose)
message("Finding the total no. of groups took: (FOF SCALING): %.3f %s.",
clocks_from_ticks(getticks() - tic_num_groups_calc),
clocks_getunit());
MPI_Reduce(&num_parts_in_groups_local, &num_parts_in_groups, 1,
MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&max_group_size_local, &max_group_size, 1, MPI_LONG_LONG_INT,
MPI_MAX, 0, MPI_COMM_WORLD);
#else
num_groups = num_groups_local;
num_parts_in_groups = num_parts_in_groups_local;
max_group_size = max_group_size_local;
#endif /* WITH_MPI */
props->num_groups = num_groups;
/* Find number of groups on lower numbered MPI ranks */
#ifdef WITH_MPI
long long nglocal = num_groups_local;
long long ngsum;
MPI_Scan(&nglocal, &ngsum, 1, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
const size_t num_groups_prev = (size_t)(ngsum - nglocal);
#endif /* WITH_MPI */
if (verbose)
message("Finding the total no. of groups took: (FOF SCALING): %.3f %s.",
clocks_from_ticks(getticks() - tic_num_groups_calc),
clocks_getunit());
/* Sort local groups into descending order of size */
qsort(high_group_sizes, num_groups_local, sizeof(struct group_length),
cmp_func_group_size);
tic = getticks();
/* Set default group ID for all particles */
threadpool_map(&s->e->threadpool, fof_set_initial_group_id_mapper, s->gparts,
s->nr_gparts, sizeof(struct gpart), threadpool_auto_chunk_size,
(void *)&group_id_default);
if (verbose)
message("Setting default group ID took: %.3f %s.",
clocks_from_ticks(getticks() - tic), clocks_getunit());
/* Assign final group IDs to local root particles where the global root is
* on this node and the group is large enough. Within a node IDs are
* assigned in descending order of particle number. */
for (size_t i = 0; i < num_groups_local; i++) {
#ifdef WITH_MPI
gparts[high_group_sizes[i].index - node_offset].fof_data.group_id =
group_id_offset + i + num_groups_prev;
#else
gparts[high_group_sizes[i].index].fof_data.group_id = group_id_offset + i;
#endif
}
#ifdef WITH_MPI
/* Now, for each local root where the global root is on some other node
* AND the total size of the group is >= min_group_size we need to
* retrieve the gparts.group_id we just assigned to the global root.
*
* Will do that by sending the group_index of these lcoal roots to the
* node where their global root is stored and receiving back the new
* group_id associated with that particle.
*
* Identify local roots with global root on another node and large enough
* group_size. Store index of the local and global roots in these cases.
*
* NOTE: if group_size only contains the total FoF mass for global roots,
* then we have to communicate ALL fragments where the global root is not
* on this node. Hence the commented out extra conditions below.*/
size_t nsend = 0;
for (size_t i = 0; i < nr_gparts; i += 1) {
if ((!is_local(group_index[i],
nr_gparts))) { /* && (group_size[i] >= min_group_size)) { */
nsend += 1;
}
}
struct fof_final_index *fof_index_send =
(struct fof_final_index *)swift_malloc(
"fof_index_send", sizeof(struct fof_final_index) * nsend);
nsend = 0;
for (size_t i = 0; i < nr_gparts; i += 1) {
if ((!is_local(group_index[i],
nr_gparts))) { /* && (group_size[i] >= min_group_size)) { */
fof_index_send[nsend].local_root = node_offset + i;
fof_index_send[nsend].global_root = group_index[i];
nsend += 1;
}
}
/* Sort by global root - this puts the groups in order of which node they're
* stored on */
qsort(fof_index_send, nsend, sizeof(struct fof_final_index),
compare_fof_final_index_global_root);
/* Determine range of global indexes (i.e. particles) on each node */
size_t *num_on_node = (size_t *)malloc(nr_nodes * sizeof(size_t));
MPI_Allgather(&nr_gparts, sizeof(size_t), MPI_BYTE, num_on_node,
sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD);
size_t *first_on_node = (size_t *)malloc(nr_nodes * sizeof(size_t));
first_on_node[0] = 0;
for (int i = 1; i < nr_nodes; i += 1)
first_on_node[i] = first_on_node[i - 1] + num_on_node[i - 1];
/* Determine how many entries go to each node */
int *sendcount = (int *)malloc(nr_nodes * sizeof(int));
for (int i = 0; i < nr_nodes; i += 1) sendcount[i] = 0;
int dest = 0;
for (size_t i = 0; i < nsend; i += 1) {
while ((fof_index_send[i].global_root >=
first_on_node[dest] + num_on_node[dest]) ||
(num_on_node[dest] == 0))
dest += 1;
if (dest >= nr_nodes) error("Node index out of range!");
sendcount[dest] += 1;
}
int *recvcount = NULL, *sendoffset = NULL, *recvoffset = NULL;
size_t nrecv = 0;
fof_compute_send_recv_offsets(nr_nodes, sendcount, &recvcount, &sendoffset,
&recvoffset, &nrecv);
struct fof_final_index *fof_index_recv =
(struct fof_final_index *)swift_malloc(
"fof_index_recv", nrecv * sizeof(struct fof_final_index));
/* Exchange group indexes */
MPI_Alltoallv(fof_index_send, sendcount, sendoffset, fof_final_index_type,
fof_index_recv, recvcount, recvoffset, fof_final_index_type,
MPI_COMM_WORLD);
/* For each received global root, look up the group ID we assigned and store
* it in the struct */
for (size_t i = 0; i < nrecv; i += 1) {
if ((fof_index_recv[i].global_root < node_offset) ||
(fof_index_recv[i].global_root >= node_offset + nr_gparts)) {
error("Received global root index out of range!");
}
fof_index_recv[i].global_root =
gparts[fof_index_recv[i].global_root - node_offset].fof_data.group_id;
}
/* Send the result back */
MPI_Alltoallv(fof_index_recv, recvcount, recvoffset, fof_final_index_type,
fof_index_send, sendcount, sendoffset, fof_final_index_type,
MPI_COMM_WORLD);
/* Update local gparts.group_id */
for (size_t i = 0; i < nsend; i += 1) {
if ((fof_index_send[i].local_root < node_offset) ||
(fof_index_send[i].local_root >= node_offset + nr_gparts)) {
error("Sent local root index out of range!");
}
gparts[fof_index_send[i].local_root - node_offset].fof_data.group_id =
fof_index_send[i].global_root;
}
free(sendcount);
free(recvcount);
free(sendoffset);
free(recvoffset);
swift_free("fof_index_send", fof_index_send);
swift_free("fof_index_recv", fof_index_recv);
#endif /* WITH_MPI */
/* Assign every particle the group_id of its local root. */
for (size_t i = 0; i < nr_gparts; i++) {
const size_t root = fof_find_local(i, nr_gparts, group_index);
gparts[i].fof_data.group_id = gparts[root].fof_data.group_id;
}
if (verbose)
message("Group sorting took: %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
/* Allocate and initialise a group mass and centre of mass array. */
if (swift_memalign("fof_group_mass", (void **)&props->group_mass, 32,
num_groups_local * sizeof(double)) != 0)
error("Failed to allocate list of group masses for FOF search.");
if (swift_memalign("fof_group_size", (void **)&props->final_group_size, 32,
num_groups_local * sizeof(long long)) != 0)
error("Failed to allocate list of group masses for FOF search.");
if (swift_memalign("fof_group_centre_of_mass",
(void **)&props->group_centre_of_mass, 32,
num_groups_local * 3 * sizeof(double)) != 0)
error("Failed to allocate list of group CoM for FOF search.");
if (swift_memalign("fof_group_first_position",
(void **)&props->group_first_position, 32,
num_groups_local * 3 * sizeof(double)) != 0)
error("Failed to allocate list of group first positions for FOF search.");
bzero(props->group_mass, num_groups_local * sizeof(double));
bzero(props->final_group_size, num_groups_local * sizeof(long long));
bzero(props->group_centre_of_mass, num_groups_local * 3 * sizeof(double));
for (size_t i = 0; i < 3 * num_groups_local; i++) {
props->group_first_position[i] = -FLT_MAX;
}
/* Allocate and initialise arrays to identify the densest gas particle. */
if (swift_memalign("fof_max_part_density_index",
(void **)&props->max_part_density_index, 32,
num_groups_local * sizeof(long long)) != 0)
error(
"Failed to allocate list of max group density indices for FOF "
"search.");
if (swift_memalign("fof_max_part_density", (void **)&props->max_part_density,
32, num_groups_local * sizeof(float)) != 0)
error("Failed to allocate list of max group densities for FOF search.");
/* No densest particle found so far */
bzero(props->max_part_density, num_groups_local * sizeof(float));
/* Start by assuming that the haloes have no gas */
for (size_t i = 0; i < num_groups_local; i++) {
props->max_part_density_index[i] = fof_halo_has_no_gas;
}
const ticks tic_seeding = getticks();
#ifdef WITH_MPI
fof_calc_group_mass(props, s, seed_black_holes, num_groups_local,
num_groups_prev, num_on_node, first_on_node,
props->group_mass);
free(num_on_node);
free(first_on_node);
#else
fof_calc_group_mass(props, s, seed_black_holes, num_groups_local,
/*num_groups_prev=*/0, /*num_on_node=*/NULL,
/*first_on_node=*/NULL, props->group_mass);
#endif
/* Finalise the group data before dump */
fof_finalise_group_data(props, high_group_sizes, s->gparts, s->periodic,
s->dim, num_groups_local);
if (verbose)
message("Computing group properties took: %.3f %s.",
clocks_from_ticks(getticks() - tic_seeding), clocks_getunit());
/* Dump group data. */
if (dump_results) {
#ifdef HAVE_HDF5
write_fof_hdf5_catalogue(props, (long long)num_groups_local, s->e);
#else
error("Can't dump hdf5 catalogues with hdf5 switched off!");
#endif
}
if (dump_debug_results) {
char output_file_name[PARSER_MAX_LINE_SIZE];
snprintf(output_file_name, PARSER_MAX_LINE_SIZE, "%s", props->base_name);
#ifdef WITH_MPI
snprintf(output_file_name + strlen(output_file_name), FILENAME_BUFFER_SIZE,
"_mpi.dat");
#else
snprintf(output_file_name + strlen(output_file_name), FILENAME_BUFFER_SIZE,
".dat");
#endif
fof_dump_group_data(props, s->e->nodeID, s->e->nr_nodes, output_file_name,
s, num_groups_local);
}
/* Seed black holes */
if (seed_black_holes) {
fof_seed_black_holes(props, bh_props, constants, cosmo, s,
num_groups_local);
}
/* Free the left-overs */
swift_free("fof_high_group_sizes", high_group_sizes);
swift_free("fof_group_mass", props->group_mass);
swift_free("fof_group_size", props->final_group_size);
swift_free("fof_group_centre_of_mass", props->group_centre_of_mass);
swift_free("fof_group_first_position", props->group_first_position);
swift_free("fof_max_part_density_index", props->max_part_density_index);
swift_free("fof_max_part_density", props->max_part_density);
props->group_mass = NULL;
props->final_group_size = NULL;
props->group_centre_of_mass = NULL;
props->max_part_density_index = NULL;
props->max_part_density = NULL;
swift_free("fof_distance", props->distance_to_link);
swift_free("fof_group_index", props->group_index);
swift_free("fof_attach_index", props->attach_index);
swift_free("fof_found_attach", props->found_attachable_link);
swift_free("fof_group_size", props->group_size);
props->group_index = NULL;
props->group_size = NULL;
if (engine_rank == 0) {
message(
"No. of groups: %lld. No. of particles in groups: %lld. No. of "
"particles not in groups: %lld.",
num_groups, num_parts_in_groups,
s->e->total_nr_gparts - num_parts_in_groups);
message("Largest group by size: %lld", max_group_size);
}
if (verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic_total),
clocks_getunit());
#ifdef WITH_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
}
void fof_struct_dump(const struct fof_props *props, FILE *stream) {
struct fof_props temp = *props;
temp.num_groups = 0;
temp.group_link_count = 0;
temp.group_links_size = 0;
temp.group_index = NULL;
temp.group_size = NULL;
temp.group_mass = NULL;
temp.final_group_size = NULL;
temp.group_centre_of_mass = NULL;
temp.max_part_density_index = NULL;
temp.max_part_density = NULL;
temp.group_links = NULL;
restart_write_blocks((void *)&temp, sizeof(struct fof_props), 1, stream,
"fof_props", "fof_props");
}
void fof_struct_restore(struct fof_props *props, FILE *stream) {
restart_read_blocks((void *)props, sizeof(struct fof_props), 1, stream, NULL,
"fof_props");
fof_set_current_types(props);
}
#endif /* WITH_FOF */