/*******************************************************************************
 * This file is part of SWIFT.
 * Copyright (c) 2018 James Willis (james.s.willis@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 ******************************************************************************/
/* Config parameters. */
#include 
#ifdef WITH_FOF
/* Some standard headers. */
#include 
#include 
#include 
/* MPI headers. */
#ifdef WITH_MPI
#include 
#endif
/* This object's header. */
#include "fof.h"
/* Local headers. */
#include "black_holes.h"
#include "common_io.h"
#include "engine.h"
#include "fof_catalogue_io.h"
#include "hashmap.h"
#include "memuse.h"
#include "proxy.h"
#include "threadpool.h"
#include "tools.h"
#include "tracers.h"
#define fof_props_default_group_id 2147483647
#define fof_props_default_group_id_offset 1
#define fof_props_default_group_link_size 20000
/* Constants. */
#define UNION_BY_SIZE_OVER_MPI (1)
#define FOF_COMPRESS_PATHS_MIN_LENGTH (2)
/* The FoF policy we are running */
int current_fof_linking_type;
/* The FoF policy for particles attached to the main type */
int current_fof_attach_type;
/* The FoF policy for particles ignored altogether */
int current_fof_ignore_type;
/* Are we timing calculating group properties in the FOF? */
// #define WITHOUT_GROUP_PROPS
/**
 * @brief Properties of a group used for black hole seeding
 */
enum fof_halo_seeding_props {
  fof_halo_has_no_gas = -1LL,
  fof_halo_has_black_hole = -2LL,
  fof_halo_has_too_low_mass = -3LL
};
#ifdef WITH_MPI
/* MPI types used for communications */
MPI_Datatype fof_mpi_type;
MPI_Datatype group_length_mpi_type;
MPI_Datatype fof_final_index_type;
/*! Offset between the first particle on this MPI rank and the first particle in
 * the global order */
size_t node_offset;
#endif
#ifdef SWIFT_DEBUG_CHECKS
static integertime_t ti_current;
#endif
void fof_set_current_types(const struct fof_props *props) {
  /* Initialize the FoF linking mode */
  current_fof_linking_type = 0;
  for (int i = 0; i < swift_type_count; ++i)
    if (props->fof_linking_types[i]) {
      current_fof_linking_type |= (1 << (i + 1));
    }
  /* Initialize the FoF attaching mode */
  current_fof_attach_type = 0;
  for (int i = 0; i < swift_type_count; ++i)
    if (props->fof_attach_types[i]) {
      current_fof_attach_type |= (1 << (i + 1));
    }
  /* Construct the combined mask of ignored particles */
  current_fof_ignore_type =
      ~(current_fof_linking_type | current_fof_attach_type);
}
/**
 * @brief Initialise the properties of the FOF code.
 *
 * @param props the #fof_props structure to fill.
 * @param params the parameter file parser.
 * @param phys_const The physical constants in internal units.
 * @param us The internal unit system.
 * @param stand_alone_fof Are we initialising for stand-alone? (1) or
 * on-the-fly? (0)
 */
void fof_init(struct fof_props *props, struct swift_params *params,
              const struct phys_const *phys_const, const struct unit_system *us,
              const int stand_alone_fof) {
  /* Base name for the FOF output file */
  parser_get_param_string(params, "FOF:basename", props->base_name);
  /* Check that we can write outputs by testing if the output
   * directory exists and is searchable and writable. */
  char directory[PARSER_MAX_LINE_SIZE] = {0};
  sprintf(directory, "%s", props->base_name);
  const char *dirp = dirname(directory);
  if (engine_rank == 0) safe_checkdir(dirp, /*create=*/1);
  /* Read the minimum group size. */
  props->min_group_size = parser_get_param_int(params, "FOF:min_group_size");
  /* Read whether we're doing FoF calls to seed black holes. */
  props->seed_black_holes_enabled =
      parser_get_param_int(params, "FOF:seed_black_holes_enabled");
  /* Read the default group ID of particles in groups below the minimum group
   * size. */
  props->group_id_default = parser_get_opt_param_int(
      params, "FOF:group_id_default", fof_props_default_group_id);
  /* Read the starting group ID. */
  props->group_id_offset = parser_get_opt_param_int(
      params, "FOF:group_id_offset", fof_props_default_group_id_offset);
  /* Read the linking length ratio to the mean inter-particle separation. */
  props->l_x_ratio =
      parser_get_opt_param_double(params, "FOF:linking_length_ratio", -1.);
  /* Read value of absolute linking length aksed by the user */
  props->l_x_absolute =
      parser_get_opt_param_double(params, "FOF:absolute_linking_length", -1.);
  if (props->l_x_ratio == -1. && props->l_x_absolute <= 0.)
    error("The FOF linking length can't be negative!");
  if (props->l_x_ratio <= 0. && props->l_x_absolute == -1.)
    error("The FOF linking length ratio can't be negative!");
  if (!stand_alone_fof && props->seed_black_holes_enabled) {
    /* Read the minimal halo mass for black hole seeding */
    props->seed_halo_mass =
        parser_get_param_double(params, "FOF:black_hole_seed_halo_mass_Msun");
    /* Convert to internal units */
    props->seed_halo_mass *= phys_const->const_solar_mass;
  }
  /* Read what particle types we want to run FOF on */
  parser_get_param_int_array(params, "FOF:linking_types", swift_type_count,
                             props->fof_linking_types);
  /* Read what particle types we want to attach to FOF groups */
  parser_get_param_int_array(params, "FOF:attaching_types", swift_type_count,
                             props->fof_attach_types);
  /* Check that there is something to do */
  int sum = 0;
  for (int i = 0; i < swift_type_count; ++i)
    if (props->fof_linking_types[i]) sum++;
  if (sum == 0) error("FOF must run on at least one type of particles!");
  for (int i = 0; i < swift_type_count; ++i)
    if (props->fof_linking_types[i] && props->fof_attach_types[i])
      error("FOF can't use a type (%s) as both linking and attaching type!",
            part_type_names[i]);
  /* Set the current FOF types */
  fof_set_current_types(props);
  /* Report what we do */
  if (engine_rank == 0) {
    printf("FOF using the following types for linking:");
    for (int i = 0; i < swift_type_count; ++i)
      if (props->fof_linking_types[i]) printf("'%s' ", part_type_names[i]);
    printf("\n");
    printf("FOF using the following types for attaching:");
    for (int i = 0; i < swift_type_count; ++i)
      if (props->fof_attach_types[i]) printf("'%s' ", part_type_names[i]);
    printf("\n");
    printf("FOF ignoring the following types:");
    for (int i = 0; i < swift_type_count; ++i)
      if (current_fof_ignore_type & (1 << (i + 1)))
        printf("'%s' ", part_type_names[i]);
    printf("\n");
  }
#if defined(WITH_MPI) && defined(UNION_BY_SIZE_OVER_MPI)
  if (engine_rank == 0)
    message(
        "Performing FOF over MPI using union by size and union by rank "
        "locally.");
#else
  message("Performing FOF using union by rank.");
#endif
}
/**
 * @brief Registers MPI types used by FOF.
 */
void fof_create_mpi_types(void) {
#ifdef WITH_MPI
  if (MPI_Type_contiguous(sizeof(struct fof_mpi) / sizeof(unsigned char),
                          MPI_BYTE, &fof_mpi_type) != MPI_SUCCESS ||
      MPI_Type_commit(&fof_mpi_type) != MPI_SUCCESS) {
    error("Failed to create MPI type for fof.");
  }
  if (MPI_Type_contiguous(sizeof(struct group_length) / sizeof(unsigned char),
                          MPI_BYTE, &group_length_mpi_type) != MPI_SUCCESS ||
      MPI_Type_commit(&group_length_mpi_type) != MPI_SUCCESS) {
    error("Failed to create MPI type for group_length.");
  }
  /* Define type for sending fof_final_index struct */
  if (MPI_Type_contiguous(sizeof(struct fof_final_index), MPI_BYTE,
                          &fof_final_index_type) != MPI_SUCCESS ||
      MPI_Type_commit(&fof_final_index_type) != MPI_SUCCESS) {
    error("Failed to create MPI type for fof_final_index.");
  }
#else
  error("Calling an MPI function in non-MPI code.");
#endif
}
/**
 * @brief Mapper function to set the initial group indices.
 *
 * @param map_data The array of group indices.
 * @param num_elements Chunk size.
 * @param extra_data Pointer to first group index.
 */
void fof_set_initial_group_index_mapper(void *map_data, int num_elements,
                                        void *extra_data) {
  size_t *group_index = (size_t *)map_data;
  size_t *group_index_start = (size_t *)extra_data;
  const ptrdiff_t offset = group_index - group_index_start;
  for (int i = 0; i < num_elements; ++i) {
    group_index[i] = i + offset;
  }
}
/**
 * @brief Mapper function to set the initial attach group indices.
 *
 * @param map_data The array of attach group indices.
 * @param num_elements Chunk size.
 * @param extra_data Pointer to first group index.
 */
void fof_set_initial_attach_index_mapper(void *map_data, int num_elements,
                                         void *extra_data) {
  size_t *attach_index = (size_t *)map_data;
  size_t *attach_index_start = (size_t *)extra_data;
  const ptrdiff_t offset = attach_index - attach_index_start;
  for (int i = 0; i < num_elements; ++i) {
    attach_index[i] = i + offset;
  }
}
/**
 * @brief Mapper function to set the initial group sizes.
 *
 * @param map_data The array of group sizes.
 * @param num_elements Chunk size.
 * @param extra_data N/A.
 */
void fof_set_initial_group_size_mapper(void *map_data, int num_elements,
                                       void *extra_data) {
  size_t *group_size = (size_t *)map_data;
  for (int i = 0; i < num_elements; ++i) {
    group_size[i] = 1;
  }
}
/**
 * @brief Mapper function to set the initial distances.
 *
 * @param map_data The array of distance.
 * @param num_elements Chunk size.
 * @param extra_data N/A.
 */
void fof_set_initial_part_distances_mapper(void *map_data, int num_elements,
                                           void *extra_data) {
  float *distance = (float *)map_data;
  for (int i = 0; i < num_elements; ++i) {
    distance[i] = FLT_MAX;
  }
}
/**
 * @brief Mapper function to set the initial group IDs.
 *
 * @param map_data The array of #gpart%s.
 * @param num_elements Chunk size.
 * @param extra_data Pointer to the default group ID.
 */
void fof_set_initial_group_id_mapper(void *map_data, int num_elements,
                                     void *extra_data) {
  /* Unpack the information */
  struct gpart *gparts = (struct gpart *)map_data;
  const size_t group_id_default = *((size_t *)extra_data);
  for (int i = 0; i < num_elements; ++i) {
    gparts[i].fof_data.group_id = group_id_default;
  }
}
/**
 * @brief Allocate the memory and initialise the arrays for a FOF calculation.
 *
 * @param s The #space to act on.
 * @param props The properties of the FOF structure.
 */
void fof_allocate(const struct space *s, struct fof_props *props) {
  const int verbose = s->e->verbose;
  const ticks total_tic = getticks();
  /* Start by computing the mean inter DM particle separation */
  /* Collect the mean mass of the non-background gpart */
  double high_res_DM_mass = 0.;
  long long num_high_res_DM = 0;
  for (size_t i = 0; i < s->nr_gparts; ++i) {
    const struct gpart *gp = &s->gparts[i];
    if (gp->type == swift_type_dark_matter &&
        gp->time_bin != time_bin_inhibited &&
        gp->time_bin != time_bin_not_created) {
      high_res_DM_mass += gp->mass;
      num_high_res_DM++;
    }
  }
#ifdef WITH_MPI
  /* Gather the information from all ranks */
  MPI_Allreduce(MPI_IN_PLACE, &high_res_DM_mass, 1, MPI_DOUBLE, MPI_SUM,
                MPI_COMM_WORLD);
  MPI_Allreduce(MPI_IN_PLACE, &num_high_res_DM, 1, MPI_LONG_LONG, MPI_SUM,
                MPI_COMM_WORLD);
#endif
  high_res_DM_mass /= (double)num_high_res_DM;
  /* Are we using the aboslute value or the one derived from the mean
     inter-particle sepration? */
  if (props->l_x_absolute != -1.) {
    props->l_x2 = props->l_x_absolute * props->l_x_absolute;
    if (s->e->nodeID == 0) {
      message("Linking length is set to %e [internal units].",
              props->l_x_absolute);
    }
  } else {
    /* Safety check */
    if (!(s->e->policy & engine_policy_cosmology))
      error(
          "Attempting to run FoF on a simulation using cosmological "
          "information but cosmology was not initialised");
    /* Calculate the mean inter-particle separation as if we were in
       a scenario where the entire box was filled with high-resolution
         particles */
    const double Omega_cdm = s->e->cosmology->Omega_cdm;
    const double Omega_b = s->e->cosmology->Omega_b;
    const double Omega_m = Omega_cdm + Omega_b;
    const double critical_density_0 = s->e->cosmology->critical_density_0;
    double mean_matter_density;
    if (s->with_hydro)
      mean_matter_density = Omega_cdm * critical_density_0;
    else
      mean_matter_density = Omega_m * critical_density_0;
    /* Mean inter-particle separation of the DM particles */
    const double mean_inter_particle_sep =
        cbrt(high_res_DM_mass / mean_matter_density);
    /* Calculate the particle linking length based upon the mean inter-particle
     * spacing of the DM particles. */
    const double l_x = props->l_x_ratio * mean_inter_particle_sep;
    props->l_x2 = l_x * l_x;
    if (s->e->nodeID == 0) {
      message(
          "Linking length is set to %e [internal units] (%f of mean "
          "inter-DM-particle separation).",
          l_x, props->l_x_ratio);
    }
  }
#ifdef WITH_MPI
  /* Check size of linking length against the top-level cell dimensions. */
  if (props->l_x2 > s->width[0] * s->width[0])
    error(
        "Linking length greater than the width of a top-level cell. Need to "
        "check more than one layer of top-level cells for links.");
#endif
  /* Allocate and initialise a group index array. */
  if (swift_memalign("fof_group_index", (void **)&props->group_index, 64,
                     s->nr_gparts * sizeof(size_t)) != 0)
    error("Failed to allocate list of particle group indices for FOF search.");
  /* Allocate and initialise a group index array for attachables. */
  if (swift_memalign("fof_attach_index", (void **)&props->attach_index, 64,
                     s->nr_gparts * sizeof(size_t)) != 0)
    error(
        "Failed to allocate list of particle distances array for FOF search.");
  /* Allocate and initialise a group index array for attachables. */
  if (swift_memalign("fof_found_attach", (void **)&props->found_attachable_link,
                     64, s->nr_gparts * sizeof(char)) != 0)
    error(
        "Failed to allocate list of particle distances array for FOF search.");
  /* Allocate and initialise the closest distance array. */
  if (swift_memalign("fof_distance", (void **)&props->distance_to_link, 64,
                     s->nr_gparts * sizeof(float)) != 0)
    error(
        "Failed to allocate list of particle distances array for FOF search.");
  /* Allocate and initialise a group size array. */
  if (swift_memalign("fof_group_size", (void **)&props->group_size, 64,
                     s->nr_gparts * sizeof(size_t)) != 0)
    error("Failed to allocate list of group size for FOF search.");
  ticks tic = getticks();
  /* Set initial group index */
  threadpool_map(&s->e->threadpool, fof_set_initial_group_index_mapper,
                 props->group_index, s->nr_gparts, sizeof(size_t),
                 threadpool_auto_chunk_size, props->group_index);
  if (verbose)
    message("Setting initial group index took: %.3f %s.",
            clocks_from_ticks(getticks() - tic), clocks_getunit());
  tic = getticks();
  /* Set initial attach index */
  threadpool_map(&s->e->threadpool, fof_set_initial_attach_index_mapper,
                 props->attach_index, s->nr_gparts, sizeof(size_t),
                 threadpool_auto_chunk_size, props->attach_index);
  bzero(props->found_attachable_link, s->nr_gparts * sizeof(char));
  if (verbose)
    message("Setting initial attach index took: %.3f %s.",
            clocks_from_ticks(getticks() - tic), clocks_getunit());
  tic = getticks();
  /* Set initial distances */
  threadpool_map(&s->e->threadpool, fof_set_initial_part_distances_mapper,
                 props->distance_to_link, s->nr_gparts, sizeof(float),
                 threadpool_auto_chunk_size, NULL);
  if (verbose)
    message("Setting initial distances took: %.3f %s.",
            clocks_from_ticks(getticks() - tic), clocks_getunit());
  tic = getticks();
  /* Set initial group sizes */
  threadpool_map(&s->e->threadpool, fof_set_initial_group_size_mapper,
                 props->group_size, s->nr_gparts, sizeof(size_t),
                 threadpool_auto_chunk_size, NULL);
  if (verbose)
    message("Setting initial group sizes took: %.3f %s.",
            clocks_from_ticks(getticks() - tic), clocks_getunit());
#ifdef SWIFT_DEBUG_CHECKS
  ti_current = s->e->ti_current;
#endif
  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - total_tic),
            clocks_getunit());
}
/**
 * @brief Comparison function for qsort call comparing group sizes.
 *
 * @param a The first #group_length object.
 * @param b The second #group_length object.
 * @return 1 if the size of the group b is larger than the size of group a, -1
 * if a is the largest and 0 if they are equal.
 */
int cmp_func_group_size(const void *a, const void *b) {
  struct group_length *a_group_size = (struct group_length *)a;
  struct group_length *b_group_size = (struct group_length *)b;
  if (b_group_size->size > a_group_size->size)
    return 1;
  else if (b_group_size->size < a_group_size->size)
    return -1;
  else
    return 0;
}
#ifdef WITH_MPI
/**
 * @brief Comparison function for qsort call comparing group global roots.
 *
 * @param a The first #fof_final_index object.
 * @param b The second #fof_final_index object.
 * @return 1 if the global of the group b is *smaller* than the global group of
 * group a, -1 if a is the smaller one and 0 if they are equal.
 */
int compare_fof_final_index_global_root(const void *a, const void *b) {
  struct fof_final_index *fof_final_index_a = (struct fof_final_index *)a;
  struct fof_final_index *fof_final_index_b = (struct fof_final_index *)b;
  if (fof_final_index_b->global_root < fof_final_index_a->global_root)
    return 1;
  else if (fof_final_index_b->global_root > fof_final_index_a->global_root)
    return -1;
  else
    return 0;
}
/**
 * @brief Check whether a given group ID is on the local node.
 *
 * This function only makes sense in MPI mode.
 *
 * @param group_id The ID to check.
 * @param nr_gparts The number of gparts on this node.
 */
__attribute__((always_inline)) INLINE static int is_local(
    const size_t group_id, const size_t nr_gparts) {
#ifdef WITH_MPI
  return (group_id >= node_offset && group_id < node_offset + nr_gparts);
#else
  error("Calling MPI function in non-MPI mode");
  return 1;
#endif
}
/**
 * @brief Find the global root ID of a given particle
 *
 * This function only makes sense in MPI mode.
 *
 * @param i Index of the particle.
 * @param group_index Array of group root indices.
 * @param nr_gparts The number of g-particles on this node.
 */
__attribute__((always_inline)) INLINE static size_t fof_find_global(
    const size_t i, const size_t *group_index, const size_t nr_gparts) {
#ifdef WITH_MPI
  size_t root = node_offset + i;
  if (!is_local(root, nr_gparts)) {
    /* Non local --> This is the root */
    return root;
  } else {
    /* Local --> Follow the links until we find the root */
    while (root != group_index[root - node_offset]) {
      root = group_index[root - node_offset];
      if (!is_local(root, nr_gparts)) break;
    }
  }
  /* Perform path compression. */
  // int index = i;
  // while(index != root) {
  //  int next = group_index[index];
  //  group_index[index] = root;
  //  index = next;
  //}
  return root;
#else
  error("Calling MPI function in non-MPI mode");
  return -1;
#endif
}
#endif /* WITH_MPI */
/**
 * @brief   Finds the local root ID of the group a particle exists in
 * when group_index contains globally unique identifiers -
 * i.e. we stop *before* we advance to a foreign root.
 *
 * Here we assume that the input i is a local index and we
 * return the local index of the root.
 *
 * @param i Index of the particle.
 * @param nr_gparts The number of g-particles on this node.
 * @param group_index Array of group root indices.
 */
__attribute__((always_inline)) INLINE static size_t fof_find_local(
    const size_t i, const size_t nr_gparts, const size_t *group_index) {
#ifdef WITH_MPI
  size_t root = node_offset + i;
  while ((group_index[root - node_offset] != root) &&
         (group_index[root - node_offset] >= node_offset) &&
         (group_index[root - node_offset] < node_offset + nr_gparts)) {
    root = group_index[root - node_offset];
  }
  return root - node_offset;
#else
  size_t root = i;
  while ((group_index[root] != root) && (group_index[root] < nr_gparts)) {
    root = group_index[root];
  }
  return root;
#endif
}
/**
 * @brief Returns whether a #gpart is of the 'attachable' kind.
 */
__attribute__((always_inline)) INLINE static int gpart_is_attachable(
    const struct gpart *gp) {
  return current_fof_attach_type & (1 << (gp->type + 1));
}
/**
 * @brief Returns whether a #gpart is of the 'linkable' kind.
 */
__attribute__((always_inline)) INLINE static int gpart_is_linkable(
    const struct gpart *gp) {
  return current_fof_linking_type & (1 << (gp->type + 1));
}
/**
 * @brief Returns whether a #gpart is to be ignored by FOF.
 */
__attribute__((always_inline)) INLINE static int gpart_is_ignorable(
    const struct gpart *gp) {
  return current_fof_ignore_type & (1 << (gp->type + 1));
}
/**
 * @brief Returns whether a foreign #gpart is of the 'attachable' kind.
 */
__attribute__((always_inline)) INLINE static int gpart_foreign_is_attachable(
    const struct gpart_fof_foreign *gp) {
  return current_fof_attach_type & (1 << (gp->type + 1));
}
/**
 * @brief Returns whether a foreign #gpart is of the 'linkable' kind.
 */
__attribute__((always_inline)) INLINE static int gpart_foreign_is_linkable(
    const struct gpart_fof_foreign *gp) {
  return current_fof_linking_type & (1 << (gp->type + 1));
}
/**
 * @brief Returns whether a foreign #gpart is to be ignored by FOF.
 */
__attribute__((always_inline)) INLINE static int gpart_foreign_is_ignorable(
    const struct gpart_fof_foreign *gp) {
  return current_fof_ignore_type & (1 << (gp->type + 1));
}
/**
 * @brief Finds the local root ID of the group a particle exists in.
 *
 * We follow the group_index array until reaching the root of the group.
 *
 * Also performs path compression if the path is long.
 *
 * @param i The index of the particle.
 * @param group_index Array of group root indices.
 */
__attribute__((always_inline)) INLINE static size_t fof_find(
    const size_t i, size_t *group_index) {
  size_t root = i;
  int tree_depth = 0;
  while (root != group_index[root]) {
#ifdef PATH_HALVING
    atomic_cas(&group_index[root], group_index[root],
               group_index[group_index[root]]);
#endif
    root = group_index[root];
    tree_depth++;
  }
  /* Only perform path compression on trees with a depth of
   * FOF_COMPRESS_PATHS_MIN_LENGTH or higher. */
  if (tree_depth >= FOF_COMPRESS_PATHS_MIN_LENGTH)
    atomic_cas(&group_index[i], group_index[i], root);
  return root;
}
/**
 * @brief Atomically update the root of a group
 *
 * @param address The address of the value to update.
 * @param y The new value to write.
 *
 * @return 1 If successful, 0 otherwise.
 */
__attribute__((always_inline)) INLINE static int atomic_update_root(
    volatile size_t *address, const size_t y) {
  size_t *size_t_ptr = (size_t *)address;
  size_t old_val = *address;
  size_t test_val = old_val;
  size_t new_val = y;
  /* atomic_cas returns old_val if *size_t_ptr has not changed since being
   * read.*/
  old_val = atomic_cas(size_t_ptr, test_val, new_val);
  if (test_val == old_val)
    return 1;
  else
    return 0;
}
/**
 * @brief Unifies two groups by setting them to the same root.
 *
 * @param root_i The root of the first group. Will be updated.
 * @param root_j The root of the second group.
 * @param group_index The list of group roots.
 */
__attribute__((always_inline)) INLINE static void fof_union(
    size_t *restrict root_i, const size_t root_j,
    size_t *restrict group_index) {
  int result = 0;
  /* Loop until the root can be set to a new value. */
  do {
    size_t root_i_new = fof_find(*root_i, group_index);
    const size_t root_j_new = fof_find(root_j, group_index);
    /* Skip particles in the same group. */
    if (root_i_new == root_j_new) return;
    /* If the root ID of pj is lower than pi's root ID set pi's root to point to
     * pj's. Otherwise set pj's root to point to pi's.*/
    if (root_j_new < root_i_new) {
      /* Updates the root and checks that its value has not been changed since
       * being read. */
      result = atomic_update_root(&group_index[root_i_new], root_j_new);
      /* Update root_i on the fly. */
      *root_i = root_j_new;
    } else {
      /* Updates the root and checks that its value has not been changed since
       * being read. */
      result = atomic_update_root(&group_index[root_j_new], root_i_new);
      /* Update root_i on the fly. */
      *root_i = root_i_new;
    }
  } while (result != 1);
}
/**
 * @brief Compute th minimal distance between any two points in two cells.
 *
 * @param ci The first #cell.
 * @param cj The second #cell.
 * @param dim The size of the simulation domain.
 */
__attribute__((always_inline)) INLINE static double cell_min_dist(
    const struct cell *restrict ci, const struct cell *restrict cj,
    const double dim[3]) {
  /* Get cell locations. */
  const double cix_min = ci->loc[0];
  const double ciy_min = ci->loc[1];
  const double ciz_min = ci->loc[2];
  const double cjx_min = cj->loc[0];
  const double cjy_min = cj->loc[1];
  const double cjz_min = cj->loc[2];
  const double cix_max = ci->loc[0] + ci->width[0];
  const double ciy_max = ci->loc[1] + ci->width[1];
  const double ciz_max = ci->loc[2] + ci->width[2];
  const double cjx_max = cj->loc[0] + cj->width[0];
  const double cjy_max = cj->loc[1] + cj->width[1];
  const double cjz_max = cj->loc[2] + cj->width[2];
  double not_same_range[3];
  /* If two cells are in the same range of coordinates along
     any of the 3 axis, the distance along this axis is 0 */
  if (ci->width[0] > cj->width[0]) {
    if ((cix_min <= cjx_min) && (cjx_max <= cix_max))
      not_same_range[0] = 0.;
    else
      not_same_range[0] = 1.;
  } else {
    if ((cjx_min <= cix_min) && (cix_max <= cjx_max))
      not_same_range[0] = 0.;
    else
      not_same_range[0] = 1.;
  }
  if (ci->width[1] > cj->width[1]) {
    if ((ciy_min <= cjy_min) && (cjy_max <= ciy_max))
      not_same_range[1] = 0.;
    else
      not_same_range[1] = 1.;
  } else {
    if ((cjy_min <= ciy_min) && (ciy_max <= cjy_max))
      not_same_range[1] = 0.;
    else
      not_same_range[1] = 1.;
  }
  if (ci->width[2] > cj->width[2]) {
    if ((ciz_min <= cjz_min) && (cjz_max <= ciz_max))
      not_same_range[2] = 0.;
    else
      not_same_range[2] = 1.;
  } else {
    if ((cjz_min <= ciz_min) && (ciz_max <= cjz_max))
      not_same_range[2] = 0.;
    else
      not_same_range[2] = 1.;
  }
  /* Find the shortest distance between cells, remembering to account for
   * periodic boundary conditions. */
  double dx[3];
  dx[0] = min4(fabs(nearest(cix_min - cjx_min, dim[0])),
               fabs(nearest(cix_min - cjx_max, dim[0])),
               fabs(nearest(cix_max - cjx_min, dim[0])),
               fabs(nearest(cix_max - cjx_max, dim[0])));
  dx[1] = min4(fabs(nearest(ciy_min - cjy_min, dim[1])),
               fabs(nearest(ciy_min - cjy_max, dim[1])),
               fabs(nearest(ciy_max - cjy_min, dim[1])),
               fabs(nearest(ciy_max - cjy_max, dim[1])));
  dx[2] = min4(fabs(nearest(ciz_min - cjz_min, dim[2])),
               fabs(nearest(ciz_min - cjz_max, dim[2])),
               fabs(nearest(ciz_max - cjz_min, dim[2])),
               fabs(nearest(ciz_max - cjz_max, dim[2])));
  double r2 = 0.;
  for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k] * not_same_range[k];
  return r2;
}
#ifdef WITH_MPI
/* Add a group to the hash table. */
__attribute__((always_inline)) INLINE static void hashmap_add_group(
    const size_t group_id, const size_t group_offset, hashmap_t *map) {
  int created_new_element = 0;
  hashmap_value_t *offset =
      hashmap_get_new(map, group_id, &created_new_element);
  if (offset != NULL) {
    /* If the element is a new entry set its value. */
    if (created_new_element) {
      (*offset).value_st = group_offset;
    }
  } else
    error("Couldn't find key (%zu) or create new one.", group_id);
}
/* Find a group in the hash table. */
__attribute__((always_inline)) INLINE static size_t hashmap_find_group_offset(
    const size_t group_id, hashmap_t *map) {
  hashmap_value_t *group_offset = hashmap_get(map, group_id);
  if (group_offset == NULL)
    error("Couldn't find key (%zu) or create new one.", group_id);
  return (size_t)(*group_offset).value_st;
}
/* Compute send/recv offsets for MPI communication. */
__attribute__((always_inline)) INLINE static void fof_compute_send_recv_offsets(
    const int nr_nodes, int *sendcount, int **recvcount, int **sendoffset,
    int **recvoffset, size_t *nrecv) {
  /* Determine number of entries to receive */
  *recvcount = (int *)malloc(nr_nodes * sizeof(int));
  MPI_Alltoall(sendcount, 1, MPI_INT, *recvcount, 1, MPI_INT, MPI_COMM_WORLD);
  /* Compute send/recv offsets */
  *sendoffset = (int *)malloc(nr_nodes * sizeof(int));
  (*sendoffset)[0] = 0;
  for (int i = 1; i < nr_nodes; i++)
    (*sendoffset)[i] = (*sendoffset)[i - 1] + sendcount[i - 1];
  *recvoffset = (int *)malloc(nr_nodes * sizeof(int));
  (*recvoffset)[0] = 0;
  for (int i = 1; i < nr_nodes; i++)
    (*recvoffset)[i] = (*recvoffset)[i - 1] + (*recvcount)[i - 1];
  /* Allocate receive buffer */
  *nrecv = 0;
  for (int i = 0; i < nr_nodes; i++) (*nrecv) += (*recvcount)[i];
}
#endif /* WITH_MPI */
/**
 * @brief Perform a FOF search using union-find on a given leaf-cell
 *
 * @param props The properties fof the FOF scheme.
 * @param l_x2 The square of the FOF linking length.
 * @param space_gparts The start of the #gpart array in the #space structure.
 * @param c The #cell in which to perform FOF.
 */
void fof_search_self_cell(const struct fof_props *props, const double l_x2,
                          const struct gpart *const space_gparts,
                          const struct cell *c) {
#ifdef SWIFT_DEBUG_CHECKS
  if (c->split) error("Performing the FOF search at a non-leaf level!");
#endif
  const size_t count = c->grav.count;
  const struct gpart *gparts = c->grav.parts;
  /* Index of particles in the global group list */
  size_t *const group_index = props->group_index;
  /* Make a list of particle offsets into the global gparts array. */
  size_t *const offset = group_index + (ptrdiff_t)(gparts - space_gparts);
#ifdef SWIFT_DEBUG_CHECKS
  if (c->nodeID != engine_rank)
    error("Performing self FOF search on foreign cell.");
#endif
  /* Loop over particles and find which particles belong in the same group. */
  for (size_t i = 0; i < count; i++) {
    const struct gpart *pi = &gparts[i];
    /* Ignore inhibited particles */
    if (pi->time_bin >= time_bin_inhibited) continue;
    /* Check whether we ignore this particle type altogether */
    if (gpart_is_ignorable(pi)) continue;
#ifdef SWIFT_DEBUG_CHECKS
    if (pi->ti_drift != ti_current)
      error("Running FOF on an un-drifted particle!");
#endif
    const double pix = pi->x[0];
    const double piy = pi->x[1];
    const double piz = pi->x[2];
    /* Find the root of pi. */
    size_t root_i = fof_find(offset[i], group_index);
    /* Get the nature of the linking */
    const int is_link_i = gpart_is_linkable(pi);
    for (size_t j = i + 1; j < count; j++) {
      const struct gpart *pj = &gparts[j];
      /* Ignore inhibited particles */
      if (pj->time_bin >= time_bin_inhibited) continue;
      /* Check whether we ignore this particle type altogether */
      if (gpart_is_ignorable(pj)) continue;
      /* Get the nature of the linking */
      const int is_link_j = gpart_is_linkable(pj);
      /* Both particles must be of the linking kind */
      if (!(is_link_i && is_link_j)) continue;
#ifdef SWIFT_DEBUG_CHECKS
      if (pj->ti_drift != ti_current)
        error("Running FOF on an un-drifted particle!");
#endif
      /* Find the root of pj. */
      const size_t root_j = fof_find(offset[j], group_index);
      /* Skip particles in the same group. */
      if (root_i == root_j) continue;
      const double pjx = pj->x[0];
      const double pjy = pj->x[1];
      const double pjz = pj->x[2];
      /* Compute the pairwise distance */
      float dx[3], r2 = 0.0f;
      dx[0] = pix - pjx;
      dx[1] = piy - pjy;
      dx[2] = piz - pjz;
      for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k];
      /* Hit or miss? */
      if (r2 < l_x2) {
        /* Merge the groups` */
        fof_union(&root_i, root_j, group_index);
      }
    }
  }
}
/**
 * @brief Perform a FOF search using union-find between two cells
 *
 * @param props The properties fof the FOF scheme.
 * @param dim The dimension of the simulation volume.
 * @param l_x2 The square of the FOF linking length.
 * @param periodic Are we using periodic BCs?
 * @param space_gparts The start of the #gpart array in the #space structure.
 * @param ci The first #cell in which to perform FOF.
 * @param cj The second #cell in which to perform FOF.
 */
void fof_search_pair_cells(const struct fof_props *props, const double dim[3],
                           const double l_x2, const int periodic,
                           const struct gpart *const space_gparts,
                           const struct cell *restrict ci,
                           const struct cell *restrict cj) {
  const size_t count_i = ci->grav.count;
  const size_t count_j = cj->grav.count;
  const struct gpart *gparts_i = ci->grav.parts;
  const struct gpart *gparts_j = cj->grav.parts;
  /* Index of particles in the global group list */
  size_t *const group_index = props->group_index;
  /* Make a list of particle offsets into the global gparts array. */
  size_t *const offset_i = group_index + (ptrdiff_t)(gparts_i - space_gparts);
  size_t *const offset_j = group_index + (ptrdiff_t)(gparts_j - space_gparts);
#ifdef SWIFT_DEBUG_CHECKS
  if (offset_j > offset_i && (offset_j < offset_i + count_i))
    error("Overlapping cells");
  if (offset_i > offset_j && (offset_i < offset_j + count_j))
    error("Overlapping cells");
  if (ci->nodeID != cj->nodeID) error("Searching foreign cells!");
#endif
  /* Account for boundary conditions.*/
  double shift[3] = {0.0, 0.0, 0.0};
  /* Get the relative distance between the pairs, wrapping. */
  double diff[3];
  for (int k = 0; k < 3; k++) {
    diff[k] = cj->loc[k] - ci->loc[k];
    if (periodic && diff[k] < -dim[k] * 0.5)
      shift[k] = dim[k];
    else if (periodic && diff[k] > dim[k] * 0.5)
      shift[k] = -dim[k];
    else
      shift[k] = 0.0;
    diff[k] += shift[k];
  }
  /* Loop over particles and find which particles belong in the same group. */
  for (size_t i = 0; i < count_i; i++) {
    const struct gpart *restrict pi = &gparts_i[i];
    /* Ignore inhibited particles */
    if (pi->time_bin >= time_bin_inhibited) continue;
    /* Check whether we ignore this particle type altogether */
    if (gpart_is_ignorable(pi)) continue;
#ifdef SWIFT_DEBUG_CHECKS
    if (pi->ti_drift != ti_current)
      error("Running FOF on an un-drifted particle!");
#endif
    const double pix = pi->x[0] - shift[0];
    const double piy = pi->x[1] - shift[1];
    const double piz = pi->x[2] - shift[2];
    /* Find the root of pi. */
    size_t root_i = fof_find(offset_i[i], group_index);
    /* Get the nature of the linking */
    const int is_link_i = gpart_is_linkable(pi);
    for (size_t j = 0; j < count_j; j++) {
      const struct gpart *restrict pj = &gparts_j[j];
      /* Ignore inhibited particles */
      if (pj->time_bin >= time_bin_inhibited) continue;
      /* Check whether we ignore this particle type altogether */
      if (gpart_is_ignorable(pj)) continue;
      /* Get the nature of the linking */
      const int is_link_j = gpart_is_linkable(pj);
      /* At least one of the particles has to be of linking type */
      if (!(is_link_i && is_link_j)) continue;
#ifdef SWIFT_DEBUG_CHECKS
      if (pj->ti_drift != ti_current)
        error("Running FOF on an un-drifted particle!");
#endif
      /* Find the root of pj. */
      const size_t root_j = fof_find(offset_j[j], group_index);
      /* Skip particles in the same group. */
      if (root_i == root_j) continue;
      const double pjx = pj->x[0];
      const double pjy = pj->x[1];
      const double pjz = pj->x[2];
      /* Compute pairwise distance (periodic BCs were accounted
       for by the shift vector) */
      float dx[3], r2 = 0.0f;
      dx[0] = pix - pjx;
      dx[1] = piy - pjy;
      dx[2] = piz - pjz;
      for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k];
      /* Hit or miss? */
      if (r2 < l_x2) {
        /* Merge the groups */
        fof_union(&root_i, root_j, group_index);
      }
    }
  }
}
#ifdef WITH_MPI
/**
 * @brief Add a local<->foreign pair in range to the list of links
 *
 * Possibly reallocates the local_group_links if we run out of space.
 */
static INLINE void add_foreign_link_to_list(
    int *local_link_count, int *group_links_size, struct fof_mpi **group_links,
    struct fof_mpi **local_group_links, const size_t root_i,
    const size_t root_j, const size_t size_i, const size_t size_j) {
  /* If the group_links array is not big enough re-allocate it. */
  if (*local_link_count + 1 > *group_links_size) {
    const int new_size = 2 * (*group_links_size);
    *group_links_size = new_size;
    (*group_links) = (struct fof_mpi *)realloc(
        *group_links, new_size * sizeof(struct fof_mpi));
    /* Reset the local pointer */
    (*local_group_links) = *group_links;
    message("Re-allocating local group links from %d to %d elements.",
            *local_link_count, new_size);
    if (new_size < 0) error("Overflow in size of list of foreign links");
  }
  /* Store the particle group properties for communication. */
  (*local_group_links)[*local_link_count].group_i = root_i;
  (*local_group_links)[*local_link_count].group_i_size = size_i;
  (*local_group_links)[*local_link_count].group_j = root_j;
  (*local_group_links)[*local_link_count].group_j_size = size_j;
  (*local_link_count)++;
}
#endif
/* Perform a FOF search between a local and foreign cell using the Union-Find
 * algorithm. Store any links found between particles.*/
void fof_search_pair_cells_foreign(
    const struct fof_props *props, const double dim[3], const double l_x2,
    const int periodic, const struct gpart *const space_gparts,
    const size_t nr_gparts, const struct cell *restrict ci,
    const struct cell *restrict cj, int *restrict link_count,
    struct fof_mpi **group_links, int *restrict group_links_size) {
#ifdef WITH_MPI
  const size_t count_i = ci->grav.count;
  const size_t count_j = cj->grav.count;
  const struct gpart *gparts_i = ci->grav.parts;
  const struct gpart_fof_foreign *gparts_j = cj->grav.parts_fof_foreign;
  /* Get local pointers */
  const size_t *restrict group_index = props->group_index;
  const size_t *restrict group_size = props->group_size;
  /* Values local to this function to avoid dereferencing */
  struct fof_mpi *local_group_links = *group_links;
  int local_link_count = *link_count;
  /* Make a list of particle offsets into the global gparts array. */
  const size_t *const offset_i =
      group_index + (ptrdiff_t)(gparts_i - space_gparts);
#ifdef SWIFT_DEBUG_CHECKS
  /* Check whether cells are local to the node. */
  const int ci_local = (ci->nodeID == engine_rank);
  const int cj_local = (cj->nodeID == engine_rank);
  if ((ci_local && cj_local) || (!ci_local && !cj_local))
    error(
        "FOF search of foreign cells called on two local cells or two foreign "
        "cells.");
  if (!ci_local) {
    error("Cell ci is not local!");
  }
  if (cj_local) {
    error("Cell cj is local!");
  }
#endif
  double shift[3] = {0.0, 0.0, 0.0};
  /* Get the relative distance between the pairs, wrapping. */
  for (int k = 0; k < 3; k++) {
    const double diff = cj->loc[k] - ci->loc[k];
    if (periodic && diff < -dim[k] / 2)
      shift[k] = dim[k];
    else if (periodic && diff > dim[k] / 2)
      shift[k] = -dim[k];
    else
      shift[k] = 0.0;
  }
  /* Loop over particles and find which particles belong in the same group. */
  for (size_t i = 0; i < count_i; i++) {
    const struct gpart *pi = &gparts_i[i];
    /* Ignore inhibited particles */
    if (pi->time_bin >= time_bin_inhibited) continue;
    /* Check whether we ignore this particle type altogether */
    if (gpart_is_ignorable(pi)) continue;
#ifdef SWIFT_DEBUG_CHECKS
    if (pi->ti_drift != ti_current)
      error("Running FOF on an un-drifted particle!");
#endif
    const double pix = pi->x[0] - shift[0];
    const double piy = pi->x[1] - shift[1];
    const double piz = pi->x[2] - shift[2];
    /* Find the root of pi. */
    const size_t root_i =
        fof_find_global(offset_i[i] - node_offset, group_index, nr_gparts);
    /* Get the nature of the linking */
    const int is_link_i = gpart_is_linkable(pi);
    for (size_t j = 0; j < count_j; j++) {
      const struct gpart_fof_foreign *pj = &gparts_j[j];
      /* Ignore inhibited particles */
      if (pj->time_bin >= time_bin_inhibited) continue;
      /* Check whether we ignore this particle type altogether */
      if (gpart_foreign_is_ignorable(pj)) continue;
      /* Get the nature of the linking */
      const int is_link_j = gpart_foreign_is_linkable(pj);
      /* Only consider linkable<->linkable pairs */
      if (!(is_link_i && is_link_j)) continue;
#ifdef SWIFT_DEBUG_CHECKS
      if (pj->ti_drift != ti_current)
        error("Running FOF on an un-drifted particle!");
#endif
      const double pjx = pj->x[0];
      const double pjy = pj->x[1];
      const double pjz = pj->x[2];
      /* Compute pairwise distance (periodic BCs were accounted
       for by the shift vector) */
      float dx[3], r2 = 0.0f;
      dx[0] = pix - pjx;
      dx[1] = piy - pjy;
      dx[2] = piz - pjz;
      for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k];
      /* Hit or miss? */
      if (r2 < l_x2) {
        /* Check that the links have not already been added to the list. */
        for (int l = 0; l < local_link_count; l++) {
          if (local_group_links[l].group_i == root_i &&
              local_group_links[l].group_j == pj->fof_data.group_id) {
            continue;
          }
        }
        /* Add a possible link to the list */
        add_foreign_link_to_list(
            &local_link_count, group_links_size, group_links,
            &local_group_links, root_i, pj->fof_data.group_id,
            group_size[root_i - node_offset], pj->fof_data.group_size);
      }
    }
  }
  /* Update the returned values */
  *link_count = local_link_count;
#else
  error("Calling MPI function in non-MPI mode.");
#endif
}
/**
 * @brief Recursively perform a union-find FOF between two cells.
 *
 * If cells are more distant than the linking length, we abort early.
 *
 * @param props The properties fof the FOF scheme.
 * @param dim The dimension of the space.
 * @param search_r2 the square of the FOF linking length.
 * @param periodic Are we using periodic BCs?
 * @param space_gparts The start of the #gpart array in the #space structure.
 * @param ci The first #cell in which to perform FOF.
 * @param cj The second #cell in which to perform FOF.
 */
void rec_fof_search_pair(const struct fof_props *props, const double dim[3],
                         const double search_r2, const int periodic,
                         const struct gpart *const space_gparts,
                         struct cell *restrict ci, struct cell *restrict cj) {
  /* Find the shortest distance between cells, remembering to account for
   * boundary conditions. */
  const double r2 = cell_min_dist(ci, cj, dim);
#ifdef SWIFT_DEBUG_CHECKS
  if (ci == cj) error("Pair FOF called on same cell!!!");
#endif
  /* Return if cells are out of range of each other. */
  if (r2 > search_r2) return;
  /* Recurse on both cells if they are both split. */
  if (ci->split && cj->split) {
    for (int k = 0; k < 8; k++) {
      if (ci->progeny[k] != NULL) {
        for (int l = 0; l < 8; l++)
          if (cj->progeny[l] != NULL)
            rec_fof_search_pair(props, dim, search_r2, periodic, space_gparts,
                                ci->progeny[k], cj->progeny[l]);
      }
    }
  } else if (ci->split) {
    for (int k = 0; k < 8; k++) {
      if (ci->progeny[k] != NULL)
        rec_fof_search_pair(props, dim, search_r2, periodic, space_gparts,
                            ci->progeny[k], cj);
    }
  } else if (cj->split) {
    for (int k = 0; k < 8; k++) {
      if (cj->progeny[k] != NULL)
        rec_fof_search_pair(props, dim, search_r2, periodic, space_gparts, ci,
                            cj->progeny[k]);
    }
  } else {
    /* Perform FOF search between pairs of cells that are within the linking
     * length and not the same cell. */
    fof_search_pair_cells(props, dim, search_r2, periodic, space_gparts, ci,
                          cj);
  }
}
#ifdef WITH_MPI
/* Recurse on a pair of cells (one local, one foreign) and perform a FOF search
 * between cells that are within range. */
void rec_fof_search_pair_foreign(
    const struct fof_props *props, const double dim[3], const double search_r2,
    const int periodic, const struct gpart *const space_gparts,
    const size_t nr_gparts, const struct cell *ci, const struct cell *cj,
    int *restrict link_count, struct fof_mpi **group_links,
    int *restrict group_links_size) {
#ifdef SWIFT_DEBUG_CHECKS
  if (ci == cj) error("Pair FOF called on same cell!!!");
  if (ci->nodeID == cj->nodeID) error("Fully local pair!");
#endif
  /* Find the shortest distance between cells, remembering to account for
   * boundary conditions. */
  const double r2 = cell_min_dist(ci, cj, dim);
  /* Return if cells are out of range of each other. */
  if (r2 > search_r2) return;
  /* Recurse on both cells if they are both split. */
  if (ci->split && cj->split) {
    for (int k = 0; k < 8; k++) {
      if (ci->progeny[k] != NULL) {
        for (int l = 0; l < 8; l++)
          if (cj->progeny[l] != NULL)
            rec_fof_search_pair_foreign(props, dim, search_r2, periodic,
                                        space_gparts, nr_gparts, ci->progeny[k],
                                        cj->progeny[l], link_count, group_links,
                                        group_links_size);
      }
    }
  } else if (ci->split) {
    for (int k = 0; k < 8; k++) {
      if (ci->progeny[k] != NULL)
        rec_fof_search_pair_foreign(props, dim, search_r2, periodic,
                                    space_gparts, nr_gparts, ci->progeny[k], cj,
                                    link_count, group_links, group_links_size);
    }
  } else if (cj->split) {
    for (int k = 0; k < 8; k++) {
      if (cj->progeny[k] != NULL)
        rec_fof_search_pair_foreign(props, dim, search_r2, periodic,
                                    space_gparts, nr_gparts, ci, cj->progeny[k],
                                    link_count, group_links, group_links_size);
    }
  } else {
    /* Perform FOF search between pairs of cells that are within the linking
     * length and not the same cell. */
    fof_search_pair_cells_foreign(props, dim, search_r2, periodic, space_gparts,
                                  nr_gparts, ci, cj, link_count, group_links,
                                  group_links_size);
  }
}
#endif /* WITH_MPI */
/**
 * @brief Recursively perform a union-find FOF on a cell.
 *
 * @param props The properties fof the FOF scheme.
 * @param dim The dimension of the space.
 * @param space_gparts The start of the #gpart array in the #space structure.
 * @param search_r2 the square of the FOF linking length.
 * @param periodic Are we using periodic BCs?
 * @param c The #cell in which to perform FOF.
 */
void rec_fof_search_self(const struct fof_props *props, const double dim[3],
                         const double search_r2, const int periodic,
                         const struct gpart *const space_gparts,
                         struct cell *c) {
  /* Recurse? */
  if (c->split) {
    /* Loop over all progeny. Perform pair and self recursion on progenies.*/
    for (int k = 0; k < 8; k++) {
      if (c->progeny[k] != NULL) {
        rec_fof_search_self(props, dim, search_r2, periodic, space_gparts,
                            c->progeny[k]);
        for (int l = k + 1; l < 8; l++)
          if (c->progeny[l] != NULL)
            rec_fof_search_pair(props, dim, search_r2, periodic, space_gparts,
                                c->progeny[k], c->progeny[l]);
      }
    }
  }
  /* Otherwise, compute self-interaction. */
  else
    fof_search_self_cell(props, search_r2, space_gparts, c);
}
/**
 * @brief Perform the attaching operation using union-find on a given leaf-cell
 *
 * @param props The properties fof the FOF scheme.
 * @param l_x2 The square of the FOF linking length.
 * @param space_gparts The start of the #gpart array in the #space structure.
 * @param nr_gparts The number of #gpart in the local #space structure.
 * @param c The #cell in which to perform FOF.
 */
void fof_attach_self_cell(const struct fof_props *props, const double l_x2,
                          const struct gpart *const space_gparts,
                          const size_t nr_gparts, const struct cell *c) {
#ifdef SWIFT_DEBUG_CHECKS
  if (c->split) error("Performing the FOF search at a non-leaf level!");
#endif
  const size_t count = c->grav.count;
  struct gpart *gparts = (struct gpart *)c->grav.parts;
  /* Distances of particles in the global list */
  float *const offset_dist =
      props->distance_to_link + (ptrdiff_t)(gparts - space_gparts);
#ifdef SWIFT_DEBUG_CHECKS
  if (c->nodeID != engine_rank)
    error("Performing self FOF search on foreign cell.");
#endif
  /* Loop over particles and find which particles belong in the same group. */
  for (size_t i = 0; i < count; i++) {
    struct gpart *pi = &gparts[i];
    /* Ignore inhibited particles */
    if (pi->time_bin >= time_bin_inhibited) continue;
    /* Check whether we ignore this particle type altogether */
    if (gpart_is_ignorable(pi)) continue;
#ifdef SWIFT_DEBUG_CHECKS
    if (pi->ti_drift != ti_current)
      error("Running FOF on an un-drifted particle!");
#endif
    const double pix = pi->x[0];
    const double piy = pi->x[1];
    const double piz = pi->x[2];
    /* Get the nature of the linking */
    const int is_link_i = gpart_is_linkable(pi);
    const int is_attach_i = gpart_is_attachable(pi);
#ifdef SWIFT_DEBUG_CHECKS
    if (is_link_i && is_attach_i)
      error("Particle cannot be both linkable and attachable!");
#endif
    for (size_t j = i + 1; j < count; j++) {
      struct gpart *pj = &gparts[j];
      /* Ignore inhibited particles */
      if (pj->time_bin >= time_bin_inhibited) continue;
      /* Check whether we ignore this particle type altogether */
      if (gpart_is_ignorable(pj)) continue;
      /* Get the nature of the linking */
      const int is_link_j = gpart_is_linkable(pj);
      const int is_attach_j = gpart_is_attachable(pj);
#ifdef SWIFT_DEBUG_CHECKS
      if (is_link_j && is_attach_j)
        error("Particle cannot be both linkable and attachable!");
#endif
      /* We only want link<->attach pairs */
      if (is_attach_i && is_attach_j) continue;
      if (is_link_i && is_link_j) continue;
#ifdef SWIFT_DEBUG_CHECKS
      if (pj->ti_drift != ti_current)
        error("Running FOF on an un-drifted particle!");
#endif
      const double pjx = pj->x[0];
      const double pjy = pj->x[1];
      const double pjz = pj->x[2];
      /* Compute the pairwise distance */
      float dx[3], r2 = 0.0f;
      dx[0] = pix - pjx;
      dx[1] = piy - pjy;
      dx[2] = piz - pjz;
      for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k];
      /* Hit or miss? */
      if (r2 < l_x2) {
        /* Now that we are within the linking length,
         * decide what to do based on linking types */
        if (is_link_i && is_link_j) {
#ifdef SWIFT_DEBUG_CHECKS
          error("Fundamental logic error!");
#endif
        } else if (is_link_i && is_attach_j) {
          /* We got a linkable and an attachable.
           * See whether it is closer and if so re-link.
           * This is safe to do as the attachables are never roots and
           * nothing is attached to them */
          const float dist = sqrtf(r2);
          if (dist < offset_dist[j]) {
            /* Store the new min dist */
            offset_dist[j] = dist;
            /* Store the current best root */
            pj->fof_data.group_id = pi->fof_data.group_id;
          }
        } else if (is_link_j && is_attach_i) {
          /* We got a linkable and an attachable.
           * See whether it is closer and if so re-link.
           * This is safe to do as the attachables are never roots and
           * nothing is attached to them */
          const float dist = sqrtf(r2);
          if (dist < offset_dist[i]) {
            /* Store the new min dist */
            offset_dist[i] = dist;
            /* Store the current best root */
            pi->fof_data.group_id = pj->fof_data.group_id;
          }
        } else {
#ifdef SWIFT_DEBUG_CHECKS
          error("Fundamental logic error!");
#endif
        }
      }
    }
  }
}
/**
 * @brief Perform the attaching operation using union-find between two cells
 *
 * @param props The properties fof the FOF scheme.
 * @param dim The dimension of the simulation volume.
 * @param l_x2 The square of the FOF linking length.
 * @param periodic Are we using periodic BCs?
 * @param space_gparts The start of the #gpart array in the #space structure.
 * @param nr_gparts The number of #gpart in the local #space structure.
 * @param ci The first #cell in which to perform FOF.
 * @param cj The second #cell in which to perform FOF.
 */
void fof_attach_pair_cells_both_local(const struct fof_props *props,
                                      const double dim[3], const double l_x2,
                                      const int periodic,
                                      const struct gpart *const space_gparts,
                                      const size_t nr_gparts,
                                      const struct cell *restrict ci,
                                      const struct cell *restrict cj) {
#ifdef SWIFT_DEBUG_CHECKS
  if (ci->nodeID != engine_rank) error("ci not local!");
  if (cj->nodeID != engine_rank) error("cj not local!");
#endif
  const size_t count_i = ci->grav.count;
  const size_t count_j = cj->grav.count;
  struct gpart *gparts_i = (struct gpart *)ci->grav.parts;
  struct gpart *gparts_j = (struct gpart *)cj->grav.parts;
  /* Distances of particles in the global list */
  float *const offset_dist_i =
      props->distance_to_link + (ptrdiff_t)(gparts_i - space_gparts);
  float *const offset_dist_j =
      props->distance_to_link + (ptrdiff_t)(gparts_j - space_gparts);
  /* Account for boundary conditions.*/
  double shift[3] = {0.0, 0.0, 0.0};
  /* Get the relative distance between the pairs, wrapping. */
  double diff[3];
  for (int k = 0; k < 3; k++) {
    diff[k] = cj->loc[k] - ci->loc[k];
    if (periodic && diff[k] < -dim[k] * 0.5)
      shift[k] = dim[k];
    else if (periodic && diff[k] > dim[k] * 0.5)
      shift[k] = -dim[k];
    else
      shift[k] = 0.0;
    diff[k] += shift[k];
  }
  /* Loop over particles and find which particles belong in the same group. */
  for (size_t i = 0; i < count_i; i++) {
    struct gpart *restrict pi = &gparts_i[i];
    /* Ignore inhibited particles */
    if (pi->time_bin >= time_bin_inhibited) continue;
    /* Check whether we ignore this particle type altogether */
    if (gpart_is_ignorable(pi)) continue;
#ifdef SWIFT_DEBUG_CHECKS
    if (pi->ti_drift != ti_current)
      error("Running FOF on an un-drifted particle!");
#endif
    const double pix = pi->x[0] - shift[0];
    const double piy = pi->x[1] - shift[1];
    const double piz = pi->x[2] - shift[2];
    /* Get the nature of the linking */
    const int is_link_i = gpart_is_linkable(pi);
    const int is_attach_i = gpart_is_attachable(pi);
#ifdef SWIFT_DEBUG_CHECKS
    if (is_link_i && is_attach_i)
      error("Particle cannot be both linkable and attachable!");
#endif
    for (size_t j = 0; j < count_j; j++) {
      struct gpart *restrict pj = &gparts_j[j];
      /* Ignore inhibited particles */
      if (pj->time_bin >= time_bin_inhibited) continue;
      /* Check whether we ignore this particle type altogether */
      if (gpart_is_ignorable(pj)) continue;
      /* Get the nature of the linking */
      const int is_link_j = gpart_is_linkable(pj);
      const int is_attach_j = gpart_is_attachable(pj);
#ifdef SWIFT_DEBUG_CHECKS
      if (is_link_j && is_attach_j)
        error("Particle cannot be both linkable and attachable!");
#endif
      /* We only want link<->attach pairs */
      if (is_attach_i && is_attach_j) continue;
      if (is_link_i && is_link_j) continue;
#ifdef SWIFT_DEBUG_CHECKS
      if (pj->ti_drift != ti_current)
        error("Running FOF on an un-drifted particle!");
#endif
      const double pjx = pj->x[0];
      const double pjy = pj->x[1];
      const double pjz = pj->x[2];
      /* Compute pairwise distance (periodic BCs were accounted
       for by the shift vector) */
      float dx[3], r2 = 0.0f;
      dx[0] = pix - pjx;
      dx[1] = piy - pjy;
      dx[2] = piz - pjz;
      for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k];
      /* Hit or miss? */
      if (r2 < l_x2) {
        /* Now that we are within the linking length,
         * decide what to do based on linking types */
        if (is_link_i && is_link_j) {
#ifdef SWIFT_DEBUG_CHECKS
          error("Fundamental logic error!");
#endif
        } else if (is_link_i && is_attach_j) {
          /* We got a linkable and an attachable.
           * See whether it is closer and if so re-link.
           * This is safe to do as the attachables are never roots and
           * nothing is attached to them */
          const float dist = sqrtf(r2);
          if (dist < offset_dist_j[j]) {
            /* Store the new min dist */
            offset_dist_j[j] = dist;
            /* Store the current best root */
            pj->fof_data.group_id = pi->fof_data.group_id;
          }
        } else if (is_link_j && is_attach_i) {
          /* We got a linkable and an attachable.
           * See whether it is closer and if so re-link.
           * This is safe to do as the attachables are never roots and
           * nothing is attached to them */
          const float dist = sqrtf(r2);
          if (dist < offset_dist_i[i]) {
            /* Store the new min dist */
            offset_dist_i[i] = dist;
            /* Store the current best root */
            pi->fof_data.group_id = pj->fof_data.group_id;
          }
        } else {
#ifdef SWIFT_DEBUG_CHECKS
          error("Fundamental logic error!");
#endif
        }
      }
    }
  }
}
void fof_attach_pair_cells_ci_local(const struct fof_props *props,
                                    const double dim[3], const double l_x2,
                                    const int periodic,
                                    const struct gpart *const space_gparts,
                                    const size_t nr_gparts,
                                    const struct cell *restrict ci,
                                    const struct cell *restrict cj) {
#ifdef SWIFT_DEBUG_CHECKS
  if (ci->nodeID != engine_rank) error("ci not local!");
  if (cj->nodeID == engine_rank) error("cj local!");
#endif
  const size_t count_i = ci->grav.count;
  const size_t count_j = cj->grav.count;
  struct gpart *gparts_i = ci->grav.parts;
  struct gpart_fof_foreign *gparts_j = cj->grav.parts_fof_foreign;
  /* Distances of particles in the global list */
  float *const offset_dist_i =
      props->distance_to_link + (ptrdiff_t)(gparts_i - space_gparts);
  /* Account for boundary conditions.*/
  double shift[3] = {0.0, 0.0, 0.0};
  /* Get the relative distance between the pairs, wrapping. */
  double diff[3];
  for (int k = 0; k < 3; k++) {
    diff[k] = cj->loc[k] - ci->loc[k];
    if (periodic && diff[k] < -dim[k] * 0.5)
      shift[k] = dim[k];
    else if (periodic && diff[k] > dim[k] * 0.5)
      shift[k] = -dim[k];
    else
      shift[k] = 0.0;
    diff[k] += shift[k];
  }
  /* Loop over particles and find which particles belong in the same group. */
  for (size_t i = 0; i < count_i; i++) {
    struct gpart *restrict pi = &gparts_i[i];
    /* Ignore inhibited particles */
    if (pi->time_bin >= time_bin_inhibited) continue;
    /* Check whether we ignore this particle type altogether */
    if (gpart_is_ignorable(pi)) continue;
#ifdef SWIFT_DEBUG_CHECKS
    if (pi->ti_drift != ti_current)
      error("Running FOF on an un-drifted particle!");
#endif
    const double pix = pi->x[0] - shift[0];
    const double piy = pi->x[1] - shift[1];
    const double piz = pi->x[2] - shift[2];
    /* Get the nature of the linking */
    const int is_link_i = gpart_is_linkable(pi);
    const int is_attach_i = gpart_is_attachable(pi);
#ifdef SWIFT_DEBUG_CHECKS
    if (is_link_i && is_attach_i)
      error("Particle cannot be both linkable and attachable!");
#endif
    for (size_t j = 0; j < count_j; j++) {
      struct gpart_fof_foreign *restrict pj = &gparts_j[j];
      /* Ignore inhibited particles */
      if (pj->time_bin >= time_bin_inhibited) continue;
      /* Check whether we ignore this particle type altogether */
      if (gpart_foreign_is_ignorable(pj)) continue;
      /* Get the nature of the linking */
      const int is_link_j = gpart_foreign_is_linkable(pj);
      const int is_attach_j = gpart_foreign_is_attachable(pj);
#ifdef SWIFT_DEBUG_CHECKS
      if (is_link_j && is_attach_j)
        error("Particle cannot be both linkable and attachable!");
#endif
      /* We only want link<->attach pairs */
      if (is_attach_i && is_attach_j) continue;
      if (is_link_i && is_link_j) continue;
#ifdef SWIFT_DEBUG_CHECKS
      if (pj->ti_drift != ti_current)
        error("Running FOF on an un-drifted particle!");
#endif
      const double pjx = pj->x[0];
      const double pjy = pj->x[1];
      const double pjz = pj->x[2];
      /* Compute pairwise distance (periodic BCs were accounted
       for by the shift vector) */
      float dx[3], r2 = 0.0f;
      dx[0] = pix - pjx;
      dx[1] = piy - pjy;
      dx[2] = piz - pjz;
      for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k];
      /* Hit or miss? */
      if (r2 < l_x2) {
        /* Now that we are within the linking length,
         * decide what to do based on linking types */
        if (is_link_i && is_link_j) {
#ifdef SWIFT_DEBUG_CHECKS
          error("Fundamental logic error!");
#endif
        } else if (is_link_i && is_attach_j) {
          /* Nothing to do here. The reverse action will be done
             in the converse call on the other node. */
        } else if (is_link_j && is_attach_i) {
          /* We got a linkable and an attachable.
           * See whether it is closer and if so re-link.
           * This is safe to do as the attachables are never roots and
           * nothing is attached to them */
          const float dist = sqrtf(r2);
          if (dist < offset_dist_i[i]) {
            /* Store the new min dist */
            offset_dist_i[i] = dist;
            /* Store the current best root */
            pi->fof_data.group_id = pj->fof_data.group_id;
          }
        } else {
#ifdef SWIFT_DEBUG_CHECKS
          error("Fundamental logic error!");
#endif
        }
      }
    }
  }
}
void fof_attach_pair_cells_cj_local(const struct fof_props *props,
                                    const double dim[3], const double l_x2,
                                    const int periodic,
                                    const struct gpart *const space_gparts,
                                    const size_t nr_gparts,
                                    const struct cell *restrict ci,
                                    const struct cell *restrict cj) {
#ifdef SWIFT_DEBUG_CHECKS
  if (ci->nodeID == engine_rank) error("ci local!");
  if (cj->nodeID != engine_rank) error("cj not local!");
#endif
  const size_t count_i = ci->grav.count;
  const size_t count_j = cj->grav.count;
  struct gpart_fof_foreign *gparts_i = ci->grav.parts_fof_foreign;
  struct gpart *gparts_j = cj->grav.parts;
  /* Distances of particles in the global list */
  float *const offset_dist_j =
      props->distance_to_link + (ptrdiff_t)(gparts_j - space_gparts);
  /* Account for boundary conditions.*/
  double shift[3] = {0.0, 0.0, 0.0};
  /* Get the relative distance between the pairs, wrapping. */
  double diff[3];
  for (int k = 0; k < 3; k++) {
    diff[k] = cj->loc[k] - ci->loc[k];
    if (periodic && diff[k] < -dim[k] * 0.5)
      shift[k] = dim[k];
    else if (periodic && diff[k] > dim[k] * 0.5)
      shift[k] = -dim[k];
    else
      shift[k] = 0.0;
    diff[k] += shift[k];
  }
  /* Loop over particles and find which particles belong in the same group. */
  for (size_t i = 0; i < count_i; i++) {
    struct gpart_fof_foreign *restrict pi = &gparts_i[i];
    /* Ignore inhibited particles */
    if (pi->time_bin >= time_bin_inhibited) continue;
    /* Check whether we ignore this particle type altogether */
    if (gpart_foreign_is_ignorable(pi)) continue;
#ifdef SWIFT_DEBUG_CHECKS
    if (pi->ti_drift != ti_current)
      error("Running FOF on an un-drifted particle!");
#endif
    const double pix = pi->x[0] - shift[0];
    const double piy = pi->x[1] - shift[1];
    const double piz = pi->x[2] - shift[2];
    /* Get the nature of the linking */
    const int is_link_i = gpart_foreign_is_linkable(pi);
    const int is_attach_i = gpart_foreign_is_attachable(pi);
#ifdef SWIFT_DEBUG_CHECKS
    if (is_link_i && is_attach_i)
      error("Particle cannot be both linkable and attachable!");
#endif
    for (size_t j = 0; j < count_j; j++) {
      struct gpart *restrict pj = &gparts_j[j];
      /* Ignore inhibited particles */
      if (pj->time_bin >= time_bin_inhibited) continue;
      /* Check whether we ignore this particle type altogether */
      if (gpart_is_ignorable(pj)) continue;
      /* Get the nature of the linking */
      const int is_link_j = gpart_is_linkable(pj);
      const int is_attach_j = gpart_is_attachable(pj);
#ifdef SWIFT_DEBUG_CHECKS
      if (is_link_j && is_attach_j)
        error("Particle cannot be both linkable and attachable!");
#endif
      /* We only want link<->attach pairs */
      if (is_attach_i && is_attach_j) continue;
      if (is_link_i && is_link_j) continue;
#ifdef SWIFT_DEBUG_CHECKS
      if (pj->ti_drift != ti_current)
        error("Running FOF on an un-drifted particle!");
#endif
      const double pjx = pj->x[0];
      const double pjy = pj->x[1];
      const double pjz = pj->x[2];
      /* Compute pairwise distance (periodic BCs were accounted
       for by the shift vector) */
      float dx[3], r2 = 0.0f;
      dx[0] = pix - pjx;
      dx[1] = piy - pjy;
      dx[2] = piz - pjz;
      for (int k = 0; k < 3; k++) r2 += dx[k] * dx[k];
      /* Hit or miss? */
      if (r2 < l_x2) {
        /* Now that we are within the linking length,
         * decide what to do based on linking types */
        if (is_link_i && is_link_j) {
#ifdef SWIFT_DEBUG_CHECKS
          error("Fundamental logic error!");
#endif
        } else if (is_link_i && is_attach_j) {
          /* We got a linkable and an attachable.
           * See whether it is closer and if so re-link.
           * This is safe to do as the attachables are never roots and
           * nothing is attached to them */
          const float dist = sqrtf(r2);
          if (dist < offset_dist_j[j]) {
            /* Store the new min dist */
            offset_dist_j[j] = dist;
            /* Store the current best root */
            pj->fof_data.group_id = pi->fof_data.group_id;
          }
        } else if (is_link_j && is_attach_i) {
          /* Nothing to do here. The reverse action will be done
             in the converse call on the other node. */
        } else {
#ifdef SWIFT_DEBUG_CHECKS
          error("Fundamental logic error!");
#endif
        }
      }
    }
  }
}
/**
 * @brief Recursively perform a union-find attaching between two cells.
 *
 * If cells are more distant than the linking length, we abort early.
 *
 * @param props The properties fof the FOF scheme.
 * @param dim The dimension of the space.
 * @param attach_r2 the square of the FOF linking length.
 * @param periodic Are we using periodic BCs?
 * @param space_gparts The start of the #gpart array in the #space structure.
 * @param nr_gparts The number of #gpart in the local #space structure.
 * @param ci The first #cell in which to perform FOF.
 * @param cj The second #cell in which to perform FOF.
 * @param ci_local Is the #cell ci on the local MPI rank?
 * @param cj_local Is the #cell cj on the local MPI rank?
 */
void rec_fof_attach_pair(const struct fof_props *props, const double dim[3],
                         const double attach_r2, const int periodic,
                         const struct gpart *const space_gparts,
                         const size_t nr_gparts, struct cell *restrict ci,
                         struct cell *restrict cj, const int ci_local,
                         const int cj_local) {
  /* Find the shortest distance between cells, remembering to account for
   * boundary conditions. */
  const double r2 = cell_min_dist(ci, cj, dim);
#ifdef SWIFT_DEBUG_CHECKS
  if (ci == cj) error("Pair FOF called on same cell!!!");
#endif
  /* Return if cells are out of range of each other. */
  if (r2 > attach_r2) return;
  /* Recurse on both cells if they are both split. */
  if (ci->split && cj->split) {
    for (int k = 0; k < 8; k++) {
      if (ci->progeny[k] != NULL) {
        for (int l = 0; l < 8; l++)
          if (cj->progeny[l] != NULL)
            rec_fof_attach_pair(props, dim, attach_r2, periodic, space_gparts,
                                nr_gparts, ci->progeny[k], cj->progeny[l],
                                ci_local, cj_local);
      }
    }
  } else if (ci->split) {
    for (int k = 0; k < 8; k++) {
      if (ci->progeny[k] != NULL)
        rec_fof_attach_pair(props, dim, attach_r2, periodic, space_gparts,
                            nr_gparts, ci->progeny[k], cj, ci_local, cj_local);
    }
  } else if (cj->split) {
    for (int k = 0; k < 8; k++) {
      if (cj->progeny[k] != NULL)
        rec_fof_attach_pair(props, dim, attach_r2, periodic, space_gparts,
                            nr_gparts, ci, cj->progeny[k], ci_local, cj_local);
    }
  } else {
    /* Perform FOF attach between pairs of cells that are within the linking
     * length and not the same cell. */
    if (ci_local && cj_local) {
      fof_attach_pair_cells_both_local(props, dim, attach_r2, periodic,
                                       space_gparts, nr_gparts, ci, cj);
    } else if (ci_local && !cj_local) {
      fof_attach_pair_cells_ci_local(props, dim, attach_r2, periodic,
                                     space_gparts, nr_gparts, ci, cj);
    } else if (!ci_local && cj_local) {
      fof_attach_pair_cells_cj_local(props, dim, attach_r2, periodic,
                                     space_gparts, nr_gparts, ci, cj);
    } else {
      error("Error in the recursion logic");
    }
  }
}
/**
 * @brief Recursively perform a the attaching operation on a cell.
 *
 * @param props The properties fof the FOF scheme.
 * @param dim The dimension of the space.
 * @param attach_r2 the square of the FOF linking length.
 * @param periodic Are we using periodic BCs?
 * @param space_gparts The start of the #gpart array in the #space structure.
 * @param nr_gparts The number of #gpart in the local #space structure.
 * @param c The #cell in which to perform FOF.
 */
void rec_fof_attach_self(const struct fof_props *props, const double dim[3],
                         const double attach_r2, const int periodic,
                         const struct gpart *const space_gparts,
                         const size_t nr_gparts, struct cell *c) {
  /* Recurse? */
  if (c->split) {
    /* Loop over all progeny. Perform pair and self recursion on progenies.*/
    for (int k = 0; k < 8; k++) {
      if (c->progeny[k] != NULL) {
        rec_fof_attach_self(props, dim, attach_r2, periodic, space_gparts,
                            nr_gparts, c->progeny[k]);
        for (int l = k + 1; l < 8; l++)
          if (c->progeny[l] != NULL)
            rec_fof_attach_pair(props, dim, attach_r2, periodic, space_gparts,
                                nr_gparts, c->progeny[k], c->progeny[l],
                                /*ci_local=*/1,
                                /*cj_local=*/1);
      }
    }
  } else {
    /* Otherwise, compute self-interaction. */
    fof_attach_self_cell(props, attach_r2, space_gparts, nr_gparts, c);
  }
}
/* Mapper function to atomically update the group size array. */
void fof_update_group_size_mapper(hashmap_key_t key, hashmap_value_t *value,
                                  void *data) {
  size_t *group_size = (size_t *)data;
  /* Use key to index into group size array. */
  atomic_add(&group_size[key], value->value_st);
}
/**
 * @brief Mapper function to calculate the group sizes.
 *
 * @param map_data An array of #gpart%s.
 * @param num_elements Chunk size.
 * @param extra_data Pointer to a #space.
 */
void fof_calc_group_size_mapper(void *map_data, int num_elements,
                                void *extra_data) {
  /* Retrieve mapped data. */
  struct space *s = (struct space *)extra_data;
  struct gpart *gparts = (struct gpart *)map_data;
  size_t *restrict group_index = s->e->fof_properties->group_index;
  size_t *restrict group_size = s->e->fof_properties->group_size;
  /* Offset into gparts array. */
  const ptrdiff_t gparts_offset = (ptrdiff_t)(gparts - s->gparts);
  size_t *const group_index_offset = group_index + gparts_offset;
  /* Create hash table. */
  hashmap_t map;
  hashmap_init(&map);
  for (int ind = 0; ind < num_elements; ind++) {
    const hashmap_key_t root =
        (hashmap_key_t)fof_find(group_index_offset[ind], group_index);
    const size_t gpart_index = gparts_offset + ind;
    /* Only add particles which aren't the root of a group. Stops groups of size
     * 1 being added to the hash table. */
    if (root != gpart_index) {
      hashmap_value_t *size = hashmap_get(&map, root);
      if (size != NULL)
        (*size).value_st++;
      else
        error("Couldn't find key (%zu) or create new one.", root);
    }
  }
  /* Update the group size array. */
  if (map.size > 0)
    hashmap_iterate(&map, fof_update_group_size_mapper, group_size);
  hashmap_free(&map);
}
/**
 * @brief Mapper function to perform FOF search.
 *
 * @param map_data An array of #cell pair indices.
 * @param num_elements Chunk size.
 * @param extra_data Pointer to a #space.
 */
void fof_find_foreign_links_mapper(void *map_data, int num_elements,
                                   void *extra_data) {
#ifdef WITH_MPI
  /* Retrieve mapped data. */
  struct space *s = (struct space *)extra_data;
  const int periodic = s->periodic;
  const size_t nr_gparts = s->nr_gparts;
  const struct gpart *const gparts = s->gparts;
  const struct engine *e = s->e;
  struct fof_props *props = e->fof_properties;
  struct cell_pair_indices *cell_pairs = (struct cell_pair_indices *)map_data;
  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
  const double search_r2 = props->l_x2;
  /* Store links in an array local to this thread. */
  int local_link_count = 0;
  int local_group_links_size = props->group_links_size / e->nr_threads;
  /* Init the local group links buffer. */
  struct fof_mpi *local_group_links = (struct fof_mpi *)swift_calloc(
      "fof_local_group_links", sizeof(struct fof_mpi), local_group_links_size);
  if (local_group_links == NULL)
    error("Failed to allocate temporary group links buffer.");
  /* Loop over all pairs of local and foreign cells, recurse then perform a
   * FOF search. */
  for (int ind = 0; ind < num_elements; ind++) {
    /* Get the local and foreign cells to recurse on. */
    const struct cell *restrict local_cell = cell_pairs[ind].local;
    const struct cell *restrict foreign_cell = cell_pairs[ind].foreign;
    rec_fof_search_pair_foreign(props, dim, search_r2, periodic, gparts,
                                nr_gparts, local_cell, foreign_cell,
                                &local_link_count, &local_group_links,
                                &local_group_links_size);
  }
  /* Add links found by this thread to the global link list. */
  /* Lock to prevent race conditions while adding to the global link list.*/
  if (lock_lock(&s->lock) == 0) {
    /* Get pointers to global arrays. */
    int *restrict group_links_size = &props->group_links_size;
    int *restrict group_link_count = &props->group_link_count;
    struct fof_mpi **group_links = &props->group_links;
    /* If the global group_links array is not big enough re-allocate it. */
    if (*group_link_count + local_link_count > *group_links_size) {
      const int old_size = *group_links_size;
      const int new_size =
          max(*group_link_count + local_link_count, 2 * old_size);
      (*group_links) = (struct fof_mpi *)realloc(
          *group_links, new_size * sizeof(struct fof_mpi));
      *group_links_size = new_size;
      message("Re-allocating global group links from %d to %d elements.",
              old_size, new_size);
    }
    /* Copy the local links to the global list. */
    for (int i = 0; i < local_link_count; i++) {
      int found = 0;
      /* Check that the links have not already been added to the list by another
       * thread. */
      for (int l = 0; l < *group_link_count; l++) {
        if ((*group_links)[l].group_i == local_group_links[i].group_i &&
            (*group_links)[l].group_j == local_group_links[i].group_j) {
          found = 1;
          break;
        }
      }
      if (!found) {
        (*group_links)[*group_link_count].group_i =
            local_group_links[i].group_i;
        (*group_links)[*group_link_count].group_i_size =
            local_group_links[i].group_i_size;
        (*group_links)[*group_link_count].group_j =
            local_group_links[i].group_j;
        (*group_links)[*group_link_count].group_j_size =
            local_group_links[i].group_j_size;
        (*group_link_count) = (*group_link_count) + 1;
      }
    }
  }
  /* Release lock. */
  if (lock_unlock(&s->lock) != 0) error("Failed to unlock the space");
  swift_free("fof_local_group_links", local_group_links);
#endif
}
/**
 * @brief Compute the group properties for all groups.
 *
 * At the end of the process, all MPI ranks have all the information
 * about every group. We compute:
 * - Group size,
 * - Group total mass,
 * - Group centre of mass,
 * - Maximal gas particle density,
 * - Whether a group has a BH particle or not.
 *
 * TODO: Possible improvement is to delay the allocation of the CoM array
 * until after the min/max density arrays have played their role.
 * TODO: Can the loop over particles be threadpoolized?
 */
void fof_calc_group_mass(struct fof_props *props, const struct space *s,
                         int *restrict number_of_local_seeds,
                         int *restrict number_of_global_seeds) {
  const size_t nr_gparts = s->nr_gparts;
  const struct gpart *gparts = s->gparts;
  const struct part *parts = s->parts;
  const size_t group_id_default = props->group_id_default;
  const int periodic = s->periodic;
  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
  const double seed_halo_mass = props->seed_halo_mass;
  /* Direct pointers to the arrays */
  long long *final_group_size = props->final_group_size;
  double *group_mass = props->group_mass;
  double *centre_of_mass = props->group_centre_of_mass;
  float *radii = props->group_radii;
  char *has_black_hole = props->has_black_hole;
  float *max_part_density = props->max_part_density;
  /* Temporary arrays to help with the CoMs */
  float *max_positions, *min_positions;
  if (swift_memalign("fof_group_max_position", (void **)&max_positions, 32,
                     props->num_groups * 3 * sizeof(double)) != 0)
    error("Unable to allocate memory for the max positions");
  if (swift_memalign("fof_group_min_position", (void **)&min_positions, 32,
                     props->num_groups * 3 * sizeof(double)) != 0)
    error("Unable to allocate memory for the min positions");
  /* Initialise the min/max arrays to the limits */
  for (size_t i = 0; i < 3 * (size_t)props->num_groups; ++i) {
    min_positions[i] = DBL_MAX;
    max_positions[i] = -DBL_MAX;
  }
  /* Collect information about the local particles and update the array of
   * properties. Recall the array is as big as all the haloes accross
   * all domains */
  for (size_t i = 0; i < nr_gparts; i++) {
    /* Ignore inhibited particles */
    if (gparts[i].time_bin >= time_bin_inhibited) continue;
    /* Check whether we ignore this particle type altogether */
    if (gpart_is_ignorable(&gparts[i])) continue;
    /* Ignore particles not in groups */
    if (gparts[i].fof_data.group_id == group_id_default) continue;
    if (gparts[i].fof_data.group_id > (size_t)props->num_groups)
      error("Found an invalid group ID!");
    /* Entry into the global list of group properties */
    const size_t index = gparts[i].fof_data.group_id - 1;
    /********************
     * We know in which group this particle is: compute props
     ********************/
    /* Count the number of particles */
    final_group_size[index]++;
    /* Add to the total mass */
    group_mass[index] += gparts[i].mass;
    /* Get the min/max position along each axis */
    min_positions[index * 3 + 0] =
        fmin(min_positions[index * 3 + 0], gparts[i].x[0]);
    min_positions[index * 3 + 1] =
        fmin(min_positions[index * 3 + 1], gparts[i].x[1]);
    min_positions[index * 3 + 2] =
        fmin(min_positions[index * 3 + 2], gparts[i].x[2]);
    max_positions[index * 3 + 0] =
        fmax(max_positions[index * 3 + 0], gparts[i].x[0]);
    max_positions[index * 3 + 1] =
        fmax(max_positions[index * 3 + 1], gparts[i].x[1]);
    max_positions[index * 3 + 2] =
        fmax(max_positions[index * 3 + 2], gparts[i].x[2]);
    /* Check whether there is a black hole */
    if (gparts[i].type == swift_type_black_hole) {
      has_black_hole[index] = 1;
    }
    /* Idntify the densest gas particle in the group */
    if (gparts[i].type == swift_type_gas) {
      const size_t gas_index = -gparts[i].id_or_neg_offset;
      const float rho_com = hydro_get_comoving_density(&parts[gas_index]);
      max_part_density[index] = fmaxf(rho_com, max_part_density[index]);
    }
  }
#ifdef WITH_MPI
  /* Now all-reduce the local fragments so that everyone has a full catalog */
  MPI_Allreduce(MPI_IN_PLACE, final_group_size, props->num_groups,
                MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
  MPI_Allreduce(MPI_IN_PLACE, group_mass, props->num_groups, MPI_DOUBLE,
                MPI_SUM, MPI_COMM_WORLD);
  MPI_Allreduce(MPI_IN_PLACE, min_positions, 3 * props->num_groups, MPI_DOUBLE,
                MPI_MIN, MPI_COMM_WORLD);
  MPI_Allreduce(MPI_IN_PLACE, max_positions, 3 * props->num_groups, MPI_DOUBLE,
                MPI_MAX, MPI_COMM_WORLD);
  MPI_Allreduce(MPI_IN_PLACE, has_black_hole, props->num_groups, MPI_CHAR,
                MPI_MAX, MPI_COMM_WORLD);
  MPI_Allreduce(MPI_IN_PLACE, max_part_density, props->num_groups, MPI_FLOAT,
                MPI_MAX, MPI_COMM_WORLD);
#endif
  *number_of_local_seeds = 0;
  *number_of_global_seeds = 0;
  /* We can now do a second pass to compute the centre of mass
   * Because of periodic BCs, we need to shift the positions in cases where the
   * (max-min) is larger than 1/2 of the box size */
  for (size_t i = 0; i < nr_gparts; i++) {
    /* Ignore inhibited particles */
    if (gparts[i].time_bin >= time_bin_inhibited) continue;
    /* Check whether we ignore this particle type altogether */
    if (gpart_is_ignorable(&gparts[i])) continue;
    /* Ignore particles not in groups */
    if (gparts[i].fof_data.group_id == group_id_default) continue;
    /* Entry into the global list of group properties */
    const size_t index = gparts[i].fof_data.group_id - 1;
    const int halo_is_on_edge[3] = {
        max_positions[index * 3 + 0] - min_positions[index * 3 + 0] >
            0.5 * dim[0],
        max_positions[index * 3 + 1] - min_positions[index * 3 + 1] >
            0.5 * dim[1],
        max_positions[index * 3 + 2] - min_positions[index * 3 + 2] >
            0.5 * dim[2]};
    /* Get particle position, including necessary wrapping */
    double x[3] = {gparts[i].x[0], gparts[i].x[1], gparts[i].x[2]};
    if (periodic) {
      for (int k = 0; k < 3; k++) {
        if (halo_is_on_edge[k]) {
          x[k] = box_wrap(x[k] + 0.5 * dim[k], 0., dim[k]);
        }
      }
    }
    /* Centre of mass */
    centre_of_mass[index * 3 + 0] += gparts[i].mass * x[0];
    centre_of_mass[index * 3 + 1] += gparts[i].mass * x[1];
    centre_of_mass[index * 3 + 2] += gparts[i].mass * x[2];
    /* Should we seed a BH in this group? */
    if (!has_black_hole[index] && group_mass[index] > seed_halo_mass) {
      /* Is this a gas particle? */
      if (gparts[i].type == swift_type_gas) {
        const size_t gas_index = -gparts[i].id_or_neg_offset;
        const float rho_com = hydro_get_comoving_density(&parts[gas_index]);
        /* Is this the gas paricle which is the densest? */
        if (rho_com == max_part_density[index]) {
          (*number_of_local_seeds)++;
        }
      }
    }
  }
#ifdef WITH_MPI
  /* Now all-reduce the CoMs so that everyone has a full catalog */
  MPI_Allreduce(MPI_IN_PLACE, centre_of_mass, 3 * props->num_groups, MPI_DOUBLE,
                MPI_SUM, MPI_COMM_WORLD);
  MPI_Allreduce(number_of_local_seeds, number_of_global_seeds, 1, MPI_INT,
                MPI_SUM, MPI_COMM_WORLD);
#else
  *number_of_global_seeds = *number_of_local_seeds;
#endif
  /* Finalise the operations on the group catalog */
  for (size_t i = 0; i < (size_t)props->num_groups; ++i) {
    centre_of_mass[i * 3 + 0] /= group_mass[i];
    centre_of_mass[i * 3 + 1] /= group_mass[i];
    centre_of_mass[i * 3 + 2] /= group_mass[i];
    const int halo_is_on_edge[3] = {
        max_positions[i * 3 + 0] - min_positions[i * 3 + 0] > 0.5 * dim[0],
        max_positions[i * 3 + 1] - min_positions[i * 3 + 1] > 0.5 * dim[1],
        max_positions[i * 3 + 2] - min_positions[i * 3 + 2] > 0.5 * dim[2]};
    /* Undo the half-box periodic shift */
    if (periodic) {
      for (int k = 0; k < 3; k++) {
        if (halo_is_on_edge[k]) {
          centre_of_mass[i * 3 + k] -= 0.5 * dim[k];
        }
        centre_of_mass[i * 3 + k] =
            box_wrap(centre_of_mass[i * 3 + k], 0., dim[k]);
      }
    }
    /* Relabel the group ids */
    props->final_group_index[i] = i + 1;
  }
  // Calculate the maximum radius for each FOF
  for (size_t i = 0; i < nr_gparts; i++) {
    /* Ignore particles not in groups */
    if (gparts[i].fof_data.group_id == group_id_default) continue;
    /* Entry into the global list of group properties */
    const size_t index = gparts[i].fof_data.group_id - 1;
    /* Set CoM as the origin*/
    double x[3] = {gparts[i].x[0], gparts[i].x[1], gparts[i].x[2]};
    for (int k = 0; k < 3; k++) {
      if (periodic) {
        x[k] = box_wrap(x[k] + (dim[k] / 2.) - centre_of_mass[index * 3 + k],
                        0., dim[k]);
        x[k] -= dim[k] / 2.;
      } else {
        x[k] -= centre_of_mass[index * 3 + k];
      }
    }
    /* Calculate the radius*/
    float r = sqrtf((x[0] * x[0]) + (x[1] * x[1]) + (x[2] * x[2]));
    radii[index] = fmax(radii[index], r);
  }
#ifdef WITH_MPI
  MPI_Allreduce(MPI_IN_PLACE, radii, props->num_groups, MPI_FLOAT, MPI_MAX,
                MPI_COMM_WORLD);
#endif
  /* Free temporary arrays */
  swift_free("max_positions", max_positions);
  swift_free("min_positions", min_positions);
}
/**
 * @brief Seed black holes from gas particles in the haloes on the local MPI
 * rank that passed the criteria.
 *
 * @param props The properties of the FOF scheme.
 * @param bh_props The properties of the black hole scheme.
 * @param constants The physical constants.
 * @param cosmo The cosmological model.
 * @param s The @space we act on.
 * @param number_of_local_seeds Number of BHs to create on this rank.
 * @param number_of_global_seeds Number of BHs to create in total.
 */
void fof_seed_black_holes(const struct fof_props *props,
                          const struct black_holes_props *bh_props,
                          const struct phys_const *constants,
                          const struct cosmology *cosmo, struct space *s,
                          const int number_of_local_seeds,
                          const int number_of_global_seeds) {
  if (engine_rank == 0)
    message("Seeding %d black hole(s)", number_of_global_seeds);
  /* Anything to do this time on this rank? */
  if (number_of_local_seeds == 0) return;
  /* Do we need to reallocate the black hole array for the new particles? */
  if (s->nr_bparts + number_of_local_seeds > s->size_bparts) {
    const size_t nr_bparts_new = s->nr_bparts + number_of_local_seeds;
    s->size_bparts = engine_parts_size_grow * nr_bparts_new;
    struct bpart *bparts_new = NULL;
    if (swift_memalign("bparts", (void **)&bparts_new, bpart_align,
                       sizeof(struct bpart) * s->size_bparts) != 0)
      error("Failed to allocate new bpart data.");
    memcpy(bparts_new, s->bparts, sizeof(struct bpart) * s->nr_bparts);
    swift_free("bparts", s->bparts);
    s->bparts = bparts_new;
  }
  const size_t nr_gparts = s->nr_gparts;
  struct gpart *gparts = s->gparts;
  struct part *parts = s->parts;
  struct xpart *xparts = s->xparts;
  struct bpart *bparts = s->bparts;
  const size_t group_id_default = props->group_id_default;
  const double seed_halo_mass = props->seed_halo_mass;
  /* Direct pointers to the arrays */
  double *group_mass = props->group_mass;
  char *has_black_hole = props->has_black_hole;
  float *max_part_density = props->max_part_density;
  size_t k = s->nr_bparts;
  for (size_t i = 0; i < nr_gparts; i++) {
    /* Check whether this is a gas particle */
    if (gparts[i].type != swift_type_gas) continue;
    /* Ignore inhibited particles */
    if (gparts[i].time_bin >= time_bin_inhibited) continue;
    /* Ignore particles not in groups */
    if (gparts[i].fof_data.group_id == group_id_default) continue;
    if (gparts[i].fof_data.group_id > (size_t)props->num_groups)
      error("Found an invalid group ID!");
    /* Get the density of this particle */
    const size_t gas_index = -gparts[i].id_or_neg_offset;
    const float rho_com = hydro_get_comoving_density(&parts[gas_index]);
    /* Entry into the global list of group properties of this particle */
    const size_t index = gparts[i].fof_data.group_id - 1;
    /* Should we seed a BH in this group? */
    if (!has_black_hole[index] && group_mass[index] > seed_halo_mass) {
      /* Does it match the max density for this group?
       * (i.e. is it the particle we identified as the one to convert?) */
      if (rho_com == max_part_density[index]) {
        /* Handle on the particle to convert */
        struct part *p = &parts[gas_index];
        struct xpart *xp = &xparts[gas_index];
        struct gpart *gp = p->gpart;
#ifdef SWIFT_DEBUG_CHECKS
        if (gp != &gparts[i]) error("Weird gas<->gpart linking error!");
#endif
        /* Let's destroy the gas particle */
        p->time_bin = time_bin_inhibited;
        p->gpart = NULL;
        /* Mark the gpart as black hole */
        gp->type = swift_type_black_hole;
        /* Basic properties of the black hole */
        struct bpart *bp = &bparts[k];
        bzero(bp, sizeof(struct bpart));
        bp->time_bin = gp->time_bin;
        /* Re-link things */
        bp->gpart = gp;
        gp->id_or_neg_offset = -(bp - s->bparts);
        /* Synchronize masses, positions and velocities */
        bp->mass = gp->mass;
        bp->x[0] = gp->x[0];
        bp->x[1] = gp->x[1];
        bp->x[2] = gp->x[2];
        bp->v[0] = gp->v_full[0];
        bp->v[1] = gp->v_full[1];
        bp->v[2] = gp->v_full[2];
        /* Set a smoothing length */
        bp->h = p->h;
        /* Save the ID */
        bp->id = p->id;
        /* Save the tree depth */
        bp->depth_h = p->depth_h;
#ifdef SWIFT_DEBUG_CHECKS
        bp->ti_kick = p->ti_kick;
        bp->ti_drift = p->ti_drift;
#endif
        /* Copy over all the gas properties that we want */
        black_holes_create_from_gas(bp, bh_props, constants, cosmo, p, xp,
                                    s->e->ti_current);
        tracers_first_init_bpart(bp, s->e->internal_units,
                                 s->e->physical_constants, cosmo);
        /* Move to the next BH slot */
        k++;
      }
    }
  }
#ifdef SWIFT_DEBUG_CHECKS
  if (s->nr_bparts + number_of_local_seeds != k) {
    error("Seeded the wrong number of black holes!");
  }
#endif
  /* Update the count of black holes. */
  s->nr_bparts = k;
}
/* Dump FOF group data. */
void fof_dump_group_data(const struct fof_props *props, const int my_rank,
                         const int nr_nodes, const char *out_file_name,
                         struct space *s, const int num_groups) {
  FILE *file = NULL;
  long long *final_group_size = props->final_group_size;
  long long *group_index = props->final_group_index;
  double *group_mass = props->group_mass;
  double *group_centre_of_mass = props->group_centre_of_mass;
  float *group_radii = props->group_radii;
  for (int rank = 0; rank < nr_nodes; ++rank) {
    if (rank == 0) {
      const char *mode;
      if (my_rank == 0)
        mode = "w";
      else
        mode = "a";
      file = fopen(out_file_name, mode);
      if (file == NULL)
        error("Could not open the file '%s' with mode '%s'.", out_file_name,
              mode);
      if (my_rank == 0) {
        fprintf(file, "# %8s %12s %12s %12s %12s %12s %12s %12s %24s %24s \n",
                "Group ID", "Group Size", "Group Mass", "Group Radii", "CoM_x",
                "CoM_y", "CoM_z", "Max Density", "Max Density Local Index",
                "Particle ID");
        fprintf(file,
                "#-------------------------------------------------------------"
                "------"
                "------------------------------\n");
      }
      for (int i = 0; i < num_groups; i++) {
        fprintf(file,
                "  %8lld %12lld %12e %12e %12e %12e %12e %12e %24lld %24lld\n",
                group_index[i], final_group_size[i], group_mass[i],
                group_radii[i], group_centre_of_mass[i * 3 + 0],
                group_centre_of_mass[i * 3 + 1],
                group_centre_of_mass[i * 3 + 2], 0., -1ll, -1ll);
      }
      fclose(file);
    }
  }
}
struct mapper_data {
  size_t *group_index;
  size_t *group_size;
  float *distance_to_link;
  size_t nr_gparts;
  struct gpart *space_gparts;
};
/**
 * @brief Mapper function to set the roots of #gpart%s going to other MPI ranks.
 *
 * @param map_data The list of outgoing local cells.
 * @param num_elements Chunk size.
 * @param extra_data Pointer to mapper data.
 */
void fof_set_outgoing_root_mapper(void *map_data, int num_elements,
                                  void *extra_data) {
#ifdef WITH_MPI
  /* Unpack the data */
  struct cell **local_cells = (struct cell **)map_data;
  const struct mapper_data *data = (struct mapper_data *)extra_data;
  const size_t *const group_index = data->group_index;
  const size_t *const group_size = data->group_size;
  const size_t nr_gparts = data->nr_gparts;
  const struct gpart *const space_gparts = data->space_gparts;
  /* Loop over the out-going local cells */
  for (int i = 0; i < num_elements; ++i) {
    /* Get the cell and its gparts */
    struct cell *local_cell = local_cells[i];
    struct gpart *gparts = local_cell->grav.parts;
    /* Make a list of particle offsets into the global gparts array. */
    const size_t *const offset =
        group_index + (ptrdiff_t)(gparts - space_gparts);
    /* Set each particle's root and group properties found in the local FOF.*/
    for (int k = 0; k < local_cell->grav.count; k++) {
      /* TODO: Can we skip ignorable particles here?
       * Likely makes no difference */
      /* Recall we did alter the group_index with a global_offset.
       * We need to remove that here as we want the *local* root */
      const size_t root =
          fof_find_global(offset[k] - node_offset, group_index, nr_gparts);
      /* TODO: Could we call fof_find() here instead?
       * Likely yes but we  don't want path compression at this stage.
       * So, probably not */
      gparts[k].fof_data.group_id = root;
      gparts[k].fof_data.group_size = group_size[root - node_offset];
    }
  }
#else
  error("Calling MPI function in non-MPI mode");
#endif
}
/**
 * @brief Search foreign cells for links and communicate any found to the
 * appropriate node.
 *
 * @param props the properties of the FOF scheme.
 * @param s Pointer to a #space.
 */
void fof_search_foreign_cells(struct fof_props *props, const struct space *s) {
#ifdef WITH_MPI
  struct engine *e = s->e;
  const int verbose = e->verbose;
  /* Abort if only one node */
  if (e->nr_nodes == 1) return;
  size_t *restrict group_index = props->group_index;
  size_t *restrict group_size = props->group_size;
  const size_t nr_gparts = s->nr_gparts;
  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
  const double search_r2 = props->l_x2;
  const ticks tic_total = getticks();
  ticks tic = getticks();
  /* Make group IDs globally unique. */
  for (size_t i = 0; i < nr_gparts; i++) group_index[i] += node_offset;
  struct cell_pair_indices *cell_pairs = NULL;
  int cell_pair_count = 0;
  props->group_links_size = fof_props_default_group_link_size;
  int num_cells_out = 0;
  int num_cells_in = 0;
  /* Find the maximum no. of cell pairs that can communicate. */
  for (int i = 0; i < e->nr_proxies; i++) {
    for (int j = 0; j < e->proxies[i].nr_cells_out; j++) {
      /* Only include gravity cells. */
      if (e->proxies[i].cells_out_type[j] & proxy_cell_type_gravity)
        num_cells_out++;
    }
    for (int j = 0; j < e->proxies[i].nr_cells_in; j++) {
      /* Only include gravity cells. */
      if (e->proxies[i].cells_in_type[j] & proxy_cell_type_gravity)
        num_cells_in++;
    }
  }
  if (verbose)
    message(
        "Finding max no. of cells + offset IDs"
        "took: %.3f %s.",
        clocks_from_ticks(getticks() - tic), clocks_getunit());
  const int cell_pair_size = num_cells_in * num_cells_out;
  /* Allocate memory for all the possible cell links */
  if (swift_memalign("fof_group_links", (void **)&props->group_links,
                     SWIFT_STRUCT_ALIGNMENT,
                     props->group_links_size * sizeof(struct fof_mpi)) != 0)
    error("Error while allocating memory for FOF links over an MPI domain");
  if (swift_memalign("fof_cell_pairs", (void **)&cell_pairs,
                     SWIFT_STRUCT_ALIGNMENT,
                     cell_pair_size * sizeof(struct cell_pair_indices)) != 0)
    error("Error while allocating memory for FOF cell pair indices");
  ticks tic_pairs = getticks();
  /* Loop over cells_in and cells_out for each proxy and find which cells are in
   * range of each other to perform the FOF search. Store local cells that are
   * touching foreign cells in a list. */
  for (int i = 0; i < e->nr_proxies; i++) {
    /* Only find links across an MPI domain on one rank. */
    if (engine_rank == min(engine_rank, e->proxies[i].nodeID)) {
      for (int j = 0; j < e->proxies[i].nr_cells_out; j++) {
        /* Skip non-gravity cells. */
        if (!(e->proxies[i].cells_out_type[j] & proxy_cell_type_gravity))
          continue;
        struct cell *restrict local_cell = e->proxies[i].cells_out[j];
        /* Skip empty cells. */
        if (local_cell->grav.count == 0) continue;
        for (int k = 0; k < e->proxies[i].nr_cells_in; k++) {
          /* Skip non-gravity cells. */
          if (!(e->proxies[i].cells_in_type[k] & proxy_cell_type_gravity))
            continue;
          struct cell *restrict foreign_cell = e->proxies[i].cells_in[k];
          /* Skip empty cells. */
          if (foreign_cell->grav.count == 0) continue;
          /* Add candidates in range to the list of pairs of cells to treat. */
          const double r2 = cell_min_dist(local_cell, foreign_cell, dim);
          if (r2 < search_r2) {
            cell_pairs[cell_pair_count].local = local_cell;
            cell_pairs[cell_pair_count].foreign = foreign_cell;
            cell_pair_count++;
          }
        }
      }
    }
  }
  if (verbose)
    message("Finding local/foreign cell pairs took: %.3f %s.",
            clocks_from_ticks(getticks() - tic_pairs), clocks_getunit());
  const ticks tic_set_roots = getticks();
  /* Set the root of outgoing particles. */
  /* Allocate array of outgoing cells and populate it */
  struct cell **local_cells =
      (struct cell **)malloc(num_cells_out * sizeof(struct cell *));
  int count = 0;
  for (int i = 0; i < e->nr_proxies; i++) {
    for (int j = 0; j < e->proxies[i].nr_cells_out; j++) {
      /* Only include gravity cells. */
      if (e->proxies[i].cells_out_type[j] & proxy_cell_type_gravity) {
        local_cells[count] = e->proxies[i].cells_out[j];
        ++count;
      }
    }
  }
  /* Now set the *local* roots of all the gparts we are sending */
  struct mapper_data data;
  data.group_index = group_index;
  data.group_size = group_size;
  data.nr_gparts = nr_gparts;
  data.space_gparts = s->gparts;
  threadpool_map(&e->threadpool, fof_set_outgoing_root_mapper, local_cells,
                 num_cells_out, sizeof(struct cell **),
                 threadpool_auto_chunk_size, &data);
  if (verbose)
    message("Initialising particle roots took: %.3f %s.",
            clocks_from_ticks(getticks() - tic_set_roots), clocks_getunit());
  free(local_cells);
  if (verbose)
    message(
        "Finding local/foreign cell pairs and initialising particle roots "
        "took: %.3f %s.",
        clocks_from_ticks(getticks() - tic), clocks_getunit());
  /* Activate the tasks exchanging all the required gparts */
  engine_activate_gpart_comms(e);
  ticks local_fof_tic = getticks();
  /* Wait for all the communication tasks to be ready */
  MPI_Barrier(MPI_COMM_WORLD);
  if (verbose)
    message("Local FOF imbalance: %.3f %s.",
            clocks_from_ticks(getticks() - local_fof_tic), clocks_getunit());
  tic = getticks();
  /* Perform send and receive tasks. */
  engine_launch(e, "fof comms");
  if (verbose)
    message("MPI send/recv comms took: %.3f %s.",
            clocks_from_ticks(getticks() - tic), clocks_getunit());
  /* We have now recevied the foreign particles. Each particle received
   * carries information about its own *foreign* (to us) root and the
   * size of the group fragment it belongs too its original foreign rank. */
  tic = getticks();
  props->group_link_count = 0;
  /* Perform search of group links between local and foreign cells with the
   * threadpool. */
  threadpool_map(&s->e->threadpool, fof_find_foreign_links_mapper, cell_pairs,
                 cell_pair_count, sizeof(struct cell_pair_indices), 1,
                 (struct space *)s);
  /* Clean up memory used by foreign particles. */
  swift_free("fof_cell_pairs", cell_pairs);
  if (verbose)
    message("Searching for foreign links took: %.3f %s.",
            clocks_from_ticks(getticks() - tic), clocks_getunit());
  tic = getticks();
  const ticks comms_tic = getticks();
  MPI_Barrier(MPI_COMM_WORLD);
  if (verbose)
    message("Imbalance took: %.3f %s.",
            clocks_from_ticks(getticks() - comms_tic), clocks_getunit());
  if (verbose)
    message("fof_search_foreign_cells() took (FOF SCALING): %.3f %s.",
            clocks_from_ticks(getticks() - tic_total), clocks_getunit());
#endif /* WITH_MPI */
}
/**
 * @brief Run all the tasks attaching the attachables to their
 * nearest linkable particle.
 *
 * @param props The properties fof the FOF scheme.
 * @param s The #space we work with.
 */
void fof_link_attachable_particles(struct fof_props *props,
                                   const struct space *s) {
  /* Is there anything to attach? */
  if (!current_fof_attach_type) return;
  const ticks tic_total = getticks();
  /* Activate the tasks attaching attachable particles to the linkable ones */
  engine_activate_fof_attach_tasks(s->e);
  /* Perform FOF tasks for attachable particles. */
  engine_launch(s->e, "fof");
  if (s->e->verbose)
    message("fof_link_attachable_particles() took (FOF SCALING): %.3f %s.",
            clocks_from_ticks(getticks() - tic_total), clocks_getunit());
}
/**
 * @brief Process all the group fragments spanning more than
 * one rank to link them.
 *
 * This is the final global union-find pass which concludes
 * the MPI-FOF-algorithm.
 *
 * @param props The properties fof the FOF scheme.
 * @param s The #space we work with.
 */
void fof_link_foreign_fragments(struct fof_props *props,
                                const struct space *s) {
#ifdef WITH_MPI
  struct engine *e = s->e;
  const int verbose = e->verbose;
  /* Abort if only one node */
  if (e->nr_nodes == 1) return;
  const size_t nr_gparts = s->nr_gparts;
  size_t *restrict group_index = props->group_index;
  size_t *restrict group_size = props->group_size;
  const ticks tic_total = getticks();
  ticks tic = getticks();
  const ticks comms_tic = getticks();
  if (verbose)
    message(
        "Searching %zu gravity particles for cross-node links with l_x: %lf",
        nr_gparts, sqrt(props->l_x2));
  /* Local copy of the variable set in the mapper */
  const int group_link_count = props->group_link_count;
  /* Sum the total number of links across MPI domains over each MPI rank. */
  int global_group_link_count = 0;
  MPI_Allreduce(&group_link_count, &global_group_link_count, 1, MPI_INT,
                MPI_SUM, MPI_COMM_WORLD);
  if (global_group_link_count < 0)
    error("Overflow of the size of the global list of foreign links");
  struct fof_mpi *global_group_links = NULL;
  int *displ = NULL, *group_link_counts = NULL;
  if (swift_memalign("fof_global_group_links", (void **)&global_group_links,
                     SWIFT_STRUCT_ALIGNMENT,
                     global_group_link_count * sizeof(struct fof_mpi)) != 0)
    error("Error while allocating memory for the global list of group links");
  if (posix_memalign((void **)&group_link_counts, SWIFT_STRUCT_ALIGNMENT,
                     e->nr_nodes * sizeof(int)) != 0)
    error(
        "Error while allocating memory for the number of group links on each "
        "MPI rank");
  if (posix_memalign((void **)&displ, SWIFT_STRUCT_ALIGNMENT,
                     e->nr_nodes * sizeof(int)) != 0)
    error(
        "Error while allocating memory for the displacement in memory for the "
        "global group link list");
  /* Gather the total number of links on each rank. */
  MPI_Allgather(&group_link_count, 1, MPI_INT, group_link_counts, 1, MPI_INT,
                MPI_COMM_WORLD);
  /* Set the displacements into the global link list using the link counts from
   * each rank */
  displ[0] = 0;
  for (int i = 1; i < e->nr_nodes; i++) {
    displ[i] = displ[i - 1] + group_link_counts[i - 1];
    if (displ[i] < 0) error("Number of group links overflowing!");
  }
  /* Gather the global link list on all ranks. */
  MPI_Allgatherv(props->group_links, group_link_count, fof_mpi_type,
                 global_group_links, group_link_counts, displ, fof_mpi_type,
                 MPI_COMM_WORLD);
  /* Clean up memory. */
  free(group_link_counts);
  free(displ);
  swift_free("fof_group_links", props->group_links);
  props->group_links = NULL;
  if (verbose) {
    message("Communication took: %.3f %s.",
            clocks_from_ticks(getticks() - comms_tic), clocks_getunit());
    message("Global comms took: %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
  }
  tic = getticks();
  /* Transform the group IDs to a local list going from 0-group_count so a
   * union-find can be performed.
   * Each member of a link is stored separately --> Need 2x as many entries */
  size_t *global_group_index = NULL, *global_group_id = NULL,
         *global_group_size = NULL;
  const int global_group_list_size = 2 * global_group_link_count;
  if (swift_memalign("fof_global_group_index", (void **)&global_group_index,
                     SWIFT_STRUCT_ALIGNMENT,
                     global_group_list_size * sizeof(size_t)) != 0)
    error(
        "Error while allocating memory for the displacement in memory for the "
        "global group link list");
  if (swift_memalign("fof_global_group_id", (void **)&global_group_id,
                     SWIFT_STRUCT_ALIGNMENT,
                     global_group_list_size * sizeof(size_t)) != 0)
    error(
        "Error while allocating memory for the displacement in memory for the "
        "global group link list");
  if (swift_memalign("fof_global_group_size", (void **)&global_group_size,
                     SWIFT_STRUCT_ALIGNMENT,
                     global_group_list_size * sizeof(size_t)) != 0)
    error(
        "Error while allocating memory for the displacement in memory for the "
        "global group link list");
  bzero(global_group_size, global_group_list_size * sizeof(size_t));
  /* Create hash table. */
  hashmap_t map;
  hashmap_init(&map);
  /* Store each group ID and its properties. */
  int group_count = 0;
  for (int k = 0; k < global_group_link_count; k++) {
    const size_t group_i = global_group_links[k].group_i;
    const size_t group_j = global_group_links[k].group_j;
    global_group_size[group_count] += global_group_links[k].group_i_size;
    global_group_id[group_count] = group_i;
    hashmap_add_group(group_i, group_count, &map);
    group_count++;
    global_group_size[group_count] += global_group_links[k].group_j_size;
    global_group_id[group_count] = group_j;
    hashmap_add_group(group_j, group_count, &map);
    group_count++;
  }
  if (verbose)
    message("Global list compression took: %.3f %s.",
            clocks_from_ticks(getticks() - tic), clocks_getunit());
  tic = getticks();
  /* Create a global_group_index list of groups across MPI domains so that you
   * can perform a union-find locally on each node.
   * The value of which is an offset into global_group_id, which is the actual
   * root. */
  for (int i = 0; i < group_count; i++) global_group_index[i] = i;
  /* Store the original group size before incrementing in the Union-Find. */
  size_t *orig_global_group_size = NULL;
  if (swift_memalign("fof_orig_global_group_size",
                     (void **)&orig_global_group_size, SWIFT_STRUCT_ALIGNMENT,
                     group_count * sizeof(size_t)) != 0)
    error(
        "Error while allocating memory for the displacement in memory for the "
        "global group link list");
  memcpy(orig_global_group_size, global_group_size,
         group_count * sizeof(size_t));
  /* Perform a union-find on the group links. */
  for (int k = 0; k < global_group_link_count; k++) {
    /* Use the hash table to find the group offsets in the index array. */
    const size_t find_i =
        hashmap_find_group_offset(global_group_links[k].group_i, &map);
    const size_t find_j =
        hashmap_find_group_offset(global_group_links[k].group_j, &map);
    /* Use the offset to find the group's root. */
    const size_t root_i = fof_find(find_i, global_group_index);
    const size_t root_j = fof_find(find_j, global_group_index);
    const size_t group_i = global_group_id[root_i];
    const size_t group_j = global_group_id[root_j];
    if (group_i == group_j) continue;
    /* Update roots accordingly. */
    const size_t size_i = global_group_size[root_i];
    const size_t size_j = global_group_size[root_j];
#ifdef UNION_BY_SIZE_OVER_MPI
    if (size_i < size_j) {
      global_group_index[root_i] = root_j;
      global_group_size[root_j] += size_i;
    } else {
      global_group_index[root_j] = root_i;
      global_group_size[root_i] += size_j;
    }
#else
    if (group_j < group_i) {
      global_group_index[root_i] = root_j;
      global_group_size[root_j] += size_i;
    } else {
      global_group_index[root_j] = root_i;
      global_group_size[root_i] += size_j;
    }
#endif
  }
  hashmap_free(&map);
  if (verbose)
    message("global_group_index construction took: %.3f %s.",
            clocks_from_ticks(getticks() - tic), clocks_getunit());
  tic = getticks();
  /* Update each group locally with new root information. */
  for (int i = 0; i < group_count; i++) {
    const size_t group_id = global_group_id[i];
    const size_t offset = fof_find(global_group_index[i], global_group_index);
    const size_t new_root = global_group_id[offset];
    /* If the group is local update its root and size. */
    if (is_local(group_id, nr_gparts) && new_root != group_id) {
      group_index[group_id - node_offset] = new_root;
      group_size[group_id - node_offset] -= orig_global_group_size[i];
    }
    /* If the group linked to a local root update its size. */
    if (is_local(new_root, nr_gparts) && new_root != group_id) {
      /* Use group sizes before Union-Find */
      group_size[new_root - node_offset] += orig_global_group_size[i];
    }
  }
  if (verbose)
    message("Updating groups locally took: %.3f %s.",
            clocks_from_ticks(getticks() - tic), clocks_getunit());
  /* Clean up memory. */
  swift_free("fof_global_group_links", global_group_links);
  swift_free("fof_global_group_index", global_group_index);
  swift_free("fof_global_group_size", global_group_size);
  swift_free("fof_global_group_id", global_group_id);
  swift_free("fof_orig_global_group_size", orig_global_group_size);
  if (verbose) {
    message("link_foreign_fragmens() took (FOF SCALING): %.3f %s.",
            clocks_from_ticks(getticks() - tic_total), clocks_getunit());
  }
#endif /* WITH_MPI */
}
/**
 * @brief Compute the local size of each FOF group fragment.
 *
 * @param props The properties of the FOF scheme.
 * @param s The #space containing the particles.
 */
void fof_compute_local_sizes(struct fof_props *props, struct space *s) {
  const int verbose = s->e->verbose;
  struct gpart *gparts = s->gparts;
  const size_t nr_gparts = s->nr_gparts;
  const ticks tic_total = getticks();
  if (engine_rank == 0 && verbose)
    message("Size of hash table element: %ld", sizeof(hashmap_element_t));
#ifdef WITH_MPI
  const ticks comms_tic = getticks();
  /* Determine number of gparts on lower numbered MPI ranks */
  const long long nr_gparts_local = s->nr_gparts;
  long long nr_gparts_cumulative;
  MPI_Scan(&nr_gparts_local, &nr_gparts_cumulative, 1, MPI_LONG_LONG, MPI_SUM,
           MPI_COMM_WORLD);
  if (verbose)
    message("MPI_Scan Imbalance took: %.3f %s.",
            clocks_from_ticks(getticks() - comms_tic), clocks_getunit());
  /* Reset global variable containing the rank particle count offset */
  node_offset = nr_gparts_cumulative - nr_gparts_local;
#endif
  /* Compute the group sizes of the local fragments
   * (in non-MPI land that is the final group size of the haloes) */
  const ticks tic_calc_group_size = getticks();
  threadpool_map(&s->e->threadpool, fof_calc_group_size_mapper, gparts,
                 nr_gparts, sizeof(struct gpart), threadpool_auto_chunk_size,
                 s);
  if (verbose)
    message("FOF calc group size took (FOF SCALING): %.3f %s.",
            clocks_from_ticks(getticks() - tic_calc_group_size),
            clocks_getunit());
  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic_total),
            clocks_getunit());
}
/**
 * @brief Compute all the group properties
 *
 * @param props The properties of the FOF scheme.
 * @param bh_props The properties of the black hole scheme.
 * @param constants The physical constants in internal units.
 * @param cosmo The current cosmological model.
 * @param s The #space containing the particles.
 * @param dump_debug_results Are we writing txt-file debug catalogues including
 * BH-seeding info?
 * @param dump_results Do we want to write the group catalogue to a hdf5 file?
 * @param seed_black_holes Do we want to seed black holes in haloes?
 */
void fof_assign_group_ids(struct fof_props *props, struct space *s) {
  const int verbose = s->e->verbose;
#ifdef WITH_MPI
  const int nr_nodes = s->e->nr_nodes;
#endif
  const ticks tic_total = getticks();
  struct gpart *gparts = s->gparts;
  const size_t nr_gparts = s->nr_gparts;
  const size_t min_group_size = props->min_group_size;
  const size_t group_id_offset = props->group_id_offset;
  const size_t group_id_default = props->group_id_default;
  size_t num_groups_local = 0;
  size_t num_parts_in_groups_local = 0;
  size_t max_group_size_local = 0;
  /* Local copy of the arrays */
  size_t *restrict group_index = props->group_index;
  size_t *restrict group_size = props->group_size;
  const ticks tic_num_groups_calc = getticks();
  for (size_t i = 0; i < nr_gparts; i++) {
#ifdef WITH_MPI
    /* Find the total number of groups. */
    if (group_index[i] == i + node_offset && group_size[i] >= min_group_size)
      num_groups_local++;
#else
    /* Find the total number of groups. */
    if (group_index[i] == i && group_size[i] >= min_group_size)
      num_groups_local++;
#endif
    /* Find the total number of particles in groups. */
    if (group_size[i] >= min_group_size)
      num_parts_in_groups_local += group_size[i];
    /* Find the largest group. */
    if (group_size[i] > max_group_size_local)
      max_group_size_local = group_size[i];
  }
  if (verbose)
    message(
        "Calculating the total no. of local groups took: (FOF SCALING): %.3f "
        "%s.",
        clocks_from_ticks(getticks() - tic_num_groups_calc), clocks_getunit());
  /* Sort the groups in descending order based upon size and re-label their
   * IDs 0-num_groups. */
  struct group_length *high_group_sizes = NULL;
  size_t group_count = 0;
  if (swift_memalign("fof_high_group_sizes", (void **)&high_group_sizes, 32,
                     num_groups_local * sizeof(struct group_length)) != 0)
    error("Failed to allocate list of large groups.");
  /* Store the group_sizes and their offset. */
  for (size_t i = 0; i < nr_gparts; i++) {
#ifdef WITH_MPI
    if (group_index[i] == i + node_offset && group_size[i] >= min_group_size) {
      high_group_sizes[group_count].index = node_offset + i;
      high_group_sizes[group_count++].size = group_size[i];
    }
#else
    if (group_index[i] == i && group_size[i] >= min_group_size) {
      high_group_sizes[group_count].index = i;
      high_group_sizes[group_count++].size = group_size[i];
    }
#endif
  }
  props->high_group_sizes = high_group_sizes;
  ticks tic = getticks();
  /* Find global properties. */
  long long num_groups = 0, num_parts_in_groups = 0, max_group_size = 0;
#ifdef WITH_MPI
  MPI_Allreduce(&num_groups_local, &num_groups, 1, MPI_LONG_LONG_INT, MPI_SUM,
                MPI_COMM_WORLD);
  MPI_Reduce(&num_parts_in_groups_local, &num_parts_in_groups, 1,
             MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD);
  MPI_Reduce(&max_group_size_local, &max_group_size, 1, MPI_LONG_LONG_INT,
             MPI_MAX, 0, MPI_COMM_WORLD);
#else
  num_groups = num_groups_local;
  num_parts_in_groups = num_parts_in_groups_local;
  max_group_size = max_group_size_local;
#endif /* WITH_MPI */
  props->num_groups = num_groups;
  // message("num_groups_local=%zd", num_groups_local);
  /* Find number of groups on lower numbered MPI ranks */
#ifdef WITH_MPI
  long long nglocal = num_groups_local;
  long long ngsum;
  MPI_Scan(&nglocal, &ngsum, 1, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
  props->num_groups_prev = (size_t)(ngsum - nglocal);
#endif /* WITH_MPI */
  if (verbose)
    message("Finding the total no. of groups took: (FOF SCALING): %.3f %s.",
            clocks_from_ticks(getticks() - tic_num_groups_calc),
            clocks_getunit());
  /* Sort local groups into descending order of size */
  qsort(high_group_sizes, num_groups_local, sizeof(struct group_length),
        cmp_func_group_size);
  tic = getticks();
  /* Set default group ID for all particles */
  threadpool_map(&s->e->threadpool, fof_set_initial_group_id_mapper, s->gparts,
                 s->nr_gparts, sizeof(struct gpart), threadpool_auto_chunk_size,
                 (void *)&group_id_default);
  if (verbose)
    message("Setting default group ID took: %.3f %s.",
            clocks_from_ticks(getticks() - tic), clocks_getunit());
  /* Assign final group IDs to local root particles where the global root is
   * on this node and the group is large enough. Within a node IDs are
   * assigned in descending order of particle number. */
  for (size_t i = 0; i < num_groups_local; i++) {
#ifdef WITH_MPI
    gparts[high_group_sizes[i].index - node_offset].fof_data.group_id =
        group_id_offset + i + props->num_groups_prev;
#else
    gparts[high_group_sizes[i].index].fof_data.group_id = group_id_offset + i;
#endif
  }
#ifdef WITH_MPI
  /* Now, for each local root where the global root is on some other node
   * AND the total size of the group is >= min_group_size we need to
   * retrieve the gparts.group_id we just assigned to the global root.
   *
   * Will do that by sending the group_index of these local roots to the
   * node where their global root is stored and receiving back the new
   * group_id associated with that particle.
   *
   * Identify local roots with global root on another node and large enough
   * group_size. Store index of the local and global roots in these cases.
   *
   * NOTE: if group_size only contains the total FoF mass for global roots,
   * then we have to communicate ALL fragments where the global root is not
   * on this node. Hence the commented out extra conditions below.*/
  size_t nsend = 0;
  for (size_t i = 0; i < nr_gparts; i += 1) {
    if ((!is_local(group_index[i],
                   nr_gparts))) { /* && (group_size[i] >= min_group_size)) { */
      nsend += 1;
    }
  }
  struct fof_final_index *fof_index_send =
      (struct fof_final_index *)swift_malloc(
          "fof_index_send", sizeof(struct fof_final_index) * nsend);
  nsend = 0;
  for (size_t i = 0; i < nr_gparts; i += 1) {
    if ((!is_local(group_index[i],
                   nr_gparts))) { /* && (group_size[i] >= min_group_size)) { */
      fof_index_send[nsend].local_root = node_offset + i;
      fof_index_send[nsend].global_root = group_index[i];
      nsend += 1;
    }
  }
  /* Sort by global root - this puts the groups in order of which node they're
   * stored on */
  qsort(fof_index_send, nsend, sizeof(struct fof_final_index),
        compare_fof_final_index_global_root);
  /* Determine range of global indexes (i.e. particles) on each node */
  props->num_on_node =
      (size_t *)swift_malloc("fof_num_on_node", nr_nodes * sizeof(size_t));
  MPI_Allgather(&nr_gparts, sizeof(size_t), MPI_BYTE, props->num_on_node,
                sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD);
  props->first_on_node =
      (size_t *)swift_malloc("fof_first_on_node", nr_nodes * sizeof(size_t));
  props->first_on_node[0] = 0;
  for (int i = 1; i < nr_nodes; i++)
    props->first_on_node[i] =
        props->first_on_node[i - 1] + props->num_on_node[i - 1];
  /* Determine how many entries go to each node */
  int *sendcount = (int *)calloc(nr_nodes, sizeof(int));
  int dest = 0;
  for (size_t i = 0; i < nsend; i += 1) {
    while ((fof_index_send[i].global_root >=
            props->first_on_node[dest] + props->num_on_node[dest]) ||
           (props->num_on_node[dest] == 0))
      dest += 1;
    if (dest >= nr_nodes) error("Node index out of range!");
    sendcount[dest] += 1;
  }
  int *recvcount = NULL, *sendoffset = NULL, *recvoffset = NULL;
  size_t nrecv = 0;
  fof_compute_send_recv_offsets(nr_nodes, sendcount, &recvcount, &sendoffset,
                                &recvoffset, &nrecv);
  struct fof_final_index *fof_index_recv =
      (struct fof_final_index *)swift_malloc(
          "fof_index_recv", nrecv * sizeof(struct fof_final_index));
  /* Exchange group indexes */
  MPI_Alltoallv(fof_index_send, sendcount, sendoffset, fof_final_index_type,
                fof_index_recv, recvcount, recvoffset, fof_final_index_type,
                MPI_COMM_WORLD);
  /* For each received global root, look up the group ID we assigned and store
   * it in the struct */
  for (size_t i = 0; i < nrecv; i += 1) {
    if ((fof_index_recv[i].global_root < node_offset) ||
        (fof_index_recv[i].global_root >= node_offset + nr_gparts)) {
      error("Received global root index out of range!");
    }
    fof_index_recv[i].global_root =
        gparts[fof_index_recv[i].global_root - node_offset].fof_data.group_id;
  }
  /* Send the result back */
  MPI_Alltoallv(fof_index_recv, recvcount, recvoffset, fof_final_index_type,
                fof_index_send, sendcount, sendoffset, fof_final_index_type,
                MPI_COMM_WORLD);
  /* Update local gparts.group_id */
  for (size_t i = 0; i < nsend; i += 1) {
    if ((fof_index_send[i].local_root < node_offset) ||
        (fof_index_send[i].local_root >= node_offset + nr_gparts)) {
      error("Sent local root index out of range!");
    }
    gparts[fof_index_send[i].local_root - node_offset].fof_data.group_id =
        fof_index_send[i].global_root;
  }
  free(sendcount);
  free(recvcount);
  free(sendoffset);
  free(recvoffset);
  swift_free("fof_index_send", fof_index_send);
  swift_free("fof_index_recv", fof_index_recv);
#endif /* WITH_MPI */
  size_t max_id = 0;
  /* Assign every particle the group_id of its local root. */
  for (size_t i = 0; i < nr_gparts; i++) {
    if (gpart_is_ignorable(&gparts[i])) continue;
    if (gpart_is_attachable(&gparts[i])) continue;
    const size_t root = fof_find_local(i, nr_gparts, group_index);
    gparts[i].fof_data.group_id = gparts[root].fof_data.group_id;
    if (gparts[i].fof_data.group_id != fof_props_default_group_id)
      max_id = max(max_id, gparts[i].fof_data.group_id);
  }
  /* Give some info */
  if (engine_rank == 0) {
    message(
        "No. of groups: %lld. No. of particles in groups: %lld. No. of "
        "particles not in groups: %lld.",
        num_groups, num_parts_in_groups,
        s->e->total_nr_gparts - num_parts_in_groups);
    message("Largest group (linkables only) by size: %lld", max_group_size);
  }
  /* Free data we are done with */
  swift_free("fof_group_index", props->group_index);
  props->group_index = NULL;
  if (verbose)
    message("took: %.3f %s.", clocks_from_ticks(getticks() - tic_total),
            clocks_getunit());
}
/**
 * @brief Compute all the group properties
 *
 * @param props The properties of the FOF scheme.
 * @param bh_props The properties of the black hole scheme.
 * @param constants The physical constants in internal units.
 * @param cosmo The current cosmological model.
 * @param s The #space containing the particles.
 * @param dump_debug_results Are we writing txt-file debug catalogues including
 * BH-seeding info?
 * @param dump_results Do we want to write the group catalogue to a hdf5 file?
 * @param seed_black_holes Do we want to seed black holes in haloes?
 */
void fof_compute_group_props(struct fof_props *props,
                             const struct black_holes_props *bh_props,
                             const struct phys_const *constants,
                             const struct cosmology *cosmo, struct space *s,
                             const int dump_results,
                             const int dump_debug_results,
                             const int seed_black_holes) {
  const int verbose = s->e->verbose;
  const ticks tic_total = getticks();
  const size_t num_groups = props->num_groups;
  /* Allocate and initialise a group mass and centre of mass array
     for *all* groups. */
  if (swift_memalign("fof_group_mass", (void **)&props->group_mass, 32,
                     num_groups * sizeof(double)) != 0)
    error("Failed to allocate list of group masses for FOF search.");
  if (swift_memalign("fof_group_size", (void **)&props->final_group_size, 32,
                     num_groups * sizeof(long long)) != 0)
    error("Failed to allocate list of group masses for FOF search.");
  if (swift_memalign("fof_group_index", (void **)&props->final_group_index, 32,
                     num_groups * sizeof(long long)) != 0)
    error("Failed to allocate list of group masses for FOF search.");
  if (swift_memalign("fof_has_black_hole", (void **)&props->has_black_hole, 32,
                     num_groups * sizeof(char)) != 0)
    error("Failed to allocate list of black holes for FOF search.");
  if (swift_memalign("fof_group_centre_of_mass",
                     (void **)&props->group_centre_of_mass, 32,
                     num_groups * 3 * sizeof(double)) != 0)
    error("Failed to allocate list of group CoM for FOF search.");
  if (swift_memalign("fof_group_radii", (void **)&props->group_radii, 32,
                     num_groups * sizeof(float)) != 0)
    error("Failed to allocate list of group radii for FOF search.");
  if (swift_memalign("fof_max_part_density", (void **)&props->max_part_density,
                     32, num_groups * sizeof(float)) != 0)
    error("Failed to allocate list of max group densities for FOF search.");
  bzero(props->group_mass, num_groups * sizeof(double));
  bzero(props->final_group_size, num_groups * sizeof(long long));
  bzero(props->final_group_index, num_groups * sizeof(long long));
  bzero(props->has_black_hole, num_groups * sizeof(char));
  bzero(props->group_centre_of_mass, num_groups * 3 * sizeof(double));
  bzero(props->group_radii, num_groups * sizeof(float));
  bzero(props->max_part_density, num_groups * sizeof(float));
  const ticks tic_props = getticks();
  /* Now, compute all things */
  int number_of_local_seeds = 0, number_of_global_seeds = 0;
  fof_calc_group_mass(props, s, &number_of_local_seeds,
                      &number_of_global_seeds);
  if (verbose)
    message("Computing group properties took: %.3f %s.",
            clocks_from_ticks(getticks() - tic_props), clocks_getunit());
  /* All MPI ranks now have information about all haloes, even
   * the ones where there are no local particles in. */
  /* Dump group data (only rank 0 since everyone has everything anyway). */
  if (dump_results && engine_rank == 0) {
#ifdef HAVE_HDF5
    write_fof_hdf5_catalogue(props, s->e);
#else
    error("Can't dump hdf5 catalogues with hdf5 switched off!");
#endif
  }
  if (dump_debug_results && engine_rank == 0) {
    char output_file_name[PARSER_MAX_LINE_SIZE];
    snprintf(output_file_name, PARSER_MAX_LINE_SIZE, "%s", props->base_name);
#ifdef WITH_MPI
    snprintf(output_file_name + strlen(output_file_name), FILENAME_BUFFER_SIZE,
             "_mpi.dat");
#else
    snprintf(output_file_name + strlen(output_file_name), FILENAME_BUFFER_SIZE,
             ".dat");
#endif
    fof_dump_group_data(props, s->e->nodeID, s->e->nr_nodes, output_file_name,
                        s, num_groups);
  }
  /* Seed black holes */
  if (seed_black_holes) {
    fof_seed_black_holes(props, bh_props, constants, cosmo, s,
                         number_of_local_seeds, number_of_global_seeds);
  }
  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic_total),
            clocks_getunit());
}
/**
 * @brief Free all the arrays we allocated on the way
 */
void fof_free_arrays(struct fof_props *props) {
  swift_free("fof_high_group_sizes", props->high_group_sizes);
  swift_free("fof_group_mass", props->group_mass);
  swift_free("fof_group_size", props->final_group_size);
  swift_free("fof_group_index", props->final_group_index);
  swift_free("fof_group_centre_of_mass", props->group_centre_of_mass);
  swift_free("fof_group_radii", props->group_radii);
  swift_free("fof_max_part_density", props->max_part_density);
  swift_free("fof_has_black_hole", props->has_black_hole);
  swift_free("fof_distance", props->distance_to_link);
  swift_free("fof_attach_index", props->attach_index);
  swift_free("fof_found_attach", props->found_attachable_link);
  swift_free("fof_group_size", props->group_size);
  props->group_mass = NULL;
  props->final_group_size = NULL;
  props->final_group_index = NULL;
  props->group_centre_of_mass = NULL;
  props->group_radii = NULL;
  props->max_part_density = NULL;
  props->has_black_hole = NULL;
  props->group_size = NULL;
#ifdef WITH_MPI
  swift_free("fof_num_on_node", props->num_on_node);
  swift_free("fof_first_on_node", props->first_on_node);
  props->num_on_node = NULL;
  props->first_on_node = NULL;
#endif
}
void fof_struct_dump(const struct fof_props *props, FILE *stream) {
  struct fof_props temp = *props;
  temp.num_groups = 0;
  temp.group_link_count = 0;
  temp.group_links_size = 0;
  temp.group_index = NULL;
  temp.group_size = NULL;
  temp.group_mass = NULL;
  temp.final_group_size = NULL;
  temp.group_centre_of_mass = NULL;
  temp.group_radii = NULL;
  temp.max_part_density = NULL;
  temp.group_links = NULL;
  restart_write_blocks((void *)&temp, sizeof(struct fof_props), 1, stream,
                       "fof_props", "fof_props");
}
void fof_struct_restore(struct fof_props *props, FILE *stream) {
  restart_read_blocks((void *)props, sizeof(struct fof_props), 1, stream, NULL,
                      "fof_props");
  fof_set_current_types(props);
}
#endif /* WITH_FOF */