diff --git a/src/fof.c b/src/fof.c index af530af6aadfd8a8c1ea8529330156f84278a534..fd6a5cc0ce2a8fb6200ec510fcdbf44624b4ec07 100644 --- a/src/fof.c +++ b/src/fof.c @@ -52,6 +52,9 @@ #define UNION_BY_SIZE_OVER_MPI (1) #define FOF_COMPRESS_PATHS_MIN_LENGTH (2) +/* Are we timing calculating group properties in the FOF? */ +//#define WITHOUT_GROUP_PROPS + /** * @brief Properties of a group used for black hole seeding */ @@ -177,6 +180,60 @@ void fof_create_mpi_types() { #endif } +/** + * @brief Mapper function to set the initial group indices. + * + * @param map_data The array of group indices. + * @param num_elements Chunk size. + * @param extra_data Pointer to first group index. + */ +void fof_set_initial_group_index_mapper(void *map_data, int num_elements, + void *extra_data) { + size_t *group_index = (size_t *)map_data; + size_t *group_index_start = (size_t *)extra_data; + + const ptrdiff_t offset = group_index - group_index_start; + + for (int i = 0; i < num_elements; ++i) { + group_index[i] = i + offset; + } +} + +/** + * @brief Mapper function to set the initial group sizes. + * + * @param map_data The array of group sizes. + * @param num_elements Chunk size. + * @param extra_data N/A. + */ +void fof_set_initial_group_size_mapper(void *map_data, int num_elements, + void *extra_data) { + + size_t *group_size = (size_t *)map_data; + for (int i = 0; i < num_elements; ++i) { + group_size[i] = 1; + } +} + +/** + * @brief Mapper function to set the initial group IDs. + * + * @param map_data The array of #gpart%s. + * @param num_elements Chunk size. + * @param extra_data Pointer to the default group ID. + */ +void fof_set_initial_group_id_mapper(void *map_data, int num_elements, + void *extra_data) { + + /* Unpack the information */ + struct gpart *gparts = (struct gpart *)map_data; + const size_t group_id_default = *((size_t *)extra_data); + + for (int i = 0; i < num_elements; ++i) { + gparts[i].fof_data.group_id = group_id_default; + } +} + /** * @brief Allocate the memory and initialise the arrays for a FOF calculation. * @@ -193,6 +250,7 @@ void fof_allocate(const struct space *s, const long long total_nr_DM_particles, const double mean_inter_particle_sep = s->dim[0] / cbrt((double)total_nr_DM_particles); const double l_x = props->l_x_ratio * mean_inter_particle_sep; + int verbose = s->e->verbose; /* Are we using the aboslute value or the one derived from the mean inter-particle sepration? */ @@ -222,32 +280,36 @@ void fof_allocate(const struct space *s, const long long total_nr_DM_particles, "check more than one layer of top-level cells for links."); #endif - const size_t nr_local_gparts = s->nr_gparts; - struct gpart *gparts = s->gparts; - /* Allocate and initialise a group index array. */ if (swift_memalign("fof_group_index", (void **)&props->group_index, 64, - nr_local_gparts * sizeof(size_t)) != 0) + s->nr_gparts * sizeof(size_t)) != 0) error("Failed to allocate list of particle group indices for FOF search."); /* Allocate and initialise a group size array. */ if (swift_memalign("fof_group_size", (void **)&props->group_size, 64, - nr_local_gparts * sizeof(size_t)) != 0) + s->nr_gparts * sizeof(size_t)) != 0) error("Failed to allocate list of group size for FOF search."); - /* Set initial group ID of the gparts */ - const size_t group_id_default = props->group_id_default; - for (size_t i = 0; i < nr_local_gparts; i++) { - gparts[i].fof_data.group_id = group_id_default; - } + ticks tic = getticks(); - /* Set initial group index and group size */ - size_t *group_index = props->group_index; - size_t *group_size = props->group_size; - for (size_t i = 0; i < nr_local_gparts; i++) { - group_index[i] = i; - group_size[i] = 1; - } + /* Set initial group index */ + threadpool_map(&s->e->threadpool, fof_set_initial_group_index_mapper, + props->group_index, s->nr_gparts, sizeof(size_t), 0, + props->group_index); + + if(verbose) + message("Setting initial group index took: %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + + tic = getticks(); + + /* Set initial group sizes */ + threadpool_map(&s->e->threadpool, fof_set_initial_group_size_mapper, + props->group_size, s->nr_gparts, sizeof(size_t), 0, NULL); + + if(verbose) + message("Setting initial group sizes took: %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); #ifdef SWIFT_DEBUG_CHECKS ti_current = s->e->ti_current; @@ -375,6 +437,7 @@ __attribute__((always_inline)) INLINE static size_t fof_find_global( #endif /* WITH_MPI */ +#ifndef WITHOUT_GROUP_PROPS /** * @brief Finds the local root ID of the group a particle exists in * when group_index contains globally unique identifiers - @@ -409,6 +472,7 @@ __attribute__((always_inline)) INLINE static size_t fof_find_local( return root; #endif } +#endif /* #ifndef WITHOUT_GROUP_PROPS */ /** * @brief Finds the local root ID of the group a particle exists in. @@ -1979,6 +2043,58 @@ void fof_dump_group_data(const struct fof_props *props, fclose(file); } +struct mapper_data { + size_t *group_index; + size_t *group_size; + size_t nr_gparts; + struct gpart *space_gparts; +}; + +/** + * @brief Mapper function to set the roots of #gpart%s going to other MPI ranks. + * + * @param map_data The list of outgoing local cells. + * @param num_elements Chunk size. + * @param extra_data Pointer to mapper data. + */ +void fof_set_outgoing_root_mapper(void *map_data, int num_elements, + void *extra_data) { +#ifdef WITH_MPI + + /* Unpack the data */ + struct cell **local_cells = (struct cell **)map_data; + const struct mapper_data *data = (struct mapper_data *)extra_data; + const size_t *const group_index = data->group_index; + const size_t *const group_size = data->group_size; + const size_t nr_gparts = data->nr_gparts; + const struct gpart *const space_gparts = data->space_gparts; + + /* Loop over the out-going local cells */ + for (int i = 0; i < num_elements; ++i) { + + /* Get the cell and its gparts */ + struct cell *local_cell = local_cells[i]; + struct gpart *gparts = local_cell->grav.parts; + + /* Make a list of particle offsets into the global gparts array. */ + const size_t *const offset = + group_index + (ptrdiff_t)(gparts - space_gparts); + + /* Set each particle's root and group properties found in the local FOF.*/ + for (int k = 0; k < local_cell->grav.count; k++) { + const size_t root = + fof_find_global(offset[k] - node_offset, group_index, nr_gparts); + + gparts[k].fof_data.group_id = root; + gparts[k].fof_data.group_size = group_size[root - node_offset]; + } + } + +#else + error("Calling MPI function in non-MPI mode"); +#endif +} + /** * @brief Search foreign cells for links and communicate any found to the * appropriate node. @@ -2031,6 +2147,12 @@ void fof_search_foreign_cells(struct fof_props *props, const struct space *s) { } } + if (verbose) + message( + "Finding max no. of cells + offset IDs" + "took: %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); + const int cell_pair_size = num_cells_in * num_cells_out; if (swift_memalign("fof_group_links", (void **)&props->group_links, @@ -2043,6 +2165,8 @@ void fof_search_foreign_cells(struct fof_props *props, const struct space *s) { cell_pair_size * sizeof(struct cell_pair_indices)) != 0) error("Error while allocating memory for FOF cell pair indices"); + ticks tic_pairs = getticks(); + /* Loop over cells_in and cells_out for each proxy and find which cells are in * range of each other to perform the FOF search. Store local cells that are * touching foreign cells in a list. */ @@ -2085,26 +2209,47 @@ void fof_search_foreign_cells(struct fof_props *props, const struct space *s) { } } + if (verbose) + message( + "Finding local/foreign cell pairs" + "took: %.3f %s.", + clocks_from_ticks(getticks() - tic_pairs), clocks_getunit()); + + ticks tic_set_roots = getticks(); + /* Set the root of outgoing particles. */ - for (int i = 0; i < e->nr_proxies; i++) { + /* Allocate array of outgoing cells and populate it */ + struct cell **local_cells = malloc(num_cells_out * sizeof(struct cell *)); + int count = 0; + for (int i = 0; i < e->nr_proxies; i++) { for (int j = 0; j < e->proxies[i].nr_cells_out; j++) { - struct cell *restrict local_cell = e->proxies[i].cells_out[j]; - struct gpart *gparts = local_cell->grav.parts; - - /* Make a list of particle offsets into the global gparts array. */ - size_t *const offset = group_index + (ptrdiff_t)(gparts - s->gparts); + /* Only include gravity cells. */ + if (e->proxies[i].cells_out_type[j] & proxy_cell_type_gravity) { - /* Set each particle's root and group properties found in the local FOF.*/ - for (int k = 0; k < local_cell->grav.count; k++) { - const size_t root = - fof_find_global(offset[k] - node_offset, group_index, nr_gparts); - gparts[k].fof_data.group_id = root; - gparts[k].fof_data.group_size = group_size[root - node_offset]; + local_cells[count] = e->proxies[i].cells_out[j]; + ++count; } } } + + /* Now set the roots */ + struct mapper_data data; + data.group_index = group_index; + data.group_size = group_size; + data.nr_gparts = nr_gparts; + data.space_gparts = s->gparts; + threadpool_map(&e->threadpool, fof_set_outgoing_root_mapper, local_cells, + num_cells_out, sizeof(struct cell **), 0, &data); + + if (verbose) + message( + "Initialising particle roots " + "took: %.3f %s.", + clocks_from_ticks(getticks() - tic_set_roots), clocks_getunit()); + + free(local_cells); if (verbose) message( @@ -2112,6 +2257,7 @@ void fof_search_foreign_cells(struct fof_props *props, const struct space *s) { "took: %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); + /* Activate the tasks exchanging all the required gparts */ engine_activate_gpart_comms(e); ticks local_fof_tic = getticks(); @@ -2395,7 +2541,11 @@ void fof_search_tree(struct fof_props *props, const size_t nr_gparts = s->nr_gparts; const size_t min_group_size = props->min_group_size; +#ifndef WITHOUT_GROUP_PROPS const size_t group_id_offset = props->group_id_offset; + const size_t group_id_default = props->group_id_default; +#endif + #ifdef WITH_MPI const int nr_nodes = s->e->nr_nodes; #endif @@ -2415,8 +2565,6 @@ void fof_search_tree(struct fof_props *props, if (engine_rank == 0 && verbose) message("Size of hash table element: %ld", sizeof(hashmap_element_t)); - const size_t group_id_default = props->group_id_default; - #ifdef WITH_MPI /* Reset global variable */ @@ -2425,8 +2573,16 @@ void fof_search_tree(struct fof_props *props, /* Determine number of gparts on lower numbered MPI ranks */ long long nr_gparts_cumulative; long long nr_gparts_local = s->nr_gparts; + + ticks comms_tic = getticks(); + MPI_Scan(&nr_gparts_local, &nr_gparts_cumulative, 1, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); + + if (verbose) + message("MPI_Scan Imbalance took: %.3f %s.", + clocks_from_ticks(getticks() - comms_tic), clocks_getunit()); + node_offset = nr_gparts_cumulative - nr_gparts_local; snprintf(output_file_name + strlen(output_file_name), FILENAME_BUFFER_SIZE, @@ -2444,11 +2600,10 @@ void fof_search_tree(struct fof_props *props, threadpool_map(&s->e->threadpool, fof_calc_group_size_mapper, gparts, nr_gparts, sizeof(struct gpart), 0, s); - - if (verbose) - message("FOF calc group size took (scaling): %.3f %s.", - clocks_from_ticks(getticks() - tic_calc_group_size), - clocks_getunit()); + if(verbose) + message("FOF calc group size took (FOF SCALING): %.3f %s.", + clocks_from_ticks(getticks() - tic_calc_group_size), + clocks_getunit()); #ifdef WITH_MPI if (nr_nodes > 1) { @@ -2458,14 +2613,25 @@ void fof_search_tree(struct fof_props *props, /* Search for group links across MPI domains. */ fof_search_foreign_cells(props, s); - if (verbose) - message("fof_search_foreign_cells() took: %.3f %s.", - clocks_from_ticks(getticks() - tic_mpi), clocks_getunit()); + if(verbose) { + message("fof_search_foreign_cells() took (FOF SCALING): %.3f %s.", + clocks_from_ticks(getticks() - tic_mpi), clocks_getunit()); + + message( + "fof_search_foreign_cells() + calc_group_size took (FOF SCALING): %.3f " + "%s.", + clocks_from_ticks(getticks() - tic_total), clocks_getunit()); + } } #endif - size_t num_groups_local = 0, num_parts_in_groups_local = 0, - max_group_size_local = 0; + size_t num_groups_local = 0; +#ifndef WITHOUT_GROUP_PROPS + size_t num_parts_in_groups_local = 0; + size_t max_group_size_local = 0; +#endif + + ticks tic_num_groups_calc = getticks(); for (size_t i = 0; i < nr_gparts; i++) { @@ -2479,6 +2645,7 @@ void fof_search_tree(struct fof_props *props, num_groups_local++; #endif +#ifndef WITHOUT_GROUP_PROPS /* Find the total number of particles in groups. */ if (group_size[i] >= min_group_size) num_parts_in_groups_local += group_size[i]; @@ -2486,10 +2653,17 @@ void fof_search_tree(struct fof_props *props, /* Find the largest group. */ if (group_size[i] > max_group_size_local) max_group_size_local = group_size[i]; +#endif } + if(verbose) + message( + "Calculating the total no. of local groups took: (FOF SCALING): %.3f %s.", + clocks_from_ticks(getticks() - tic_num_groups_calc), clocks_getunit()); + /* Sort the groups in descending order based upon size and re-label their IDs * 0-num_groups. */ +#ifndef WITHOUT_GROUP_PROPS struct group_length *high_group_sizes = NULL; int group_count = 0; @@ -2514,22 +2688,36 @@ void fof_search_tree(struct fof_props *props, } ticks tic = getticks(); +#endif /* #ifndef WITHOUT_GROUP_PROPS */ /* Find global properties. */ #ifdef WITH_MPI MPI_Allreduce(&num_groups_local, &num_groups, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + if(verbose) + message("Finding the total no. of groups took: (FOF SCALING): %.3f %s.", + clocks_from_ticks(getticks() - tic_num_groups_calc), + clocks_getunit()); + +#ifndef WITHOUT_GROUP_PROPS MPI_Reduce(&num_parts_in_groups_local, &num_parts_in_groups, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&max_group_size_local, &max_group_size, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); +#endif /* #ifndef WITHOUT_GROUP_PROPS */ #else num_groups = num_groups_local; + +#ifndef WITHOUT_GROUP_PROPS num_parts_in_groups = num_parts_in_groups_local; max_group_size = max_group_size_local; +#endif /* #ifndef WITHOUT_GROUP_PROPS */ #endif /* WITH_MPI */ props->num_groups = num_groups; +#ifndef WITHOUT_GROUP_PROPS + /* Find number of groups on lower numbered MPI ranks */ #ifdef WITH_MPI long long nglocal = num_groups_local; @@ -2538,19 +2726,28 @@ void fof_search_tree(struct fof_props *props, const size_t num_groups_prev = (size_t)(ngsum - nglocal); #endif /* WITH_MPI */ + if(verbose) + message("Finding the total no. of groups took: (FOF SCALING): %.3f %s.", + clocks_from_ticks(getticks() - tic_num_groups_calc), + clocks_getunit()); + /* Sort local groups into descending order of size */ qsort(high_group_sizes, num_groups_local, sizeof(struct group_length), - cmp_func_group_size); + cmp_func_group_size); + tic = getticks(); + /* Set default group ID for all particles */ - for (size_t i = 0; i < nr_gparts; i++) - gparts[i].fof_data.group_id = group_id_default; + threadpool_map(&s->e->threadpool, fof_set_initial_group_id_mapper, s->gparts, + s->nr_gparts, sizeof(struct gpart), 0, (void *)&group_id_default); + + if(verbose) + message("Setting default group ID took: %.3f %s.", + clocks_from_ticks(getticks() - tic), clocks_getunit()); - /* - Assign final group IDs to local root particles where the global root is on - this node and the group is large enough. Within a node IDs are assigned in - descending order of particle number. - */ + /* Assign final group IDs to local root particles where the global root is + * on this node and the group is large enough. Within a node IDs are + * assigned in descending order of particle number. */ for (size_t i = 0; i < num_groups_local; i++) { #ifdef WITH_MPI gparts[high_group_sizes[i].index - node_offset].fof_data.group_id = @@ -2561,24 +2758,21 @@ void fof_search_tree(struct fof_props *props, } #ifdef WITH_MPI - /* - Now, for each local root where the global root is on some other node - AND the total size of the group is >= min_group_size we need to retrieve - the gparts.group_id we just assigned to the global root. - - Will do that by sending the group_index of these lcoal roots to the node - where their global root is stored and receiving back the new group_id - associated with that particle. - */ - - /* - Identify local roots with global root on another node and large enough - group_size. Store index of the local and global roots in these cases. - - NOTE: if group_size only contains the total FoF mass for global roots, - then we have to communicate ALL fragments where the global root is not - on this node. Hence the commented out extra conditions below. - */ + + /* Now, for each local root where the global root is on some other node + * AND the total size of the group is >= min_group_size we need to + * retrieve the gparts.group_id we just assigned to the global root. + * + * Will do that by sending the group_index of these lcoal roots to the + * node where their global root is stored and receiving back the new + * group_id associated with that particle. + * + * Identify local roots with global root on another node and large enough + * group_size. Store index of the local and global roots in these cases. + * + * NOTE: if group_size only contains the total FoF mass for global roots, + * then we have to communicate ALL fragments where the global root is not + * on this node. Hence the commented out extra conditions below.*/ size_t nsend = 0; for (size_t i = 0; i < nr_gparts; i += 1) { if ((!is_local(group_index[i], @@ -2723,6 +2917,7 @@ void fof_search_tree(struct fof_props *props, /* Free the left-overs */ swift_free("fof_high_group_sizes", high_group_sizes); +#endif /* #ifndef WITHOUT_GROUP_PROPS */ swift_free("fof_group_index", props->group_index); swift_free("fof_group_size", props->group_size); swift_free("fof_group_mass", props->group_mass); diff --git a/tools/parallel_replicate_ICs.py b/tools/parallel_replicate_ICs.py new file mode 100755 index 0000000000000000000000000000000000000000..69b7ca214281b91a728a286c388ec004ffae0602 --- /dev/null +++ b/tools/parallel_replicate_ICs.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python +""" +Usage: + python parallel_replicate_ICs.py IC_file.hdf5 rep_fac + +where IC_file.hdf5 is the ICs file that you want to replicate and rep_fac is the +replication factor in each dimension + +Description: +Reads in a ICs file and replicates the particles in each dimension by the +replication factor given and write a new IC called IC_file_xrep_fac.hdf5. + +Example: + python parallel_replicate_ICs.py EAGLE_ICs_50.hdf5 4 + +Running the above example will produce a tiled 50MPc box in each dimension to +give a 200MPc box. + +Note: the script only replicates dark matter particles, but can be easily +extended to support other particle types. + +This file is part of SWIFT. + +Copyright (C) 2019 James Willis (james.s.willis@durham.ac.uk) +All Rights Reserved. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +""" + +import h5py as h +import numpy as np +import matplotlib + +matplotlib.use("Agg") +from pylab import * +import os.path +from tqdm import tqdm +from tqdm import trange +import time +from numba import jit, prange +from swiftsimio import Writer +from swiftsimio.units import cosmo_units + +replicate = 1 +box_size = 1 +num_parts = 1 + +@jit(nopython=True, nogil=True) +def shift_pos(pos, pos_orig, i, j, k): + + offset = i * replicate * replicate + j * replicate + k + + # Copy original particle positions + pos[offset * num_parts:(offset + 1) * num_parts] = pos_orig + + # Shift positions + shift = [i * box_size, j * box_size, k * box_size] + + for n in range(offset * num_parts, (offset + 1) * num_parts): + pos[n][0] += shift[0] + pos[n][1] += shift[1] + pos[n][2] += shift[2] + +@jit(nopython=True, parallel=True, nogil=True) +def parallel_replicate(pos, pos_orig): + for i in prange(0,replicate): + for j in prange(0,replicate): + for k in prange(0,replicate): + shift_pos(pos, pos_orig, i, j, k) + +def main(): + + # Parse command line arguments + if len(sys.argv) < 3: + print("Error: pass input file and replication factor (integer) as arguments.") + print("python replicate_ICs.py EAGLE_ICs_50.hdf5 4") + sys.exit() + else: + inputFile = sys.argv[1] + global replicate + replicate = int(sys.argv[2]) + if os.path.exists(inputFile) != 1: + print("\n{} does not exist!\n".format(inputFile1)) + sys.exit() + + # Open ICs + ics_file = h.File(inputFile, "r") + + replicate_factor = replicate * replicate * replicate + + global box_size, num_parts + box_size = ics_file["/Header"].attrs["BoxSize"] + num_parts = ics_file["/Header"].attrs["NumPart_Total"][1] + + print("Box size: {}".format(box_size)) + print("No. of original particles: {}".format(num_parts)) + print("New box size: {}".format(box_size * replicate)) + print("No. of replicated particles: {}".format(num_parts * replicate_factor)) + + # Read input file fields + pos_orig = ics_file["/PartType1/Coordinates"][:, :] + mass_orig = ics_file["/PartType1/Masses"][:][0] + + # Create new arrays + global pos, vel, mass, u, ids + vel = pos = zeros((num_parts * replicate_factor, 3)) + ids = linspace(1, num_parts * replicate_factor, num_parts * replicate_factor) + mass = zeros(num_parts * replicate_factor) + mass[:] = mass_orig + u = smoothing_length = zeros(num_parts * replicate_factor) + + start = time.time() + # Replicate particles + parallel_replicate(pos, pos_orig) + + print("Replicating particles took: %.3ss." % (time.time() - start)) + + start = time.time() + + # Create output file + base_filename = os.path.basename(inputFile) + filename, file_extension = os.path.splitext(base_filename) + outputFile = filename + "_x" + str(replicate) + ".hdf5" + + out_file = h.File(outputFile, 'w') + + # Copy Header and set new values + ics_file.copy("/Header", out_file) + grp = out_file["/Header"] + grp.attrs["BoxSize"] = box_size * replicate + grp.attrs["NumPart_Total"] = [0, num_parts * replicate_factor, 0, 0, 0, 0] + grp.attrs["NumPart_ThisFile"] = [0, num_parts * replicate_factor, 0, 0, 0, 0] + + # Copy Units + ics_file.copy("/Units", out_file) + + # Particle group + grp = out_file.create_group("/PartType1") + grp.create_dataset('Coordinates', data=pos, dtype='d') + grp.create_dataset('Velocities', data=vel, dtype='f') + grp.create_dataset('Masses', data=mass, dtype='f') + grp.create_dataset('SmoothingLength', data=smoothing_length, dtype='f') + grp.create_dataset('InternalEnergy', data=u, dtype='f') + grp.create_dataset('ParticleIDs', data=ids, dtype='L') + + print("Writing output file took: %.3ss." % (time.time() - start)) + +if __name__=="__main__": + main()