Skip to content
Snippets Groups Projects
Commit 303578d9 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

MPI parallel mesh gravity - hashmap free

parent d4837921
Branches
Tags
1 merge request!1408MPI parallel mesh gravity - hashmap free
Showing with 2699 additions and 91 deletions
......@@ -686,6 +686,15 @@ if test "$enable_ubsan" = "yes"; then
fi
fi
# MPI mesh gravity
AC_ARG_ENABLE([mpi-mesh-gravity],
[AS_HELP_STRING([--enable-mpi-mesh-gravity],
[enable parallel mesh gravity (requires FFTW MPI library) @<:@no/yes@:>@]
)],
[with_mpi_mesh_gravity="${enableval}"],
[with_mpi_mesh_gravity="no"]
)
# Autoconf stuff.
AC_PROG_INSTALL
AC_PROG_MAKE_SET
......@@ -858,6 +867,8 @@ AH_VERBATIM([__STDC_FORMAT_MACROS],
# is given FFTW must be found.
# If FFTW is found, we check whether this is the threaded version.
have_fftw="no"
have_mpi_fftw="no"
have_threaded_fftw="no"
AC_ARG_WITH([fftw],
[AS_HELP_STRING([--with-fftw=PATH],
[root directory where fftw is installed @<:@yes/no@:>@]
......@@ -917,7 +928,33 @@ if test "x$with_fftw" != "xno"; then
AC_DEFINE([HAVE_THREADED_FFTW],1,[The threaded FFTW library appears to be present.])
FFTW_LIBS=$FFTW_THREADED_LIBS
FFTW_INCS=$FFTW_THREADED_INCS
have_fftw="yes - threaded"
fi
fi
# If MPI mesh gravity is not disabled, check whether we have the MPI version of FFTW
if test "x$enable_mpi" = "xyes" -a "x$with_mpi_mesh_gravity" != "xno"; then
# Was FFTW's location specifically given?
if test "x$with_fftw" != "xyes" -a "x$with_fftw" != "xtest" -a "x$with_fftw" != "x"; then
FFTW_MPI_LIBS="-L$with_fftw/lib -lfftw3_mpi -lfftw3"
FFTW_MPI_INCS="-I$with_fftw/include"
else
FFTW_MPI_LIBS="-lfftw3_mpi -lfftw3"
FFTW_MPI_INCS=""
fi
# Verify that the library has MPI support
AC_CHECK_LIB([fftw3],[fftw_mpi_init],[have_mpi_fftw="yes"],
[have_mpi_fftw="no"], $FFTW_MPI_LIBS)
# If found, update things. Don't add MPI flags to FFTW_*_LIBS etc because
# we don't want to link the MPI library into the non-MPI swift executable.
if test "x$have_mpi_fftw" = "xyes"; then
AC_DEFINE([HAVE_MPI_FFTW],1,[The MPI FFTW library appears to be present.])
else
if test "x$with_mpi_mesh_gravity" = "xyes" ; then
AC_MSG_ERROR("Unable to find FFTW MPI library for MPI mesh gravity")
fi
fi
fi
fi
......@@ -929,6 +966,7 @@ AC_ARG_WITH([arm-fftw],
[with_arm_fftw="$withval"],
[with_arm_fftw=no]
)
have_arm_fftw="no"
if test "x$with_arm_fftw" != "xno"; then
# Was FFTW's location specifically given?
......@@ -945,14 +983,12 @@ if test "x$with_arm_fftw" != "xno"; then
AC_CHECK_LIB([armpl_lp64],[fftw_malloc],[have_fftw="yes"],[have_fftw="no"],$FFTW_LIBS)
if test "x$have_arm_fftw" != "xno"; then
AC_DEFINE([HAVE_FFTW],1,[The FFTW library appears to be present.])
have_fftw="yes - ARM"
fi
# FFTW was specified, check that it was a valid location.
else
AC_CHECK_LIB([armpl_lp64],[fftw_malloc],
AC_DEFINE([HAVE_FFTW],1,[The FFTW library appears to be present.]),
AC_MSG_ERROR(something is wrong with the FFTW library!), $FFTW_LIBS)
have_fftw="yes - ARM"
fi
# FFTW was requested not to be used.
......@@ -990,6 +1026,10 @@ AC_SUBST([FFTW_LIBS])
AC_SUBST([FFTW_INCS])
AM_CONDITIONAL([HAVEFFTW],[test -n "$FFTW_LIBS"])
AC_SUBST([FFTW_MPI_LIBS])
AC_SUBST([FFTW_MPI_INCS])
AM_CONDITIONAL([HAVEMPIFFTW],[test -n "$FFTW_MPI_LIBS"])
# Check for -lprofiler usually part of the gperftools along with tcmalloc.
have_profiler="no"
AC_ARG_WITH([profiler],
......@@ -2582,7 +2622,10 @@ AC_MSG_RESULT([
HDF5 enabled : $with_hdf5
- parallel : $have_parallel_hdf5
METIS/ParMETIS : $have_metis / $have_parmetis
FFTW3 enabled : $have_fftw
FFTW3 enabled : $have_fftw
- threaded : $have_threaded_fftw
- MPI : $have_mpi_fftw
- ARM : $have_arm_fftw
GSL enabled : $have_gsl
libNUMA enabled : $have_numa
GRACKLE enabled : $have_grackle
......
......@@ -292,6 +292,7 @@ Simulations using periodic boundary conditions use additional parameters for the
Particle-Mesh part of the calculation. The last five are optional:
* The number cells along each axis of the mesh :math:`N`: ``mesh_side_length``,
* Whether or not to use a distributed mesh when running over MPI: ``distributed_mesh`` (default: ``0``),
* The mesh smoothing scale in units of the mesh cell-size :math:`a_{\rm
smooth}`: ``a_smooth`` (default: ``1.25``),
* The scale above which the short-range forces are assumed to be 0 (in units of
......@@ -305,6 +306,18 @@ For most runs, the default values can be used. Only the number of cells along
each axis needs to be specified. The remaining three values are best described
in the context of the full set of equations in the theory documents.
By default, SWIFT will replicate the mesh on each MPI rank. This means that a
single MPI reduction is used to ensure all ranks have a full copy of the density
field. Each node then solves for the potential in Fourier space independently of
the others. This is a fast option for small meshes. This technique is limited to
mesh with sizes :math:`N<1291` due to the limitations of MPI. Larger meshes need
to use the distributed version of the algorithm. The code then also needs to be
compiled with ``--enable-mpi-mesh-gravity``. That algorithm is slower for small
meshes but has no limits on the size of the mesh and truly huge Fourier
transforms can be performed without any problems. The only limitation is the
amount of memory on each node. The algorithm will use ``N^3 * 8 * 2 / M`` bytes
on each of the ``M`` MPI ranks.
As a summary, here are the values used for the EAGLE :math:`100^3~{\rm Mpc}^3`
simulation:
......@@ -316,7 +329,8 @@ simulation:
MAC: adaptive
theta_cr: 0.6
epsilon_fmm: 0.001
mesh_side_length: 512
mesh_side_length: 2048
distributed_mesh: 0
comoving_DM_softening: 0.0026994 # 0.7 proper kpc at z=2.8.
max_physical_DM_softening: 0.0007 # 0.7 proper kpc
comoving_baryon_softening: 0.0026994 # 0.7 proper kpc at z=2.8.
......
......@@ -30,8 +30,8 @@ EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(NUMA_LIBS) $(PROFILER_LIBS) \
$(VELOCIRAPTOR_LIBS) $(GSL_LIBS)
# MPI libraries.
MPI_LIBS = $(PARMETIS_LIBS) $(METIS_LIBS) $(MPI_THREAD_LIBS)
MPI_FLAGS = -DWITH_MPI $(PARMETIS_INCS) $(METIS_INCS)
MPI_LIBS = $(PARMETIS_LIBS) $(METIS_LIBS) $(MPI_THREAD_LIBS) $(FFTW_MPI_LIBS)
MPI_FLAGS = -DWITH_MPI $(PARMETIS_INCS) $(METIS_INCS) $(FFTW_MPI_INCS)
# Programs.
bin_PROGRAMS = swift
......
......@@ -77,7 +77,8 @@ Stars:
# Parameters for the self-gravity scheme
Gravity:
mesh_side_length: 128 # Number of cells along each axis for the periodic gravity mesh.
mesh_side_length: 128 # Number of cells along each axis for the periodic gravity mesh (must be even).
distributed_mesh: 0 # (Optional) Are we using a distributed mesh when running over MPI (necessary for meshes > 1290^3)
eta: 0.025 # Constant dimensionless multiplier for time integration.
MAC: adaptive # Choice of mulitpole acceptance criterion: 'adaptive' OR 'geometric'.
epsilon_fmm: 0.001 # Tolerance parameter for the adaptive multipole acceptance criterion.
......
......@@ -51,7 +51,7 @@ include_HEADERS += statistics.h memswap.h cache.h runner_doiact_hydro_vec.h prof
include_HEADERS += csds.h active.h timeline.h xmf.h gravity_properties.h gravity_derivatives.h
include_HEADERS += gravity_softened_derivatives.h vector_power.h collectgroup.h hydro_space.h sort_part.h
include_HEADERS += chemistry.h chemistry_io.h chemistry_struct.h cosmology.h restart.h space_getsid.h utilities.h
include_HEADERS += mesh_gravity.h cbrt.h exp10.h velociraptor_interface.h swift_velociraptor_part.h output_list.h
include_HEADERS += cbrt.h exp10.h velociraptor_interface.h swift_velociraptor_part.h output_list.h
include_HEADERS += csds_io.h tracers_io.h tracers.h tracers_struct.h star_formation_io.h
include_HEADERS += fof.h fof_struct.h fof_io.h fof_catalogue_io.h
include_HEADERS += multipole.h multipole_accept.h multipole_struct.h binomial.h integer_power.h sincos.h
......@@ -65,6 +65,7 @@ include_HEADERS += space_unique_id.h line_of_sight.h io_compression.h
include_HEADERS += rays.h rays_struct.h
include_HEADERS += particle_splitting.h particle_splitting_struct.h
include_HEADERS += chemistry_csds.h star_formation_csds.h
include_HEADERS += mesh_gravity.h mesh_gravity_mpi.h mesh_gravity_patch.h mesh_gravity_sort.h
# source files for EAGLE cooling
QLA_COOLING_SOURCES =
......@@ -132,10 +133,11 @@ AM_SOURCES += hydro.c stars.c
AM_SOURCES += statistics.c profiler.c csds.c part_type.c
AM_SOURCES += gravity_properties.c gravity.c multipole.c
AM_SOURCES += collectgroup.c hydro_space.c equation_of_state.c io_compression.c
AM_SOURCES += chemistry.c cosmology.c mesh_gravity.c velociraptor_interface.c
AM_SOURCES += chemistry.c cosmology.c velociraptor_interface.c
AM_SOURCES += output_list.c velociraptor_dummy.c csds_io.c memuse.c mpiuse.c memuse_rnodes.c
AM_SOURCES += fof.c fof_catalogue_io.c
AM_SOURCES += hashmap.c pressure_floor.c
AM_SOURCES += mesh_gravity.c mesh_gravity_mpi.c mesh_gravity_patch.c mesh_gravity_sort.c
AM_SOURCES += runner_neutrino.c
AM_SOURCES += neutrino/Default/fermi_dirac.c neutrino/Default/neutrino.c
AM_SOURCES += rt_parameters.c
......
......@@ -39,6 +39,7 @@
#define gravity_props_default_r_cut_max 4.5f
#define gravity_props_default_r_cut_min 0.1f
#define gravity_props_default_rebuild_frequency 0.01f
#define gravity_props_default_distributed_mesh 0
void gravity_props_init(struct gravity_props *p, struct swift_params *params,
const struct phys_const *phys_const,
......@@ -59,6 +60,9 @@ void gravity_props_init(struct gravity_props *p, struct swift_params *params,
/* Tree-PM parameters */
if (periodic) {
p->mesh_size = parser_get_param_int(params, "Gravity:mesh_side_length");
p->distributed_mesh =
parser_get_opt_param_int(params, "Gravity:distributed_mesh",
gravity_props_default_distributed_mesh);
p->a_smooth = parser_get_opt_param_float(params, "Gravity:a_smooth",
gravity_props_default_a_smooth);
p->r_cut_max_ratio = parser_get_opt_param_float(
......@@ -76,11 +80,19 @@ void gravity_props_init(struct gravity_props *p, struct swift_params *params,
if (p->a_smooth <= 0.)
error("The mesh smoothing scale 'a_smooth' must be > 0.");
#if !defined(WITH_MPI) || !defined(HAVE_MPI_FFTW)
if (p->distributed_mesh)
error(
"Need to use MPI and FFTW MPI library to run with "
"distributed_mesh=1.");
#endif
if (2. * p->a_smooth * p->r_cut_max_ratio > p->mesh_size)
error("Mesh too small given r_cut_max. Should be at least %d cells wide.",
(int)(2. * p->a_smooth * p->r_cut_max_ratio) + 1);
} else {
p->mesh_size = 0;
p->distributed_mesh = 0;
p->a_smooth = 0.f;
p->r_s = FLT_MAX;
p->r_s_inv = 0.f;
......@@ -320,6 +332,7 @@ void gravity_props_print(const struct gravity_props *p) {
message("Self-gravity mesh side-length: N=%d", p->mesh_size);
message("Self-gravity mesh smoothing-scale: a_smooth=%f", p->a_smooth);
message("Self-gravity distributed mesh enabled: %d", p->distributed_mesh);
message("Self-gravity tree cut-off ratio: r_cut_max=%f", p->r_cut_max_ratio);
message("Self-gravity truncation cut-off ratio: r_cut_min=%f",
......
......@@ -118,6 +118,9 @@ struct gravity_props {
/*! Periodic long-range mesh side-length */
int mesh_size;
/*! Whether mesh is distributed between MPI ranks when we use MPI */
int distributed_mesh;
/*! Mesh smoothing scale in units of top-level cell size */
float a_smooth;
......
This diff is collapsed.
......@@ -31,6 +31,7 @@ struct engine;
struct space;
struct gpart;
struct threadpool;
struct cell;
/**
* @brief Data structure for the long-range periodic forces using a mesh
......@@ -46,6 +47,9 @@ struct pm_mesh {
/*! Side-length of the mesh */
int N;
/*! Whether mesh is distributed between MPI ranks */
int distributed_mesh;
/*! Integer time-step end of the mesh force for the last step */
integertime_t ti_end_mesh_last;
......@@ -76,8 +80,8 @@ struct pm_mesh {
/*! Distance below which tree forces are Newtonian */
double r_cut_min;
/*! Potential field */
double *potential;
/*! Full N*N*N potential field */
double *potential_global;
};
void pm_mesh_init(struct pm_mesh *mesh, const struct gravity_props *props,
......
This diff is collapsed.
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#ifndef SWIFT_MESH_GRAVITY_MPI_H
#define SWIFT_MESH_GRAVITY_MPI_H
/* Config parameters. */
#include "../config.h"
/* Forward declarations */
struct space;
struct cell;
struct threadpool;
struct pm_mesh;
struct pm_mesh_patch;
void mpi_mesh_accumulate_gparts_to_local_patches(
struct threadpool *tp, const int N, const double fac, const struct space *s,
struct pm_mesh_patch *local_patches);
void mpi_mesh_local_patches_to_slices(const int N, const int local_n0,
struct pm_mesh_patch *local_patches,
const int nr_patches, double *mesh,
struct threadpool *tp, const int verbose);
void mpi_mesh_fetch_potential(const int N, const double fac,
const struct space *s, int local_0_start,
int local_n0, double *potential_slice,
struct pm_mesh_patch *local_patches,
struct threadpool *tp, const int verbose);
void mpi_mesh_update_gparts(struct pm_mesh_patch *local_patches,
const struct space *s, struct threadpool *tp,
const int N, const double cell_fac);
#endif
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
/* Config parameters. */
#include "../config.h"
#include <math.h>
/* This object's header. */
#include "mesh_gravity_patch.h"
/* Local includes. */
#include "cell.h"
#include "error.h"
#include "row_major_id.h"
/**
* @brief Initialize a mesh patch to cover a cell
*
* @param patch A pointer to the mesh patch
* @param cell The cell which the mesh should cover
* @param fac Inverse of the FFT mesh size
* @param dim Size of the full volume in each dimension
* @param boundary_size Size of the boundary layer to include
*/
void pm_mesh_patch_init(struct pm_mesh_patch *patch, const struct cell *cell,
const int N, const double fac, const double dim[3],
const int boundary_size) {
const int gcount = cell->grav.count;
const struct gpart *gparts = cell->grav.parts;
patch->N = N;
patch->fac = fac;
/* Will need to wrap particles to position nearest the cell centre */
double wrap_min[3];
double wrap_max[3];
for (int i = 0; i < 3; i++) {
wrap_min[i] = cell->loc[i] + 0.5 * cell->width[i] - 0.5 * dim[i];
wrap_max[i] = cell->loc[i] + 0.5 * cell->width[i] + 0.5 * dim[i];
}
/* Store the wraps */
for (int i = 0; i < 3; i++) {
patch->wrap_min[i] = wrap_min[i];
patch->wrap_max[i] = wrap_max[i];
}
/* Find the extent of the particle distribution in the cell */
double pos_min[3];
double pos_max[3];
for (int i = 0; i < 3; i++) {
pos_min[i] = patch->wrap_max[i];
pos_max[i] = patch->wrap_min[i];
}
for (int ipart = 0; ipart < gcount; ipart++) {
const struct gpart *gp = &gparts[ipart];
if (gp->time_bin == time_bin_inhibited) continue;
for (int i = 0; i < 3; i++) {
const double pos_wrap = box_wrap(gp->x[i], wrap_min[i], wrap_max[i]);
if (pos_wrap < pos_min[i]) pos_min[i] = pos_wrap;
if (pos_wrap > pos_max[i]) pos_max[i] = pos_wrap;
}
}
/* Determine the integer size and coordinates of the mesh */
int num_cells = 1;
for (int i = 0; i < 3; i++) {
patch->mesh_min[i] = floor(pos_min[i] * fac) - boundary_size;
/* CIC interpolation requires one extra element in the positive direction */
patch->mesh_max[i] = floor(pos_max[i] * fac) + boundary_size + 1;
patch->mesh_size[i] = patch->mesh_max[i] - patch->mesh_min[i] + 1;
num_cells *= patch->mesh_size[i];
}
/* Allocate the mesh */
if (swift_memalign("mesh_patch", (void **)&patch->mesh, SWIFT_CACHE_ALIGNMENT,
num_cells * sizeof(double)) != 0)
error("Failed to allocate array for mesh patch!");
}
/**
* @brief Set all values in a mesh patch to zero
*
* @param patch A pointer to the mesh patch
*/
void pm_mesh_patch_zero(struct pm_mesh_patch *patch) {
const int num =
patch->mesh_size[0] * patch->mesh_size[1] * patch->mesh_size[2];
memset(patch->mesh, 0, num * sizeof(double));
}
/**
* @brief Free the memory associated with a mesh patch.
*
* @param patch A pointer to the mesh patch
*/
void pm_mesh_patch_clean(struct pm_mesh_patch *patch) {
swift_free("mesh_patch", patch->mesh);
/* Zero everything and give a silly mesh size to help debugging */
memset(patch, 0, sizeof(struct pm_mesh_patch));
patch->N = -1;
}
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#ifndef SWIFT_MESH_GRAVITY_PATCH_H
#define SWIFT_MESH_GRAVITY_PATCH_H
/* Config parameters. */
#include "../config.h"
/* Includes. */
#include "align.h"
#include "error.h"
#include "inline.h"
/* Forward declarations */
struct cell;
/**
* @brief Data structure for a patch of mesh covering a cell
*/
struct pm_mesh_patch {
/*! Size of the full mesh */
int N;
/*! Inverse of the mesh cell size */
double fac;
/*! Minimum of coordinate range into which particles should be wrapped */
double wrap_min[3];
/*! Maximum of coordinate range into which particles should be wrapped */
double wrap_max[3];
/*! Size of the mesh in each dimension */
int mesh_size[3];
/*! Minimum integer coordinate of the mesh in each dimension */
int mesh_min[3];
/*! Maximum integer coordinate of the mesh in each dimension */
int mesh_max[3];
/*! Pointer to the mesh data */
double *mesh;
};
void pm_mesh_patch_init(struct pm_mesh_patch *patch, const struct cell *cell,
const int N, const double fac, const double dim[3],
const int boundary_size);
void pm_mesh_patch_zero(struct pm_mesh_patch *patch);
void pm_mesh_patch_clean(struct pm_mesh_patch *patch);
/**
* @brief Return the array index in the patch corresponding to
* coordinates in the full mesh
*
* @param patch Pointer to the patch
* @param i Integer x coordinate in the full mesh
* @param j Integer y coordinate in the full mesh
* @param k Integer z coordinate in the full mesh
*
*/
__attribute__((always_inline)) INLINE static int pm_mesh_patch_index(
const struct pm_mesh_patch *patch, const int i, const int j, const int k) {
#ifdef SWIFT_DEBUG_CHECKS
if (i < 0 || i >= patch->mesh_size[0])
error("Coordinate in local mesh out of range!");
if (j < 0 || j >= patch->mesh_size[1])
error("Coordinate in local mesh out of range!");
if (k < 0 || k >= patch->mesh_size[2])
error("Coordinate in local mesh out of range!");
#endif
return (i * patch->mesh_size[1] * patch->mesh_size[2]) +
(j * patch->mesh_size[2]) + k;
}
/**
* @brief Cloud in cell evaluation of the mesh patch
*
* @param patch Pointer to the patch
* @param i Integer x coordinate in the mesh patch
* @param j Integer y coordinate in the mesh patch
* @param k Integer z coordinate in the mesh patch
* @param tx CIC parameter in the x direction
* @param ty CIC parameter in the y direction
* @param tz CIC parameter in the z direction
* @param dx CIC parameter in the x direction
* @param dy CIC parameter in the y direction
* @param dz CIC parameter in the z direction
*
*/
__attribute__((always_inline)) INLINE static double pm_mesh_patch_CIC_get(
const struct pm_mesh_patch *patch, const int i, const int j, const int k,
const double tx, const double ty, const double tz, const double dx,
const double dy, const double dz) {
/* Remind the compiler that the arrays are nicely aligned */
swift_declare_aligned_ptr(const double, mesh, patch->mesh,
SWIFT_CACHE_ALIGNMENT);
double temp;
/* Classic 3D CIC */
temp = mesh[pm_mesh_patch_index(patch, i + 0, j + 0, k + 0)] * tx * ty * tz;
temp += mesh[pm_mesh_patch_index(patch, i + 0, j + 0, k + 1)] * tx * ty * dz;
temp += mesh[pm_mesh_patch_index(patch, i + 0, j + 1, k + 0)] * tx * dy * tz;
temp += mesh[pm_mesh_patch_index(patch, i + 0, j + 1, k + 1)] * tx * dy * dz;
temp += mesh[pm_mesh_patch_index(patch, i + 1, j + 0, k + 0)] * dx * ty * tz;
temp += mesh[pm_mesh_patch_index(patch, i + 1, j + 0, k + 1)] * dx * ty * dz;
temp += mesh[pm_mesh_patch_index(patch, i + 1, j + 1, k + 0)] * dx * dy * tz;
temp += mesh[pm_mesh_patch_index(patch, i + 1, j + 1, k + 1)] * dx * dy * dz;
return temp;
}
/**
* @brief Cloud in cell assignment to the mesh patch
*
* @param patch Pointer to the patch
* @param i Integer x coordinate in the mesh patch
* @param j Integer y coordinate in the mesh patch
* @param k Integer z coordinate in the mesh patch
* @param tx CIC parameter in the x direction
* @param ty CIC parameter in the y direction
* @param tz CIC parameter in the z direction
* @param dx CIC parameter in the x direction
* @param dy CIC parameter in the y direction
* @param dz CIC parameter in the z direction
* @param value The value to set
*/
__attribute__((always_inline)) INLINE static void pm_mesh_patch_CIC_set(
const struct pm_mesh_patch *patch, const int i, const int j, const int k,
const double tx, const double ty, const double tz, const double dx,
const double dy, const double dz, const double value) {
/* Remind the compiler that the arrays are nicely aligned */
swift_declare_aligned_ptr(double, mesh, patch->mesh, SWIFT_CACHE_ALIGNMENT);
/* Classic 3D CIC */
mesh[pm_mesh_patch_index(patch, i + 0, j + 0, k + 0)] += value * tx * ty * tz;
mesh[pm_mesh_patch_index(patch, i + 0, j + 0, k + 1)] += value * tx * ty * dz;
mesh[pm_mesh_patch_index(patch, i + 0, j + 1, k + 0)] += value * tx * dy * tz;
mesh[pm_mesh_patch_index(patch, i + 0, j + 1, k + 1)] += value * tx * dy * dz;
mesh[pm_mesh_patch_index(patch, i + 1, j + 0, k + 0)] += value * dx * ty * tz;
mesh[pm_mesh_patch_index(patch, i + 1, j + 0, k + 1)] += value * dx * ty * dz;
mesh[pm_mesh_patch_index(patch, i + 1, j + 1, k + 0)] += value * dx * dy * tz;
mesh[pm_mesh_patch_index(patch, i + 1, j + 1, k + 1)] += value * dx * dy * dz;
}
#endif
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
/* Config parameters. */
#include "../config.h"
/* This object's header. */
#include "mesh_gravity_sort.h"
/* Local includes. */
#include "align.h"
#include "atomic.h"
#include "error.h"
#include "row_major_id.h"
#include "threadpool.h"
struct mapper_extra_data {
/* Mesh size */
int N;
/* Buckets */
size_t *bucket_counts;
};
/**
* @param Count how may mesh cells will end up in each x-coord bucket.
*/
void bucket_sort_mesh_key_value_rho_count_mapper(void *map_data, int nr_parts,
void *extra_data) {
/* Unpack the data */
const struct mesh_key_value_rho *array_in =
(const struct mesh_key_value_rho *)map_data;
struct mapper_extra_data *data = (struct mapper_extra_data *)extra_data;
const int N = data->N;
size_t *global_bucket_counts = data->bucket_counts;
/* Local buckets */
size_t *local_bucket_counts = (size_t *)calloc(N, sizeof(size_t));
/* Count how many items will land in each bucket. */
for (int i = 0; i < nr_parts; ++i) {
const size_t key = array_in[i].key;
/* Get the x coordinate of this mesh cell in the global mesh
* Note: we don't need to sort more precisely than just
* the x coordinate */
const int mesh_x = get_xcoord_from_padded_row_major_id(key, N);
#ifdef SWIFT_DEBUG_CHECKS
if (mesh_x < 0) error("Invalid mesh cell x-coordinate (too small)");
if (mesh_x >= N) error("Invalid mesh cell x-coordinate (too large)");
#endif
/* Add a contribution to the bucket count */
local_bucket_counts[mesh_x]++;
}
/* Now write back to memory */
for (int i = 0; i < N; ++i) {
atomic_add(&global_bucket_counts[i], local_bucket_counts[i]);
}
/* Clean up */
free(local_bucket_counts);
}
/**
* @param Count how may mesh cells will end up in each x-coord bucket.
*/
void bucket_sort_mesh_key_value_pot_count_mapper(void *map_data, int nr_parts,
void *extra_data) {
/* Unpack the data */
const struct mesh_key_value_pot *array_in =
(const struct mesh_key_value_pot *)map_data;
struct mapper_extra_data *data = (struct mapper_extra_data *)extra_data;
const int N = data->N;
size_t *global_bucket_counts = data->bucket_counts;
/* Local buckets */
size_t *local_bucket_counts = (size_t *)calloc(N, sizeof(size_t));
/* Count how many items will land in each bucket. */
for (int i = 0; i < nr_parts; ++i) {
const size_t key = array_in[i].key;
/* Get the x coordinate of this mesh cell in the global mesh
* Note: we don't need to sort more precisely than just
* the x coordinate */
const int mesh_x = get_xcoord_from_padded_row_major_id(key, N);
#ifdef SWIFT_DEBUG_CHECKS
if (mesh_x < 0) error("Invalid mesh cell x-coordinate (too small)");
if (mesh_x >= N) error("Invalid mesh cell x-coordinate (too large)");
#endif
/* Add a contribution to the bucket count */
local_bucket_counts[mesh_x]++;
}
/* Now write back to memory */
for (int i = 0; i < N; ++i) {
atomic_add(&global_bucket_counts[i], local_bucket_counts[i]);
}
/* Clean up */
free(local_bucket_counts);
}
/**
* @param Count how may mesh cells will end up in each cell index
*/
void bucket_sort_mesh_key_value_pot_index_count_mapper(void *map_data,
int nr_parts,
void *extra_data) {
/* Unpack the data */
const struct mesh_key_value_pot *array_in =
(const struct mesh_key_value_pot *)map_data;
struct mapper_extra_data *data = (struct mapper_extra_data *)extra_data;
const int N = data->N;
size_t *global_bucket_counts = data->bucket_counts;
/* Local buckets */
size_t *local_bucket_counts = (size_t *)calloc(N, sizeof(size_t));
/* Count how many items will land in each bucket. */
for (int i = 0; i < nr_parts; ++i) {
const size_t key = array_in[i].cell_index;
/* Get the cell_index */
const int cell_id = cell_index_extract_patch_index(key);
#ifdef SWIFT_DEBUG_CHECKS
if (cell_id < 0) error("Invalid mesh cell x-coordinate (too small)");
if (cell_id >= N) error("Invalid mesh cell x-coordinate (too large)");
#endif
/* Add a contribution to the bucket count */
local_bucket_counts[cell_id]++;
}
/* Now write back to memory */
for (int i = 0; i < N; ++i) {
atomic_add(&global_bucket_counts[i], local_bucket_counts[i]);
}
/* Clean up */
free(local_bucket_counts);
}
/**
* @brief Bucket sort of the array of mesh cells based on their x-coord.
*
* Note the two mesh_key_value_rho arrays must be aligned on
* SWIFT_CACHE_ALIGNMENT.
*
* @param array_in The unsorted array of mesh-key value pairs.
* @param count The number of elements in the mesh-key value pair arrays.
* @param N The size of the mesh. (i.e. the number of possible x-axis values)
* @param tp The #threadpool object.
* @param array_out The sorted array of mesh-key value pairs (to be filled).
* @param bucket_offsets The offsets in the sorted array where we change x-coord
* (to be filled).
*/
void bucket_sort_mesh_key_value_rho(const struct mesh_key_value_rho *array_in,
const size_t count, const int N,
struct threadpool *tp,
struct mesh_key_value_rho *array_out,
size_t *bucket_offsets) {
/* Create an array of bucket counts and one of offsets */
size_t *bucket_counts = (size_t *)malloc(N * sizeof(size_t));
memset(bucket_counts, 0, N * sizeof(size_t));
struct mapper_extra_data extra_data;
extra_data.N = N;
extra_data.bucket_counts = bucket_counts;
/* Collect the number of items that will end up in each bucket */
threadpool_map(tp, bucket_sort_mesh_key_value_rho_count_mapper,
(void *)array_in, count, sizeof(struct mesh_key_value_rho),
threadpool_auto_chunk_size, &extra_data);
#ifdef SWIFT_DEBUG_CHECKS
size_t count_check = 0;
for (int i = 0; i < N; ++i) count_check += bucket_counts[i];
if (count_check != count) error("Bucket count is not matching");
#endif
/* Now we can build the array of offsets (cumsum of the counts) */
bucket_offsets[0] = 0;
for (int i = 1; i < N; ++i) {
bucket_offsets[i] = bucket_offsets[i - 1] + bucket_counts[i - 1];
}
/* Remind the compiler that the array is nicely aligned */
swift_declare_aligned_ptr(struct mesh_key_value_rho, array_out_aligned,
array_out, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(const struct mesh_key_value_rho, array_in_aligned,
array_in, SWIFT_CACHE_ALIGNMENT);
/* Now, we can do the actual sorting */
for (size_t i = 0; i < count; ++i) {
const size_t key = array_in_aligned[i].key;
const int mesh_x = get_xcoord_from_padded_row_major_id(key, N);
/* Where does this element land? */
const size_t index = bucket_offsets[mesh_x];
/* Copy the element to its correct position */
memcpy(&array_out_aligned[index], &array_in_aligned[i],
sizeof(struct mesh_key_value_rho));
/* Move the start of this bucket by one */
bucket_offsets[mesh_x]++;
}
#ifdef SWIFT_DEBUG_CHECKS
/* Verify that things have indeed been sorted */
int last_mesh_x =
get_xcoord_from_padded_row_major_id(array_out_aligned[0].key, N);
;
for (size_t i = 1; i < count; ++i) {
const size_t key = array_out_aligned[i].key;
const int mesh_x = get_xcoord_from_padded_row_major_id(key, N);
if (mesh_x < last_mesh_x) error("Unsorted array!");
last_mesh_x = mesh_x;
}
#endif
/* Restore the bucket offsets to their former glory */
for (int i = 0; i < N; ++i) {
bucket_offsets[i] -= bucket_counts[i];
}
/* Clean up! */
free(bucket_counts);
}
/**
* @brief Bucket sort of the array of mesh cells based on their x-coord.
*
* Note the two mesh_key_value_pot arrays must be aligned on
* SWIFT_CACHE_ALIGNMENT.
*
* @param array_in The unsorted array of mesh-key value pairs.
* @param count The number of elements in the mesh-key value pair arrays.
* @param N The size of the mesh. (i.e. the number of possible x-axis values)
* @param tp The #threadpool object.
* @param array_out The sorted array of mesh-key value pairs (to be filled).
* @param bucket_offsets The offsets in the sorted array where we change x-coord
* (to be filled).
*/
void bucket_sort_mesh_key_value_pot(const struct mesh_key_value_pot *array_in,
const size_t count, const int N,
struct threadpool *tp,
struct mesh_key_value_pot *array_out,
size_t *bucket_offsets) {
/* Create an array of bucket counts and one of offsets */
size_t *bucket_counts = (size_t *)malloc(N * sizeof(size_t));
memset(bucket_counts, 0, N * sizeof(size_t));
struct mapper_extra_data extra_data;
extra_data.N = N;
extra_data.bucket_counts = bucket_counts;
/* Collect the number of items that will end up in each bucket */
threadpool_map(tp, bucket_sort_mesh_key_value_pot_count_mapper,
(void *)array_in, count, sizeof(struct mesh_key_value_pot),
threadpool_auto_chunk_size, &extra_data);
#ifdef SWIFT_DEBUG_CHECKS
size_t count_check = 0;
for (int i = 0; i < N; ++i) count_check += bucket_counts[i];
if (count_check != count) error("Bucket count is not matching");
#endif
/* Now we can build the array of offsets (cumsum of the counts) */
bucket_offsets[0] = 0;
for (int i = 1; i < N; ++i) {
bucket_offsets[i] = bucket_offsets[i - 1] + bucket_counts[i - 1];
}
/* Remind the compiler that the array is nicely aligned */
swift_declare_aligned_ptr(struct mesh_key_value_pot, array_out_aligned,
array_out, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(const struct mesh_key_value_pot, array_in_aligned,
array_in, SWIFT_CACHE_ALIGNMENT);
/* Now, we can do the actual sorting */
for (size_t i = 0; i < count; ++i) {
const size_t key = array_in_aligned[i].key;
const int mesh_x = get_xcoord_from_padded_row_major_id(key, N);
/* Where does this element land? */
const size_t index = bucket_offsets[mesh_x];
/* Copy the element to its correct position */
memcpy(&array_out_aligned[index], &array_in_aligned[i],
sizeof(struct mesh_key_value_pot));
/* Move the start of this bucket by one */
bucket_offsets[mesh_x]++;
}
#ifdef SWIFT_DEBUG_CHECKS
/* Verify that things have indeed been sorted */
int last_mesh_x =
get_xcoord_from_padded_row_major_id(array_out_aligned[0].key, N);
for (size_t i = 1; i < count; ++i) {
const size_t key = array_out_aligned[i].key;
const int mesh_x = get_xcoord_from_padded_row_major_id(key, N);
if (mesh_x < last_mesh_x) error("Unsorted array!");
last_mesh_x = mesh_x;
}
#endif
/* Restore the bucket offsets to their former glory */
for (int i = 0; i < N; ++i) {
bucket_offsets[i] -= bucket_counts[i];
}
/* Clean up! */
free(bucket_counts);
}
/**
* @brief Bucket sort of the array of mesh cells based on their cell index.
*
* Note the two mesh_key_value_pot arrays must be aligned on
* SWIFT_CACHE_ALIGNMENT.
*
* @param array_in The unsorted array of mesh-key value pairs.
* @param count The number of elements in the mesh-key value pair arrays.
* @param N The number of buckets (i.e. the nr. of local top-level cells).
* @param tp The #threadpool object.
* @param array_out The sorted array of mesh-key value pairs (to be filled).
*/
void bucket_sort_mesh_key_value_pot_index(
const struct mesh_key_value_pot *array_in, const size_t count, const int N,
struct threadpool *tp, struct mesh_key_value_pot *array_out) {
/* Create an array of bucket counts and one of offsets */
size_t *bucket_counts = (size_t *)malloc(N * sizeof(size_t));
size_t *bucket_offsets = (size_t *)malloc(N * sizeof(size_t));
memset(bucket_counts, 0, N * sizeof(size_t));
struct mapper_extra_data extra_data;
extra_data.N = N;
extra_data.bucket_counts = bucket_counts;
/* Collect the number of items that will end up in each bucket */
threadpool_map(tp, bucket_sort_mesh_key_value_pot_index_count_mapper,
(void *)array_in, count, sizeof(struct mesh_key_value_pot),
threadpool_auto_chunk_size, &extra_data);
#ifdef SWIFT_DEBUG_CHECKS
size_t count_check = 0;
for (int i = 0; i < N; ++i) count_check += bucket_counts[i];
if (count_check != count) error("Bucket count is not matching");
#endif
/* Now we can build the array of offsets (cumsum of the counts) */
bucket_offsets[0] = 0;
for (int i = 1; i < N; ++i) {
bucket_offsets[i] = bucket_offsets[i - 1] + bucket_counts[i - 1];
}
/* Remind the compiler that the array is nicely aligned */
swift_declare_aligned_ptr(const struct mesh_key_value_pot, array_in_aligned,
array_in, SWIFT_CACHE_ALIGNMENT);
swift_declare_aligned_ptr(struct mesh_key_value_pot, array_out_aligned,
array_out, SWIFT_CACHE_ALIGNMENT);
/* Now, we can do the actual sorting */
for (size_t i = 0; i < count; ++i) {
const size_t key = array_in_aligned[i].cell_index;
const int cell_id = cell_index_extract_patch_index(key);
/* Where does this element land? */
const size_t index = bucket_offsets[cell_id];
/* Copy the element to its correct position */
memcpy(&array_out_aligned[index], &array_in_aligned[i],
sizeof(struct mesh_key_value_pot));
/* Move the start of this bucket by one */
bucket_offsets[cell_id]++;
}
/* Clean up! */
free(bucket_offsets);
free(bucket_counts);
}
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#ifndef SWIFT_MESH_GRAVITY_SORT_H
#define SWIFT_MESH_GRAVITY_SORT_H
/* Config parameters. */
#include "../config.h"
/* Standard includes */
#include <stdlib.h>
#include <string.h>
struct threadpool;
/**
* @brief Store contributions to the mesh as (index, mass) pairs
*/
struct mesh_key_value_rho {
size_t key;
double value;
};
/**
* @brief Store contributions to the mesh as (cell, index, mass) tuples
*/
struct mesh_key_value_pot {
size_t cell_index;
size_t key;
double value;
};
void bucket_sort_mesh_key_value_rho(const struct mesh_key_value_rho *array_in,
const size_t count, const int N,
struct threadpool *tp,
struct mesh_key_value_rho *array_out,
size_t *bucket_offsets);
void bucket_sort_mesh_key_value_pot(const struct mesh_key_value_pot *array_in,
const size_t count, const int N,
struct threadpool *tp,
struct mesh_key_value_pot *array_out,
size_t *bucket_offsets);
void bucket_sort_mesh_key_value_pot_index(
const struct mesh_key_value_pot *array_in, const size_t count, const int N,
struct threadpool *tp, struct mesh_key_value_pot *array_out);
#endif /* SWIFT_MESH_GRAVITY_SORT_H */
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#ifndef SWIFT_ROW_MAJOR_ID_H
#define SWIFT_ROW_MAJOR_ID_H
/* Config parameters. */
#include "../config.h"
/* Includes. */
#include "inline.h"
/**
* @brief Returns 1D index of a 3D NxNxN array using row-major style.
*
* Wraps around in the corresponding dimension if any of the 3 indices is >= N
* or < 0.
*
* @param i Index along x.
* @param j Index along y.
* @param k Index along z.
* @param N Size of the array along one axis.
*/
__attribute__((always_inline, const)) INLINE static int row_major_id_periodic(
const int i, const int j, const int k, const int N) {
return (((i + N) % N) * N * N + ((j + N) % N) * N + ((k + N) % N));
}
/**
* @brief Returns 1D index of a FFTW-padded 3D NxNxN array using row-major
* style.
*
* Wraps around in the corresponding dimension if any of the 3 indices is >= N
* or < 0. Note that indexes are of type size_t in this version and the array
* is assumed to be padded to size 2*(N/2+1) in the last dimension as required
* by FFTW MPI routines.
*
* @param i Index along x.
* @param j Index along y.
* @param k Index along z.
* @param N Size of the array along one axis.
*/
__attribute__((always_inline, const)) INLINE static size_t
row_major_id_periodic_size_t_padded(const int i, const int j, const int k,
const int N) {
/* Find last two dimensions of the padded array */
const size_t Nj = N;
const size_t Nk = 2 * (N / 2 + 1);
/* Get box-wrapped coordinates (note that we don't use the padded size here)
*/
const size_t i_wrap = (size_t)((i + N) % N);
const size_t j_wrap = (size_t)((j + N) % N);
const size_t k_wrap = (size_t)((k + N) % N);
/* Compute index in the padded array */
return (i_wrap * Nj * Nk) + (j_wrap * Nk) + k_wrap;
}
/**
* @brief Return a unique ID for a mesh cell in a local patch.
*
* We use the first 28 bits for the patch id then 3 lots
* of 12 bits for each of i, j, and k.
*
* @param patch_id The local ID of patch.
* @param i The i-index of the mesh cell in the patch.
* @param j The j-index of the mesh cell in the patch.
* @param k The k-index of the mesh cell in the patch.
*/
__attribute__((always_inline, const)) INLINE static size_t
cell_index_from_patch_index(const int patch_id, const int i, const int j,
const int k) {
size_t ret = patch_id;
ret <<= 12;
ret += (size_t)i;
ret <<= 12;
ret += (size_t)j;
ret <<= 12;
ret += (size_t)k;
return ret;
}
/**
* @brief Return the patch index from the mesh cell unique ID.
*/
__attribute__((always_inline, const)) INLINE static int
cell_index_extract_patch_index(const size_t index) {
return (int)(index >> 36);
}
/**
* @brief Returns a size_t containing the last n bits of a give size_t
*/
__attribute__((always_inline, const)) INLINE static size_t get_last_n_bits(
const size_t x, const int n) {
return x & ~(~((size_t)0) << n);
}
/**
* @brief Extract the patch index, i, j and k from a cell_index.
*
* Performs the opposite operation to cell_index_from_patch_index().
*/
__attribute__((always_inline)) INLINE static void patch_index_from_cell_index(
size_t cell_index, int *restrict patch_index, int *restrict i,
int *restrict j, int *restrict k) {
const size_t kk = get_last_n_bits(cell_index, 12);
cell_index >>= 12;
const size_t jj = get_last_n_bits(cell_index, 12);
cell_index >>= 12;
const size_t ii = get_last_n_bits(cell_index, 12);
cell_index >>= 12;
*k = (int)kk;
*j = (int)jj;
*i = (int)ii;
*patch_index = (int)cell_index;
}
/**
* @brief Return i coordinate from an id returned by
* row_major_id_periodic_size_t_padded
*
* This extracts the index in the first dimension from a row major id
* returned by row_major_id_periodic_size_t_padded. I.e. it finds the
* 'i' input parameter that was used to generate the id.
*
* @param id The padded row major ID.
* @param N Size of the array along one axis.
*/
__attribute__((always_inline, const)) INLINE static int
get_xcoord_from_padded_row_major_id(const size_t id, const int N) {
const size_t Nj = N;
const size_t Nk = 2 * (N / 2 + 1);
return (int)(id / (Nj * Nk));
}
/**
* @brief Convert a global mesh array index to local slice index
*
* Given an index into the padded N*N*2*(N/2+1) array, compute
* the corresponding index in the slice of the array stored
* on the local MPI rank.
*
* @param id The padded row major ID.
* @param N Size of the array along one axis.
* @param slice_offset Index of the first slice on this rank
*/
__attribute__((always_inline, const)) INLINE static size_t
get_index_in_local_slice(const size_t id, const int N, const int slice_offset) {
const size_t Nj = N;
const size_t Nk = 2 * (N / 2 + 1);
return id - ((size_t)slice_offset) * Nj * Nk;
}
#endif /* SWIFT_ROW_MAJOR_ID_H */
......@@ -2,11 +2,14 @@
labels = [
["engine_split_gas_particles:", 1],
["Gpart assignment", 4],
["Mesh communication", 4],
["Mesh MPI-reduction", 4],
["Forward Fourier transform", 4],
["Green function", 4],
["Backwards Fourier transform", 4],
["Gpart mesh forces", 4],
["Reverse Fourier transform", 4],
["Computing mesh accelerations", 4],
["Accumulating mass to local patches", 4],
["Assembling mesh slices", 4],
["Fetching local potential", 4],
["engine_recompute_displacement_constraint:", 1],
["engine_exchange_top_multipoles:", 1],
["updating particle counts", 1],
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment