diff --git a/doc/RTD/source/ParameterFiles/parameter_description.rst b/doc/RTD/source/ParameterFiles/parameter_description.rst index 28f8b908de911f5054fb0b46a43977d0cb1eb739..87961c443f232b07b74164f103cd99000f9d7150 100644 --- a/doc/RTD/source/ParameterFiles/parameter_description.rst +++ b/doc/RTD/source/ParameterFiles/parameter_description.rst @@ -293,6 +293,9 @@ Particle-Mesh part of the calculation. The last five are optional: * The number cells along each axis of the mesh :math:`N`: ``mesh_side_length``, * Whether or not to use a distributed mesh when running over MPI: ``distributed_mesh`` (default: ``0``), +* Whether or not to use local patches instead of direct atomic operations to + write to the mesh in the non-MPI case (this is a performance tuning + parameter): ``mesh_uses_local_patches`` (default: ``1``), * The mesh smoothing scale in units of the mesh cell-size :math:`a_{\rm smooth}`: ``a_smooth`` (default: ``1.25``), * The scale above which the short-range forces are assumed to be 0 (in units of diff --git a/examples/parameter_example.yml b/examples/parameter_example.yml index 7fc39617f4aa85a1bfa06dbbdff47fc61f872429..ff472d4eba03c90134ef285c88283c7c827f43da 100644 --- a/examples/parameter_example.yml +++ b/examples/parameter_example.yml @@ -79,6 +79,7 @@ Stars: Gravity: mesh_side_length: 128 # Number of cells along each axis for the periodic gravity mesh (must be even). distributed_mesh: 0 # (Optional) Are we using a distributed mesh when running over MPI (necessary for meshes > 1290^3) + mesh_uses_local_patches: 1 # (Optional) Are we using thread-local patches (1) or direct atomic writes to the global mesh (0) in the non-MPI case? eta: 0.025 # Constant dimensionless multiplier for time integration. MAC: adaptive # Choice of mulitpole acceptance criterion: 'adaptive' OR 'geometric'. epsilon_fmm: 0.001 # Tolerance parameter for the adaptive multipole acceptance criterion. diff --git a/src/gravity_properties.c b/src/gravity_properties.c index 575d21b14b1cb822ccf17143daf491f4d2e3103c..6cffb1f395730369470266267354d6edc818735c 100644 --- a/src/gravity_properties.c +++ b/src/gravity_properties.c @@ -64,6 +64,8 @@ void gravity_props_init(struct gravity_props *p, struct swift_params *params, p->distributed_mesh = parser_get_opt_param_int(params, "Gravity:distributed_mesh", gravity_props_default_distributed_mesh); + p->mesh_uses_local_patches = + parser_get_opt_param_int(params, "Gravity:mesh_uses_local_patches", 1); p->a_smooth = parser_get_opt_param_float(params, "Gravity:a_smooth", gravity_props_default_a_smooth); p->r_cut_max_ratio = parser_get_opt_param_float( diff --git a/src/gravity_properties.h b/src/gravity_properties.h index cc2ccc8d835f76c587e0ff04da06102633067c12..016c318f46e34b5e68ae7d3aa1ebbb1aab928b46 100644 --- a/src/gravity_properties.h +++ b/src/gravity_properties.h @@ -121,6 +121,10 @@ struct gravity_props { /*! Whether mesh is distributed between MPI ranks when we use MPI */ int distributed_mesh; + /*! Whether or not to use local patches rather than + * direct atomic writes to the mesh when running without MPI */ + int mesh_uses_local_patches; + /*! Mesh smoothing scale in units of top-level cell size */ float a_smooth; diff --git a/src/mesh_gravity.c b/src/mesh_gravity.c index 7af0224519f0bc1cc1970e9fc7f489c6edb7fac2..5560b9c12987e2f3075f93cf958221f854bcb0e2 100644 --- a/src/mesh_gravity.c +++ b/src/mesh_gravity.c @@ -215,6 +215,7 @@ struct cic_mapper_data { double* rho; double* potential; int N; + int use_local_patches; double fac; double dim[3]; float const_G; @@ -260,10 +261,8 @@ void cell_gpart_to_mesh_CIC_mapper(void* map_data, int num, void* extra) { /* Pointer to the chunk to be processed */ int* local_cells = (int*)map_data; - // MATTHIEU: This could in principle be improved by creating a local mesh - // with just the extent required for the cell. Assignment can - // then be done without atomics. That local mesh is then added - // atomically to the global one. + /* A temporary patch of the global mesh */ + struct pm_mesh_patch patch; /* Loop over the elements assigned to this thread */ for (int i = 0; i < num; ++i) { @@ -271,8 +270,26 @@ void cell_gpart_to_mesh_CIC_mapper(void* map_data, int num, void* extra) { /* Pointer to local cell */ const struct cell* c = &cells[local_cells[i]]; - /* Assign this cell's content to the mesh */ - cell_gpart_to_mesh_CIC(c, rho, N, fac, dim, nu_model); + /* Skip empty cells */ + if (c->grav.count == 0) continue; + + if (data->use_local_patches) { + + /* Do a CIC interpolation of all the particles in this cell onto + the local patch (allocates memory in the patch) */ + accumulate_cell_to_local_patch(N, fac, dim, c, &patch, nu_model); + + /* Copy the local patch values back onto the global mesh */ + pm_add_patch_to_global_mesh(rho, &patch); + + /* Free the allocated memory */ + pm_mesh_patch_clean(&patch); + + } else { + + /* Assign this cell's content directly atomically to the mesh */ + cell_gpart_to_mesh_CIC(c, rho, N, fac, dim, nu_model); + } } } @@ -880,6 +897,7 @@ void compute_potential_global(struct pm_mesh* mesh, const struct space* s, data.rho = rho; data.potential = NULL; data.N = N; + data.use_local_patches = mesh->use_local_patches; data.fac = cell_fac; data.dim[0] = dim[0]; data.dim[1] = dim[1]; @@ -1136,6 +1154,7 @@ void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props, mesh->periodic = 1; mesh->N = N; mesh->distributed_mesh = props->distributed_mesh; + mesh->use_local_patches = props->mesh_uses_local_patches; mesh->dim[0] = dim[0]; mesh->dim[1] = dim[1]; mesh->dim[2] = dim[2]; diff --git a/src/mesh_gravity.h b/src/mesh_gravity.h index 4f968d09dd8c53bab5823f01cddfcc30f4666325..5b92d6e1c0bf10455a182253788cdd3aae638ba5 100644 --- a/src/mesh_gravity.h +++ b/src/mesh_gravity.h @@ -50,6 +50,10 @@ struct pm_mesh { /*! Whether mesh is distributed between MPI ranks */ int distributed_mesh; + /*! Whether or not to use local patches rather than + * direct atomic writes to the mesh when running without MPI */ + int use_local_patches; + /*! Integer time-step end of the mesh force for the last step */ integertime_t ti_end_mesh_last; diff --git a/src/mesh_gravity_mpi.c b/src/mesh_gravity_mpi.c index f493eae2ac081825accbe19d122e1e31c3e34664..2401b5ac4dab2030864a36305c3f9b21205012f8 100644 --- a/src/mesh_gravity_mpi.c +++ b/src/mesh_gravity_mpi.c @@ -161,6 +161,9 @@ void accumulate_cell_to_local_patches_mapper(void *map_data, int num, /* Pointer to local cell */ const struct cell *c = &cells[local_cells[i]]; + /* Skip empty cells */ + if (c->grav.count == 0) continue; + /* Assign this cell's content to the mesh */ accumulate_cell_to_local_patch(N, fac, dim, c, &local_patches[i], nu_model); } diff --git a/src/mesh_gravity_mpi.h b/src/mesh_gravity_mpi.h index 4840a6d379f71ee91a12c8738e04401265e16806..11e2f9a3a9ad9c19d6defec939a7b0aee9a39f88 100644 --- a/src/mesh_gravity_mpi.h +++ b/src/mesh_gravity_mpi.h @@ -28,6 +28,12 @@ struct cell; struct threadpool; struct pm_mesh; struct pm_mesh_patch; +struct neutrino_model; + +void accumulate_cell_to_local_patch(const int N, const double fac, + const double *dim, const struct cell *cell, + struct pm_mesh_patch *patch, + const struct neutrino_model *nu_model); void mpi_mesh_accumulate_gparts_to_local_patches( struct threadpool *tp, const int N, const double fac, const struct space *s, diff --git a/src/mesh_gravity_patch.c b/src/mesh_gravity_patch.c index c26153f7f4f06e30f9354d235ab5a8085f78d1b1..53036581ca35d4a631904bb6e25f0c4161ee6936 100644 --- a/src/mesh_gravity_patch.c +++ b/src/mesh_gravity_patch.c @@ -26,6 +26,7 @@ #include "mesh_gravity_patch.h" /* Local includes. */ +#include "atomic.h" #include "cell.h" #include "error.h" #include "row_major_id.h" @@ -99,6 +100,45 @@ void pm_mesh_patch_init(struct pm_mesh_patch *patch, const struct cell *cell, error("Failed to allocate array for mesh patch!"); } +/** + * @brief Write the content of a mesh patch back to the global mesh + * using atomic operations. + * + * @param global_mesh The global mesh to write to. + * @param patch The #pm_mesh_patch object to write from. + */ +void pm_add_patch_to_global_mesh(double *const global_mesh, + const struct pm_mesh_patch *patch) { + + const int N = patch->N; + const int size_i = patch->mesh_size[0]; + const int size_j = patch->mesh_size[1]; + const int size_k = patch->mesh_size[2]; + const int mesh_min_i = patch->mesh_min[0]; + const int mesh_min_j = patch->mesh_min[1]; + const int mesh_min_k = patch->mesh_min[2]; + + /* Remind the compiler that the arrays are nicely aligned */ + swift_declare_aligned_ptr(const double, mesh, patch->mesh, + SWIFT_CACHE_ALIGNMENT); + + for (int i = 0; i < size_i; ++i) { + for (int j = 0; j < size_j; ++j) { + for (int k = 0; k < size_k; ++k) { + + const int ii = i + mesh_min_i; + const int jj = j + mesh_min_j; + const int kk = k + mesh_min_k; + + const int patch_index = pm_mesh_patch_index(patch, i, j, k); + const int mesh_index = row_major_id_periodic(ii, jj, kk, N); + + atomic_add_d(&global_mesh[mesh_index], mesh[patch_index]); + } + } + } +} + /** * @brief Set all values in a mesh patch to zero * @@ -106,9 +146,12 @@ void pm_mesh_patch_init(struct pm_mesh_patch *patch, const struct cell *cell, */ void pm_mesh_patch_zero(struct pm_mesh_patch *patch) { + /* Remind the compiler that the arrays are nicely aligned */ + swift_declare_aligned_ptr(double, mesh, patch->mesh, SWIFT_CACHE_ALIGNMENT); + const int num = patch->mesh_size[0] * patch->mesh_size[1] * patch->mesh_size[2]; - memset(patch->mesh, 0, num * sizeof(double)); + memset(mesh, 0, num * sizeof(double)); } /** @@ -118,7 +161,7 @@ void pm_mesh_patch_zero(struct pm_mesh_patch *patch) { */ void pm_mesh_patch_clean(struct pm_mesh_patch *patch) { - swift_free("mesh_patch", patch->mesh); + if (patch->mesh) swift_free("mesh_patch", patch->mesh); /* Zero everything and give a silly mesh size to help debugging */ memset(patch, 0, sizeof(struct pm_mesh_patch)); diff --git a/src/mesh_gravity_patch.h b/src/mesh_gravity_patch.h index 067dc2cf5e0fbd289534c5fe1afeb65a5f413e00..cc82175882159e3a29fc523a008da44c27fc2d3c 100644 --- a/src/mesh_gravity_patch.h +++ b/src/mesh_gravity_patch.h @@ -167,4 +167,7 @@ __attribute__((always_inline)) INLINE static void pm_mesh_patch_CIC_set( mesh[pm_mesh_patch_index(patch, i + 1, j + 1, k + 1)] += value * dx * dy * dz; } +void pm_add_patch_to_global_mesh(double *const global_mesh, + const struct pm_mesh_patch *patch); + #endif