diff --git a/.gitignore b/.gitignore index f843002a1c03af25e89a601c75ac14f1203cca84..99aa018c9d8f826076cb26ae7cdf57fc314b8852 100644 --- a/.gitignore +++ b/.gitignore @@ -122,6 +122,7 @@ tests/testInteractions.sh tests/testSymmetry tests/testHydroMPIrules tests/testMaths +tests/testAtomic tests/testRandom tests/testRandomSpacing tests/testThreadpool diff --git a/configure.ac b/configure.ac index 6edd9586b9b8e92c29b0dabb8c45cd81a7dbb516..8495dca80d11f61470136bbd95eafbd78dccad02 100644 --- a/configure.ac +++ b/configure.ac @@ -239,6 +239,23 @@ if test "x$enable_debug" = "xyes"; then fi fi +# Are we using the regular tasks (with atomics and no task conflicts) or +# are we using the atomic-free task-conflicting (slower) version? +AC_ARG_ENABLE([atomics-within-tasks], + [AS_HELP_STRING([--disable-atomics-within-tasks], + [Disable the use of atomic operations within tasks. This creates more task conflicts + but allows for atomic-free and lock-free code within the tasks. This likely slows down + the code @<:@yes/no@:>@] + )], + [enable_atomics_within_tasks="$enableval"], + [enable_atomics_within_tasks="yes"] +) +AS_IF([test "x$enable_atomics_within_tasks" != "xno"], + , # Note: atomics are allowed by default, we define the macro if we don't want them. + [AC_DEFINE([SWIFT_TASKS_WITHOUT_ATOMICS],1,[Makes SWIFT use atomic-free and lock-free tasks.]) +]) + + # Check if task debugging is on. AC_ARG_ENABLE([task-debugging], [AS_HELP_STRING([--enable-task-debugging], @@ -2301,6 +2318,7 @@ AC_MSG_RESULT([ Black holes model : $with_black_holes Task dependencies : $with_task_order + Atomic operations in tasks : $enable_atomics_within_tasks Individual timers : $enable_timers Task debugging : $enable_task_debugging Threadpool debugging : $enable_threadpool_debugging diff --git a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml index 2770f9e27a5a50646575ccc0a9a2567bc548fc0e..8234b4b3e2d463b33d03f744386479723a29a9b4 100644 --- a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml +++ b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml @@ -68,17 +68,19 @@ FOF: min_group_size: 256 # The minimum no. of particles required for a group. linking_length_ratio: 0.2 # Linking length in units of the main inter-particle separation. black_hole_seed_halo_mass_Msun: 1.5e10 # Minimal halo mass in which to seed a black hole (in solar masses). - scale_factor_first: 0.01 # Scale-factor of first FoF black hole seeding calls. - delta_time: 1.005 # Scale-factor ratio between consecutive FoF black hole seeding calls. + scale_factor_first: 0.05 # Scale-factor of first FoF black hole seeding calls. + delta_time: 1.00751 # Scale-factor ratio between consecutive FoF black hole seeding calls. Scheduler: max_top_level_cells: 8 - tasks_per_cell: 5 cell_split_size: 200 Restarts: onexit: 1 - delta_hours: 1.0 + delta_hours: 6.0 + max_run_time: 71.5 # Three days minus fergie time + resubmit_on_exit: 1 + resubmit_command: ./resub.sh # Parameters related to the initial conditions InitialConditions: diff --git a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml index f175e2c6a3eef20371f6efa1d8f8f952393e4cd4..2de9a58ed5fe04f2061fd29d12bcce955293c447 100644 --- a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml +++ b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml @@ -68,17 +68,19 @@ FOF: min_group_size: 256 # The minimum no. of particles required for a group. linking_length_ratio: 0.2 # Linking length in units of the main inter-particle separation. black_hole_seed_halo_mass_Msun: 1.5e10 # Minimal halo mass in which to seed a black hole (in solar masses). - scale_factor_first: 0.01 # Scale-factor of first FoF black hole seeding calls. - delta_time: 1.005 # Scale-factor ratio between consecutive FoF black hole seeding calls. + scale_factor_first: 0.05 # Scale-factor of first FoF black hole seeding calls. + delta_time: 1.00751 # Scale-factor ratio between consecutive FoF black hole seeding calls. Scheduler: max_top_level_cells: 16 - tasks_per_cell: 5 cell_split_size: 200 Restarts: onexit: 1 - delta_hours: 1.0 + delta_hours: 6.0 + max_run_time: 71.5 # Three days minus fergie time + resubmit_on_exit: 1 + resubmit_command: ./resub.sh # Parameters related to the initial conditions InitialConditions: diff --git a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml index b65f1061524516f3a5a9bba7c79b59c58a973c42..49070d3b315fcfd06c90fb779a060a38a28bd072 100644 --- a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml +++ b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml @@ -68,8 +68,8 @@ FOF: min_group_size: 256 # The minimum no. of particles required for a group. linking_length_ratio: 0.2 # Linking length in units of the main inter-particle separation. black_hole_seed_halo_mass_Msun: 1.5e10 # Minimal halo mass in which to seed a black hole (in solar masses). - scale_factor_first: 0.01 # Scale-factor of first FoF black hole seeding calls. - delta_time: 1.005 # Scale-factor ratio between consecutive FoF black hole seeding calls. + scale_factor_first: 0.05 # Scale-factor of first FoF black hole seeding calls. + delta_time: 1.00751 # Scale-factor ratio between consecutive FoF black hole seeding calls. Scheduler: max_top_level_cells: 32 @@ -78,7 +78,7 @@ Scheduler: Restarts: onexit: 1 - delta_hours: 1.0 + delta_hours: 6.0 # Parameters related to the initial conditions InitialConditions: diff --git a/src/Makefile.am b/src/Makefile.am index 65dbae7bba5afc31db07c459fb0de60bd85df390..cf6c71644d7161001828c22260c5b56efce9ac75 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -114,7 +114,7 @@ AM_SOURCES = space.c runner_main.c runner_doiact_hydro.c runner_doiact_limiter.c # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ - gravity_iact.h kernel_long_gravity.h vector.h cache.h \ + gravity_iact.h kernel_long_gravity.h vector.h accumulate.h cache.h \ runner_doiact_nosort.h runner_doiact_hydro.h runner_doiact_stars.h runner_doiact_black_holes.h runner_doiact_grav.h \ runner_doiact_functions_hydro.h runner_doiact_functions_stars.h runner_doiact_functions_black_holes.h \ runner_doiact_functions_limiter.h runner_doiact_limiter.h units.h intrinsics.h minmax.h \ diff --git a/src/accumulate.h b/src/accumulate.h new file mode 100644 index 0000000000000000000000000000000000000000..877ac23baa1e9041c45a8d3dcc10a1d660ea6c68 --- /dev/null +++ b/src/accumulate.h @@ -0,0 +1,208 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_ACCUMULATE_H +#define SWIFT_ACCUMULATE_H + +/* Config parameters. */ +#include "../config.h" + +/* Local includes */ +#include "atomic.h" +#include "minmax.h" + +/** + * @file accumulate.h + * @brief Defines a series of functions used to update the fields of a particle + * atomically. These functions should be used in any task that can run in + * parallel to another one. + */ + +/** + * @brief Add x to the value stored at the location address (int version) + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + * @param x The value to add to *address. + */ +__attribute__((always_inline)) INLINE static void accumulate_add_i( + volatile int *const address, const int x) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + *address += x; +#else + atomic_add(address, x); +#endif +} + +/** + * @brief Add x to the value stored at the location address (long long version) + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + * @param x The value to add to *address. + */ +__attribute__((always_inline)) INLINE static void accumulate_add_ll( + volatile long long *const address, const long long x) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + *address += x; +#else + atomic_add(address, x); +#endif +} + +/** + * @brief Add x to the value stored at the location address (float version) + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + * @param x The value to add to *address. + */ +__attribute__((always_inline)) INLINE static void accumulate_add_f( + volatile float *const address, const float x) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + *address += x; +#else + atomic_add_f(address, x); +#endif +} + +/** + * @brief Add x to the value stored at the location address (double version) + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + * @param x The value to add to *address. + */ +__attribute__((always_inline)) INLINE static void accumulate_add_d( + volatile double *const address, const double x) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + *address += x; +#else + atomic_add_d(address, x); +#endif +} + +/** + * @brief Add 1 to the value stored at the location address (int version) + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + */ +__attribute__((always_inline)) INLINE static void accumulate_inc_i( + volatile int *const address) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + (*address)++; +#else + atomic_inc(address); +#endif +} + +/** + * @brief Add 1 to the value stored at the location address (long long version) + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + */ +__attribute__((always_inline)) INLINE static void accumulate_inc_ll( + volatile long long *const address) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + (*address)++; +#else + atomic_inc(address); +#endif +} + +/** + * @brief Compute the max of x and the value storedd at the location address + * and store the value at the address (int8_t version). + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + * @param x The value to max against *address. + */ +__attribute__((always_inline)) INLINE static void accumulate_max_c( + volatile int8_t *const address, const int8_t x) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + *address = max(*address, x); +#else + atomic_max_c(address, x); +#endif +} + +/** + * @brief Compute the max of x and the value storedd at the location address + * and store the value at the address (int version). + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + * @param x The value to max against *address. + */ +__attribute__((always_inline)) INLINE static void accumulate_max_i( + volatile int *const address, const int x) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + *address = max(*address, x); +#else + atomic_max(address, x); +#endif +} + +/** + * @brief Compute the max of x and the value storedd at the location address + * and store the value at the address (float version). + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + * @param x The value to max against *address. + */ +__attribute__((always_inline)) INLINE static void accumulate_max_f( + volatile float *const address, const float x) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + *address = max(*address, x); +#else + atomic_max_f(address, x); +#endif +} + +#endif /* SWIFT_ACCUMULATE_H */ diff --git a/src/atomic.h b/src/atomic.h index 8a790a7f6cbbf190c3d87d590f10c6f1676c2d24..8ba1b0e692b08806d18eb453d9211a4829a8943a 100644 --- a/src/atomic.h +++ b/src/atomic.h @@ -26,6 +26,9 @@ #include "inline.h" #include "minmax.h" +/* Standard includes */ +#include <stdint.h> + #define atomic_add(v, i) __sync_fetch_and_add(v, i) #define atomic_sub(v, i) __sync_fetch_and_sub(v, i) #define atomic_or(v, i) __sync_fetch_and_or(v, i) @@ -35,6 +38,29 @@ #define atomic_cas(v, o, n) __sync_val_compare_and_swap(v, o, n) #define atomic_swap(v, n) __sync_lock_test_and_set(v, n) +/** + * @brief Atomic min operation on ints. + * + * This is a text-book implementation based on an atomic CAS. + * + * @param address The address to update. + * @param y The value to update the address with. + */ +__attribute__((always_inline)) INLINE static void atomic_min( + volatile int *const address, const int y) { + + int *int_ptr = (int *)address; + + int test_val, old_val, new_val; + old_val = *address; + + do { + test_val = old_val; + new_val = min(old_val, y); + old_val = atomic_cas(int_ptr, test_val, new_val); + } while (test_val != old_val); +} + /** * @brief Atomic min operation on floats. * @@ -66,29 +92,6 @@ __attribute__((always_inline)) INLINE static void atomic_min_f( } while (test_val.as_int != old_val.as_int); } -/** - * @brief Atomic min operation on ints. - * - * This is a text-book implementation based on an atomic CAS. - * - * @param address The address to update. - * @param y The value to update the address with. - */ -__attribute__((always_inline)) INLINE static void atomic_min( - volatile int *address, int y) { - - int *int_ptr = (int *)address; - - int test_val, old_val, new_val; - old_val = *address; - - do { - test_val = old_val; - new_val = min(old_val, y); - old_val = atomic_cas(int_ptr, test_val, new_val); - } while (test_val != old_val); -} - /** * @brief Atomic min operation on doubles. * @@ -121,6 +124,50 @@ __attribute__((always_inline)) INLINE static void atomic_min_d( } while (test_val.as_long_long != old_val.as_long_long); } +/** + * @brief Atomic max operation on int8_t. + * + * This is a text-book implementation based on an atomic CAS. + * + * @param address The address to update. + * @param y The value to update the address with. + */ +__attribute__((always_inline)) INLINE static void atomic_max_c( + volatile int8_t *const address, const int8_t y) { + + int8_t test_val, old_val, new_val; + old_val = *address; + + do { + test_val = old_val; + new_val = max(old_val, y); + old_val = atomic_cas(address, test_val, new_val); + } while (test_val != old_val); +} + +/** + * @brief Atomic max operation on ints. + * + * This is a text-book implementation based on an atomic CAS. + * + * @param address The address to update. + * @param y The value to update the address with. + */ +__attribute__((always_inline)) INLINE static void atomic_max( + volatile int *const address, const int y) { + + int *int_ptr = (int *)address; + + int test_val, old_val, new_val; + old_val = *address; + + do { + test_val = old_val; + new_val = max(old_val, y); + old_val = atomic_cas(int_ptr, test_val, new_val); + } while (test_val != old_val); +} + /** * @brief Atomic max operation on floats. * diff --git a/src/cell.c b/src/cell.c index 6eef7d32d94ab6edbb418d6eb1732da1062ce538..056c0aee013e1321be61a3542906b98a97809b83 100644 --- a/src/cell.c +++ b/src/cell.c @@ -1228,7 +1228,7 @@ int cell_unpack_sf_counts(struct cell *restrict c, * @return 0 on success, 1 on failure */ int cell_locktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to lock this cell. */ if (c->hydro.hold || lock_trylock(&c->hydro.lock) != 0) { @@ -1288,7 +1288,7 @@ int cell_locktree(struct cell *c) { * @return 0 on success, 1 on failure */ int cell_glocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to lock this cell. */ if (c->grav.phold || lock_trylock(&c->grav.plock) != 0) { @@ -1348,7 +1348,7 @@ int cell_glocktree(struct cell *c) { * @return 0 on success, 1 on failure */ int cell_mlocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to lock this cell. */ if (c->grav.mhold || lock_trylock(&c->grav.mlock) != 0) { @@ -1408,7 +1408,7 @@ int cell_mlocktree(struct cell *c) { * @return 0 on success, 1 on failure */ int cell_slocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to lock this cell. */ if (c->stars.hold || lock_trylock(&c->stars.lock) != 0) { @@ -1468,7 +1468,7 @@ int cell_slocktree(struct cell *c) { * @return 0 on success, 1 on failure */ int cell_blocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to lock this cell. */ if (c->black_holes.hold || lock_trylock(&c->black_holes.lock) != 0) { @@ -1528,7 +1528,7 @@ int cell_blocktree(struct cell *c) { * @param c The #cell. */ void cell_unlocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to unlock this cell. */ if (lock_unlock(&c->hydro.lock) != 0) error("Failed to unlock cell."); @@ -1546,7 +1546,7 @@ void cell_unlocktree(struct cell *c) { * @param c The #cell. */ void cell_gunlocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to unlock this cell. */ if (lock_unlock(&c->grav.plock) != 0) error("Failed to unlock cell."); @@ -1564,7 +1564,7 @@ void cell_gunlocktree(struct cell *c) { * @param c The #cell. */ void cell_munlocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to unlock this cell. */ if (lock_unlock(&c->grav.mlock) != 0) error("Failed to unlock cell."); @@ -1582,7 +1582,7 @@ void cell_munlocktree(struct cell *c) { * @param c The #cell. */ void cell_sunlocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to unlock this cell. */ if (lock_unlock(&c->stars.lock) != 0) error("Failed to unlock cell."); @@ -1600,7 +1600,7 @@ void cell_sunlocktree(struct cell *c) { * @param c The #cell. */ void cell_bunlocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to unlock this cell. */ if (lock_unlock(&c->black_holes.lock) != 0) error("Failed to unlock cell."); @@ -5358,16 +5358,31 @@ void cell_check_timesteps(const struct cell *c, const integertime_t ti_current, /* Only check cells that have at least one non-inhibited particle */ if (count > 0) { - if (ti_end_min != c->hydro.ti_end_min) - error( - "Non-matching ti_end_min. Cell=%lld true=%lld ti_current=%lld " - "depth=%d", - c->hydro.ti_end_min, ti_end_min, ti_current, c->depth); + if (count != c->hydro.count) { + + /* Note that we use a < as the particle with the smallest time-bin + might have been swallowed. This means we will run this cell with + 0 active particles but that's not wrong */ + if (ti_end_min < c->hydro.ti_end_min) + error( + "Non-matching ti_end_min. Cell=%lld true=%lld ti_current=%lld " + "depth=%d", + c->hydro.ti_end_min, ti_end_min, ti_current, c->depth); + + } else /* Normal case: nothing was swallowed/converted */ { + if (ti_end_min != c->hydro.ti_end_min) + error( + "Non-matching ti_end_min. Cell=%lld true=%lld ti_current=%lld " + "depth=%d", + c->hydro.ti_end_min, ti_end_min, ti_current, c->depth); + } + if (ti_end_max > c->hydro.ti_end_max) error( "Non-matching ti_end_max. Cell=%lld true=%lld ti_current=%lld " "depth=%d", c->hydro.ti_end_max, ti_end_max, ti_current, c->depth); + if (ti_beg_max != c->hydro.ti_beg_max) error( "Non-matching ti_beg_max. Cell=%lld true=%lld ti_current=%lld " diff --git a/src/engine.c b/src/engine.c index 7f87bdc6463df32ead4397b0c8c9ceffbcfaa7de..e5e7b78dcafd9c14a032dd3a4249e6b0a52fab4d 100644 --- a/src/engine.c +++ b/src/engine.c @@ -2579,8 +2579,10 @@ void engine_step(struct engine *e) { /* Prepare the tasks to be launched, rebuild or repartition if needed. */ engine_prepare(e); - /* Print the number of active tasks ? */ +#ifdef SWIFT_DEBUG_CHECKS + /* Print the number of active tasks */ if (e->verbose) engine_print_task_counts(e); +#endif /* Dump local cells and active particle counts. */ // dumpCells("cells", 1, 0, 0, 0, e->s, e->nodeID, e->step); diff --git a/src/gravity/MultiSoftening/gravity.h b/src/gravity/MultiSoftening/gravity.h index 07ed0e78b164e5ee3f01b8c9a62b940201b31aa4..d7716cf712254054e3cf364a807f3cb1c12355f3 100644 --- a/src/gravity/MultiSoftening/gravity.h +++ b/src/gravity/MultiSoftening/gravity.h @@ -23,6 +23,7 @@ #include <float.h> /* Local includes. */ +#include "accumulate.h" #include "cosmology.h" #include "error.h" #include "gravity_properties.h" @@ -77,7 +78,7 @@ __attribute__((always_inline)) INLINE static float gravity_get_softening( __attribute__((always_inline)) INLINE static void gravity_add_comoving_potential(struct gpart* restrict gp, float pot) { - gp->potential += pot; + accumulate_add_f(&gp->potential, pot); } /** diff --git a/src/gravity/Potential/gravity.h b/src/gravity/Potential/gravity.h index 3a3e9680016ef51c554691c6f43f8279cde8caf9..f9a9502a528c161fbc82b3028f303b7f9cad49f8 100644 --- a/src/gravity/Potential/gravity.h +++ b/src/gravity/Potential/gravity.h @@ -23,6 +23,7 @@ #include <float.h> /* Local includes. */ +#include "accumulate.h" #include "cosmology.h" #include "gravity_properties.h" #include "kernel_gravity.h" @@ -63,7 +64,7 @@ __attribute__((always_inline)) INLINE static float gravity_get_softening( __attribute__((always_inline)) INLINE static void gravity_add_comoving_potential(struct gpart* restrict gp, float pot) { - gp->potential += pot; + accumulate_add_f(&gp->potential, pot); } /** diff --git a/src/gravity_cache.h b/src/gravity_cache.h index 912961d2e06dc748039f54c72bd9b33fd2547df2..ba3432d7e732b783547ddd2a597dbdf5723bf606 100644 --- a/src/gravity_cache.h +++ b/src/gravity_cache.h @@ -23,6 +23,7 @@ #include "../config.h" /* Local headers */ +#include "accumulate.h" #include "align.h" #include "error.h" #include "gravity.h" @@ -491,9 +492,9 @@ __attribute__((always_inline)) INLINE static void gravity_cache_write_back( /* Write stuff back to the particles */ for (int i = 0; i < gcount; ++i) { if (active[i]) { - gparts[i].a_grav[0] += a_x[i]; - gparts[i].a_grav[1] += a_y[i]; - gparts[i].a_grav[2] += a_z[i]; + accumulate_add_f(&gparts[i].a_grav[0], a_x[i]); + accumulate_add_f(&gparts[i].a_grav[1], a_y[i]); + accumulate_add_f(&gparts[i].a_grav[2], a_z[i]); gravity_add_comoving_potential(&gparts[i], pot[i]); } } diff --git a/src/mesh_gravity.c b/src/mesh_gravity.c index 0efec0bc80fc381d9bd6ff733cc546e1885c1d9e..cbfb9ef7a2970b222bf67a915bd5044bedaaaae3 100644 --- a/src/mesh_gravity.c +++ b/src/mesh_gravity.c @@ -28,6 +28,7 @@ #include "mesh_gravity.h" /* Local includes. */ +#include "accumulate.h" #include "active.h" #include "debug.h" #include "engine.h" @@ -334,10 +335,10 @@ void mesh_to_gparts_CIC(struct gpart* gp, const double* pot, const int N, /* ---- */ /* Store things back */ + accumulate_add_f(&gp->a_grav[0], fac * a[0]); + accumulate_add_f(&gp->a_grav[1], fac * a[1]); + accumulate_add_f(&gp->a_grav[2], fac * a[2]); gravity_add_comoving_potential(gp, p); - gp->a_grav[0] += fac * a[0]; - gp->a_grav[1] += fac * a[1]; - gp->a_grav[2] += fac * a[2]; #ifdef SWIFT_GRAVITY_FORCE_CHECKS gp->potential_PM = p; gp->a_grav_PM[0] = fac * a[0]; diff --git a/src/multipole.h b/src/multipole.h index e03e302c4186a54442518a687c444ff4a7216b1e..55de9c0baebe0371e4e61860e2362907874cba6b 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -1608,12 +1608,19 @@ INLINE static void gravity_M2L_apply( const struct potential_derivatives_M2L *pot) { #ifdef SWIFT_DEBUG_CHECKS - /* Count interactions */ - l_b->num_interacted += m_a->num_gpart; + /* Count all interactions + * Note that despite being in a section of the code protected by locks, + * we must use atomics here as the long-range task may update this + * counter in a lock-free section of code. */ + accumulate_add_ll(&l_b->num_interacted, m_a->num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS - l_b->num_interacted_tree += m_a->num_gpart; + /* Count tree interactions + * Note that despite being in a section of the code protected by locks, + * we must use atomics here as the long-range task may update this + * counter in a lock-free section of code. */ + accumulate_add_ll(&l_b->num_interacted_tree, m_a->num_gpart); #endif /* Record that this tensor has received contributions */ @@ -2467,12 +2474,12 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb, #ifdef SWIFT_DEBUG_CHECKS if (lb->num_interacted == 0) error("Interacting with empty field tensor"); - gp->num_interacted += lb->num_interacted; + accumulate_add_ll(&gp->num_interacted, lb->num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS - gp->num_interacted_m2l += lb->num_interacted_tree; - gp->num_interacted_pm += lb->num_interacted_pm; + accumulate_add_ll(&gp->num_interacted_m2l, lb->num_interacted_tree); + accumulate_add_ll(&gp->num_interacted_pm, lb->num_interacted_pm); #endif /* Local accumulator */ @@ -2589,15 +2596,15 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb, #endif /* Update the particle */ - gp->a_grav[0] += a_grav[0]; - gp->a_grav[1] += a_grav[1]; - gp->a_grav[2] += a_grav[2]; + accumulate_add_f(&gp->a_grav[0], a_grav[0]); + accumulate_add_f(&gp->a_grav[1], a_grav[1]); + accumulate_add_f(&gp->a_grav[2], a_grav[2]); gravity_add_comoving_potential(gp, pot); #ifdef SWIFT_GRAVITY_FORCE_CHECKS - gp->a_grav_m2l[0] += a_grav[0]; - gp->a_grav_m2l[1] += a_grav[1]; - gp->a_grav_m2l[2] += a_grav[2]; + accumulate_add_f(&gp->a_grav_m2l[0], a_grav[0]); + accumulate_add_f(&gp->a_grav_m2l[1], a_grav[1]); + accumulate_add_f(&gp->a_grav_m2l[2], a_grav[2]); #endif } diff --git a/src/runner_doiact_grav.c b/src/runner_doiact_grav.c index 043b662b101d99698bbc46cf39ae2096e2725c91..91abe9e91060b19fa6df1bbbb29e0de6e6442cad 100644 --- a/src/runner_doiact_grav.c +++ b/src/runner_doiact_grav.c @@ -264,13 +264,13 @@ static INLINE void runner_dopair_grav_pp_full( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) - gparts_i[pid].num_interacted++; + accumulate_inc_ll(&gparts_i[pid].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the p2p interaction counter if it's not a padded gpart */ if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) - gparts_i[pid].num_interacted_p2p++; + accumulate_inc_ll(&gparts_i[pid].num_interacted_p2p); #endif } @@ -281,9 +281,9 @@ static INLINE void runner_dopair_grav_pp_full( ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS - gparts_i[pid].a_grav_p2p[0] += a_x; - gparts_i[pid].a_grav_p2p[1] += a_y; - gparts_i[pid].a_grav_p2p[2] += a_z; + accumulate_add_f(&gparts_i[pid].a_grav_p2p[0], a_x); + accumulate_add_f(&gparts_i[pid].a_grav_p2p[1], a_y); + accumulate_add_f(&gparts_i[pid].a_grav_p2p[2], a_z); #endif } } @@ -420,13 +420,13 @@ static INLINE void runner_dopair_grav_pp_truncated( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) - gparts_i[pid].num_interacted++; + accumulate_inc_ll(&gparts_i[pid].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the p2p interaction counter if it's not a padded gpart */ if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) - gparts_i[pid].num_interacted_p2p++; + accumulate_inc_ll(&gparts_i[pid].num_interacted_p2p); #endif } @@ -437,9 +437,9 @@ static INLINE void runner_dopair_grav_pp_truncated( ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS - gparts_i[pid].a_grav_p2p[0] += a_x; - gparts_i[pid].a_grav_p2p[1] += a_y; - gparts_i[pid].a_grav_p2p[2] += a_z; + accumulate_add_f(&gparts_i[pid].a_grav_p2p[0], a_x); + accumulate_add_f(&gparts_i[pid].a_grav_p2p[1], a_y); + accumulate_add_f(&gparts_i[pid].a_grav_p2p[2], a_z); #endif } } @@ -565,16 +565,18 @@ static INLINE void runner_dopair_grav_pm_full( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter */ if (pid < gcount_i) - gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart; + accumulate_add_ll(&gparts_i[pid].num_interacted, + cj->grav.multipole->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the M2P interaction counter and forces. */ if (pid < gcount_i) { - gparts_i[pid].num_interacted_m2p += cj->grav.multipole->m_pole.num_gpart; - gparts_i[pid].a_grav_m2p[0] += f_x; - gparts_i[pid].a_grav_m2p[1] += f_y; - gparts_i[pid].a_grav_m2p[2] += f_z; + accumulate_add_ll(&gparts_i[pid].num_interacted_m2p, + cj->grav.multipole->m_pole.num_gpart); + accumulate_add_f(&gparts_i[pid].a_grav_m2p[0], f_x); + accumulate_add_f(&gparts_i[pid].a_grav_m2p[1], f_y); + accumulate_add_f(&gparts_i[pid].a_grav_m2p[2], f_z); } #endif } @@ -706,16 +708,18 @@ static INLINE void runner_dopair_grav_pm_truncated( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter */ if (pid < gcount_i) - gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart; + accumulate_add_ll(&gparts_i[pid].num_interacted, + cj->grav.multipole->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the M2P interaction counter and forces. */ if (pid < gcount_i) { - gparts_i[pid].num_interacted_m2p += cj->grav.multipole->m_pole.num_gpart; - gparts_i[pid].a_grav_m2p[0] += f_x; - gparts_i[pid].a_grav_m2p[1] += f_y; - gparts_i[pid].a_grav_m2p[2] += f_z; + accumulate_add_ll(&gparts_i[pid].num_interacted_m2p, + cj->grav.multipole->m_pole.num_gpart); + accumulate_add_f(&gparts_i[pid].a_grav_m2p[0], f_x); + accumulate_add_f(&gparts_i[pid].a_grav_m2p[1], f_y); + accumulate_add_f(&gparts_i[pid].a_grav_m2p[2], f_z); } #endif } @@ -1041,13 +1045,13 @@ static INLINE void runner_doself_grav_pp_full( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) - gparts[pid].num_interacted++; + accumulate_inc_ll(&gparts[pid].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the P2P interaction counter if it's not a padded gpart */ if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) - gparts[pid].num_interacted_p2p++; + accumulate_inc_ll(&gparts[pid].num_interacted_p2p); #endif } @@ -1058,9 +1062,9 @@ static INLINE void runner_doself_grav_pp_full( ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS - gparts[pid].a_grav_p2p[0] += a_x; - gparts[pid].a_grav_p2p[1] += a_y; - gparts[pid].a_grav_p2p[2] += a_z; + accumulate_add_f(&gparts[pid].a_grav_p2p[0], a_x); + accumulate_add_f(&gparts[pid].a_grav_p2p[1], a_y); + accumulate_add_f(&gparts[pid].a_grav_p2p[2], a_z); #endif } } @@ -1180,13 +1184,13 @@ static INLINE void runner_doself_grav_pp_truncated( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) - gparts[pid].num_interacted++; + accumulate_inc_ll(&gparts[pid].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the P2P interaction counter if it's not a padded gpart */ if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) - gparts[pid].num_interacted_p2p++; + accumulate_inc_ll(&gparts[pid].num_interacted_p2p); #endif } @@ -1197,9 +1201,9 @@ static INLINE void runner_doself_grav_pp_truncated( ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS - gparts[pid].a_grav_p2p[0] += a_x; - gparts[pid].a_grav_p2p[1] += a_y; - gparts[pid].a_grav_p2p[2] += a_z; + accumulate_add_f(&gparts[pid].a_grav_p2p[0], a_x); + accumulate_add_f(&gparts[pid].a_grav_p2p[1], a_y); + accumulate_add_f(&gparts[pid].a_grav_p2p[2], a_z); #endif } } @@ -1355,11 +1359,29 @@ static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r, cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); #endif +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + /* Lock the multipoles + * Note we impose a hierarchy to solve the dining philosopher problem */ + if (ci < cj) { + lock_lock(&ci->grav.mlock); + lock_lock(&cj->grav.mlock); + } else { + lock_lock(&cj->grav.mlock); + lock_lock(&ci->grav.mlock); + } +#endif + /* Let's interact at this level */ gravity_M2L_symmetric(&ci->grav.multipole->pot, &cj->grav.multipole->pot, multi_i, multi_j, ci->grav.multipole->CoM, cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + /* Unlock the multipoles */ + if (lock_unlock(&ci->grav.mlock) != 0) error("Failed to unlock multipole"); + if (lock_unlock(&cj->grav.mlock) != 0) error("Failed to unlock multipole"); +#endif + TIMER_TOC(timer_dopair_grav_mm); } @@ -1371,9 +1393,9 @@ static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r, * @param ci The #cell with field tensor to interact. * @param cj The #cell with the multipole. */ -static INLINE void runner_dopair_grav_mm_nonsym( - struct runner *r, struct cell *restrict ci, - const struct cell *restrict cj) { +static INLINE void runner_dopair_grav_mm_nonsym(struct runner *r, + struct cell *restrict ci, + struct cell *restrict cj) { /* Some constants */ const struct engine *e = r->e; @@ -1406,10 +1428,28 @@ static INLINE void runner_dopair_grav_mm_nonsym( cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); #endif +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + /* Lock the multipoles + * Note we impose a hierarchy to solve the dining philosopher problem */ + if (ci < cj) { + lock_lock(&ci->grav.mlock); + lock_lock(&cj->grav.mlock); + } else { + lock_lock(&cj->grav.mlock); + lock_lock(&ci->grav.mlock); + } +#endif + /* Let's interact at this level */ gravity_M2L_nonsym(&ci->grav.multipole->pot, multi_j, ci->grav.multipole->CoM, cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS + /* Unlock the multipoles */ + if (lock_unlock(&ci->grav.mlock) != 0) error("Failed to unlock multipole"); + if (lock_unlock(&cj->grav.mlock) != 0) error("Failed to unlock multipole"); +#endif + TIMER_TOC(timer_dopair_grav_mm); } @@ -1638,17 +1678,21 @@ void runner_dopair_recursive_grav(struct runner *r, struct cell *ci, #ifdef SWIFT_DEBUG_CHECKS if (cell_is_active_gravity(ci, e)) - multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; + accumulate_add_ll(&multi_i->pot.num_interacted, + multi_j->m_pole.num_gpart); if (cell_is_active_gravity(cj, e)) - multi_j->pot.num_interacted += multi_i->m_pole.num_gpart; + accumulate_add_ll(&multi_j->pot.num_interacted, + multi_i->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Need to account for the interactions we missed */ if (cell_is_active_gravity(ci, e)) - multi_i->pot.num_interacted_pm += multi_j->m_pole.num_gpart; + accumulate_add_ll(&multi_i->pot.num_interacted_pm, + multi_j->m_pole.num_gpart); if (cell_is_active_gravity(cj, e)) - multi_j->pot.num_interacted_pm += multi_i->m_pole.num_gpart; + accumulate_add_ll(&multi_j->pot.num_interacted_pm, + multi_i->m_pole.num_gpart); #endif return; } @@ -1833,8 +1877,8 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { for (int n = 0; n < nr_cells_with_particles; ++n) { /* Handle on the top-level cell and it's gravity business*/ - const struct cell *cj = &cells[cells_with_particles[n]]; - const struct gravity_tensors *const multi_j = cj->grav.multipole; + struct cell *cj = &cells[cells_with_particles[n]]; + struct gravity_tensors *const multi_j = cj->grav.multipole; /* Avoid self contributions */ if (top == cj) continue; @@ -1854,12 +1898,14 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { #ifdef SWIFT_DEBUG_CHECKS /* Need to account for the interactions we missed */ - multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; + accumulate_add_ll(&multi_i->pot.num_interacted, + multi_j->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Need to account for the interactions we missed */ - multi_i->pot.num_interacted_pm += multi_j->m_pole.num_gpart; + accumulate_add_ll(&multi_i->pot.num_interacted_pm, + multi_j->m_pole.num_gpart); #endif /* Record that this multipole received a contribution */ diff --git a/src/task.c b/src/task.c index 19ba01580e6ca519a48fd934d0bb8c21b0670343..f821eed5e6b0fd6d29ebc00882584abb4548ef7c 100644 --- a/src/task.c +++ b/src/task.c @@ -456,8 +456,10 @@ void task_unlock(struct task *t) { case task_type_self: case task_type_sub_self: if (subtype == task_subtype_grav) { +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS cell_gunlocktree(ci); cell_munlocktree(ci); +#endif } else if ((subtype == task_subtype_stars_density) || (subtype == task_subtype_stars_feedback)) { cell_sunlocktree(ci); @@ -470,7 +472,11 @@ void task_unlock(struct task *t) { cell_unlocktree(ci); } else if (subtype == task_subtype_do_bh_swallow) { cell_bunlocktree(ci); - } else { + } else if (subtype == task_subtype_limiter) { +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + cell_unlocktree(ci); +#endif + } else { /* hydro */ cell_unlocktree(ci); } break; @@ -478,10 +484,12 @@ void task_unlock(struct task *t) { case task_type_pair: case task_type_sub_pair: if (subtype == task_subtype_grav) { +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS cell_gunlocktree(ci); cell_gunlocktree(cj); cell_munlocktree(ci); cell_munlocktree(cj); +#endif } else if ((subtype == task_subtype_stars_density) || (subtype == task_subtype_stars_feedback)) { cell_sunlocktree(ci); @@ -499,24 +507,35 @@ void task_unlock(struct task *t) { } else if (subtype == task_subtype_do_bh_swallow) { cell_bunlocktree(ci); cell_bunlocktree(cj); - } else { + } else if (subtype == task_subtype_limiter) { +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + cell_unlocktree(ci); + cell_unlocktree(cj); +#endif + } else { /* hydro */ cell_unlocktree(ci); cell_unlocktree(cj); } break; case task_type_grav_down: +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS cell_gunlocktree(ci); cell_munlocktree(ci); +#endif break; case task_type_grav_long_range: +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS cell_munlocktree(ci); +#endif break; case task_type_grav_mm: +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS cell_munlocktree(ci); cell_munlocktree(cj); +#endif break; case task_type_star_formation: @@ -612,6 +631,7 @@ int task_lock(struct task *t) { case task_type_self: case task_type_sub_self: if (subtype == task_subtype_grav) { +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS /* Lock the gparts and the m-pole */ if (ci->grav.phold || ci->grav.mhold) return 0; if (cell_glocktree(ci) != 0) @@ -620,6 +640,7 @@ int task_lock(struct task *t) { cell_gunlocktree(ci); return 0; } +#endif } else if ((subtype == task_subtype_stars_density) || (subtype == task_subtype_stars_feedback)) { if (ci->stars.hold) return 0; @@ -643,6 +664,11 @@ int task_lock(struct task *t) { } else if (subtype == task_subtype_do_bh_swallow) { if (ci->black_holes.hold) return 0; if (cell_blocktree(ci) != 0) return 0; + } else if (subtype == task_subtype_limiter) { +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + if (ci->hydro.hold) return 0; + if (cell_locktree(ci) != 0) return 0; +#endif } else { /* subtype == hydro */ if (ci->hydro.hold) return 0; if (cell_locktree(ci) != 0) return 0; @@ -652,6 +678,7 @@ int task_lock(struct task *t) { case task_type_pair: case task_type_sub_pair: if (subtype == task_subtype_grav) { +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS /* Lock the gparts and the m-pole in both cells */ if (ci->grav.phold || cj->grav.phold) return 0; if (cell_glocktree(ci) != 0) return 0; @@ -668,6 +695,7 @@ int task_lock(struct task *t) { cell_munlocktree(ci); return 0; } +#endif } else if ((subtype == task_subtype_stars_density) || (subtype == task_subtype_stars_feedback)) { /* Lock the stars and the gas particles in both cells */ @@ -719,6 +747,15 @@ int task_lock(struct task *t) { cell_bunlocktree(ci); return 0; } + } else if (subtype == task_subtype_limiter) { +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + if (ci->hydro.hold || cj->hydro.hold) return 0; + if (cell_locktree(ci) != 0) return 0; + if (cell_locktree(cj) != 0) { + cell_unlocktree(ci); + return 0; + } +#endif } else { /* subtype == hydro */ /* Lock the parts in both cells */ if (ci->hydro.hold || cj->hydro.hold) return 0; @@ -731,6 +768,7 @@ int task_lock(struct task *t) { break; case task_type_grav_down: +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS /* Lock the gparts and the m-poles */ if (ci->grav.phold || ci->grav.mhold) return 0; if (cell_glocktree(ci) != 0) @@ -739,15 +777,19 @@ int task_lock(struct task *t) { cell_gunlocktree(ci); return 0; } +#endif break; case task_type_grav_long_range: +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS /* Lock the m-poles */ if (ci->grav.mhold) return 0; if (cell_mlocktree(ci) != 0) return 0; +#endif break; case task_type_grav_mm: +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS /* Lock both m-poles */ if (ci->grav.mhold || cj->grav.mhold) return 0; if (cell_mlocktree(ci) != 0) return 0; @@ -755,6 +797,7 @@ int task_lock(struct task *t) { cell_munlocktree(ci); return 0; } +#endif break; case task_type_star_formation: diff --git a/src/timestep_limiter_iact.h b/src/timestep_limiter_iact.h index a4f5c7c1af48db7f6926f0fac1c09a4db2a0c492..839b1a3dc450ca12b8b0c8824fd75cd87063a7b8 100644 --- a/src/timestep_limiter_iact.h +++ b/src/timestep_limiter_iact.h @@ -88,7 +88,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter( if (pj->time_bin > pi->time_bin + time_bin_neighbour_max_delta_bin) { /* Store the smallest time bin that woke up this particle */ - pj->limiter_data.wakeup = max(pj->limiter_data.wakeup, -pi->time_bin); + accumulate_max_c(&pj->limiter_data.wakeup, -pi->time_bin); } } diff --git a/tests/Makefile.am b/tests/Makefile.am index 11426ac89970917cf121351e266aaa67cfd99847..e24a2a69b07d9ebf8ff10f367f7eaf0724817f7c 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -29,7 +29,8 @@ TESTS = testGreetings testMaths testReading.sh testKernel \ testPeriodicBC.sh testPeriodicBCPerturbed.sh testPotentialSelf \ testPotentialPair testEOS testUtilities testSelectOutput.sh \ testCbrt testCosmology testOutputList testFormat.sh \ - test27cellsStars.sh test27cellsStarsPerturbed.sh testHydroMPIrules + test27cellsStars.sh test27cellsStarsPerturbed.sh testHydroMPIrules \ + testAtomic # List of test programs to compile check_PROGRAMS = testGreetings testReading testTimeIntegration \ @@ -41,7 +42,8 @@ check_PROGRAMS = testGreetings testReading testTimeIntegration \ testVoronoi1D testVoronoi2D testVoronoi3D testPeriodicBC \ testGravityDerivatives testPotentialSelf testPotentialPair testEOS testUtilities \ testSelectOutput testCbrt testCosmology testOutputList test27cellsStars \ - test27cellsStars_subset testCooling testComovingCooling testFeedback testHashmap testHydroMPIrules + test27cellsStars_subset testCooling testComovingCooling testFeedback testHashmap \ + testAtomic testHydroMPIrules # Rebuild tests when SWIFT is updated. $(check_PROGRAMS): ../src/.libs/libswiftsim.a @@ -51,6 +53,8 @@ testGreetings_SOURCES = testGreetings.c testMaths_SOURCES = testMaths.c +testAtomic_SOURCES = testAtomic.c + testRandom_SOURCES = testRandom.c testRandomSpacing_SOURCES = testRandomSpacing.c diff --git a/tests/testAtomic.c b/tests/testAtomic.c new file mode 100644 index 0000000000000000000000000000000000000000..47eca7b736d1687654bb1d2c5b40366a05e8af73 --- /dev/null +++ b/tests/testAtomic.c @@ -0,0 +1,146 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Standard includes. */ +#include <fenv.h> + +/* Local includes */ +#include "swift.h" + +const int array_size = 2048 * 2048; +const int num_threads = 64; +const int chunk_size = 64; + +void map_function_sum_f(void *data, int num_elements, void *extra_data) { + + float *array = (float *)data; + float *sum = (float *)extra_data; + + for (int i = 0; i < num_elements; ++i) atomic_add_f(sum, array[i]); +} + +void map_function_sum_ll(void *data, int num_elements, void *extra_data) { + + long long *array = (long long *)data; + long long *sum = (long long *)extra_data; + + for (int i = 0; i < num_elements; ++i) atomic_add(sum, array[i]); +} + +void map_function_inc_ll(void *data, int num_elements, void *extra_data) { + + long long *sum = (long long *)extra_data; + + for (int i = 0; i < num_elements; ++i) atomic_inc(sum); +} + +int main(int argc, char *argv[]) { + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + +/* Choke on FPEs */ +#ifdef HAVE_FE_ENABLE_EXCEPT + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif + + /* Get some randomness going */ + const int seed = time(NULL); + message("Seed = %d", seed); + srand(seed); + + /* Start a bunch of threads */ + printf("# Creating threadpool with %d threads\n", num_threads); + struct threadpool tp; + threadpool_init(&tp, num_threads); + + /* Create some random data */ + float *array_f = malloc(array_size * sizeof(float)); + long long *array_ll = malloc(array_size * sizeof(long long)); + + for (int i = 0; i < array_size; ++i) { + array_f[i] = rand() / ((float)RAND_MAX); + array_ll[i] = rand(); + } + + /*** Test the addition atomic ops *******************************/ + + /* float case */ + + /* Compute the real answer */ + float real_sum_f = 0.f; + for (int i = 0; i < array_size; ++i) { + real_sum_f += array_f[i]; + } + + /* Compute the answer via threads and atomic */ + float atomic_sum_f = 0.f; + threadpool_map(&tp, map_function_sum_f, array_f, array_size, sizeof(float), + chunk_size, &atomic_sum_f); + + const double diff_sum_f = (double)real_sum_f - (double)atomic_sum_f; + const double sum_sum_f = (double)real_sum_f + (double)atomic_sum_f; + const double rel_sum_f = 0.5 * fabs(diff_sum_f) / sum_sum_f; + message("Real sum = %.7e -- atomic sum = %.7e rel=%e", real_sum_f, + atomic_sum_f, rel_sum_f); + + /* long long case */ + + /* Compute the real answer */ + long long real_sum_ll = 0.f; + for (int i = 0; i < array_size; ++i) { + real_sum_ll += array_ll[i]; + } + + /* Compute the answer via threads and atomic */ + long long atomic_sum_ll = 0LL; + threadpool_map(&tp, map_function_sum_ll, array_ll, array_size, + sizeof(long long), chunk_size, &atomic_sum_ll); + + const double diff_sum_ll = (double)real_sum_ll - (double)atomic_sum_ll; + const double sum_sum_ll = (double)real_sum_ll + (double)atomic_sum_ll; + const double rel_sum_ll = 0.5 * fabs(diff_sum_ll) / sum_sum_ll; + message("Real sum = %lld -- atomic sum = %lld rel=%e", real_sum_ll, + atomic_sum_ll, rel_sum_ll); + + /*** Test the inc atomic ops *******************************/ + + long long real_inc_ll = array_size; + + /* Compute the answer via threads and atomic */ + long long atomic_inc_ll = 0LL; + threadpool_map(&tp, map_function_inc_ll, array_ll, array_size, + sizeof(long long), chunk_size, &atomic_inc_ll); + + const double diff_inc_ll = (double)real_inc_ll - (double)atomic_inc_ll; + const double sum_inc_ll = (double)real_inc_ll + (double)atomic_inc_ll; + const double rel_inc_ll = 0.5 * fabs(diff_inc_ll) / sum_inc_ll; + message("Real inc = %lld -- atomic inc = %lld rel=%e", real_inc_ll, + atomic_inc_ll, rel_inc_ll); + + /* Be clean */ + threadpool_clean(&tp); + free(array_f); + free(array_ll); + return 0; +}