diff --git a/.gitignore b/.gitignore
index f843002a1c03af25e89a601c75ac14f1203cca84..99aa018c9d8f826076cb26ae7cdf57fc314b8852 100644
--- a/.gitignore
+++ b/.gitignore
@@ -122,6 +122,7 @@ tests/testInteractions.sh
tests/testSymmetry
tests/testHydroMPIrules
tests/testMaths
+tests/testAtomic
tests/testRandom
tests/testRandomSpacing
tests/testThreadpool
diff --git a/configure.ac b/configure.ac
index 6edd9586b9b8e92c29b0dabb8c45cd81a7dbb516..8495dca80d11f61470136bbd95eafbd78dccad02 100644
--- a/configure.ac
+++ b/configure.ac
@@ -239,6 +239,23 @@ if test "x$enable_debug" = "xyes"; then
fi
fi
+# Are we using the regular tasks (with atomics and no task conflicts) or
+# are we using the atomic-free task-conflicting (slower) version?
+AC_ARG_ENABLE([atomics-within-tasks],
+ [AS_HELP_STRING([--disable-atomics-within-tasks],
+ [Disable the use of atomic operations within tasks. This creates more task conflicts
+ but allows for atomic-free and lock-free code within the tasks. This likely slows down
+ the code @<:@yes/no@:>@]
+ )],
+ [enable_atomics_within_tasks="$enableval"],
+ [enable_atomics_within_tasks="yes"]
+)
+AS_IF([test "x$enable_atomics_within_tasks" != "xno"],
+ , # Note: atomics are allowed by default, we define the macro if we don't want them.
+ [AC_DEFINE([SWIFT_TASKS_WITHOUT_ATOMICS],1,[Makes SWIFT use atomic-free and lock-free tasks.])
+])
+
+
# Check if task debugging is on.
AC_ARG_ENABLE([task-debugging],
[AS_HELP_STRING([--enable-task-debugging],
@@ -2301,6 +2318,7 @@ AC_MSG_RESULT([
Black holes model : $with_black_holes
Task dependencies : $with_task_order
+ Atomic operations in tasks : $enable_atomics_within_tasks
Individual timers : $enable_timers
Task debugging : $enable_task_debugging
Threadpool debugging : $enable_threadpool_debugging
diff --git a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml
index 2770f9e27a5a50646575ccc0a9a2567bc548fc0e..8234b4b3e2d463b33d03f744386479723a29a9b4 100644
--- a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml
+++ b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml
@@ -68,17 +68,19 @@ FOF:
min_group_size: 256 # The minimum no. of particles required for a group.
linking_length_ratio: 0.2 # Linking length in units of the main inter-particle separation.
black_hole_seed_halo_mass_Msun: 1.5e10 # Minimal halo mass in which to seed a black hole (in solar masses).
- scale_factor_first: 0.01 # Scale-factor of first FoF black hole seeding calls.
- delta_time: 1.005 # Scale-factor ratio between consecutive FoF black hole seeding calls.
+ scale_factor_first: 0.05 # Scale-factor of first FoF black hole seeding calls.
+ delta_time: 1.00751 # Scale-factor ratio between consecutive FoF black hole seeding calls.
Scheduler:
max_top_level_cells: 8
- tasks_per_cell: 5
cell_split_size: 200
Restarts:
onexit: 1
- delta_hours: 1.0
+ delta_hours: 6.0
+ max_run_time: 71.5 # Three days minus fergie time
+ resubmit_on_exit: 1
+ resubmit_command: ./resub.sh
# Parameters related to the initial conditions
InitialConditions:
diff --git a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml
index f175e2c6a3eef20371f6efa1d8f8f952393e4cd4..2de9a58ed5fe04f2061fd29d12bcce955293c447 100644
--- a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml
+++ b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml
@@ -68,17 +68,19 @@ FOF:
min_group_size: 256 # The minimum no. of particles required for a group.
linking_length_ratio: 0.2 # Linking length in units of the main inter-particle separation.
black_hole_seed_halo_mass_Msun: 1.5e10 # Minimal halo mass in which to seed a black hole (in solar masses).
- scale_factor_first: 0.01 # Scale-factor of first FoF black hole seeding calls.
- delta_time: 1.005 # Scale-factor ratio between consecutive FoF black hole seeding calls.
+ scale_factor_first: 0.05 # Scale-factor of first FoF black hole seeding calls.
+ delta_time: 1.00751 # Scale-factor ratio between consecutive FoF black hole seeding calls.
Scheduler:
max_top_level_cells: 16
- tasks_per_cell: 5
cell_split_size: 200
Restarts:
onexit: 1
- delta_hours: 1.0
+ delta_hours: 6.0
+ max_run_time: 71.5 # Three days minus fergie time
+ resubmit_on_exit: 1
+ resubmit_command: ./resub.sh
# Parameters related to the initial conditions
InitialConditions:
diff --git a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml
index b65f1061524516f3a5a9bba7c79b59c58a973c42..49070d3b315fcfd06c90fb779a060a38a28bd072 100644
--- a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml
+++ b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml
@@ -68,8 +68,8 @@ FOF:
min_group_size: 256 # The minimum no. of particles required for a group.
linking_length_ratio: 0.2 # Linking length in units of the main inter-particle separation.
black_hole_seed_halo_mass_Msun: 1.5e10 # Minimal halo mass in which to seed a black hole (in solar masses).
- scale_factor_first: 0.01 # Scale-factor of first FoF black hole seeding calls.
- delta_time: 1.005 # Scale-factor ratio between consecutive FoF black hole seeding calls.
+ scale_factor_first: 0.05 # Scale-factor of first FoF black hole seeding calls.
+ delta_time: 1.00751 # Scale-factor ratio between consecutive FoF black hole seeding calls.
Scheduler:
max_top_level_cells: 32
@@ -78,7 +78,7 @@ Scheduler:
Restarts:
onexit: 1
- delta_hours: 1.0
+ delta_hours: 6.0
# Parameters related to the initial conditions
InitialConditions:
diff --git a/src/Makefile.am b/src/Makefile.am
index f1f9157b1881a673456f7dbf394aa32e09b35379..db86d524cecd7a263e2ddf32b2545ad00397b678 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -114,7 +114,7 @@ AM_SOURCES = space.c runner_main.c runner_doiact_hydro.c runner_doiact_limiter.c
# Include files for distribution, not installation.
nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
- gravity_iact.h kernel_long_gravity.h vector.h cache.h \
+ gravity_iact.h kernel_long_gravity.h vector.h accumulate.h cache.h \
runner_doiact_nosort.h runner_doiact_hydro.h runner_doiact_stars.h runner_doiact_black_holes.h runner_doiact_grav.h \
runner_doiact_functions_hydro.h runner_doiact_functions_stars.h runner_doiact_functions_black_holes.h \
runner_doiact_functions_limiter.h runner_doiact_limiter.h units.h intrinsics.h minmax.h \
diff --git a/src/accumulate.h b/src/accumulate.h
new file mode 100644
index 0000000000000000000000000000000000000000..877ac23baa1e9041c45a8d3dcc10a1d660ea6c68
--- /dev/null
+++ b/src/accumulate.h
@@ -0,0 +1,208 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ *
+ ******************************************************************************/
+#ifndef SWIFT_ACCUMULATE_H
+#define SWIFT_ACCUMULATE_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local includes */
+#include "atomic.h"
+#include "minmax.h"
+
+/**
+ * @file accumulate.h
+ * @brief Defines a series of functions used to update the fields of a particle
+ * atomically. These functions should be used in any task that can run in
+ * parallel to another one.
+ */
+
+/**
+ * @brief Add x to the value stored at the location address (int version)
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ * @param x The value to add to *address.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_add_i(
+ volatile int *const address, const int x) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+ *address += x;
+#else
+ atomic_add(address, x);
+#endif
+}
+
+/**
+ * @brief Add x to the value stored at the location address (long long version)
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ * @param x The value to add to *address.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_add_ll(
+ volatile long long *const address, const long long x) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+ *address += x;
+#else
+ atomic_add(address, x);
+#endif
+}
+
+/**
+ * @brief Add x to the value stored at the location address (float version)
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ * @param x The value to add to *address.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_add_f(
+ volatile float *const address, const float x) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+ *address += x;
+#else
+ atomic_add_f(address, x);
+#endif
+}
+
+/**
+ * @brief Add x to the value stored at the location address (double version)
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ * @param x The value to add to *address.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_add_d(
+ volatile double *const address, const double x) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+ *address += x;
+#else
+ atomic_add_d(address, x);
+#endif
+}
+
+/**
+ * @brief Add 1 to the value stored at the location address (int version)
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_inc_i(
+ volatile int *const address) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+ (*address)++;
+#else
+ atomic_inc(address);
+#endif
+}
+
+/**
+ * @brief Add 1 to the value stored at the location address (long long version)
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_inc_ll(
+ volatile long long *const address) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+ (*address)++;
+#else
+ atomic_inc(address);
+#endif
+}
+
+/**
+ * @brief Compute the max of x and the value storedd at the location address
+ * and store the value at the address (int8_t version).
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ * @param x The value to max against *address.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_max_c(
+ volatile int8_t *const address, const int8_t x) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+ *address = max(*address, x);
+#else
+ atomic_max_c(address, x);
+#endif
+}
+
+/**
+ * @brief Compute the max of x and the value storedd at the location address
+ * and store the value at the address (int version).
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ * @param x The value to max against *address.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_max_i(
+ volatile int *const address, const int x) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+ *address = max(*address, x);
+#else
+ atomic_max(address, x);
+#endif
+}
+
+/**
+ * @brief Compute the max of x and the value storedd at the location address
+ * and store the value at the address (float version).
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ * @param x The value to max against *address.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_max_f(
+ volatile float *const address, const float x) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+ *address = max(*address, x);
+#else
+ atomic_max_f(address, x);
+#endif
+}
+
+#endif /* SWIFT_ACCUMULATE_H */
diff --git a/src/atomic.h b/src/atomic.h
index 8a790a7f6cbbf190c3d87d590f10c6f1676c2d24..8ba1b0e692b08806d18eb453d9211a4829a8943a 100644
--- a/src/atomic.h
+++ b/src/atomic.h
@@ -26,6 +26,9 @@
#include "inline.h"
#include "minmax.h"
+/* Standard includes */
+#include
+
#define atomic_add(v, i) __sync_fetch_and_add(v, i)
#define atomic_sub(v, i) __sync_fetch_and_sub(v, i)
#define atomic_or(v, i) __sync_fetch_and_or(v, i)
@@ -35,6 +38,29 @@
#define atomic_cas(v, o, n) __sync_val_compare_and_swap(v, o, n)
#define atomic_swap(v, n) __sync_lock_test_and_set(v, n)
+/**
+ * @brief Atomic min operation on ints.
+ *
+ * This is a text-book implementation based on an atomic CAS.
+ *
+ * @param address The address to update.
+ * @param y The value to update the address with.
+ */
+__attribute__((always_inline)) INLINE static void atomic_min(
+ volatile int *const address, const int y) {
+
+ int *int_ptr = (int *)address;
+
+ int test_val, old_val, new_val;
+ old_val = *address;
+
+ do {
+ test_val = old_val;
+ new_val = min(old_val, y);
+ old_val = atomic_cas(int_ptr, test_val, new_val);
+ } while (test_val != old_val);
+}
+
/**
* @brief Atomic min operation on floats.
*
@@ -66,29 +92,6 @@ __attribute__((always_inline)) INLINE static void atomic_min_f(
} while (test_val.as_int != old_val.as_int);
}
-/**
- * @brief Atomic min operation on ints.
- *
- * This is a text-book implementation based on an atomic CAS.
- *
- * @param address The address to update.
- * @param y The value to update the address with.
- */
-__attribute__((always_inline)) INLINE static void atomic_min(
- volatile int *address, int y) {
-
- int *int_ptr = (int *)address;
-
- int test_val, old_val, new_val;
- old_val = *address;
-
- do {
- test_val = old_val;
- new_val = min(old_val, y);
- old_val = atomic_cas(int_ptr, test_val, new_val);
- } while (test_val != old_val);
-}
-
/**
* @brief Atomic min operation on doubles.
*
@@ -121,6 +124,50 @@ __attribute__((always_inline)) INLINE static void atomic_min_d(
} while (test_val.as_long_long != old_val.as_long_long);
}
+/**
+ * @brief Atomic max operation on int8_t.
+ *
+ * This is a text-book implementation based on an atomic CAS.
+ *
+ * @param address The address to update.
+ * @param y The value to update the address with.
+ */
+__attribute__((always_inline)) INLINE static void atomic_max_c(
+ volatile int8_t *const address, const int8_t y) {
+
+ int8_t test_val, old_val, new_val;
+ old_val = *address;
+
+ do {
+ test_val = old_val;
+ new_val = max(old_val, y);
+ old_val = atomic_cas(address, test_val, new_val);
+ } while (test_val != old_val);
+}
+
+/**
+ * @brief Atomic max operation on ints.
+ *
+ * This is a text-book implementation based on an atomic CAS.
+ *
+ * @param address The address to update.
+ * @param y The value to update the address with.
+ */
+__attribute__((always_inline)) INLINE static void atomic_max(
+ volatile int *const address, const int y) {
+
+ int *int_ptr = (int *)address;
+
+ int test_val, old_val, new_val;
+ old_val = *address;
+
+ do {
+ test_val = old_val;
+ new_val = max(old_val, y);
+ old_val = atomic_cas(int_ptr, test_val, new_val);
+ } while (test_val != old_val);
+}
+
/**
* @brief Atomic max operation on floats.
*
diff --git a/src/cell.c b/src/cell.c
index c4a4ecc1872934da79f98ac2fd8e30d85cb988ea..056c0aee013e1321be61a3542906b98a97809b83 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -1228,7 +1228,7 @@ int cell_unpack_sf_counts(struct cell *restrict c,
* @return 0 on success, 1 on failure
*/
int cell_locktree(struct cell *c) {
- TIMER_TIC
+ TIMER_TIC;
/* First of all, try to lock this cell. */
if (c->hydro.hold || lock_trylock(&c->hydro.lock) != 0) {
@@ -1288,7 +1288,7 @@ int cell_locktree(struct cell *c) {
* @return 0 on success, 1 on failure
*/
int cell_glocktree(struct cell *c) {
- TIMER_TIC
+ TIMER_TIC;
/* First of all, try to lock this cell. */
if (c->grav.phold || lock_trylock(&c->grav.plock) != 0) {
@@ -1348,7 +1348,7 @@ int cell_glocktree(struct cell *c) {
* @return 0 on success, 1 on failure
*/
int cell_mlocktree(struct cell *c) {
- TIMER_TIC
+ TIMER_TIC;
/* First of all, try to lock this cell. */
if (c->grav.mhold || lock_trylock(&c->grav.mlock) != 0) {
@@ -1408,7 +1408,7 @@ int cell_mlocktree(struct cell *c) {
* @return 0 on success, 1 on failure
*/
int cell_slocktree(struct cell *c) {
- TIMER_TIC
+ TIMER_TIC;
/* First of all, try to lock this cell. */
if (c->stars.hold || lock_trylock(&c->stars.lock) != 0) {
@@ -1468,7 +1468,7 @@ int cell_slocktree(struct cell *c) {
* @return 0 on success, 1 on failure
*/
int cell_blocktree(struct cell *c) {
- TIMER_TIC
+ TIMER_TIC;
/* First of all, try to lock this cell. */
if (c->black_holes.hold || lock_trylock(&c->black_holes.lock) != 0) {
@@ -1528,7 +1528,7 @@ int cell_blocktree(struct cell *c) {
* @param c The #cell.
*/
void cell_unlocktree(struct cell *c) {
- TIMER_TIC
+ TIMER_TIC;
/* First of all, try to unlock this cell. */
if (lock_unlock(&c->hydro.lock) != 0) error("Failed to unlock cell.");
@@ -1546,7 +1546,7 @@ void cell_unlocktree(struct cell *c) {
* @param c The #cell.
*/
void cell_gunlocktree(struct cell *c) {
- TIMER_TIC
+ TIMER_TIC;
/* First of all, try to unlock this cell. */
if (lock_unlock(&c->grav.plock) != 0) error("Failed to unlock cell.");
@@ -1564,7 +1564,7 @@ void cell_gunlocktree(struct cell *c) {
* @param c The #cell.
*/
void cell_munlocktree(struct cell *c) {
- TIMER_TIC
+ TIMER_TIC;
/* First of all, try to unlock this cell. */
if (lock_unlock(&c->grav.mlock) != 0) error("Failed to unlock cell.");
@@ -1582,7 +1582,7 @@ void cell_munlocktree(struct cell *c) {
* @param c The #cell.
*/
void cell_sunlocktree(struct cell *c) {
- TIMER_TIC
+ TIMER_TIC;
/* First of all, try to unlock this cell. */
if (lock_unlock(&c->stars.lock) != 0) error("Failed to unlock cell.");
@@ -1600,7 +1600,7 @@ void cell_sunlocktree(struct cell *c) {
* @param c The #cell.
*/
void cell_bunlocktree(struct cell *c) {
- TIMER_TIC
+ TIMER_TIC;
/* First of all, try to unlock this cell. */
if (lock_unlock(&c->black_holes.lock) != 0) error("Failed to unlock cell.");
@@ -3175,9 +3175,11 @@ void cell_activate_subcell_stars_tasks(struct cell *ci, struct cell *cj,
* @param ci The first #cell we recurse in.
* @param cj The second #cell we recurse in.
* @param s The task #scheduler.
+ * @param with_timestep_sync Are we running with time-step synchronization on?
*/
void cell_activate_subcell_black_holes_tasks(struct cell *ci, struct cell *cj,
- struct scheduler *s) {
+ struct scheduler *s,
+ const int with_timestep_sync) {
const struct engine *e = s->space->e;
/* Store the current dx_max and h_max values. */
@@ -3205,11 +3207,12 @@ void cell_activate_subcell_black_holes_tasks(struct cell *ci, struct cell *cj,
/* Loop over all progenies and pairs of progenies */
for (int j = 0; j < 8; j++) {
if (ci->progeny[j] != NULL) {
- cell_activate_subcell_black_holes_tasks(ci->progeny[j], NULL, s);
+ cell_activate_subcell_black_holes_tasks(ci->progeny[j], NULL, s,
+ with_timestep_sync);
for (int k = j + 1; k < 8; k++)
if (ci->progeny[k] != NULL)
- cell_activate_subcell_black_holes_tasks(ci->progeny[j],
- ci->progeny[k], s);
+ cell_activate_subcell_black_holes_tasks(
+ ci->progeny[j], ci->progeny[k], s, with_timestep_sync);
}
}
} else {
@@ -3238,8 +3241,8 @@ void cell_activate_subcell_black_holes_tasks(struct cell *ci, struct cell *cj,
const int pid = csp->pairs[k].pid;
const int pjd = csp->pairs[k].pjd;
if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL)
- cell_activate_subcell_black_holes_tasks(ci->progeny[pid],
- cj->progeny[pjd], s);
+ cell_activate_subcell_black_holes_tasks(
+ ci->progeny[pid], cj->progeny[pjd], s, with_timestep_sync);
}
}
@@ -3250,10 +3253,14 @@ void cell_activate_subcell_black_holes_tasks(struct cell *ci, struct cell *cj,
/* Activate the drifts if the cells are local. */
if (ci->nodeID == engine_rank) cell_activate_drift_bpart(ci, s);
if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s);
+ if (cj->nodeID == engine_rank && with_timestep_sync)
+ cell_activate_sync_part(cj, s);
/* Activate the drifts if the cells are local. */
if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s);
if (cj->nodeID == engine_rank) cell_activate_drift_bpart(cj, s);
+ if (ci->nodeID == engine_rank && with_timestep_sync)
+ cell_activate_sync_part(ci, s);
}
} /* Otherwise, pair interation */
}
@@ -4090,6 +4097,7 @@ int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s,
int cell_unskip_black_holes_tasks(struct cell *c, struct scheduler *s) {
struct engine *e = s->space->e;
+ const int with_timestep_sync = (e->policy & engine_policy_timestep_sync);
const int nodeID = e->nodeID;
int rebuild = 0;
@@ -4137,12 +4145,13 @@ int cell_unskip_black_holes_tasks(struct cell *c, struct scheduler *s) {
/* Store current values of dx_max and h_max. */
else if (t->type == task_type_sub_self) {
- cell_activate_subcell_black_holes_tasks(ci, NULL, s);
+ cell_activate_subcell_black_holes_tasks(ci, NULL, s,
+ with_timestep_sync);
}
/* Store current values of dx_max and h_max. */
else if (t->type == task_type_sub_pair) {
- cell_activate_subcell_black_holes_tasks(ci, cj, s);
+ cell_activate_subcell_black_holes_tasks(ci, cj, s, with_timestep_sync);
}
}
@@ -5349,16 +5358,31 @@ void cell_check_timesteps(const struct cell *c, const integertime_t ti_current,
/* Only check cells that have at least one non-inhibited particle */
if (count > 0) {
- if (ti_end_min != c->hydro.ti_end_min)
- error(
- "Non-matching ti_end_min. Cell=%lld true=%lld ti_current=%lld "
- "depth=%d",
- c->hydro.ti_end_min, ti_end_min, ti_current, c->depth);
+ if (count != c->hydro.count) {
+
+ /* Note that we use a < as the particle with the smallest time-bin
+ might have been swallowed. This means we will run this cell with
+ 0 active particles but that's not wrong */
+ if (ti_end_min < c->hydro.ti_end_min)
+ error(
+ "Non-matching ti_end_min. Cell=%lld true=%lld ti_current=%lld "
+ "depth=%d",
+ c->hydro.ti_end_min, ti_end_min, ti_current, c->depth);
+
+ } else /* Normal case: nothing was swallowed/converted */ {
+ if (ti_end_min != c->hydro.ti_end_min)
+ error(
+ "Non-matching ti_end_min. Cell=%lld true=%lld ti_current=%lld "
+ "depth=%d",
+ c->hydro.ti_end_min, ti_end_min, ti_current, c->depth);
+ }
+
if (ti_end_max > c->hydro.ti_end_max)
error(
"Non-matching ti_end_max. Cell=%lld true=%lld ti_current=%lld "
"depth=%d",
c->hydro.ti_end_max, ti_end_max, ti_current, c->depth);
+
if (ti_beg_max != c->hydro.ti_beg_max)
error(
"Non-matching ti_beg_max. Cell=%lld true=%lld ti_current=%lld "
diff --git a/src/cell.h b/src/cell.h
index 44b802f078391599ae361281c9b1c48ec6d87a11..4506e206a869c08be867a8c98d1940da5602f3f2 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -911,7 +911,8 @@ void cell_activate_subcell_stars_tasks(struct cell *ci, struct cell *cj,
const int with_star_formation,
const int with_timestep_sync);
void cell_activate_subcell_black_holes_tasks(struct cell *ci, struct cell *cj,
- struct scheduler *s);
+ struct scheduler *s,
+ const int with_timestep_sync);
void cell_activate_subcell_external_grav_tasks(struct cell *ci,
struct scheduler *s);
void cell_activate_super_spart_drifts(struct cell *c, struct scheduler *s);
diff --git a/src/engine.c b/src/engine.c
index 95507065daf595d44f7909617885cf82a2309e22..e5e7b78dcafd9c14a032dd3a4249e6b0a52fab4d 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -210,6 +210,9 @@ void engine_repartition(struct engine *e) {
/* Sorting indices. */
if (e->s->cells_top != NULL) space_free_cells(e->s);
+ /* Report the time spent in the different task categories */
+ if (e->verbose) scheduler_report_task_times(&e->sched, e->nr_threads);
+
/* Task arrays. */
scheduler_free_tasks(&e->sched);
@@ -1671,7 +1674,8 @@ void engine_rebuild(struct engine *e, int repartitioned,
e->restarting = 0;
/* Report the time spent in the different task categories */
- if (e->verbose) scheduler_report_task_times(&e->sched, e->nr_threads);
+ if (e->verbose && !repartitioned)
+ scheduler_report_task_times(&e->sched, e->nr_threads);
/* Give some breathing space */
scheduler_free_tasks(&e->sched);
@@ -2575,8 +2579,10 @@ void engine_step(struct engine *e) {
/* Prepare the tasks to be launched, rebuild or repartition if needed. */
engine_prepare(e);
- /* Print the number of active tasks ? */
+#ifdef SWIFT_DEBUG_CHECKS
+ /* Print the number of active tasks */
if (e->verbose) engine_print_task_counts(e);
+#endif
/* Dump local cells and active particle counts. */
// dumpCells("cells", 1, 0, 0, 0, e->s, e->nodeID, e->step);
diff --git a/src/engine_marktasks.c b/src/engine_marktasks.c
index d3d17bcdd6dbf09fb6586cd584c7fdfd6ebe21fd..532297a02699f9d74690102e30347c5f0d776d83 100644
--- a/src/engine_marktasks.c
+++ b/src/engine_marktasks.c
@@ -192,7 +192,8 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
t_subtype == task_subtype_bh_density) {
if (ci_active_black_holes) {
scheduler_activate(s, t);
- cell_activate_subcell_black_holes_tasks(ci, NULL, s);
+ cell_activate_subcell_black_holes_tasks(ci, NULL, s,
+ with_timestep_sync);
}
}
@@ -446,7 +447,8 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
/* Store current values of dx_max and h_max. */
else if (t_type == task_type_sub_pair &&
t_subtype == task_subtype_bh_density) {
- cell_activate_subcell_black_holes_tasks(ci, cj, s);
+ cell_activate_subcell_black_holes_tasks(ci, cj, s,
+ with_timestep_sync);
}
}
diff --git a/src/gravity/MultiSoftening/gravity.h b/src/gravity/MultiSoftening/gravity.h
index 07ed0e78b164e5ee3f01b8c9a62b940201b31aa4..d7716cf712254054e3cf364a807f3cb1c12355f3 100644
--- a/src/gravity/MultiSoftening/gravity.h
+++ b/src/gravity/MultiSoftening/gravity.h
@@ -23,6 +23,7 @@
#include
/* Local includes. */
+#include "accumulate.h"
#include "cosmology.h"
#include "error.h"
#include "gravity_properties.h"
@@ -77,7 +78,7 @@ __attribute__((always_inline)) INLINE static float gravity_get_softening(
__attribute__((always_inline)) INLINE static void
gravity_add_comoving_potential(struct gpart* restrict gp, float pot) {
- gp->potential += pot;
+ accumulate_add_f(&gp->potential, pot);
}
/**
diff --git a/src/gravity/Potential/gravity.h b/src/gravity/Potential/gravity.h
index 3a3e9680016ef51c554691c6f43f8279cde8caf9..f9a9502a528c161fbc82b3028f303b7f9cad49f8 100644
--- a/src/gravity/Potential/gravity.h
+++ b/src/gravity/Potential/gravity.h
@@ -23,6 +23,7 @@
#include
/* Local includes. */
+#include "accumulate.h"
#include "cosmology.h"
#include "gravity_properties.h"
#include "kernel_gravity.h"
@@ -63,7 +64,7 @@ __attribute__((always_inline)) INLINE static float gravity_get_softening(
__attribute__((always_inline)) INLINE static void
gravity_add_comoving_potential(struct gpart* restrict gp, float pot) {
- gp->potential += pot;
+ accumulate_add_f(&gp->potential, pot);
}
/**
diff --git a/src/gravity_cache.h b/src/gravity_cache.h
index 912961d2e06dc748039f54c72bd9b33fd2547df2..ba3432d7e732b783547ddd2a597dbdf5723bf606 100644
--- a/src/gravity_cache.h
+++ b/src/gravity_cache.h
@@ -23,6 +23,7 @@
#include "../config.h"
/* Local headers */
+#include "accumulate.h"
#include "align.h"
#include "error.h"
#include "gravity.h"
@@ -491,9 +492,9 @@ __attribute__((always_inline)) INLINE static void gravity_cache_write_back(
/* Write stuff back to the particles */
for (int i = 0; i < gcount; ++i) {
if (active[i]) {
- gparts[i].a_grav[0] += a_x[i];
- gparts[i].a_grav[1] += a_y[i];
- gparts[i].a_grav[2] += a_z[i];
+ accumulate_add_f(&gparts[i].a_grav[0], a_x[i]);
+ accumulate_add_f(&gparts[i].a_grav[1], a_y[i]);
+ accumulate_add_f(&gparts[i].a_grav[2], a_z[i]);
gravity_add_comoving_potential(&gparts[i], pot[i]);
}
}
diff --git a/src/mesh_gravity.c b/src/mesh_gravity.c
index 0efec0bc80fc381d9bd6ff733cc546e1885c1d9e..cbfb9ef7a2970b222bf67a915bd5044bedaaaae3 100644
--- a/src/mesh_gravity.c
+++ b/src/mesh_gravity.c
@@ -28,6 +28,7 @@
#include "mesh_gravity.h"
/* Local includes. */
+#include "accumulate.h"
#include "active.h"
#include "debug.h"
#include "engine.h"
@@ -334,10 +335,10 @@ void mesh_to_gparts_CIC(struct gpart* gp, const double* pot, const int N,
/* ---- */
/* Store things back */
+ accumulate_add_f(&gp->a_grav[0], fac * a[0]);
+ accumulate_add_f(&gp->a_grav[1], fac * a[1]);
+ accumulate_add_f(&gp->a_grav[2], fac * a[2]);
gravity_add_comoving_potential(gp, p);
- gp->a_grav[0] += fac * a[0];
- gp->a_grav[1] += fac * a[1];
- gp->a_grav[2] += fac * a[2];
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
gp->potential_PM = p;
gp->a_grav_PM[0] = fac * a[0];
diff --git a/src/multipole.h b/src/multipole.h
index e03e302c4186a54442518a687c444ff4a7216b1e..55de9c0baebe0371e4e61860e2362907874cba6b 100644
--- a/src/multipole.h
+++ b/src/multipole.h
@@ -1608,12 +1608,19 @@ INLINE static void gravity_M2L_apply(
const struct potential_derivatives_M2L *pot) {
#ifdef SWIFT_DEBUG_CHECKS
- /* Count interactions */
- l_b->num_interacted += m_a->num_gpart;
+ /* Count all interactions
+ * Note that despite being in a section of the code protected by locks,
+ * we must use atomics here as the long-range task may update this
+ * counter in a lock-free section of code. */
+ accumulate_add_ll(&l_b->num_interacted, m_a->num_gpart);
#endif
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
- l_b->num_interacted_tree += m_a->num_gpart;
+ /* Count tree interactions
+ * Note that despite being in a section of the code protected by locks,
+ * we must use atomics here as the long-range task may update this
+ * counter in a lock-free section of code. */
+ accumulate_add_ll(&l_b->num_interacted_tree, m_a->num_gpart);
#endif
/* Record that this tensor has received contributions */
@@ -2467,12 +2474,12 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb,
#ifdef SWIFT_DEBUG_CHECKS
if (lb->num_interacted == 0) error("Interacting with empty field tensor");
- gp->num_interacted += lb->num_interacted;
+ accumulate_add_ll(&gp->num_interacted, lb->num_interacted);
#endif
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
- gp->num_interacted_m2l += lb->num_interacted_tree;
- gp->num_interacted_pm += lb->num_interacted_pm;
+ accumulate_add_ll(&gp->num_interacted_m2l, lb->num_interacted_tree);
+ accumulate_add_ll(&gp->num_interacted_pm, lb->num_interacted_pm);
#endif
/* Local accumulator */
@@ -2589,15 +2596,15 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb,
#endif
/* Update the particle */
- gp->a_grav[0] += a_grav[0];
- gp->a_grav[1] += a_grav[1];
- gp->a_grav[2] += a_grav[2];
+ accumulate_add_f(&gp->a_grav[0], a_grav[0]);
+ accumulate_add_f(&gp->a_grav[1], a_grav[1]);
+ accumulate_add_f(&gp->a_grav[2], a_grav[2]);
gravity_add_comoving_potential(gp, pot);
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
- gp->a_grav_m2l[0] += a_grav[0];
- gp->a_grav_m2l[1] += a_grav[1];
- gp->a_grav_m2l[2] += a_grav[2];
+ accumulate_add_f(&gp->a_grav_m2l[0], a_grav[0]);
+ accumulate_add_f(&gp->a_grav_m2l[1], a_grav[1]);
+ accumulate_add_f(&gp->a_grav_m2l[2], a_grav[2]);
#endif
}
diff --git a/src/runner_doiact_grav.c b/src/runner_doiact_grav.c
index 043b662b101d99698bbc46cf39ae2096e2725c91..91abe9e91060b19fa6df1bbbb29e0de6e6442cad 100644
--- a/src/runner_doiact_grav.c
+++ b/src/runner_doiact_grav.c
@@ -264,13 +264,13 @@ static INLINE void runner_dopair_grav_pp_full(
#ifdef SWIFT_DEBUG_CHECKS
/* Update the interaction counter if it's not a padded gpart */
if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e))
- gparts_i[pid].num_interacted++;
+ accumulate_inc_ll(&gparts_i[pid].num_interacted);
#endif
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
/* Update the p2p interaction counter if it's not a padded gpart */
if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e))
- gparts_i[pid].num_interacted_p2p++;
+ accumulate_inc_ll(&gparts_i[pid].num_interacted_p2p);
#endif
}
@@ -281,9 +281,9 @@ static INLINE void runner_dopair_grav_pp_full(
ci_cache->pot[pid] += pot;
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
- gparts_i[pid].a_grav_p2p[0] += a_x;
- gparts_i[pid].a_grav_p2p[1] += a_y;
- gparts_i[pid].a_grav_p2p[2] += a_z;
+ accumulate_add_f(&gparts_i[pid].a_grav_p2p[0], a_x);
+ accumulate_add_f(&gparts_i[pid].a_grav_p2p[1], a_y);
+ accumulate_add_f(&gparts_i[pid].a_grav_p2p[2], a_z);
#endif
}
}
@@ -420,13 +420,13 @@ static INLINE void runner_dopair_grav_pp_truncated(
#ifdef SWIFT_DEBUG_CHECKS
/* Update the interaction counter if it's not a padded gpart */
if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e))
- gparts_i[pid].num_interacted++;
+ accumulate_inc_ll(&gparts_i[pid].num_interacted);
#endif
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
/* Update the p2p interaction counter if it's not a padded gpart */
if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e))
- gparts_i[pid].num_interacted_p2p++;
+ accumulate_inc_ll(&gparts_i[pid].num_interacted_p2p);
#endif
}
@@ -437,9 +437,9 @@ static INLINE void runner_dopair_grav_pp_truncated(
ci_cache->pot[pid] += pot;
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
- gparts_i[pid].a_grav_p2p[0] += a_x;
- gparts_i[pid].a_grav_p2p[1] += a_y;
- gparts_i[pid].a_grav_p2p[2] += a_z;
+ accumulate_add_f(&gparts_i[pid].a_grav_p2p[0], a_x);
+ accumulate_add_f(&gparts_i[pid].a_grav_p2p[1], a_y);
+ accumulate_add_f(&gparts_i[pid].a_grav_p2p[2], a_z);
#endif
}
}
@@ -565,16 +565,18 @@ static INLINE void runner_dopair_grav_pm_full(
#ifdef SWIFT_DEBUG_CHECKS
/* Update the interaction counter */
if (pid < gcount_i)
- gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart;
+ accumulate_add_ll(&gparts_i[pid].num_interacted,
+ cj->grav.multipole->m_pole.num_gpart);
#endif
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
/* Update the M2P interaction counter and forces. */
if (pid < gcount_i) {
- gparts_i[pid].num_interacted_m2p += cj->grav.multipole->m_pole.num_gpart;
- gparts_i[pid].a_grav_m2p[0] += f_x;
- gparts_i[pid].a_grav_m2p[1] += f_y;
- gparts_i[pid].a_grav_m2p[2] += f_z;
+ accumulate_add_ll(&gparts_i[pid].num_interacted_m2p,
+ cj->grav.multipole->m_pole.num_gpart);
+ accumulate_add_f(&gparts_i[pid].a_grav_m2p[0], f_x);
+ accumulate_add_f(&gparts_i[pid].a_grav_m2p[1], f_y);
+ accumulate_add_f(&gparts_i[pid].a_grav_m2p[2], f_z);
}
#endif
}
@@ -706,16 +708,18 @@ static INLINE void runner_dopair_grav_pm_truncated(
#ifdef SWIFT_DEBUG_CHECKS
/* Update the interaction counter */
if (pid < gcount_i)
- gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart;
+ accumulate_add_ll(&gparts_i[pid].num_interacted,
+ cj->grav.multipole->m_pole.num_gpart);
#endif
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
/* Update the M2P interaction counter and forces. */
if (pid < gcount_i) {
- gparts_i[pid].num_interacted_m2p += cj->grav.multipole->m_pole.num_gpart;
- gparts_i[pid].a_grav_m2p[0] += f_x;
- gparts_i[pid].a_grav_m2p[1] += f_y;
- gparts_i[pid].a_grav_m2p[2] += f_z;
+ accumulate_add_ll(&gparts_i[pid].num_interacted_m2p,
+ cj->grav.multipole->m_pole.num_gpart);
+ accumulate_add_f(&gparts_i[pid].a_grav_m2p[0], f_x);
+ accumulate_add_f(&gparts_i[pid].a_grav_m2p[1], f_y);
+ accumulate_add_f(&gparts_i[pid].a_grav_m2p[2], f_z);
}
#endif
}
@@ -1041,13 +1045,13 @@ static INLINE void runner_doself_grav_pp_full(
#ifdef SWIFT_DEBUG_CHECKS
/* Update the interaction counter if it's not a padded gpart */
if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e))
- gparts[pid].num_interacted++;
+ accumulate_inc_ll(&gparts[pid].num_interacted);
#endif
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
/* Update the P2P interaction counter if it's not a padded gpart */
if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e))
- gparts[pid].num_interacted_p2p++;
+ accumulate_inc_ll(&gparts[pid].num_interacted_p2p);
#endif
}
@@ -1058,9 +1062,9 @@ static INLINE void runner_doself_grav_pp_full(
ci_cache->pot[pid] += pot;
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
- gparts[pid].a_grav_p2p[0] += a_x;
- gparts[pid].a_grav_p2p[1] += a_y;
- gparts[pid].a_grav_p2p[2] += a_z;
+ accumulate_add_f(&gparts[pid].a_grav_p2p[0], a_x);
+ accumulate_add_f(&gparts[pid].a_grav_p2p[1], a_y);
+ accumulate_add_f(&gparts[pid].a_grav_p2p[2], a_z);
#endif
}
}
@@ -1180,13 +1184,13 @@ static INLINE void runner_doself_grav_pp_truncated(
#ifdef SWIFT_DEBUG_CHECKS
/* Update the interaction counter if it's not a padded gpart */
if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e))
- gparts[pid].num_interacted++;
+ accumulate_inc_ll(&gparts[pid].num_interacted);
#endif
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
/* Update the P2P interaction counter if it's not a padded gpart */
if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e))
- gparts[pid].num_interacted_p2p++;
+ accumulate_inc_ll(&gparts[pid].num_interacted_p2p);
#endif
}
@@ -1197,9 +1201,9 @@ static INLINE void runner_doself_grav_pp_truncated(
ci_cache->pot[pid] += pot;
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
- gparts[pid].a_grav_p2p[0] += a_x;
- gparts[pid].a_grav_p2p[1] += a_y;
- gparts[pid].a_grav_p2p[2] += a_z;
+ accumulate_add_f(&gparts[pid].a_grav_p2p[0], a_x);
+ accumulate_add_f(&gparts[pid].a_grav_p2p[1], a_y);
+ accumulate_add_f(&gparts[pid].a_grav_p2p[2], a_z);
#endif
}
}
@@ -1355,11 +1359,29 @@ static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r,
cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current);
#endif
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+ /* Lock the multipoles
+ * Note we impose a hierarchy to solve the dining philosopher problem */
+ if (ci < cj) {
+ lock_lock(&ci->grav.mlock);
+ lock_lock(&cj->grav.mlock);
+ } else {
+ lock_lock(&cj->grav.mlock);
+ lock_lock(&ci->grav.mlock);
+ }
+#endif
+
/* Let's interact at this level */
gravity_M2L_symmetric(&ci->grav.multipole->pot, &cj->grav.multipole->pot,
multi_i, multi_j, ci->grav.multipole->CoM,
cj->grav.multipole->CoM, props, periodic, dim, r_s_inv);
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+ /* Unlock the multipoles */
+ if (lock_unlock(&ci->grav.mlock) != 0) error("Failed to unlock multipole");
+ if (lock_unlock(&cj->grav.mlock) != 0) error("Failed to unlock multipole");
+#endif
+
TIMER_TOC(timer_dopair_grav_mm);
}
@@ -1371,9 +1393,9 @@ static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r,
* @param ci The #cell with field tensor to interact.
* @param cj The #cell with the multipole.
*/
-static INLINE void runner_dopair_grav_mm_nonsym(
- struct runner *r, struct cell *restrict ci,
- const struct cell *restrict cj) {
+static INLINE void runner_dopair_grav_mm_nonsym(struct runner *r,
+ struct cell *restrict ci,
+ struct cell *restrict cj) {
/* Some constants */
const struct engine *e = r->e;
@@ -1406,10 +1428,28 @@ static INLINE void runner_dopair_grav_mm_nonsym(
cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current);
#endif
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+ /* Lock the multipoles
+ * Note we impose a hierarchy to solve the dining philosopher problem */
+ if (ci < cj) {
+ lock_lock(&ci->grav.mlock);
+ lock_lock(&cj->grav.mlock);
+ } else {
+ lock_lock(&cj->grav.mlock);
+ lock_lock(&ci->grav.mlock);
+ }
+#endif
+
/* Let's interact at this level */
gravity_M2L_nonsym(&ci->grav.multipole->pot, multi_j, ci->grav.multipole->CoM,
cj->grav.multipole->CoM, props, periodic, dim, r_s_inv);
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+ /* Unlock the multipoles */
+ if (lock_unlock(&ci->grav.mlock) != 0) error("Failed to unlock multipole");
+ if (lock_unlock(&cj->grav.mlock) != 0) error("Failed to unlock multipole");
+#endif
+
TIMER_TOC(timer_dopair_grav_mm);
}
@@ -1638,17 +1678,21 @@ void runner_dopair_recursive_grav(struct runner *r, struct cell *ci,
#ifdef SWIFT_DEBUG_CHECKS
if (cell_is_active_gravity(ci, e))
- multi_i->pot.num_interacted += multi_j->m_pole.num_gpart;
+ accumulate_add_ll(&multi_i->pot.num_interacted,
+ multi_j->m_pole.num_gpart);
if (cell_is_active_gravity(cj, e))
- multi_j->pot.num_interacted += multi_i->m_pole.num_gpart;
+ accumulate_add_ll(&multi_j->pot.num_interacted,
+ multi_i->m_pole.num_gpart);
#endif
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
/* Need to account for the interactions we missed */
if (cell_is_active_gravity(ci, e))
- multi_i->pot.num_interacted_pm += multi_j->m_pole.num_gpart;
+ accumulate_add_ll(&multi_i->pot.num_interacted_pm,
+ multi_j->m_pole.num_gpart);
if (cell_is_active_gravity(cj, e))
- multi_j->pot.num_interacted_pm += multi_i->m_pole.num_gpart;
+ accumulate_add_ll(&multi_j->pot.num_interacted_pm,
+ multi_i->m_pole.num_gpart);
#endif
return;
}
@@ -1833,8 +1877,8 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) {
for (int n = 0; n < nr_cells_with_particles; ++n) {
/* Handle on the top-level cell and it's gravity business*/
- const struct cell *cj = &cells[cells_with_particles[n]];
- const struct gravity_tensors *const multi_j = cj->grav.multipole;
+ struct cell *cj = &cells[cells_with_particles[n]];
+ struct gravity_tensors *const multi_j = cj->grav.multipole;
/* Avoid self contributions */
if (top == cj) continue;
@@ -1854,12 +1898,14 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) {
#ifdef SWIFT_DEBUG_CHECKS
/* Need to account for the interactions we missed */
- multi_i->pot.num_interacted += multi_j->m_pole.num_gpart;
+ accumulate_add_ll(&multi_i->pot.num_interacted,
+ multi_j->m_pole.num_gpart);
#endif
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
/* Need to account for the interactions we missed */
- multi_i->pot.num_interacted_pm += multi_j->m_pole.num_gpart;
+ accumulate_add_ll(&multi_i->pot.num_interacted_pm,
+ multi_j->m_pole.num_gpart);
#endif
/* Record that this multipole received a contribution */
diff --git a/src/scheduler.c b/src/scheduler.c
index d7fe3fb46583b92b9b505cf4f9313aabebf01032..14d3627f0e199a288b0a38fc28a366d855c181be 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -2286,6 +2286,7 @@ void scheduler_free_tasks(struct scheduler *s) {
s->tid_active = NULL;
}
s->size = 0;
+ s->nr_tasks = 0;
}
/**
diff --git a/src/task.c b/src/task.c
index 19ba01580e6ca519a48fd934d0bb8c21b0670343..f821eed5e6b0fd6d29ebc00882584abb4548ef7c 100644
--- a/src/task.c
+++ b/src/task.c
@@ -456,8 +456,10 @@ void task_unlock(struct task *t) {
case task_type_self:
case task_type_sub_self:
if (subtype == task_subtype_grav) {
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
cell_gunlocktree(ci);
cell_munlocktree(ci);
+#endif
} else if ((subtype == task_subtype_stars_density) ||
(subtype == task_subtype_stars_feedback)) {
cell_sunlocktree(ci);
@@ -470,7 +472,11 @@ void task_unlock(struct task *t) {
cell_unlocktree(ci);
} else if (subtype == task_subtype_do_bh_swallow) {
cell_bunlocktree(ci);
- } else {
+ } else if (subtype == task_subtype_limiter) {
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+ cell_unlocktree(ci);
+#endif
+ } else { /* hydro */
cell_unlocktree(ci);
}
break;
@@ -478,10 +484,12 @@ void task_unlock(struct task *t) {
case task_type_pair:
case task_type_sub_pair:
if (subtype == task_subtype_grav) {
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
cell_gunlocktree(ci);
cell_gunlocktree(cj);
cell_munlocktree(ci);
cell_munlocktree(cj);
+#endif
} else if ((subtype == task_subtype_stars_density) ||
(subtype == task_subtype_stars_feedback)) {
cell_sunlocktree(ci);
@@ -499,24 +507,35 @@ void task_unlock(struct task *t) {
} else if (subtype == task_subtype_do_bh_swallow) {
cell_bunlocktree(ci);
cell_bunlocktree(cj);
- } else {
+ } else if (subtype == task_subtype_limiter) {
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+ cell_unlocktree(ci);
+ cell_unlocktree(cj);
+#endif
+ } else { /* hydro */
cell_unlocktree(ci);
cell_unlocktree(cj);
}
break;
case task_type_grav_down:
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
cell_gunlocktree(ci);
cell_munlocktree(ci);
+#endif
break;
case task_type_grav_long_range:
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
cell_munlocktree(ci);
+#endif
break;
case task_type_grav_mm:
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
cell_munlocktree(ci);
cell_munlocktree(cj);
+#endif
break;
case task_type_star_formation:
@@ -612,6 +631,7 @@ int task_lock(struct task *t) {
case task_type_self:
case task_type_sub_self:
if (subtype == task_subtype_grav) {
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
/* Lock the gparts and the m-pole */
if (ci->grav.phold || ci->grav.mhold) return 0;
if (cell_glocktree(ci) != 0)
@@ -620,6 +640,7 @@ int task_lock(struct task *t) {
cell_gunlocktree(ci);
return 0;
}
+#endif
} else if ((subtype == task_subtype_stars_density) ||
(subtype == task_subtype_stars_feedback)) {
if (ci->stars.hold) return 0;
@@ -643,6 +664,11 @@ int task_lock(struct task *t) {
} else if (subtype == task_subtype_do_bh_swallow) {
if (ci->black_holes.hold) return 0;
if (cell_blocktree(ci) != 0) return 0;
+ } else if (subtype == task_subtype_limiter) {
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+ if (ci->hydro.hold) return 0;
+ if (cell_locktree(ci) != 0) return 0;
+#endif
} else { /* subtype == hydro */
if (ci->hydro.hold) return 0;
if (cell_locktree(ci) != 0) return 0;
@@ -652,6 +678,7 @@ int task_lock(struct task *t) {
case task_type_pair:
case task_type_sub_pair:
if (subtype == task_subtype_grav) {
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
/* Lock the gparts and the m-pole in both cells */
if (ci->grav.phold || cj->grav.phold) return 0;
if (cell_glocktree(ci) != 0) return 0;
@@ -668,6 +695,7 @@ int task_lock(struct task *t) {
cell_munlocktree(ci);
return 0;
}
+#endif
} else if ((subtype == task_subtype_stars_density) ||
(subtype == task_subtype_stars_feedback)) {
/* Lock the stars and the gas particles in both cells */
@@ -719,6 +747,15 @@ int task_lock(struct task *t) {
cell_bunlocktree(ci);
return 0;
}
+ } else if (subtype == task_subtype_limiter) {
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+ if (ci->hydro.hold || cj->hydro.hold) return 0;
+ if (cell_locktree(ci) != 0) return 0;
+ if (cell_locktree(cj) != 0) {
+ cell_unlocktree(ci);
+ return 0;
+ }
+#endif
} else { /* subtype == hydro */
/* Lock the parts in both cells */
if (ci->hydro.hold || cj->hydro.hold) return 0;
@@ -731,6 +768,7 @@ int task_lock(struct task *t) {
break;
case task_type_grav_down:
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
/* Lock the gparts and the m-poles */
if (ci->grav.phold || ci->grav.mhold) return 0;
if (cell_glocktree(ci) != 0)
@@ -739,15 +777,19 @@ int task_lock(struct task *t) {
cell_gunlocktree(ci);
return 0;
}
+#endif
break;
case task_type_grav_long_range:
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
/* Lock the m-poles */
if (ci->grav.mhold) return 0;
if (cell_mlocktree(ci) != 0) return 0;
+#endif
break;
case task_type_grav_mm:
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
/* Lock both m-poles */
if (ci->grav.mhold || cj->grav.mhold) return 0;
if (cell_mlocktree(ci) != 0) return 0;
@@ -755,6 +797,7 @@ int task_lock(struct task *t) {
cell_munlocktree(ci);
return 0;
}
+#endif
break;
case task_type_star_formation:
diff --git a/src/timestep_limiter_iact.h b/src/timestep_limiter_iact.h
index a4f5c7c1af48db7f6926f0fac1c09a4db2a0c492..839b1a3dc450ca12b8b0c8824fd75cd87063a7b8 100644
--- a/src/timestep_limiter_iact.h
+++ b/src/timestep_limiter_iact.h
@@ -88,7 +88,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter(
if (pj->time_bin > pi->time_bin + time_bin_neighbour_max_delta_bin) {
/* Store the smallest time bin that woke up this particle */
- pj->limiter_data.wakeup = max(pj->limiter_data.wakeup, -pi->time_bin);
+ accumulate_max_c(&pj->limiter_data.wakeup, -pi->time_bin);
}
}
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 11426ac89970917cf121351e266aaa67cfd99847..e24a2a69b07d9ebf8ff10f367f7eaf0724817f7c 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -29,7 +29,8 @@ TESTS = testGreetings testMaths testReading.sh testKernel \
testPeriodicBC.sh testPeriodicBCPerturbed.sh testPotentialSelf \
testPotentialPair testEOS testUtilities testSelectOutput.sh \
testCbrt testCosmology testOutputList testFormat.sh \
- test27cellsStars.sh test27cellsStarsPerturbed.sh testHydroMPIrules
+ test27cellsStars.sh test27cellsStarsPerturbed.sh testHydroMPIrules \
+ testAtomic
# List of test programs to compile
check_PROGRAMS = testGreetings testReading testTimeIntegration \
@@ -41,7 +42,8 @@ check_PROGRAMS = testGreetings testReading testTimeIntegration \
testVoronoi1D testVoronoi2D testVoronoi3D testPeriodicBC \
testGravityDerivatives testPotentialSelf testPotentialPair testEOS testUtilities \
testSelectOutput testCbrt testCosmology testOutputList test27cellsStars \
- test27cellsStars_subset testCooling testComovingCooling testFeedback testHashmap testHydroMPIrules
+ test27cellsStars_subset testCooling testComovingCooling testFeedback testHashmap \
+ testAtomic testHydroMPIrules
# Rebuild tests when SWIFT is updated.
$(check_PROGRAMS): ../src/.libs/libswiftsim.a
@@ -51,6 +53,8 @@ testGreetings_SOURCES = testGreetings.c
testMaths_SOURCES = testMaths.c
+testAtomic_SOURCES = testAtomic.c
+
testRandom_SOURCES = testRandom.c
testRandomSpacing_SOURCES = testRandomSpacing.c
diff --git a/tests/testAtomic.c b/tests/testAtomic.c
new file mode 100644
index 0000000000000000000000000000000000000000..47eca7b736d1687654bb1d2c5b40366a05e8af73
--- /dev/null
+++ b/tests/testAtomic.c
@@ -0,0 +1,146 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Standard includes. */
+#include
+
+/* Local includes */
+#include "swift.h"
+
+const int array_size = 2048 * 2048;
+const int num_threads = 64;
+const int chunk_size = 64;
+
+void map_function_sum_f(void *data, int num_elements, void *extra_data) {
+
+ float *array = (float *)data;
+ float *sum = (float *)extra_data;
+
+ for (int i = 0; i < num_elements; ++i) atomic_add_f(sum, array[i]);
+}
+
+void map_function_sum_ll(void *data, int num_elements, void *extra_data) {
+
+ long long *array = (long long *)data;
+ long long *sum = (long long *)extra_data;
+
+ for (int i = 0; i < num_elements; ++i) atomic_add(sum, array[i]);
+}
+
+void map_function_inc_ll(void *data, int num_elements, void *extra_data) {
+
+ long long *sum = (long long *)extra_data;
+
+ for (int i = 0; i < num_elements; ++i) atomic_inc(sum);
+}
+
+int main(int argc, char *argv[]) {
+
+ /* Initialize CPU frequency, this also starts time. */
+ unsigned long long cpufreq = 0;
+ clocks_set_cpufreq(cpufreq);
+
+/* Choke on FPEs */
+#ifdef HAVE_FE_ENABLE_EXCEPT
+ feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
+#endif
+
+ /* Get some randomness going */
+ const int seed = time(NULL);
+ message("Seed = %d", seed);
+ srand(seed);
+
+ /* Start a bunch of threads */
+ printf("# Creating threadpool with %d threads\n", num_threads);
+ struct threadpool tp;
+ threadpool_init(&tp, num_threads);
+
+ /* Create some random data */
+ float *array_f = malloc(array_size * sizeof(float));
+ long long *array_ll = malloc(array_size * sizeof(long long));
+
+ for (int i = 0; i < array_size; ++i) {
+ array_f[i] = rand() / ((float)RAND_MAX);
+ array_ll[i] = rand();
+ }
+
+ /*** Test the addition atomic ops *******************************/
+
+ /* float case */
+
+ /* Compute the real answer */
+ float real_sum_f = 0.f;
+ for (int i = 0; i < array_size; ++i) {
+ real_sum_f += array_f[i];
+ }
+
+ /* Compute the answer via threads and atomic */
+ float atomic_sum_f = 0.f;
+ threadpool_map(&tp, map_function_sum_f, array_f, array_size, sizeof(float),
+ chunk_size, &atomic_sum_f);
+
+ const double diff_sum_f = (double)real_sum_f - (double)atomic_sum_f;
+ const double sum_sum_f = (double)real_sum_f + (double)atomic_sum_f;
+ const double rel_sum_f = 0.5 * fabs(diff_sum_f) / sum_sum_f;
+ message("Real sum = %.7e -- atomic sum = %.7e rel=%e", real_sum_f,
+ atomic_sum_f, rel_sum_f);
+
+ /* long long case */
+
+ /* Compute the real answer */
+ long long real_sum_ll = 0.f;
+ for (int i = 0; i < array_size; ++i) {
+ real_sum_ll += array_ll[i];
+ }
+
+ /* Compute the answer via threads and atomic */
+ long long atomic_sum_ll = 0LL;
+ threadpool_map(&tp, map_function_sum_ll, array_ll, array_size,
+ sizeof(long long), chunk_size, &atomic_sum_ll);
+
+ const double diff_sum_ll = (double)real_sum_ll - (double)atomic_sum_ll;
+ const double sum_sum_ll = (double)real_sum_ll + (double)atomic_sum_ll;
+ const double rel_sum_ll = 0.5 * fabs(diff_sum_ll) / sum_sum_ll;
+ message("Real sum = %lld -- atomic sum = %lld rel=%e", real_sum_ll,
+ atomic_sum_ll, rel_sum_ll);
+
+ /*** Test the inc atomic ops *******************************/
+
+ long long real_inc_ll = array_size;
+
+ /* Compute the answer via threads and atomic */
+ long long atomic_inc_ll = 0LL;
+ threadpool_map(&tp, map_function_inc_ll, array_ll, array_size,
+ sizeof(long long), chunk_size, &atomic_inc_ll);
+
+ const double diff_inc_ll = (double)real_inc_ll - (double)atomic_inc_ll;
+ const double sum_inc_ll = (double)real_inc_ll + (double)atomic_inc_ll;
+ const double rel_inc_ll = 0.5 * fabs(diff_inc_ll) / sum_inc_ll;
+ message("Real inc = %lld -- atomic inc = %lld rel=%e", real_inc_ll,
+ atomic_inc_ll, rel_inc_ll);
+
+ /* Be clean */
+ threadpool_clean(&tp);
+ free(array_f);
+ free(array_ll);
+ return 0;
+}