diff --git a/.gitignore b/.gitignore
index f843002a1c03af25e89a601c75ac14f1203cca84..99aa018c9d8f826076cb26ae7cdf57fc314b8852 100644
--- a/.gitignore
+++ b/.gitignore
@@ -122,6 +122,7 @@ tests/testInteractions.sh
 tests/testSymmetry
 tests/testHydroMPIrules
 tests/testMaths
+tests/testAtomic
 tests/testRandom
 tests/testRandomSpacing
 tests/testThreadpool
diff --git a/configure.ac b/configure.ac
index 6edd9586b9b8e92c29b0dabb8c45cd81a7dbb516..8495dca80d11f61470136bbd95eafbd78dccad02 100644
--- a/configure.ac
+++ b/configure.ac
@@ -239,6 +239,23 @@ if test "x$enable_debug" = "xyes"; then
    fi
 fi
 
+# Are we using the regular tasks (with atomics and no task conflicts) or
+# are we using the atomic-free task-conflicting (slower) version?
+AC_ARG_ENABLE([atomics-within-tasks],
+   [AS_HELP_STRING([--disable-atomics-within-tasks],
+     [Disable the use of atomic operations within tasks. This creates more task conflicts
+      but allows for atomic-free and lock-free code within the tasks. This likely slows down
+      the code @<:@yes/no@:>@]
+   )],
+   [enable_atomics_within_tasks="$enableval"],
+   [enable_atomics_within_tasks="yes"]
+)
+AS_IF([test "x$enable_atomics_within_tasks" != "xno"],
+   , # Note: atomics are allowed by default, we define the macro if we don't want them.
+   [AC_DEFINE([SWIFT_TASKS_WITHOUT_ATOMICS],1,[Makes SWIFT use atomic-free and lock-free tasks.])
+])
+
+
 # Check if task debugging is on.
 AC_ARG_ENABLE([task-debugging],
    [AS_HELP_STRING([--enable-task-debugging],
@@ -2301,6 +2318,7 @@ AC_MSG_RESULT([
    Black holes model    : $with_black_holes
    Task dependencies    : $with_task_order
 
+   Atomic operations in tasks  : $enable_atomics_within_tasks
    Individual timers           : $enable_timers
    Task debugging              : $enable_task_debugging
    Threadpool debugging        : $enable_threadpool_debugging
diff --git a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml
index 2770f9e27a5a50646575ccc0a9a2567bc548fc0e..8234b4b3e2d463b33d03f744386479723a29a9b4 100644
--- a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml
+++ b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml
@@ -68,17 +68,19 @@ FOF:
   min_group_size:                  256         # The minimum no. of particles required for a group.
   linking_length_ratio:            0.2         # Linking length in units of the main inter-particle separation.
   black_hole_seed_halo_mass_Msun:  1.5e10      # Minimal halo mass in which to seed a black hole (in solar masses).
-  scale_factor_first:              0.01        # Scale-factor of first FoF black hole seeding calls.
-  delta_time:                      1.005       # Scale-factor ratio between consecutive FoF black hole seeding calls.
+  scale_factor_first:              0.05        # Scale-factor of first FoF black hole seeding calls.
+  delta_time:                      1.00751     # Scale-factor ratio between consecutive FoF black hole seeding calls.
 
 Scheduler:
   max_top_level_cells:   8
-  tasks_per_cell:        5
   cell_split_size:       200
   
 Restarts:
   onexit:       1
-  delta_hours:  1.0
+  delta_hours:  6.0
+  max_run_time: 71.5                 # Three days minus fergie time
+  resubmit_on_exit:   1
+  resubmit_command:   ./resub.sh
 
 # Parameters related to the initial conditions
 InitialConditions:
diff --git a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml
index f175e2c6a3eef20371f6efa1d8f8f952393e4cd4..2de9a58ed5fe04f2061fd29d12bcce955293c447 100644
--- a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml
+++ b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml
@@ -68,17 +68,19 @@ FOF:
   min_group_size:                  256         # The minimum no. of particles required for a group.
   linking_length_ratio:            0.2         # Linking length in units of the main inter-particle separation.
   black_hole_seed_halo_mass_Msun:  1.5e10      # Minimal halo mass in which to seed a black hole (in solar masses).
-  scale_factor_first:              0.01        # Scale-factor of first FoF black hole seeding calls.
-  delta_time:                      1.005       # Scale-factor ratio between consecutive FoF black hole seeding calls.
+  scale_factor_first:              0.05        # Scale-factor of first FoF black hole seeding calls.
+  delta_time:                      1.00751     # Scale-factor ratio between consecutive FoF black hole seeding calls.
 
 Scheduler:
   max_top_level_cells:   16
-  tasks_per_cell:        5
   cell_split_size:       200
   
 Restarts:
   onexit:       1
-  delta_hours:  1.0
+  delta_hours:  6.0
+  max_run_time: 71.5                 # Three days minus fergie time
+  resubmit_on_exit:   1
+  resubmit_command:   ./resub.sh
 
 # Parameters related to the initial conditions
 InitialConditions:
diff --git a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml
index b65f1061524516f3a5a9bba7c79b59c58a973c42..49070d3b315fcfd06c90fb779a060a38a28bd072 100644
--- a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml
+++ b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml
@@ -68,8 +68,8 @@ FOF:
   min_group_size:                  256         # The minimum no. of particles required for a group.
   linking_length_ratio:            0.2         # Linking length in units of the main inter-particle separation.
   black_hole_seed_halo_mass_Msun:  1.5e10      # Minimal halo mass in which to seed a black hole (in solar masses).
-  scale_factor_first:              0.01        # Scale-factor of first FoF black hole seeding calls.
-  delta_time:                      1.005       # Scale-factor ratio between consecutive FoF black hole seeding calls.
+  scale_factor_first:              0.05        # Scale-factor of first FoF black hole seeding calls.
+  delta_time:                      1.00751     # Scale-factor ratio between consecutive FoF black hole seeding calls.
 
 Scheduler:
   max_top_level_cells:   32
@@ -78,7 +78,7 @@ Scheduler:
   
 Restarts:
   onexit:       1
-  delta_hours:  1.0
+  delta_hours:  6.0
 
 # Parameters related to the initial conditions
 InitialConditions:
diff --git a/src/Makefile.am b/src/Makefile.am
index 65dbae7bba5afc31db07c459fb0de60bd85df390..cf6c71644d7161001828c22260c5b56efce9ac75 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -114,7 +114,7 @@ AM_SOURCES = space.c runner_main.c runner_doiact_hydro.c runner_doiact_limiter.c
 
 # Include files for distribution, not installation.
 nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
-		 gravity_iact.h kernel_long_gravity.h vector.h cache.h \
+		 gravity_iact.h kernel_long_gravity.h vector.h accumulate.h cache.h \
 	         runner_doiact_nosort.h runner_doiact_hydro.h runner_doiact_stars.h runner_doiact_black_holes.h runner_doiact_grav.h \
                  runner_doiact_functions_hydro.h runner_doiact_functions_stars.h runner_doiact_functions_black_holes.h \
 		 runner_doiact_functions_limiter.h runner_doiact_limiter.h units.h intrinsics.h minmax.h \
diff --git a/src/accumulate.h b/src/accumulate.h
new file mode 100644
index 0000000000000000000000000000000000000000..877ac23baa1e9041c45a8d3dcc10a1d660ea6c68
--- /dev/null
+++ b/src/accumulate.h
@@ -0,0 +1,208 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_ACCUMULATE_H
+#define SWIFT_ACCUMULATE_H
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Local includes */
+#include "atomic.h"
+#include "minmax.h"
+
+/**
+ * @file accumulate.h
+ * @brief Defines a series of functions used to update the fields of a particle
+ * atomically. These functions should be used in any task that can run in
+ * parallel to another one.
+ */
+
+/**
+ * @brief Add x to the value stored at the location address (int version)
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ * @param x The value to add to *address.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_add_i(
+    volatile int *const address, const int x) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+  *address += x;
+#else
+  atomic_add(address, x);
+#endif
+}
+
+/**
+ * @brief Add x to the value stored at the location address (long long version)
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ * @param x The value to add to *address.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_add_ll(
+    volatile long long *const address, const long long x) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+  *address += x;
+#else
+  atomic_add(address, x);
+#endif
+}
+
+/**
+ * @brief Add x to the value stored at the location address (float version)
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ * @param x The value to add to *address.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_add_f(
+    volatile float *const address, const float x) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+  *address += x;
+#else
+  atomic_add_f(address, x);
+#endif
+}
+
+/**
+ * @brief Add x to the value stored at the location address (double version)
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ * @param x The value to add to *address.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_add_d(
+    volatile double *const address, const double x) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+  *address += x;
+#else
+  atomic_add_d(address, x);
+#endif
+}
+
+/**
+ * @brief Add 1 to the value stored at the location address (int version)
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_inc_i(
+    volatile int *const address) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+  (*address)++;
+#else
+  atomic_inc(address);
+#endif
+}
+
+/**
+ * @brief Add 1 to the value stored at the location address (long long version)
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_inc_ll(
+    volatile long long *const address) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+  (*address)++;
+#else
+  atomic_inc(address);
+#endif
+}
+
+/**
+ * @brief Compute the max of x and the value storedd at the location address
+ * and store the value at the address (int8_t version).
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ * @param x The value to max against *address.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_max_c(
+    volatile int8_t *const address, const int8_t x) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+  *address = max(*address, x);
+#else
+  atomic_max_c(address, x);
+#endif
+}
+
+/**
+ * @brief Compute the max of x and the value storedd at the location address
+ * and store the value at the address (int version).
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ * @param x The value to max against *address.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_max_i(
+    volatile int *const address, const int x) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+  *address = max(*address, x);
+#else
+  atomic_max(address, x);
+#endif
+}
+
+/**
+ * @brief Compute the max of x and the value storedd at the location address
+ * and store the value at the address (float version).
+ *
+ * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an
+ * atomic operation.
+ *
+ * @param address The address to update.
+ * @param x The value to max against *address.
+ */
+__attribute__((always_inline)) INLINE static void accumulate_max_f(
+    volatile float *const address, const float x) {
+
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+  *address = max(*address, x);
+#else
+  atomic_max_f(address, x);
+#endif
+}
+
+#endif /* SWIFT_ACCUMULATE_H */
diff --git a/src/atomic.h b/src/atomic.h
index 8a790a7f6cbbf190c3d87d590f10c6f1676c2d24..8ba1b0e692b08806d18eb453d9211a4829a8943a 100644
--- a/src/atomic.h
+++ b/src/atomic.h
@@ -26,6 +26,9 @@
 #include "inline.h"
 #include "minmax.h"
 
+/* Standard includes */
+#include <stdint.h>
+
 #define atomic_add(v, i) __sync_fetch_and_add(v, i)
 #define atomic_sub(v, i) __sync_fetch_and_sub(v, i)
 #define atomic_or(v, i) __sync_fetch_and_or(v, i)
@@ -35,6 +38,29 @@
 #define atomic_cas(v, o, n) __sync_val_compare_and_swap(v, o, n)
 #define atomic_swap(v, n) __sync_lock_test_and_set(v, n)
 
+/**
+ * @brief Atomic min operation on ints.
+ *
+ * This is a text-book implementation based on an atomic CAS.
+ *
+ * @param address The address to update.
+ * @param y The value to update the address with.
+ */
+__attribute__((always_inline)) INLINE static void atomic_min(
+    volatile int *const address, const int y) {
+
+  int *int_ptr = (int *)address;
+
+  int test_val, old_val, new_val;
+  old_val = *address;
+
+  do {
+    test_val = old_val;
+    new_val = min(old_val, y);
+    old_val = atomic_cas(int_ptr, test_val, new_val);
+  } while (test_val != old_val);
+}
+
 /**
  * @brief Atomic min operation on floats.
  *
@@ -66,29 +92,6 @@ __attribute__((always_inline)) INLINE static void atomic_min_f(
   } while (test_val.as_int != old_val.as_int);
 }
 
-/**
- * @brief Atomic min operation on ints.
- *
- * This is a text-book implementation based on an atomic CAS.
- *
- * @param address The address to update.
- * @param y The value to update the address with.
- */
-__attribute__((always_inline)) INLINE static void atomic_min(
-    volatile int *address, int y) {
-
-  int *int_ptr = (int *)address;
-
-  int test_val, old_val, new_val;
-  old_val = *address;
-
-  do {
-    test_val = old_val;
-    new_val = min(old_val, y);
-    old_val = atomic_cas(int_ptr, test_val, new_val);
-  } while (test_val != old_val);
-}
-
 /**
  * @brief Atomic min operation on doubles.
  *
@@ -121,6 +124,50 @@ __attribute__((always_inline)) INLINE static void atomic_min_d(
   } while (test_val.as_long_long != old_val.as_long_long);
 }
 
+/**
+ * @brief Atomic max operation on int8_t.
+ *
+ * This is a text-book implementation based on an atomic CAS.
+ *
+ * @param address The address to update.
+ * @param y The value to update the address with.
+ */
+__attribute__((always_inline)) INLINE static void atomic_max_c(
+    volatile int8_t *const address, const int8_t y) {
+
+  int8_t test_val, old_val, new_val;
+  old_val = *address;
+
+  do {
+    test_val = old_val;
+    new_val = max(old_val, y);
+    old_val = atomic_cas(address, test_val, new_val);
+  } while (test_val != old_val);
+}
+
+/**
+ * @brief Atomic max operation on ints.
+ *
+ * This is a text-book implementation based on an atomic CAS.
+ *
+ * @param address The address to update.
+ * @param y The value to update the address with.
+ */
+__attribute__((always_inline)) INLINE static void atomic_max(
+    volatile int *const address, const int y) {
+
+  int *int_ptr = (int *)address;
+
+  int test_val, old_val, new_val;
+  old_val = *address;
+
+  do {
+    test_val = old_val;
+    new_val = max(old_val, y);
+    old_val = atomic_cas(int_ptr, test_val, new_val);
+  } while (test_val != old_val);
+}
+
 /**
  * @brief Atomic max operation on floats.
  *
diff --git a/src/cell.c b/src/cell.c
index 6eef7d32d94ab6edbb418d6eb1732da1062ce538..056c0aee013e1321be61a3542906b98a97809b83 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -1228,7 +1228,7 @@ int cell_unpack_sf_counts(struct cell *restrict c,
  * @return 0 on success, 1 on failure
  */
 int cell_locktree(struct cell *c) {
-  TIMER_TIC
+  TIMER_TIC;
 
   /* First of all, try to lock this cell. */
   if (c->hydro.hold || lock_trylock(&c->hydro.lock) != 0) {
@@ -1288,7 +1288,7 @@ int cell_locktree(struct cell *c) {
  * @return 0 on success, 1 on failure
  */
 int cell_glocktree(struct cell *c) {
-  TIMER_TIC
+  TIMER_TIC;
 
   /* First of all, try to lock this cell. */
   if (c->grav.phold || lock_trylock(&c->grav.plock) != 0) {
@@ -1348,7 +1348,7 @@ int cell_glocktree(struct cell *c) {
  * @return 0 on success, 1 on failure
  */
 int cell_mlocktree(struct cell *c) {
-  TIMER_TIC
+  TIMER_TIC;
 
   /* First of all, try to lock this cell. */
   if (c->grav.mhold || lock_trylock(&c->grav.mlock) != 0) {
@@ -1408,7 +1408,7 @@ int cell_mlocktree(struct cell *c) {
  * @return 0 on success, 1 on failure
  */
 int cell_slocktree(struct cell *c) {
-  TIMER_TIC
+  TIMER_TIC;
 
   /* First of all, try to lock this cell. */
   if (c->stars.hold || lock_trylock(&c->stars.lock) != 0) {
@@ -1468,7 +1468,7 @@ int cell_slocktree(struct cell *c) {
  * @return 0 on success, 1 on failure
  */
 int cell_blocktree(struct cell *c) {
-  TIMER_TIC
+  TIMER_TIC;
 
   /* First of all, try to lock this cell. */
   if (c->black_holes.hold || lock_trylock(&c->black_holes.lock) != 0) {
@@ -1528,7 +1528,7 @@ int cell_blocktree(struct cell *c) {
  * @param c The #cell.
  */
 void cell_unlocktree(struct cell *c) {
-  TIMER_TIC
+  TIMER_TIC;
 
   /* First of all, try to unlock this cell. */
   if (lock_unlock(&c->hydro.lock) != 0) error("Failed to unlock cell.");
@@ -1546,7 +1546,7 @@ void cell_unlocktree(struct cell *c) {
  * @param c The #cell.
  */
 void cell_gunlocktree(struct cell *c) {
-  TIMER_TIC
+  TIMER_TIC;
 
   /* First of all, try to unlock this cell. */
   if (lock_unlock(&c->grav.plock) != 0) error("Failed to unlock cell.");
@@ -1564,7 +1564,7 @@ void cell_gunlocktree(struct cell *c) {
  * @param c The #cell.
  */
 void cell_munlocktree(struct cell *c) {
-  TIMER_TIC
+  TIMER_TIC;
 
   /* First of all, try to unlock this cell. */
   if (lock_unlock(&c->grav.mlock) != 0) error("Failed to unlock cell.");
@@ -1582,7 +1582,7 @@ void cell_munlocktree(struct cell *c) {
  * @param c The #cell.
  */
 void cell_sunlocktree(struct cell *c) {
-  TIMER_TIC
+  TIMER_TIC;
 
   /* First of all, try to unlock this cell. */
   if (lock_unlock(&c->stars.lock) != 0) error("Failed to unlock cell.");
@@ -1600,7 +1600,7 @@ void cell_sunlocktree(struct cell *c) {
  * @param c The #cell.
  */
 void cell_bunlocktree(struct cell *c) {
-  TIMER_TIC
+  TIMER_TIC;
 
   /* First of all, try to unlock this cell. */
   if (lock_unlock(&c->black_holes.lock) != 0) error("Failed to unlock cell.");
@@ -5358,16 +5358,31 @@ void cell_check_timesteps(const struct cell *c, const integertime_t ti_current,
   /* Only check cells that have at least one non-inhibited particle */
   if (count > 0) {
 
-    if (ti_end_min != c->hydro.ti_end_min)
-      error(
-          "Non-matching ti_end_min. Cell=%lld true=%lld ti_current=%lld "
-          "depth=%d",
-          c->hydro.ti_end_min, ti_end_min, ti_current, c->depth);
+    if (count != c->hydro.count) {
+
+      /* Note that we use a < as the particle with the smallest time-bin
+         might have been swallowed. This means we will run this cell with
+         0 active particles but that's not wrong */
+      if (ti_end_min < c->hydro.ti_end_min)
+        error(
+            "Non-matching ti_end_min. Cell=%lld true=%lld ti_current=%lld "
+            "depth=%d",
+            c->hydro.ti_end_min, ti_end_min, ti_current, c->depth);
+
+    } else /* Normal case: nothing was swallowed/converted */ {
+      if (ti_end_min != c->hydro.ti_end_min)
+        error(
+            "Non-matching ti_end_min. Cell=%lld true=%lld ti_current=%lld "
+            "depth=%d",
+            c->hydro.ti_end_min, ti_end_min, ti_current, c->depth);
+    }
+
     if (ti_end_max > c->hydro.ti_end_max)
       error(
           "Non-matching ti_end_max. Cell=%lld true=%lld ti_current=%lld "
           "depth=%d",
           c->hydro.ti_end_max, ti_end_max, ti_current, c->depth);
+
     if (ti_beg_max != c->hydro.ti_beg_max)
       error(
           "Non-matching ti_beg_max. Cell=%lld true=%lld ti_current=%lld "
diff --git a/src/engine.c b/src/engine.c
index 7f87bdc6463df32ead4397b0c8c9ceffbcfaa7de..e5e7b78dcafd9c14a032dd3a4249e6b0a52fab4d 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -2579,8 +2579,10 @@ void engine_step(struct engine *e) {
   /* Prepare the tasks to be launched, rebuild or repartition if needed. */
   engine_prepare(e);
 
-  /* Print the number of active tasks ? */
+#ifdef SWIFT_DEBUG_CHECKS
+  /* Print the number of active tasks */
   if (e->verbose) engine_print_task_counts(e);
+#endif
 
     /* Dump local cells and active particle counts. */
     // dumpCells("cells", 1, 0, 0, 0, e->s, e->nodeID, e->step);
diff --git a/src/gravity/MultiSoftening/gravity.h b/src/gravity/MultiSoftening/gravity.h
index 07ed0e78b164e5ee3f01b8c9a62b940201b31aa4..d7716cf712254054e3cf364a807f3cb1c12355f3 100644
--- a/src/gravity/MultiSoftening/gravity.h
+++ b/src/gravity/MultiSoftening/gravity.h
@@ -23,6 +23,7 @@
 #include <float.h>
 
 /* Local includes. */
+#include "accumulate.h"
 #include "cosmology.h"
 #include "error.h"
 #include "gravity_properties.h"
@@ -77,7 +78,7 @@ __attribute__((always_inline)) INLINE static float gravity_get_softening(
 __attribute__((always_inline)) INLINE static void
 gravity_add_comoving_potential(struct gpart* restrict gp, float pot) {
 
-  gp->potential += pot;
+  accumulate_add_f(&gp->potential, pot);
 }
 
 /**
diff --git a/src/gravity/Potential/gravity.h b/src/gravity/Potential/gravity.h
index 3a3e9680016ef51c554691c6f43f8279cde8caf9..f9a9502a528c161fbc82b3028f303b7f9cad49f8 100644
--- a/src/gravity/Potential/gravity.h
+++ b/src/gravity/Potential/gravity.h
@@ -23,6 +23,7 @@
 #include <float.h>
 
 /* Local includes. */
+#include "accumulate.h"
 #include "cosmology.h"
 #include "gravity_properties.h"
 #include "kernel_gravity.h"
@@ -63,7 +64,7 @@ __attribute__((always_inline)) INLINE static float gravity_get_softening(
 __attribute__((always_inline)) INLINE static void
 gravity_add_comoving_potential(struct gpart* restrict gp, float pot) {
 
-  gp->potential += pot;
+  accumulate_add_f(&gp->potential, pot);
 }
 
 /**
diff --git a/src/gravity_cache.h b/src/gravity_cache.h
index 912961d2e06dc748039f54c72bd9b33fd2547df2..ba3432d7e732b783547ddd2a597dbdf5723bf606 100644
--- a/src/gravity_cache.h
+++ b/src/gravity_cache.h
@@ -23,6 +23,7 @@
 #include "../config.h"
 
 /* Local headers */
+#include "accumulate.h"
 #include "align.h"
 #include "error.h"
 #include "gravity.h"
@@ -491,9 +492,9 @@ __attribute__((always_inline)) INLINE static void gravity_cache_write_back(
   /* Write stuff back to the particles */
   for (int i = 0; i < gcount; ++i) {
     if (active[i]) {
-      gparts[i].a_grav[0] += a_x[i];
-      gparts[i].a_grav[1] += a_y[i];
-      gparts[i].a_grav[2] += a_z[i];
+      accumulate_add_f(&gparts[i].a_grav[0], a_x[i]);
+      accumulate_add_f(&gparts[i].a_grav[1], a_y[i]);
+      accumulate_add_f(&gparts[i].a_grav[2], a_z[i]);
       gravity_add_comoving_potential(&gparts[i], pot[i]);
     }
   }
diff --git a/src/mesh_gravity.c b/src/mesh_gravity.c
index 0efec0bc80fc381d9bd6ff733cc546e1885c1d9e..cbfb9ef7a2970b222bf67a915bd5044bedaaaae3 100644
--- a/src/mesh_gravity.c
+++ b/src/mesh_gravity.c
@@ -28,6 +28,7 @@
 #include "mesh_gravity.h"
 
 /* Local includes. */
+#include "accumulate.h"
 #include "active.h"
 #include "debug.h"
 #include "engine.h"
@@ -334,10 +335,10 @@ void mesh_to_gparts_CIC(struct gpart* gp, const double* pot, const int N,
   /* ---- */
 
   /* Store things back */
+  accumulate_add_f(&gp->a_grav[0], fac * a[0]);
+  accumulate_add_f(&gp->a_grav[1], fac * a[1]);
+  accumulate_add_f(&gp->a_grav[2], fac * a[2]);
   gravity_add_comoving_potential(gp, p);
-  gp->a_grav[0] += fac * a[0];
-  gp->a_grav[1] += fac * a[1];
-  gp->a_grav[2] += fac * a[2];
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
   gp->potential_PM = p;
   gp->a_grav_PM[0] = fac * a[0];
diff --git a/src/multipole.h b/src/multipole.h
index e03e302c4186a54442518a687c444ff4a7216b1e..55de9c0baebe0371e4e61860e2362907874cba6b 100644
--- a/src/multipole.h
+++ b/src/multipole.h
@@ -1608,12 +1608,19 @@ INLINE static void gravity_M2L_apply(
     const struct potential_derivatives_M2L *pot) {
 
 #ifdef SWIFT_DEBUG_CHECKS
-  /* Count interactions */
-  l_b->num_interacted += m_a->num_gpart;
+  /* Count all interactions
+   * Note that despite being in a section of the code protected by locks,
+   * we must use atomics here as the long-range task may update this
+   * counter in a lock-free section of code. */
+  accumulate_add_ll(&l_b->num_interacted, m_a->num_gpart);
 #endif
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
-  l_b->num_interacted_tree += m_a->num_gpart;
+  /* Count tree interactions
+   * Note that despite being in a section of the code protected by locks,
+   * we must use atomics here as the long-range task may update this
+   * counter in a lock-free section of code. */
+  accumulate_add_ll(&l_b->num_interacted_tree, m_a->num_gpart);
 #endif
 
   /* Record that this tensor has received contributions */
@@ -2467,12 +2474,12 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb,
 #ifdef SWIFT_DEBUG_CHECKS
   if (lb->num_interacted == 0) error("Interacting with empty field tensor");
 
-  gp->num_interacted += lb->num_interacted;
+  accumulate_add_ll(&gp->num_interacted, lb->num_interacted);
 #endif
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
-  gp->num_interacted_m2l += lb->num_interacted_tree;
-  gp->num_interacted_pm += lb->num_interacted_pm;
+  accumulate_add_ll(&gp->num_interacted_m2l, lb->num_interacted_tree);
+  accumulate_add_ll(&gp->num_interacted_pm, lb->num_interacted_pm);
 #endif
 
   /* Local accumulator */
@@ -2589,15 +2596,15 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb,
 #endif
 
   /* Update the particle */
-  gp->a_grav[0] += a_grav[0];
-  gp->a_grav[1] += a_grav[1];
-  gp->a_grav[2] += a_grav[2];
+  accumulate_add_f(&gp->a_grav[0], a_grav[0]);
+  accumulate_add_f(&gp->a_grav[1], a_grav[1]);
+  accumulate_add_f(&gp->a_grav[2], a_grav[2]);
   gravity_add_comoving_potential(gp, pot);
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
-  gp->a_grav_m2l[0] += a_grav[0];
-  gp->a_grav_m2l[1] += a_grav[1];
-  gp->a_grav_m2l[2] += a_grav[2];
+  accumulate_add_f(&gp->a_grav_m2l[0], a_grav[0]);
+  accumulate_add_f(&gp->a_grav_m2l[1], a_grav[1]);
+  accumulate_add_f(&gp->a_grav_m2l[2], a_grav[2]);
 #endif
 }
 
diff --git a/src/runner_doiact_grav.c b/src/runner_doiact_grav.c
index 043b662b101d99698bbc46cf39ae2096e2725c91..91abe9e91060b19fa6df1bbbb29e0de6e6442cad 100644
--- a/src/runner_doiact_grav.c
+++ b/src/runner_doiact_grav.c
@@ -264,13 +264,13 @@ static INLINE void runner_dopair_grav_pp_full(
 #ifdef SWIFT_DEBUG_CHECKS
       /* Update the interaction counter if it's not a padded gpart */
       if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e))
-        gparts_i[pid].num_interacted++;
+        accumulate_inc_ll(&gparts_i[pid].num_interacted);
 #endif
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
       /* Update the p2p interaction counter if it's not a padded gpart */
       if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e))
-        gparts_i[pid].num_interacted_p2p++;
+        accumulate_inc_ll(&gparts_i[pid].num_interacted_p2p);
 #endif
     }
 
@@ -281,9 +281,9 @@ static INLINE void runner_dopair_grav_pp_full(
     ci_cache->pot[pid] += pot;
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
-    gparts_i[pid].a_grav_p2p[0] += a_x;
-    gparts_i[pid].a_grav_p2p[1] += a_y;
-    gparts_i[pid].a_grav_p2p[2] += a_z;
+    accumulate_add_f(&gparts_i[pid].a_grav_p2p[0], a_x);
+    accumulate_add_f(&gparts_i[pid].a_grav_p2p[1], a_y);
+    accumulate_add_f(&gparts_i[pid].a_grav_p2p[2], a_z);
 #endif
   }
 }
@@ -420,13 +420,13 @@ static INLINE void runner_dopair_grav_pp_truncated(
 #ifdef SWIFT_DEBUG_CHECKS
       /* Update the interaction counter if it's not a padded gpart */
       if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e))
-        gparts_i[pid].num_interacted++;
+        accumulate_inc_ll(&gparts_i[pid].num_interacted);
 #endif
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
       /* Update the p2p interaction counter if it's not a padded gpart */
       if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e))
-        gparts_i[pid].num_interacted_p2p++;
+        accumulate_inc_ll(&gparts_i[pid].num_interacted_p2p);
 #endif
     }
 
@@ -437,9 +437,9 @@ static INLINE void runner_dopair_grav_pp_truncated(
     ci_cache->pot[pid] += pot;
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
-    gparts_i[pid].a_grav_p2p[0] += a_x;
-    gparts_i[pid].a_grav_p2p[1] += a_y;
-    gparts_i[pid].a_grav_p2p[2] += a_z;
+    accumulate_add_f(&gparts_i[pid].a_grav_p2p[0], a_x);
+    accumulate_add_f(&gparts_i[pid].a_grav_p2p[1], a_y);
+    accumulate_add_f(&gparts_i[pid].a_grav_p2p[2], a_z);
 #endif
   }
 }
@@ -565,16 +565,18 @@ static INLINE void runner_dopair_grav_pm_full(
 #ifdef SWIFT_DEBUG_CHECKS
     /* Update the interaction counter */
     if (pid < gcount_i)
-      gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart;
+      accumulate_add_ll(&gparts_i[pid].num_interacted,
+                        cj->grav.multipole->m_pole.num_gpart);
 #endif
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
     /* Update the M2P interaction counter and forces. */
     if (pid < gcount_i) {
-      gparts_i[pid].num_interacted_m2p += cj->grav.multipole->m_pole.num_gpart;
-      gparts_i[pid].a_grav_m2p[0] += f_x;
-      gparts_i[pid].a_grav_m2p[1] += f_y;
-      gparts_i[pid].a_grav_m2p[2] += f_z;
+      accumulate_add_ll(&gparts_i[pid].num_interacted_m2p,
+                        cj->grav.multipole->m_pole.num_gpart);
+      accumulate_add_f(&gparts_i[pid].a_grav_m2p[0], f_x);
+      accumulate_add_f(&gparts_i[pid].a_grav_m2p[1], f_y);
+      accumulate_add_f(&gparts_i[pid].a_grav_m2p[2], f_z);
     }
 #endif
   }
@@ -706,16 +708,18 @@ static INLINE void runner_dopair_grav_pm_truncated(
 #ifdef SWIFT_DEBUG_CHECKS
     /* Update the interaction counter */
     if (pid < gcount_i)
-      gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart;
+      accumulate_add_ll(&gparts_i[pid].num_interacted,
+                        cj->grav.multipole->m_pole.num_gpart);
 #endif
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
     /* Update the M2P interaction counter and forces. */
     if (pid < gcount_i) {
-      gparts_i[pid].num_interacted_m2p += cj->grav.multipole->m_pole.num_gpart;
-      gparts_i[pid].a_grav_m2p[0] += f_x;
-      gparts_i[pid].a_grav_m2p[1] += f_y;
-      gparts_i[pid].a_grav_m2p[2] += f_z;
+      accumulate_add_ll(&gparts_i[pid].num_interacted_m2p,
+                        cj->grav.multipole->m_pole.num_gpart);
+      accumulate_add_f(&gparts_i[pid].a_grav_m2p[0], f_x);
+      accumulate_add_f(&gparts_i[pid].a_grav_m2p[1], f_y);
+      accumulate_add_f(&gparts_i[pid].a_grav_m2p[2], f_z);
     }
 #endif
   }
@@ -1041,13 +1045,13 @@ static INLINE void runner_doself_grav_pp_full(
 #ifdef SWIFT_DEBUG_CHECKS
       /* Update the interaction counter if it's not a padded gpart */
       if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e))
-        gparts[pid].num_interacted++;
+        accumulate_inc_ll(&gparts[pid].num_interacted);
 #endif
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
       /* Update the P2P interaction counter if it's not a padded gpart */
       if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e))
-        gparts[pid].num_interacted_p2p++;
+        accumulate_inc_ll(&gparts[pid].num_interacted_p2p);
 #endif
     }
 
@@ -1058,9 +1062,9 @@ static INLINE void runner_doself_grav_pp_full(
     ci_cache->pot[pid] += pot;
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
-    gparts[pid].a_grav_p2p[0] += a_x;
-    gparts[pid].a_grav_p2p[1] += a_y;
-    gparts[pid].a_grav_p2p[2] += a_z;
+    accumulate_add_f(&gparts[pid].a_grav_p2p[0], a_x);
+    accumulate_add_f(&gparts[pid].a_grav_p2p[1], a_y);
+    accumulate_add_f(&gparts[pid].a_grav_p2p[2], a_z);
 #endif
   }
 }
@@ -1180,13 +1184,13 @@ static INLINE void runner_doself_grav_pp_truncated(
 #ifdef SWIFT_DEBUG_CHECKS
       /* Update the interaction counter if it's not a padded gpart */
       if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e))
-        gparts[pid].num_interacted++;
+        accumulate_inc_ll(&gparts[pid].num_interacted);
 #endif
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
       /* Update the P2P interaction counter if it's not a padded gpart */
       if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e))
-        gparts[pid].num_interacted_p2p++;
+        accumulate_inc_ll(&gparts[pid].num_interacted_p2p);
 #endif
     }
 
@@ -1197,9 +1201,9 @@ static INLINE void runner_doself_grav_pp_truncated(
     ci_cache->pot[pid] += pot;
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
-    gparts[pid].a_grav_p2p[0] += a_x;
-    gparts[pid].a_grav_p2p[1] += a_y;
-    gparts[pid].a_grav_p2p[2] += a_z;
+    accumulate_add_f(&gparts[pid].a_grav_p2p[0], a_x);
+    accumulate_add_f(&gparts[pid].a_grav_p2p[1], a_y);
+    accumulate_add_f(&gparts[pid].a_grav_p2p[2], a_z);
 #endif
   }
 }
@@ -1355,11 +1359,29 @@ static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r,
         cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current);
 #endif
 
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+  /* Lock the multipoles
+   * Note we impose a hierarchy to solve the dining philosopher problem */
+  if (ci < cj) {
+    lock_lock(&ci->grav.mlock);
+    lock_lock(&cj->grav.mlock);
+  } else {
+    lock_lock(&cj->grav.mlock);
+    lock_lock(&ci->grav.mlock);
+  }
+#endif
+
   /* Let's interact at this level */
   gravity_M2L_symmetric(&ci->grav.multipole->pot, &cj->grav.multipole->pot,
                         multi_i, multi_j, ci->grav.multipole->CoM,
                         cj->grav.multipole->CoM, props, periodic, dim, r_s_inv);
 
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+  /* Unlock the multipoles */
+  if (lock_unlock(&ci->grav.mlock) != 0) error("Failed to unlock multipole");
+  if (lock_unlock(&cj->grav.mlock) != 0) error("Failed to unlock multipole");
+#endif
+
   TIMER_TOC(timer_dopair_grav_mm);
 }
 
@@ -1371,9 +1393,9 @@ static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r,
  * @param ci The #cell with field tensor to interact.
  * @param cj The #cell with the multipole.
  */
-static INLINE void runner_dopair_grav_mm_nonsym(
-    struct runner *r, struct cell *restrict ci,
-    const struct cell *restrict cj) {
+static INLINE void runner_dopair_grav_mm_nonsym(struct runner *r,
+                                                struct cell *restrict ci,
+                                                struct cell *restrict cj) {
 
   /* Some constants */
   const struct engine *e = r->e;
@@ -1406,10 +1428,28 @@ static INLINE void runner_dopair_grav_mm_nonsym(
         cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current);
 #endif
 
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+  /* Lock the multipoles
+   * Note we impose a hierarchy to solve the dining philosopher problem */
+  if (ci < cj) {
+    lock_lock(&ci->grav.mlock);
+    lock_lock(&cj->grav.mlock);
+  } else {
+    lock_lock(&cj->grav.mlock);
+    lock_lock(&ci->grav.mlock);
+  }
+#endif
+
   /* Let's interact at this level */
   gravity_M2L_nonsym(&ci->grav.multipole->pot, multi_j, ci->grav.multipole->CoM,
                      cj->grav.multipole->CoM, props, periodic, dim, r_s_inv);
 
+#ifndef SWIFT_TASKS_WITHOUT_ATOMICS
+  /* Unlock the multipoles */
+  if (lock_unlock(&ci->grav.mlock) != 0) error("Failed to unlock multipole");
+  if (lock_unlock(&cj->grav.mlock) != 0) error("Failed to unlock multipole");
+#endif
+
   TIMER_TOC(timer_dopair_grav_mm);
 }
 
@@ -1638,17 +1678,21 @@ void runner_dopair_recursive_grav(struct runner *r, struct cell *ci,
 
 #ifdef SWIFT_DEBUG_CHECKS
     if (cell_is_active_gravity(ci, e))
-      multi_i->pot.num_interacted += multi_j->m_pole.num_gpart;
+      accumulate_add_ll(&multi_i->pot.num_interacted,
+                        multi_j->m_pole.num_gpart);
     if (cell_is_active_gravity(cj, e))
-      multi_j->pot.num_interacted += multi_i->m_pole.num_gpart;
+      accumulate_add_ll(&multi_j->pot.num_interacted,
+                        multi_i->m_pole.num_gpart);
 #endif
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
     /* Need to account for the interactions we missed */
     if (cell_is_active_gravity(ci, e))
-      multi_i->pot.num_interacted_pm += multi_j->m_pole.num_gpart;
+      accumulate_add_ll(&multi_i->pot.num_interacted_pm,
+                        multi_j->m_pole.num_gpart);
     if (cell_is_active_gravity(cj, e))
-      multi_j->pot.num_interacted_pm += multi_i->m_pole.num_gpart;
+      accumulate_add_ll(&multi_j->pot.num_interacted_pm,
+                        multi_i->m_pole.num_gpart);
 #endif
     return;
   }
@@ -1833,8 +1877,8 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) {
   for (int n = 0; n < nr_cells_with_particles; ++n) {
 
     /* Handle on the top-level cell and it's gravity business*/
-    const struct cell *cj = &cells[cells_with_particles[n]];
-    const struct gravity_tensors *const multi_j = cj->grav.multipole;
+    struct cell *cj = &cells[cells_with_particles[n]];
+    struct gravity_tensors *const multi_j = cj->grav.multipole;
 
     /* Avoid self contributions */
     if (top == cj) continue;
@@ -1854,12 +1898,14 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) {
 
 #ifdef SWIFT_DEBUG_CHECKS
         /* Need to account for the interactions we missed */
-        multi_i->pot.num_interacted += multi_j->m_pole.num_gpart;
+        accumulate_add_ll(&multi_i->pot.num_interacted,
+                          multi_j->m_pole.num_gpart);
 #endif
 
 #ifdef SWIFT_GRAVITY_FORCE_CHECKS
         /* Need to account for the interactions we missed */
-        multi_i->pot.num_interacted_pm += multi_j->m_pole.num_gpart;
+        accumulate_add_ll(&multi_i->pot.num_interacted_pm,
+                          multi_j->m_pole.num_gpart);
 #endif
 
         /* Record that this multipole received a contribution */
diff --git a/src/task.c b/src/task.c
index 19ba01580e6ca519a48fd934d0bb8c21b0670343..f821eed5e6b0fd6d29ebc00882584abb4548ef7c 100644
--- a/src/task.c
+++ b/src/task.c
@@ -456,8 +456,10 @@ void task_unlock(struct task *t) {
     case task_type_self:
     case task_type_sub_self:
       if (subtype == task_subtype_grav) {
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
         cell_gunlocktree(ci);
         cell_munlocktree(ci);
+#endif
       } else if ((subtype == task_subtype_stars_density) ||
                  (subtype == task_subtype_stars_feedback)) {
         cell_sunlocktree(ci);
@@ -470,7 +472,11 @@ void task_unlock(struct task *t) {
         cell_unlocktree(ci);
       } else if (subtype == task_subtype_do_bh_swallow) {
         cell_bunlocktree(ci);
-      } else {
+      } else if (subtype == task_subtype_limiter) {
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+        cell_unlocktree(ci);
+#endif
+      } else { /* hydro */
         cell_unlocktree(ci);
       }
       break;
@@ -478,10 +484,12 @@ void task_unlock(struct task *t) {
     case task_type_pair:
     case task_type_sub_pair:
       if (subtype == task_subtype_grav) {
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
         cell_gunlocktree(ci);
         cell_gunlocktree(cj);
         cell_munlocktree(ci);
         cell_munlocktree(cj);
+#endif
       } else if ((subtype == task_subtype_stars_density) ||
                  (subtype == task_subtype_stars_feedback)) {
         cell_sunlocktree(ci);
@@ -499,24 +507,35 @@ void task_unlock(struct task *t) {
       } else if (subtype == task_subtype_do_bh_swallow) {
         cell_bunlocktree(ci);
         cell_bunlocktree(cj);
-      } else {
+      } else if (subtype == task_subtype_limiter) {
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+        cell_unlocktree(ci);
+        cell_unlocktree(cj);
+#endif
+      } else { /* hydro */
         cell_unlocktree(ci);
         cell_unlocktree(cj);
       }
       break;
 
     case task_type_grav_down:
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
       cell_gunlocktree(ci);
       cell_munlocktree(ci);
+#endif
       break;
 
     case task_type_grav_long_range:
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
       cell_munlocktree(ci);
+#endif
       break;
 
     case task_type_grav_mm:
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
       cell_munlocktree(ci);
       cell_munlocktree(cj);
+#endif
       break;
 
     case task_type_star_formation:
@@ -612,6 +631,7 @@ int task_lock(struct task *t) {
     case task_type_self:
     case task_type_sub_self:
       if (subtype == task_subtype_grav) {
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
         /* Lock the gparts and the m-pole */
         if (ci->grav.phold || ci->grav.mhold) return 0;
         if (cell_glocktree(ci) != 0)
@@ -620,6 +640,7 @@ int task_lock(struct task *t) {
           cell_gunlocktree(ci);
           return 0;
         }
+#endif
       } else if ((subtype == task_subtype_stars_density) ||
                  (subtype == task_subtype_stars_feedback)) {
         if (ci->stars.hold) return 0;
@@ -643,6 +664,11 @@ int task_lock(struct task *t) {
       } else if (subtype == task_subtype_do_bh_swallow) {
         if (ci->black_holes.hold) return 0;
         if (cell_blocktree(ci) != 0) return 0;
+      } else if (subtype == task_subtype_limiter) {
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+        if (ci->hydro.hold) return 0;
+        if (cell_locktree(ci) != 0) return 0;
+#endif
       } else { /* subtype == hydro */
         if (ci->hydro.hold) return 0;
         if (cell_locktree(ci) != 0) return 0;
@@ -652,6 +678,7 @@ int task_lock(struct task *t) {
     case task_type_pair:
     case task_type_sub_pair:
       if (subtype == task_subtype_grav) {
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
         /* Lock the gparts and the m-pole in both cells */
         if (ci->grav.phold || cj->grav.phold) return 0;
         if (cell_glocktree(ci) != 0) return 0;
@@ -668,6 +695,7 @@ int task_lock(struct task *t) {
           cell_munlocktree(ci);
           return 0;
         }
+#endif
       } else if ((subtype == task_subtype_stars_density) ||
                  (subtype == task_subtype_stars_feedback)) {
         /* Lock the stars and the gas particles in both cells */
@@ -719,6 +747,15 @@ int task_lock(struct task *t) {
           cell_bunlocktree(ci);
           return 0;
         }
+      } else if (subtype == task_subtype_limiter) {
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
+        if (ci->hydro.hold || cj->hydro.hold) return 0;
+        if (cell_locktree(ci) != 0) return 0;
+        if (cell_locktree(cj) != 0) {
+          cell_unlocktree(ci);
+          return 0;
+        }
+#endif
       } else { /* subtype == hydro */
         /* Lock the parts in both cells */
         if (ci->hydro.hold || cj->hydro.hold) return 0;
@@ -731,6 +768,7 @@ int task_lock(struct task *t) {
       break;
 
     case task_type_grav_down:
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
       /* Lock the gparts and the m-poles */
       if (ci->grav.phold || ci->grav.mhold) return 0;
       if (cell_glocktree(ci) != 0)
@@ -739,15 +777,19 @@ int task_lock(struct task *t) {
         cell_gunlocktree(ci);
         return 0;
       }
+#endif
       break;
 
     case task_type_grav_long_range:
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
       /* Lock the m-poles */
       if (ci->grav.mhold) return 0;
       if (cell_mlocktree(ci) != 0) return 0;
+#endif
       break;
 
     case task_type_grav_mm:
+#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
       /* Lock both m-poles */
       if (ci->grav.mhold || cj->grav.mhold) return 0;
       if (cell_mlocktree(ci) != 0) return 0;
@@ -755,6 +797,7 @@ int task_lock(struct task *t) {
         cell_munlocktree(ci);
         return 0;
       }
+#endif
       break;
 
     case task_type_star_formation:
diff --git a/src/timestep_limiter_iact.h b/src/timestep_limiter_iact.h
index a4f5c7c1af48db7f6926f0fac1c09a4db2a0c492..839b1a3dc450ca12b8b0c8824fd75cd87063a7b8 100644
--- a/src/timestep_limiter_iact.h
+++ b/src/timestep_limiter_iact.h
@@ -88,7 +88,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter(
   if (pj->time_bin > pi->time_bin + time_bin_neighbour_max_delta_bin) {
 
     /* Store the smallest time bin that woke up this particle */
-    pj->limiter_data.wakeup = max(pj->limiter_data.wakeup, -pi->time_bin);
+    accumulate_max_c(&pj->limiter_data.wakeup, -pi->time_bin);
   }
 }
 
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 11426ac89970917cf121351e266aaa67cfd99847..e24a2a69b07d9ebf8ff10f367f7eaf0724817f7c 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -29,7 +29,8 @@ TESTS = testGreetings testMaths testReading.sh testKernel \
 	testPeriodicBC.sh testPeriodicBCPerturbed.sh testPotentialSelf \
 	testPotentialPair testEOS testUtilities testSelectOutput.sh \
 	testCbrt testCosmology testOutputList testFormat.sh \
-	test27cellsStars.sh test27cellsStarsPerturbed.sh testHydroMPIrules
+	test27cellsStars.sh test27cellsStarsPerturbed.sh testHydroMPIrules \
+        testAtomic
 
 # List of test programs to compile
 check_PROGRAMS = testGreetings testReading testTimeIntegration \
@@ -41,7 +42,8 @@ check_PROGRAMS = testGreetings testReading testTimeIntegration \
 		 testVoronoi1D testVoronoi2D testVoronoi3D testPeriodicBC \
 		 testGravityDerivatives testPotentialSelf testPotentialPair testEOS testUtilities \
 		 testSelectOutput testCbrt testCosmology testOutputList test27cellsStars \
-		 test27cellsStars_subset testCooling testComovingCooling testFeedback testHashmap testHydroMPIrules
+		 test27cellsStars_subset testCooling testComovingCooling testFeedback testHashmap \
+                 testAtomic testHydroMPIrules
 
 # Rebuild tests when SWIFT is updated.
 $(check_PROGRAMS): ../src/.libs/libswiftsim.a
@@ -51,6 +53,8 @@ testGreetings_SOURCES = testGreetings.c
 
 testMaths_SOURCES = testMaths.c
 
+testAtomic_SOURCES = testAtomic.c
+
 testRandom_SOURCES = testRandom.c
 
 testRandomSpacing_SOURCES = testRandomSpacing.c
diff --git a/tests/testAtomic.c b/tests/testAtomic.c
new file mode 100644
index 0000000000000000000000000000000000000000..47eca7b736d1687654bb1d2c5b40366a05e8af73
--- /dev/null
+++ b/tests/testAtomic.c
@@ -0,0 +1,146 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Standard includes. */
+#include <fenv.h>
+
+/* Local includes */
+#include "swift.h"
+
+const int array_size = 2048 * 2048;
+const int num_threads = 64;
+const int chunk_size = 64;
+
+void map_function_sum_f(void *data, int num_elements, void *extra_data) {
+
+  float *array = (float *)data;
+  float *sum = (float *)extra_data;
+
+  for (int i = 0; i < num_elements; ++i) atomic_add_f(sum, array[i]);
+}
+
+void map_function_sum_ll(void *data, int num_elements, void *extra_data) {
+
+  long long *array = (long long *)data;
+  long long *sum = (long long *)extra_data;
+
+  for (int i = 0; i < num_elements; ++i) atomic_add(sum, array[i]);
+}
+
+void map_function_inc_ll(void *data, int num_elements, void *extra_data) {
+
+  long long *sum = (long long *)extra_data;
+
+  for (int i = 0; i < num_elements; ++i) atomic_inc(sum);
+}
+
+int main(int argc, char *argv[]) {
+
+  /* Initialize CPU frequency, this also starts time. */
+  unsigned long long cpufreq = 0;
+  clocks_set_cpufreq(cpufreq);
+
+/* Choke on FPEs */
+#ifdef HAVE_FE_ENABLE_EXCEPT
+  feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
+#endif
+
+  /* Get some randomness going */
+  const int seed = time(NULL);
+  message("Seed = %d", seed);
+  srand(seed);
+
+  /* Start a bunch of threads */
+  printf("# Creating threadpool with %d threads\n", num_threads);
+  struct threadpool tp;
+  threadpool_init(&tp, num_threads);
+
+  /* Create some random data */
+  float *array_f = malloc(array_size * sizeof(float));
+  long long *array_ll = malloc(array_size * sizeof(long long));
+
+  for (int i = 0; i < array_size; ++i) {
+    array_f[i] = rand() / ((float)RAND_MAX);
+    array_ll[i] = rand();
+  }
+
+  /*** Test the addition atomic ops *******************************/
+
+  /* float case */
+
+  /* Compute the real answer */
+  float real_sum_f = 0.f;
+  for (int i = 0; i < array_size; ++i) {
+    real_sum_f += array_f[i];
+  }
+
+  /* Compute the answer via threads and atomic */
+  float atomic_sum_f = 0.f;
+  threadpool_map(&tp, map_function_sum_f, array_f, array_size, sizeof(float),
+                 chunk_size, &atomic_sum_f);
+
+  const double diff_sum_f = (double)real_sum_f - (double)atomic_sum_f;
+  const double sum_sum_f = (double)real_sum_f + (double)atomic_sum_f;
+  const double rel_sum_f = 0.5 * fabs(diff_sum_f) / sum_sum_f;
+  message("Real sum = %.7e -- atomic sum = %.7e rel=%e", real_sum_f,
+          atomic_sum_f, rel_sum_f);
+
+  /* long long case */
+
+  /* Compute the real answer */
+  long long real_sum_ll = 0.f;
+  for (int i = 0; i < array_size; ++i) {
+    real_sum_ll += array_ll[i];
+  }
+
+  /* Compute the answer via threads and atomic */
+  long long atomic_sum_ll = 0LL;
+  threadpool_map(&tp, map_function_sum_ll, array_ll, array_size,
+                 sizeof(long long), chunk_size, &atomic_sum_ll);
+
+  const double diff_sum_ll = (double)real_sum_ll - (double)atomic_sum_ll;
+  const double sum_sum_ll = (double)real_sum_ll + (double)atomic_sum_ll;
+  const double rel_sum_ll = 0.5 * fabs(diff_sum_ll) / sum_sum_ll;
+  message("Real sum = %lld -- atomic sum = %lld rel=%e", real_sum_ll,
+          atomic_sum_ll, rel_sum_ll);
+
+  /*** Test the inc atomic ops *******************************/
+
+  long long real_inc_ll = array_size;
+
+  /* Compute the answer via threads and atomic */
+  long long atomic_inc_ll = 0LL;
+  threadpool_map(&tp, map_function_inc_ll, array_ll, array_size,
+                 sizeof(long long), chunk_size, &atomic_inc_ll);
+
+  const double diff_inc_ll = (double)real_inc_ll - (double)atomic_inc_ll;
+  const double sum_inc_ll = (double)real_inc_ll + (double)atomic_inc_ll;
+  const double rel_inc_ll = 0.5 * fabs(diff_inc_ll) / sum_inc_ll;
+  message("Real inc = %lld -- atomic inc = %lld rel=%e", real_inc_ll,
+          atomic_inc_ll, rel_inc_ll);
+
+  /* Be clean */
+  threadpool_clean(&tp);
+  free(array_f);
+  free(array_ll);
+  return 0;
+}