From 6c0d5eb89902e70ce21b14a111ca55baccad08ce Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Sat, 21 Mar 2020 23:16:57 +0100 Subject: [PATCH 01/27] Make the gravity tasks only update the particles atomically --- src/gravity/MultiSoftening/gravity.h | 3 ++- src/gravity_cache.h | 6 +++--- src/mesh_gravity.c | 6 +++--- src/multipole.h | 6 +++--- src/runner_doiact_grav.c | 20 +++++++++++--------- 5 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/gravity/MultiSoftening/gravity.h b/src/gravity/MultiSoftening/gravity.h index 07ed0e78b..7cb2c5043 100644 --- a/src/gravity/MultiSoftening/gravity.h +++ b/src/gravity/MultiSoftening/gravity.h @@ -23,6 +23,7 @@ #include /* Local includes. */ +#include "atomic.h" #include "cosmology.h" #include "error.h" #include "gravity_properties.h" @@ -77,7 +78,7 @@ __attribute__((always_inline)) INLINE static float gravity_get_softening( __attribute__((always_inline)) INLINE static void gravity_add_comoving_potential(struct gpart* restrict gp, float pot) { - gp->potential += pot; + atomic_add_f(&gp->potential, pot); } /** diff --git a/src/gravity_cache.h b/src/gravity_cache.h index 912961d2e..6e55e3f44 100644 --- a/src/gravity_cache.h +++ b/src/gravity_cache.h @@ -491,9 +491,9 @@ __attribute__((always_inline)) INLINE static void gravity_cache_write_back( /* Write stuff back to the particles */ for (int i = 0; i < gcount; ++i) { if (active[i]) { - gparts[i].a_grav[0] += a_x[i]; - gparts[i].a_grav[1] += a_y[i]; - gparts[i].a_grav[2] += a_z[i]; + atomic_add_f(&gparts[i].a_grav[0], a_x[i]); + atomic_add_f(&gparts[i].a_grav[1], a_y[i]); + atomic_add_f(&gparts[i].a_grav[2], a_z[i]); gravity_add_comoving_potential(&gparts[i], pot[i]); } } diff --git a/src/mesh_gravity.c b/src/mesh_gravity.c index 0efec0bc8..7cabd7bba 100644 --- a/src/mesh_gravity.c +++ b/src/mesh_gravity.c @@ -335,9 +335,9 @@ void mesh_to_gparts_CIC(struct gpart* gp, const double* pot, const int N, /* Store things back */ gravity_add_comoving_potential(gp, p); - gp->a_grav[0] += fac * a[0]; - gp->a_grav[1] += fac * a[1]; - gp->a_grav[2] += fac * a[2]; + atomic_add_f(&gp->a_grav[0], fac * a[0]); + atomic_add_f(&gp->a_grav[1], fac * a[1]); + atomic_add_f(&gp->a_grav[2], fac * a[2]); #ifdef SWIFT_GRAVITY_FORCE_CHECKS gp->potential_PM = p; gp->a_grav_PM[0] = fac * a[0]; diff --git a/src/multipole.h b/src/multipole.h index e03e302c4..178c8088a 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -2589,9 +2589,9 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb, #endif /* Update the particle */ - gp->a_grav[0] += a_grav[0]; - gp->a_grav[1] += a_grav[1]; - gp->a_grav[2] += a_grav[2]; + atomic_add_f(&gp->a_grav[0], a_grav[0]); + atomic_add_f(&gp->a_grav[1], a_grav[1]); + atomic_add_f(&gp->a_grav[2], a_grav[2]); gravity_add_comoving_potential(gp, pot); #ifdef SWIFT_GRAVITY_FORCE_CHECKS diff --git a/src/runner_doiact_grav.c b/src/runner_doiact_grav.c index 043b662b1..0d9b2c9d6 100644 --- a/src/runner_doiact_grav.c +++ b/src/runner_doiact_grav.c @@ -264,7 +264,7 @@ static INLINE void runner_dopair_grav_pp_full( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) - gparts_i[pid].num_interacted++; + atomic_inc(&gparts_i[pid].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS @@ -420,7 +420,7 @@ static INLINE void runner_dopair_grav_pp_truncated( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) - gparts_i[pid].num_interacted++; + atomic_inc(&gparts_i[pid].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS @@ -565,7 +565,8 @@ static INLINE void runner_dopair_grav_pm_full( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter */ if (pid < gcount_i) - gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart; + atomic_add(&gparts_i[pid].num_interacted, + cj->grav.multipole->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS @@ -706,7 +707,8 @@ static INLINE void runner_dopair_grav_pm_truncated( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter */ if (pid < gcount_i) - gparts_i[pid].num_interacted += cj->grav.multipole->m_pole.num_gpart; + atomic_add(&gparts_i[pid].num_interacted, + cj->grav.multipole->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS @@ -1041,7 +1043,7 @@ static INLINE void runner_doself_grav_pp_full( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) - gparts[pid].num_interacted++; + atomic_inc(&gparts[pid].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS @@ -1180,7 +1182,7 @@ static INLINE void runner_doself_grav_pp_truncated( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) - gparts[pid].num_interacted++; + atomic_inc(&gparts[pid].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS @@ -1638,9 +1640,9 @@ void runner_dopair_recursive_grav(struct runner *r, struct cell *ci, #ifdef SWIFT_DEBUG_CHECKS if (cell_is_active_gravity(ci, e)) - multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; + atomic_add(&multi_i->pot.num_interacted, multi_j->m_pole.num_gpart); if (cell_is_active_gravity(cj, e)) - multi_j->pot.num_interacted += multi_i->m_pole.num_gpart; + atomic_add(&multi_j->pot.num_interacted, multi_i->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS @@ -1854,7 +1856,7 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { #ifdef SWIFT_DEBUG_CHECKS /* Need to account for the interactions we missed */ - multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; + atomic_add(&multi_i->pot.num_interacted, multi_j->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS -- GitLab From 7997f92604137e4ed5383a1fca37c344f444e01d Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Sat, 21 Mar 2020 23:37:16 +0100 Subject: [PATCH 02/27] Do not lock the tree for gpart and mpoles for gravity tasks --- src/cell.c | 12 +++++++---- src/task.c | 58 +++++------------------------------------------------- 2 files changed, 13 insertions(+), 57 deletions(-) diff --git a/src/cell.c b/src/cell.c index c4a4ecc18..1aefbaa7d 100644 --- a/src/cell.c +++ b/src/cell.c @@ -1288,7 +1288,7 @@ int cell_locktree(struct cell *c) { * @return 0 on success, 1 on failure */ int cell_glocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to lock this cell. */ if (c->grav.phold || lock_trylock(&c->grav.plock) != 0) { @@ -1348,7 +1348,9 @@ int cell_glocktree(struct cell *c) { * @return 0 on success, 1 on failure */ int cell_mlocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; + + error("aa"); /* First of all, try to lock this cell. */ if (c->grav.mhold || lock_trylock(&c->grav.mlock) != 0) { @@ -1546,7 +1548,7 @@ void cell_unlocktree(struct cell *c) { * @param c The #cell. */ void cell_gunlocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to unlock this cell. */ if (lock_unlock(&c->grav.plock) != 0) error("Failed to unlock cell."); @@ -1564,7 +1566,9 @@ void cell_gunlocktree(struct cell *c) { * @param c The #cell. */ void cell_munlocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; + + error("aa"); /* First of all, try to unlock this cell. */ if (lock_unlock(&c->grav.mlock) != 0) error("Failed to unlock cell."); diff --git a/src/task.c b/src/task.c index 19ba01580..f99056fb4 100644 --- a/src/task.c +++ b/src/task.c @@ -456,8 +456,6 @@ void task_unlock(struct task *t) { case task_type_self: case task_type_sub_self: if (subtype == task_subtype_grav) { - cell_gunlocktree(ci); - cell_munlocktree(ci); } else if ((subtype == task_subtype_stars_density) || (subtype == task_subtype_stars_feedback)) { cell_sunlocktree(ci); @@ -478,10 +476,6 @@ void task_unlock(struct task *t) { case task_type_pair: case task_type_sub_pair: if (subtype == task_subtype_grav) { - cell_gunlocktree(ci); - cell_gunlocktree(cj); - cell_munlocktree(ci); - cell_munlocktree(cj); } else if ((subtype == task_subtype_stars_density) || (subtype == task_subtype_stars_feedback)) { cell_sunlocktree(ci); @@ -506,17 +500,12 @@ void task_unlock(struct task *t) { break; case task_type_grav_down: - cell_gunlocktree(ci); - cell_munlocktree(ci); break; case task_type_grav_long_range: - cell_munlocktree(ci); break; case task_type_grav_mm: - cell_munlocktree(ci); - cell_munlocktree(cj); break; case task_type_star_formation: @@ -612,14 +601,7 @@ int task_lock(struct task *t) { case task_type_self: case task_type_sub_self: if (subtype == task_subtype_grav) { - /* Lock the gparts and the m-pole */ - if (ci->grav.phold || ci->grav.mhold) return 0; - if (cell_glocktree(ci) != 0) - return 0; - else if (cell_mlocktree(ci) != 0) { - cell_gunlocktree(ci); - return 0; - } + return 1; } else if ((subtype == task_subtype_stars_density) || (subtype == task_subtype_stars_feedback)) { if (ci->stars.hold) return 0; @@ -652,22 +634,7 @@ int task_lock(struct task *t) { case task_type_pair: case task_type_sub_pair: if (subtype == task_subtype_grav) { - /* Lock the gparts and the m-pole in both cells */ - if (ci->grav.phold || cj->grav.phold) return 0; - if (cell_glocktree(ci) != 0) return 0; - if (cell_glocktree(cj) != 0) { - cell_gunlocktree(ci); - return 0; - } else if (cell_mlocktree(ci) != 0) { - cell_gunlocktree(ci); - cell_gunlocktree(cj); - return 0; - } else if (cell_mlocktree(cj) != 0) { - cell_gunlocktree(ci); - cell_gunlocktree(cj); - cell_munlocktree(ci); - return 0; - } + return 1; } else if ((subtype == task_subtype_stars_density) || (subtype == task_subtype_stars_feedback)) { /* Lock the stars and the gas particles in both cells */ @@ -731,30 +698,15 @@ int task_lock(struct task *t) { break; case task_type_grav_down: - /* Lock the gparts and the m-poles */ - if (ci->grav.phold || ci->grav.mhold) return 0; - if (cell_glocktree(ci) != 0) - return 0; - else if (cell_mlocktree(ci) != 0) { - cell_gunlocktree(ci); - return 0; - } + return 1; break; case task_type_grav_long_range: - /* Lock the m-poles */ - if (ci->grav.mhold) return 0; - if (cell_mlocktree(ci) != 0) return 0; + return 1; break; case task_type_grav_mm: - /* Lock both m-poles */ - if (ci->grav.mhold || cj->grav.mhold) return 0; - if (cell_mlocktree(ci) != 0) return 0; - if (cell_mlocktree(cj) != 0) { - cell_munlocktree(ci); - return 0; - } + return 1; break; case task_type_star_formation: -- GitLab From c0768afa6f4f82057acb70240eb5f4c5ef248311 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Sun, 22 Mar 2020 00:18:01 +0100 Subject: [PATCH 03/27] Lock the multipoles before adding to them --- src/runner_doiact_grav.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/runner_doiact_grav.c b/src/runner_doiact_grav.c index 0d9b2c9d6..3b8611adb 100644 --- a/src/runner_doiact_grav.c +++ b/src/runner_doiact_grav.c @@ -1357,11 +1357,19 @@ static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r, cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); #endif + /* Lock the multipoles */ + lock_lock(&ci->grav.mlock); + lock_lock(&cj->grav.mlock); + /* Let's interact at this level */ gravity_M2L_symmetric(&ci->grav.multipole->pot, &cj->grav.multipole->pot, multi_i, multi_j, ci->grav.multipole->CoM, cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); + /* Unlock the multipoles */ + if (lock_unlock(&ci->grav.mlock) != 0) error("Failed to unlock multipole"); + if (lock_unlock(&cj->grav.mlock) != 0) error("Failed to unlock multipole"); + TIMER_TOC(timer_dopair_grav_mm); } @@ -1375,7 +1383,7 @@ static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r, */ static INLINE void runner_dopair_grav_mm_nonsym( struct runner *r, struct cell *restrict ci, - const struct cell *restrict cj) { + struct cell *restrict cj) { /* Some constants */ const struct engine *e = r->e; @@ -1408,10 +1416,18 @@ static INLINE void runner_dopair_grav_mm_nonsym( cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); #endif + /* Lock the multipoles */ + lock_lock(&ci->grav.mlock); + lock_lock(&cj->grav.mlock); + /* Let's interact at this level */ gravity_M2L_nonsym(&ci->grav.multipole->pot, multi_j, ci->grav.multipole->CoM, cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); + /* Unlock the multipoles */ + if (lock_unlock(&ci->grav.mlock) != 0) error("Failed to unlock multipole"); + if (lock_unlock(&cj->grav.mlock) != 0) error("Failed to unlock multipole"); + TIMER_TOC(timer_dopair_grav_mm); } @@ -1835,8 +1851,8 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { for (int n = 0; n < nr_cells_with_particles; ++n) { /* Handle on the top-level cell and it's gravity business*/ - const struct cell *cj = &cells[cells_with_particles[n]]; - const struct gravity_tensors *const multi_j = cj->grav.multipole; + struct cell *cj = &cells[cells_with_particles[n]]; + struct gravity_tensors *const multi_j = cj->grav.multipole; /* Avoid self contributions */ if (top == cj) continue; -- GitLab From bc72b5d248842ff3e091bf4d168352f114bfee97 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Sun, 22 Mar 2020 00:27:48 +0100 Subject: [PATCH 04/27] Solve the dining philosopher problem in the locking of the cells' multipoles --- src/runner_doiact_grav.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/runner_doiact_grav.c b/src/runner_doiact_grav.c index 3b8611adb..9a998907d 100644 --- a/src/runner_doiact_grav.c +++ b/src/runner_doiact_grav.c @@ -1357,9 +1357,15 @@ static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r, cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); #endif - /* Lock the multipoles */ - lock_lock(&ci->grav.mlock); - lock_lock(&cj->grav.mlock); + /* Lock the multipoles + * Note we impose a hierarchy to solve the dining philosopher problem */ + if (ci < cj) { + lock_lock(&ci->grav.mlock); + lock_lock(&cj->grav.mlock); + } else { + lock_lock(&cj->grav.mlock); + lock_lock(&ci->grav.mlock); + } /* Let's interact at this level */ gravity_M2L_symmetric(&ci->grav.multipole->pot, &cj->grav.multipole->pot, @@ -1381,9 +1387,9 @@ static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r, * @param ci The #cell with field tensor to interact. * @param cj The #cell with the multipole. */ -static INLINE void runner_dopair_grav_mm_nonsym( - struct runner *r, struct cell *restrict ci, - struct cell *restrict cj) { +static INLINE void runner_dopair_grav_mm_nonsym(struct runner *r, + struct cell *restrict ci, + struct cell *restrict cj) { /* Some constants */ const struct engine *e = r->e; @@ -1416,9 +1422,15 @@ static INLINE void runner_dopair_grav_mm_nonsym( cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); #endif - /* Lock the multipoles */ - lock_lock(&ci->grav.mlock); - lock_lock(&cj->grav.mlock); + /* Lock the multipoles + * Note we impose a hierarchy to solve the dining philosopher problem */ + if (ci < cj) { + lock_lock(&ci->grav.mlock); + lock_lock(&cj->grav.mlock); + } else { + lock_lock(&cj->grav.mlock); + lock_lock(&ci->grav.mlock); + } /* Let's interact at this level */ gravity_M2L_nonsym(&ci->grav.multipole->pot, multi_j, ci->grav.multipole->CoM, -- GitLab From caaeb1e562c17fe8213bcfaf8e54a6b377c27498 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Mon, 23 Mar 2020 11:11:25 +0100 Subject: [PATCH 05/27] Also apply the change to the other gravity schemes --- src/gravity/Potential/gravity.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gravity/Potential/gravity.h b/src/gravity/Potential/gravity.h index 3a3e96800..ff85f29c0 100644 --- a/src/gravity/Potential/gravity.h +++ b/src/gravity/Potential/gravity.h @@ -23,6 +23,7 @@ #include /* Local includes. */ +#include "atomic.h" #include "cosmology.h" #include "gravity_properties.h" #include "kernel_gravity.h" @@ -63,7 +64,7 @@ __attribute__((always_inline)) INLINE static float gravity_get_softening( __attribute__((always_inline)) INLINE static void gravity_add_comoving_potential(struct gpart* restrict gp, float pot) { - gp->potential += pot; + atomic_add_f(&gp->potential, pot); } /** -- GitLab From e89175a76a7d5999b8d197d67cb20e0c7ee7c496 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Mon, 23 Mar 2020 12:12:31 +0100 Subject: [PATCH 06/27] Do not print the list of tasks every time-step unless we are in debugging check mode --- src/engine.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/engine.c b/src/engine.c index 95507065d..34ba7f5b9 100644 --- a/src/engine.c +++ b/src/engine.c @@ -2575,11 +2575,13 @@ void engine_step(struct engine *e) { /* Prepare the tasks to be launched, rebuild or repartition if needed. */ engine_prepare(e); - /* Print the number of active tasks ? */ - if (e->verbose) engine_print_task_counts(e); +#ifdef SWIFT_DEBUG_CHECKS + /* Print the number of active tasks */ + engine_print_task_counts(e); +#endif - /* Dump local cells and active particle counts. */ - // dumpCells("cells", 1, 0, 0, 0, e->s, e->nodeID, e->step); + /* Dump local cells and active particle counts. */ + // dumpCells("cells", 1, 0, 0, 0, e->s, e->nodeID, e->step); #ifdef SWIFT_DEBUG_CHECKS /* Check that we have the correct total mass in the top-level multipoles */ -- GitLab From 11a1077cb041849bbb32a5e1b290ba55c44db697 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Mon, 23 Mar 2020 12:13:45 +0100 Subject: [PATCH 07/27] But only do that on the rank 0 when running with MPI --- src/engine.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/engine.c b/src/engine.c index 34ba7f5b9..daa28518c 100644 --- a/src/engine.c +++ b/src/engine.c @@ -2577,11 +2577,11 @@ void engine_step(struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS /* Print the number of active tasks */ - engine_print_task_counts(e); + if (e->verbose) engine_print_task_counts(e); #endif - /* Dump local cells and active particle counts. */ - // dumpCells("cells", 1, 0, 0, 0, e->s, e->nodeID, e->step); + /* Dump local cells and active particle counts. */ + // dumpCells("cells", 1, 0, 0, 0, e->s, e->nodeID, e->step); #ifdef SWIFT_DEBUG_CHECKS /* Check that we have the correct total mass in the top-level multipoles */ -- GitLab From cc95ff35a33b5da8280240c077817d4f2073ef71 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Mon, 23 Mar 2020 18:14:56 +0100 Subject: [PATCH 08/27] Time the FOF tasks in their own category --- src/task.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/task.c b/src/task.c index f99056fb4..964c77fea 100644 --- a/src/task.c +++ b/src/task.c @@ -1270,6 +1270,10 @@ enum task_categories task_get_category(const struct task *t) { case task_type_end_grav_force: return task_category_gravity; + case task_type_fof_self: + case task_type_fof_pair: + return task_category_fof; + case task_type_self: case task_type_pair: case task_type_sub_self: -- GitLab From 65391eddfe59933d569c731f80975e18bf13399e Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Tue, 24 Mar 2020 18:52:31 +0100 Subject: [PATCH 09/27] Also use atomics when running the gravity checks --- src/multipole.h | 12 ++++----- src/runner_doiact_grav.c | 56 +++++++++++++++++++++------------------- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/src/multipole.h b/src/multipole.h index 178c8088a..c4a51451d 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -2467,12 +2467,12 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb, #ifdef SWIFT_DEBUG_CHECKS if (lb->num_interacted == 0) error("Interacting with empty field tensor"); - gp->num_interacted += lb->num_interacted; + atomic_add(&gp->num_interacted, lb->num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS - gp->num_interacted_m2l += lb->num_interacted_tree; - gp->num_interacted_pm += lb->num_interacted_pm; + atomic_add(&gp->num_interacted_m2l, lb->num_interacted_tree); + atomic_add(&gp->num_interacted_pm, lb->num_interacted_pm); #endif /* Local accumulator */ @@ -2595,9 +2595,9 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb, gravity_add_comoving_potential(gp, pot); #ifdef SWIFT_GRAVITY_FORCE_CHECKS - gp->a_grav_m2l[0] += a_grav[0]; - gp->a_grav_m2l[1] += a_grav[1]; - gp->a_grav_m2l[2] += a_grav[2]; + atomic_add_f(&gp->a_grav_m2l[0], a_grav[0]); + atomic_add_f(&gp->a_grav_m2l[1], a_grav[1]); + atomic_add_f(&gp->a_grav_m2l[2], a_grav[2]); #endif } diff --git a/src/runner_doiact_grav.c b/src/runner_doiact_grav.c index 9a998907d..6d7a63db7 100644 --- a/src/runner_doiact_grav.c +++ b/src/runner_doiact_grav.c @@ -270,7 +270,7 @@ static INLINE void runner_dopair_grav_pp_full( #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the p2p interaction counter if it's not a padded gpart */ if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) - gparts_i[pid].num_interacted_p2p++; + atomic_inc(&gparts_i[pid].num_interacted_p2p); #endif } @@ -281,9 +281,9 @@ static INLINE void runner_dopair_grav_pp_full( ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS - gparts_i[pid].a_grav_p2p[0] += a_x; - gparts_i[pid].a_grav_p2p[1] += a_y; - gparts_i[pid].a_grav_p2p[2] += a_z; + atomic_add_f(&gparts_i[pid].a_grav_p2p[0], a_x); + atomic_add_f(&gparts_i[pid].a_grav_p2p[1], a_y); + atomic_add_f(&gparts_i[pid].a_grav_p2p[2], a_z); #endif } } @@ -426,7 +426,7 @@ static INLINE void runner_dopair_grav_pp_truncated( #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the p2p interaction counter if it's not a padded gpart */ if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) - gparts_i[pid].num_interacted_p2p++; + atomic_inc(&gparts_i[pid].num_interacted_p2p); #endif } @@ -437,9 +437,9 @@ static INLINE void runner_dopair_grav_pp_truncated( ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS - gparts_i[pid].a_grav_p2p[0] += a_x; - gparts_i[pid].a_grav_p2p[1] += a_y; - gparts_i[pid].a_grav_p2p[2] += a_z; + atomic_add_f(&gparts_i[pid].a_grav_p2p[0], a_x); + atomic_add_f(&gparts_i[pid].a_grav_p2p[1], a_y); + atomic_add_f(&gparts_i[pid].a_grav_p2p[2], a_z); #endif } } @@ -572,10 +572,11 @@ static INLINE void runner_dopair_grav_pm_full( #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the M2P interaction counter and forces. */ if (pid < gcount_i) { - gparts_i[pid].num_interacted_m2p += cj->grav.multipole->m_pole.num_gpart; - gparts_i[pid].a_grav_m2p[0] += f_x; - gparts_i[pid].a_grav_m2p[1] += f_y; - gparts_i[pid].a_grav_m2p[2] += f_z; + atomic_add(&gparts_i[pid].num_interacted_m2p, + cj->grav.multipole->m_pole.num_gpart); + atomic_add_f(&gparts_i[pid].a_grav_m2p[0], f_x); + atomic_add_f(&gparts_i[pid].a_grav_m2p[1], f_y); + atomic_add_f(&gparts_i[pid].a_grav_m2p[2], f_z); } #endif } @@ -714,10 +715,11 @@ static INLINE void runner_dopair_grav_pm_truncated( #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the M2P interaction counter and forces. */ if (pid < gcount_i) { - gparts_i[pid].num_interacted_m2p += cj->grav.multipole->m_pole.num_gpart; - gparts_i[pid].a_grav_m2p[0] += f_x; - gparts_i[pid].a_grav_m2p[1] += f_y; - gparts_i[pid].a_grav_m2p[2] += f_z; + atomic_add(&gparts_i[pid].num_interacted_m2p, + cj->grav.multipole->m_pole.num_gpart); + atomic_add_f(&gparts_i[pid].a_grav_m2p[0], f_x); + atomic_add_f(&gparts_i[pid].a_grav_m2p[1], f_y); + atomic_add_f(&gparts_i[pid].a_grav_m2p[2], f_z); } #endif } @@ -1049,7 +1051,7 @@ static INLINE void runner_doself_grav_pp_full( #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the P2P interaction counter if it's not a padded gpart */ if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) - gparts[pid].num_interacted_p2p++; + atomic_inc(&gparts[pid].num_interacted_p2p); #endif } @@ -1060,9 +1062,9 @@ static INLINE void runner_doself_grav_pp_full( ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS - gparts[pid].a_grav_p2p[0] += a_x; - gparts[pid].a_grav_p2p[1] += a_y; - gparts[pid].a_grav_p2p[2] += a_z; + atomic_add_f(&gparts[pid].a_grav_p2p[0], a_x); + atomic_add_f(&gparts[pid].a_grav_p2p[1], a_y); + atomic_add_f(&gparts[pid].a_grav_p2p[2], a_z); #endif } } @@ -1188,7 +1190,7 @@ static INLINE void runner_doself_grav_pp_truncated( #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the P2P interaction counter if it's not a padded gpart */ if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) - gparts[pid].num_interacted_p2p++; + atomic_inc(&gparts[pid].num_interacted_p2p); #endif } @@ -1199,9 +1201,9 @@ static INLINE void runner_doself_grav_pp_truncated( ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS - gparts[pid].a_grav_p2p[0] += a_x; - gparts[pid].a_grav_p2p[1] += a_y; - gparts[pid].a_grav_p2p[2] += a_z; + atomic_add_f(&gparts[pid].a_grav_p2p[0], a_x); + atomic_add_f(&gparts[pid].a_grav_p2p[1], a_y); + atomic_add_f(&gparts[pid].a_grav_p2p[2], a_z); #endif } } @@ -1676,9 +1678,9 @@ void runner_dopair_recursive_grav(struct runner *r, struct cell *ci, #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Need to account for the interactions we missed */ if (cell_is_active_gravity(ci, e)) - multi_i->pot.num_interacted_pm += multi_j->m_pole.num_gpart; + atomic_add(&multi_i->pot.num_interacted_pm, multi_j->m_pole.num_gpart); if (cell_is_active_gravity(cj, e)) - multi_j->pot.num_interacted_pm += multi_i->m_pole.num_gpart; + atomic_add(&multi_j->pot.num_interacted_pm, multi_i->m_pole.num_gpart); #endif return; } @@ -1889,7 +1891,7 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Need to account for the interactions we missed */ - multi_i->pot.num_interacted_pm += multi_j->m_pole.num_gpart; + atomic_add(&multi_i->pot.num_interacted_pm, multi_j->m_pole.num_gpart); #endif /* Record that this multipole received a contribution */ -- GitLab From 64a8d3a3ab2f2c0e263506b43cec72a099639c73 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Wed, 25 Mar 2020 13:46:47 +0100 Subject: [PATCH 10/27] Removed the error message in the cell locking/unlocking --- src/cell.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/cell.c b/src/cell.c index 1aefbaa7d..cd4cb2418 100644 --- a/src/cell.c +++ b/src/cell.c @@ -1228,7 +1228,7 @@ int cell_unpack_sf_counts(struct cell *restrict c, * @return 0 on success, 1 on failure */ int cell_locktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to lock this cell. */ if (c->hydro.hold || lock_trylock(&c->hydro.lock) != 0) { @@ -1350,8 +1350,6 @@ int cell_glocktree(struct cell *c) { int cell_mlocktree(struct cell *c) { TIMER_TIC; - error("aa"); - /* First of all, try to lock this cell. */ if (c->grav.mhold || lock_trylock(&c->grav.mlock) != 0) { TIMER_TOC(timer_locktree); @@ -1410,7 +1408,7 @@ int cell_mlocktree(struct cell *c) { * @return 0 on success, 1 on failure */ int cell_slocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to lock this cell. */ if (c->stars.hold || lock_trylock(&c->stars.lock) != 0) { @@ -1470,7 +1468,7 @@ int cell_slocktree(struct cell *c) { * @return 0 on success, 1 on failure */ int cell_blocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to lock this cell. */ if (c->black_holes.hold || lock_trylock(&c->black_holes.lock) != 0) { @@ -1530,7 +1528,7 @@ int cell_blocktree(struct cell *c) { * @param c The #cell. */ void cell_unlocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to unlock this cell. */ if (lock_unlock(&c->hydro.lock) != 0) error("Failed to unlock cell."); @@ -1568,8 +1566,6 @@ void cell_gunlocktree(struct cell *c) { void cell_munlocktree(struct cell *c) { TIMER_TIC; - error("aa"); - /* First of all, try to unlock this cell. */ if (lock_unlock(&c->grav.mlock) != 0) error("Failed to unlock cell."); @@ -1586,7 +1582,7 @@ void cell_munlocktree(struct cell *c) { * @param c The #cell. */ void cell_sunlocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to unlock this cell. */ if (lock_unlock(&c->stars.lock) != 0) error("Failed to unlock cell."); @@ -1604,7 +1600,7 @@ void cell_sunlocktree(struct cell *c) { * @param c The #cell. */ void cell_bunlocktree(struct cell *c) { - TIMER_TIC + TIMER_TIC; /* First of all, try to unlock this cell. */ if (lock_unlock(&c->black_holes.lock) != 0) error("Failed to unlock cell."); -- GitLab From 001c818b94e155e64d73c8e8df7bde28a6fe20bd Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Wed, 25 Mar 2020 14:11:20 +0100 Subject: [PATCH 11/27] Added an atomic max for chars --- src/atomic.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/atomic.h b/src/atomic.h index 8a790a7f6..0916497ef 100644 --- a/src/atomic.h +++ b/src/atomic.h @@ -121,6 +121,27 @@ __attribute__((always_inline)) INLINE static void atomic_min_d( } while (test_val.as_long_long != old_val.as_long_long); } +/** + * @brief Atomic max operation on chars. + * + * This is a text-book implementation based on an atomic CAS. + * + * @param address The address to update. + * @param y The value to update the address with. + */ +__attribute__((always_inline)) INLINE static void atomic_max_c( + volatile char *const address, const char y) { + + char test_val, old_val, new_val; + old_val = *address; + + do { + test_val = old_val; + new_val = max(old_val, y); + old_val = atomic_cas(address, test_val, new_val); + } while (test_val != old_val); +} + /** * @brief Atomic max operation on floats. * -- GitLab From 15106d9ff200ac2250f553674e9912b2b61ea410 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Wed, 25 Mar 2020 14:22:48 +0100 Subject: [PATCH 12/27] Changed the char atomic max to a int8_t atomic max --- src/atomic.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/atomic.h b/src/atomic.h index 0916497ef..fc2d1265a 100644 --- a/src/atomic.h +++ b/src/atomic.h @@ -26,6 +26,9 @@ #include "inline.h" #include "minmax.h" +/* Standard includes */ +#include + #define atomic_add(v, i) __sync_fetch_and_add(v, i) #define atomic_sub(v, i) __sync_fetch_and_sub(v, i) #define atomic_or(v, i) __sync_fetch_and_or(v, i) @@ -122,7 +125,7 @@ __attribute__((always_inline)) INLINE static void atomic_min_d( } /** - * @brief Atomic max operation on chars. + * @brief Atomic max operation on int8_t. * * This is a text-book implementation based on an atomic CAS. * @@ -130,9 +133,9 @@ __attribute__((always_inline)) INLINE static void atomic_min_d( * @param y The value to update the address with. */ __attribute__((always_inline)) INLINE static void atomic_max_c( - volatile char *const address, const char y) { + volatile int8_t *const address, const int8_t y) { - char test_val, old_val, new_val; + int8_t test_val, old_val, new_val; old_val = *address; do { -- GitLab From 3742104d18f1349df03848f0260339e051726e37 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Wed, 25 Mar 2020 14:26:40 +0100 Subject: [PATCH 13/27] Make the time-step limiter tasks lock-free --- src/task.c | 10 ++++++++-- src/timestep_limiter_iact.h | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/task.c b/src/task.c index 964c77fea..4403f1029 100644 --- a/src/task.c +++ b/src/task.c @@ -468,7 +468,8 @@ void task_unlock(struct task *t) { cell_unlocktree(ci); } else if (subtype == task_subtype_do_bh_swallow) { cell_bunlocktree(ci); - } else { + } else if (subtype == task_subtype_limiter) { + } else { /* hydro */ cell_unlocktree(ci); } break; @@ -493,7 +494,8 @@ void task_unlock(struct task *t) { } else if (subtype == task_subtype_do_bh_swallow) { cell_bunlocktree(ci); cell_bunlocktree(cj); - } else { + } else if (subtype == task_subtype_limiter) { + } else { /* hydro */ cell_unlocktree(ci); cell_unlocktree(cj); } @@ -625,6 +627,8 @@ int task_lock(struct task *t) { } else if (subtype == task_subtype_do_bh_swallow) { if (ci->black_holes.hold) return 0; if (cell_blocktree(ci) != 0) return 0; + } else if (subtype == task_subtype_limiter) { + return 1; } else { /* subtype == hydro */ if (ci->hydro.hold) return 0; if (cell_locktree(ci) != 0) return 0; @@ -686,6 +690,8 @@ int task_lock(struct task *t) { cell_bunlocktree(ci); return 0; } + } else if (subtype == task_subtype_limiter) { + return 1; } else { /* subtype == hydro */ /* Lock the parts in both cells */ if (ci->hydro.hold || cj->hydro.hold) return 0; diff --git a/src/timestep_limiter_iact.h b/src/timestep_limiter_iact.h index a4f5c7c1a..8c8fa7cee 100644 --- a/src/timestep_limiter_iact.h +++ b/src/timestep_limiter_iact.h @@ -88,7 +88,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter( if (pj->time_bin > pi->time_bin + time_bin_neighbour_max_delta_bin) { /* Store the smallest time bin that woke up this particle */ - pj->limiter_data.wakeup = max(pj->limiter_data.wakeup, -pi->time_bin); + atomic_max_c(&pj->limiter_data.wakeup, -pi->time_bin); } } -- GitLab From 73ebdb1ced42dde51b9e31968ab0bd8f5a11b368 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Wed, 25 Mar 2020 21:51:48 +0000 Subject: [PATCH 14/27] Better choice of FoF parameters in the EAGLE ICs model --- examples/EAGLE_ICs/EAGLE_12/eagle_12.yml | 10 ++++++---- examples/EAGLE_ICs/EAGLE_25/eagle_25.yml | 10 ++++++---- examples/EAGLE_ICs/EAGLE_50/eagle_50.yml | 6 +++--- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml index 2770f9e27..8234b4b3e 100644 --- a/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml +++ b/examples/EAGLE_ICs/EAGLE_12/eagle_12.yml @@ -68,17 +68,19 @@ FOF: min_group_size: 256 # The minimum no. of particles required for a group. linking_length_ratio: 0.2 # Linking length in units of the main inter-particle separation. black_hole_seed_halo_mass_Msun: 1.5e10 # Minimal halo mass in which to seed a black hole (in solar masses). - scale_factor_first: 0.01 # Scale-factor of first FoF black hole seeding calls. - delta_time: 1.005 # Scale-factor ratio between consecutive FoF black hole seeding calls. + scale_factor_first: 0.05 # Scale-factor of first FoF black hole seeding calls. + delta_time: 1.00751 # Scale-factor ratio between consecutive FoF black hole seeding calls. Scheduler: max_top_level_cells: 8 - tasks_per_cell: 5 cell_split_size: 200 Restarts: onexit: 1 - delta_hours: 1.0 + delta_hours: 6.0 + max_run_time: 71.5 # Three days minus fergie time + resubmit_on_exit: 1 + resubmit_command: ./resub.sh # Parameters related to the initial conditions InitialConditions: diff --git a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml index f175e2c6a..2de9a58ed 100644 --- a/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml +++ b/examples/EAGLE_ICs/EAGLE_25/eagle_25.yml @@ -68,17 +68,19 @@ FOF: min_group_size: 256 # The minimum no. of particles required for a group. linking_length_ratio: 0.2 # Linking length in units of the main inter-particle separation. black_hole_seed_halo_mass_Msun: 1.5e10 # Minimal halo mass in which to seed a black hole (in solar masses). - scale_factor_first: 0.01 # Scale-factor of first FoF black hole seeding calls. - delta_time: 1.005 # Scale-factor ratio between consecutive FoF black hole seeding calls. + scale_factor_first: 0.05 # Scale-factor of first FoF black hole seeding calls. + delta_time: 1.00751 # Scale-factor ratio between consecutive FoF black hole seeding calls. Scheduler: max_top_level_cells: 16 - tasks_per_cell: 5 cell_split_size: 200 Restarts: onexit: 1 - delta_hours: 1.0 + delta_hours: 6.0 + max_run_time: 71.5 # Three days minus fergie time + resubmit_on_exit: 1 + resubmit_command: ./resub.sh # Parameters related to the initial conditions InitialConditions: diff --git a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml index b65f10615..49070d3b3 100644 --- a/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml +++ b/examples/EAGLE_ICs/EAGLE_50/eagle_50.yml @@ -68,8 +68,8 @@ FOF: min_group_size: 256 # The minimum no. of particles required for a group. linking_length_ratio: 0.2 # Linking length in units of the main inter-particle separation. black_hole_seed_halo_mass_Msun: 1.5e10 # Minimal halo mass in which to seed a black hole (in solar masses). - scale_factor_first: 0.01 # Scale-factor of first FoF black hole seeding calls. - delta_time: 1.005 # Scale-factor ratio between consecutive FoF black hole seeding calls. + scale_factor_first: 0.05 # Scale-factor of first FoF black hole seeding calls. + delta_time: 1.00751 # Scale-factor ratio between consecutive FoF black hole seeding calls. Scheduler: max_top_level_cells: 32 @@ -78,7 +78,7 @@ Scheduler: Restarts: onexit: 1 - delta_hours: 1.0 + delta_hours: 6.0 # Parameters related to the initial conditions InitialConditions: -- GitLab From 7295cf3d49d60fdae10d55bebdcae2cefba0ee15 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Sun, 29 Mar 2020 15:32:34 +0200 Subject: [PATCH 15/27] Only use atomic operations when SWIFT_TASKS_WITHOUT_ATOMICS is not defined --- src/gravity/MultiSoftening/gravity.h | 4 ++++ src/gravity/Potential/gravity.h | 4 ++++ src/gravity_cache.h | 6 ++++++ src/mesh_gravity.c | 8 +++++++- src/multipole.h | 6 ++++++ src/runner_doiact_grav.c | 8 ++++++++ src/timestep_limiter_iact.h | 6 +++++- 7 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/gravity/MultiSoftening/gravity.h b/src/gravity/MultiSoftening/gravity.h index 7cb2c5043..ce47dbe5c 100644 --- a/src/gravity/MultiSoftening/gravity.h +++ b/src/gravity/MultiSoftening/gravity.h @@ -78,7 +78,11 @@ __attribute__((always_inline)) INLINE static float gravity_get_softening( __attribute__((always_inline)) INLINE static void gravity_add_comoving_potential(struct gpart* restrict gp, float pot) { +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + gp->potential += pot; +#else atomic_add_f(&gp->potential, pot); +#endif } /** diff --git a/src/gravity/Potential/gravity.h b/src/gravity/Potential/gravity.h index ff85f29c0..e467e05e0 100644 --- a/src/gravity/Potential/gravity.h +++ b/src/gravity/Potential/gravity.h @@ -64,7 +64,11 @@ __attribute__((always_inline)) INLINE static float gravity_get_softening( __attribute__((always_inline)) INLINE static void gravity_add_comoving_potential(struct gpart* restrict gp, float pot) { +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + gp->potential += pot; +#else atomic_add_f(&gp->potential, pot); +#endif } /** diff --git a/src/gravity_cache.h b/src/gravity_cache.h index 6e55e3f44..23103f050 100644 --- a/src/gravity_cache.h +++ b/src/gravity_cache.h @@ -491,9 +491,15 @@ __attribute__((always_inline)) INLINE static void gravity_cache_write_back( /* Write stuff back to the particles */ for (int i = 0; i < gcount; ++i) { if (active[i]) { +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + gparts[i].a_grav[0] += a_x[i]; + gparts[i].a_grav[1] += a_y[i]; + gparts[i].a_grav[2] += a_z[i]; +#else atomic_add_f(&gparts[i].a_grav[0], a_x[i]); atomic_add_f(&gparts[i].a_grav[1], a_y[i]); atomic_add_f(&gparts[i].a_grav[2], a_z[i]); +#endif gravity_add_comoving_potential(&gparts[i], pot[i]); } } diff --git a/src/mesh_gravity.c b/src/mesh_gravity.c index 7cabd7bba..f36200718 100644 --- a/src/mesh_gravity.c +++ b/src/mesh_gravity.c @@ -334,10 +334,16 @@ void mesh_to_gparts_CIC(struct gpart* gp, const double* pot, const int N, /* ---- */ /* Store things back */ - gravity_add_comoving_potential(gp, p); +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + gp->a_grav[0] += fac * a[0]; + gp->a_grav[1] += fac * a[1]; + gp->a_grav[2] += fac * a[2]; +#else atomic_add_f(&gp->a_grav[0], fac * a[0]); atomic_add_f(&gp->a_grav[1], fac * a[1]); atomic_add_f(&gp->a_grav[2], fac * a[2]); +#endif + gravity_add_comoving_potential(gp, p); #ifdef SWIFT_GRAVITY_FORCE_CHECKS gp->potential_PM = p; gp->a_grav_PM[0] = fac * a[0]; diff --git a/src/multipole.h b/src/multipole.h index c4a51451d..e7dcfbeb1 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -2589,9 +2589,15 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb, #endif /* Update the particle */ +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + gp->a_grav[0] += a_grav[0]; + gp->a_grav[1] += a_grav[1]; + gp->a_grav[2] += a_grav[2]; +#else atomic_add_f(&gp->a_grav[0], a_grav[0]); atomic_add_f(&gp->a_grav[1], a_grav[1]); atomic_add_f(&gp->a_grav[2], a_grav[2]); +#endif gravity_add_comoving_potential(gp, pot); #ifdef SWIFT_GRAVITY_FORCE_CHECKS diff --git a/src/runner_doiact_grav.c b/src/runner_doiact_grav.c index 6d7a63db7..bb914dc47 100644 --- a/src/runner_doiact_grav.c +++ b/src/runner_doiact_grav.c @@ -1359,6 +1359,7 @@ static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r, cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); #endif +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS /* Lock the multipoles * Note we impose a hierarchy to solve the dining philosopher problem */ if (ci < cj) { @@ -1368,15 +1369,18 @@ static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r, lock_lock(&cj->grav.mlock); lock_lock(&ci->grav.mlock); } +#endif /* Let's interact at this level */ gravity_M2L_symmetric(&ci->grav.multipole->pot, &cj->grav.multipole->pot, multi_i, multi_j, ci->grav.multipole->CoM, cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS /* Unlock the multipoles */ if (lock_unlock(&ci->grav.mlock) != 0) error("Failed to unlock multipole"); if (lock_unlock(&cj->grav.mlock) != 0) error("Failed to unlock multipole"); +#endif TIMER_TOC(timer_dopair_grav_mm); } @@ -1424,6 +1428,7 @@ static INLINE void runner_dopair_grav_mm_nonsym(struct runner *r, cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); #endif +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS /* Lock the multipoles * Note we impose a hierarchy to solve the dining philosopher problem */ if (ci < cj) { @@ -1433,14 +1438,17 @@ static INLINE void runner_dopair_grav_mm_nonsym(struct runner *r, lock_lock(&cj->grav.mlock); lock_lock(&ci->grav.mlock); } +#endif /* Let's interact at this level */ gravity_M2L_nonsym(&ci->grav.multipole->pot, multi_j, ci->grav.multipole->CoM, cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); +#ifndef SWIFT_TASKS_WITHOUT_ATOMICS /* Unlock the multipoles */ if (lock_unlock(&ci->grav.mlock) != 0) error("Failed to unlock multipole"); if (lock_unlock(&cj->grav.mlock) != 0) error("Failed to unlock multipole"); +#endif TIMER_TOC(timer_dopair_grav_mm); } diff --git a/src/timestep_limiter_iact.h b/src/timestep_limiter_iact.h index 8c8fa7cee..accfd33d0 100644 --- a/src/timestep_limiter_iact.h +++ b/src/timestep_limiter_iact.h @@ -87,8 +87,12 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter( /* Wake up the neighbour? */ if (pj->time_bin > pi->time_bin + time_bin_neighbour_max_delta_bin) { - /* Store the smallest time bin that woke up this particle */ + /* Store the smallest time bin that woke up this particle */ +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + pj->limiter_data.wakeup = max(pj->limiter_data.wakeup, -pi->time_bin); +#else atomic_max_c(&pj->limiter_data.wakeup, -pi->time_bin); +#endif } } -- GitLab From c83f6d0c1dcc5546286767a8894b9d48d8973761 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Sun, 29 Mar 2020 15:46:56 +0200 Subject: [PATCH 16/27] Lock the gravity and limiter tasks when SWIFT_TASKS_WITHOUT_ATOMICS is defined --- src/task.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 92 insertions(+), 11 deletions(-) diff --git a/src/task.c b/src/task.c index 4403f1029..f821eed5e 100644 --- a/src/task.c +++ b/src/task.c @@ -456,6 +456,10 @@ void task_unlock(struct task *t) { case task_type_self: case task_type_sub_self: if (subtype == task_subtype_grav) { +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + cell_gunlocktree(ci); + cell_munlocktree(ci); +#endif } else if ((subtype == task_subtype_stars_density) || (subtype == task_subtype_stars_feedback)) { cell_sunlocktree(ci); @@ -469,6 +473,9 @@ void task_unlock(struct task *t) { } else if (subtype == task_subtype_do_bh_swallow) { cell_bunlocktree(ci); } else if (subtype == task_subtype_limiter) { +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + cell_unlocktree(ci); +#endif } else { /* hydro */ cell_unlocktree(ci); } @@ -477,6 +484,12 @@ void task_unlock(struct task *t) { case task_type_pair: case task_type_sub_pair: if (subtype == task_subtype_grav) { +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + cell_gunlocktree(ci); + cell_gunlocktree(cj); + cell_munlocktree(ci); + cell_munlocktree(cj); +#endif } else if ((subtype == task_subtype_stars_density) || (subtype == task_subtype_stars_feedback)) { cell_sunlocktree(ci); @@ -495,6 +508,10 @@ void task_unlock(struct task *t) { cell_bunlocktree(ci); cell_bunlocktree(cj); } else if (subtype == task_subtype_limiter) { +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + cell_unlocktree(ci); + cell_unlocktree(cj); +#endif } else { /* hydro */ cell_unlocktree(ci); cell_unlocktree(cj); @@ -502,12 +519,23 @@ void task_unlock(struct task *t) { break; case task_type_grav_down: +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + cell_gunlocktree(ci); + cell_munlocktree(ci); +#endif break; case task_type_grav_long_range: +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + cell_munlocktree(ci); +#endif break; case task_type_grav_mm: +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + cell_munlocktree(ci); + cell_munlocktree(cj); +#endif break; case task_type_star_formation: @@ -603,7 +631,16 @@ int task_lock(struct task *t) { case task_type_self: case task_type_sub_self: if (subtype == task_subtype_grav) { - return 1; +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + /* Lock the gparts and the m-pole */ + if (ci->grav.phold || ci->grav.mhold) return 0; + if (cell_glocktree(ci) != 0) + return 0; + else if (cell_mlocktree(ci) != 0) { + cell_gunlocktree(ci); + return 0; + } +#endif } else if ((subtype == task_subtype_stars_density) || (subtype == task_subtype_stars_feedback)) { if (ci->stars.hold) return 0; @@ -628,7 +665,10 @@ int task_lock(struct task *t) { if (ci->black_holes.hold) return 0; if (cell_blocktree(ci) != 0) return 0; } else if (subtype == task_subtype_limiter) { - return 1; +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + if (ci->hydro.hold) return 0; + if (cell_locktree(ci) != 0) return 0; +#endif } else { /* subtype == hydro */ if (ci->hydro.hold) return 0; if (cell_locktree(ci) != 0) return 0; @@ -638,7 +678,24 @@ int task_lock(struct task *t) { case task_type_pair: case task_type_sub_pair: if (subtype == task_subtype_grav) { - return 1; +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + /* Lock the gparts and the m-pole in both cells */ + if (ci->grav.phold || cj->grav.phold) return 0; + if (cell_glocktree(ci) != 0) return 0; + if (cell_glocktree(cj) != 0) { + cell_gunlocktree(ci); + return 0; + } else if (cell_mlocktree(ci) != 0) { + cell_gunlocktree(ci); + cell_gunlocktree(cj); + return 0; + } else if (cell_mlocktree(cj) != 0) { + cell_gunlocktree(ci); + cell_gunlocktree(cj); + cell_munlocktree(ci); + return 0; + } +#endif } else if ((subtype == task_subtype_stars_density) || (subtype == task_subtype_stars_feedback)) { /* Lock the stars and the gas particles in both cells */ @@ -691,7 +748,14 @@ int task_lock(struct task *t) { return 0; } } else if (subtype == task_subtype_limiter) { - return 1; +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + if (ci->hydro.hold || cj->hydro.hold) return 0; + if (cell_locktree(ci) != 0) return 0; + if (cell_locktree(cj) != 0) { + cell_unlocktree(ci); + return 0; + } +#endif } else { /* subtype == hydro */ /* Lock the parts in both cells */ if (ci->hydro.hold || cj->hydro.hold) return 0; @@ -704,15 +768,36 @@ int task_lock(struct task *t) { break; case task_type_grav_down: - return 1; +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + /* Lock the gparts and the m-poles */ + if (ci->grav.phold || ci->grav.mhold) return 0; + if (cell_glocktree(ci) != 0) + return 0; + else if (cell_mlocktree(ci) != 0) { + cell_gunlocktree(ci); + return 0; + } +#endif break; case task_type_grav_long_range: - return 1; +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + /* Lock the m-poles */ + if (ci->grav.mhold) return 0; + if (cell_mlocktree(ci) != 0) return 0; +#endif break; case task_type_grav_mm: - return 1; +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + /* Lock both m-poles */ + if (ci->grav.mhold || cj->grav.mhold) return 0; + if (cell_mlocktree(ci) != 0) return 0; + if (cell_mlocktree(cj) != 0) { + cell_munlocktree(ci); + return 0; + } +#endif break; case task_type_star_formation: @@ -1276,10 +1361,6 @@ enum task_categories task_get_category(const struct task *t) { case task_type_end_grav_force: return task_category_gravity; - case task_type_fof_self: - case task_type_fof_pair: - return task_category_fof; - case task_type_self: case task_type_pair: case task_type_sub_self: -- GitLab From 74cb8972636867b2e629237fb4ad41d32a73b82b Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Sun, 29 Mar 2020 17:12:40 +0200 Subject: [PATCH 17/27] Add an option to disable the use of atomic operations within the tasks and return to the old behaviour. By default atomic operations are allowed in tasks --- configure.ac | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/configure.ac b/configure.ac index 6edd9586b..8495dca80 100644 --- a/configure.ac +++ b/configure.ac @@ -239,6 +239,23 @@ if test "x$enable_debug" = "xyes"; then fi fi +# Are we using the regular tasks (with atomics and no task conflicts) or +# are we using the atomic-free task-conflicting (slower) version? +AC_ARG_ENABLE([atomics-within-tasks], + [AS_HELP_STRING([--disable-atomics-within-tasks], + [Disable the use of atomic operations within tasks. This creates more task conflicts + but allows for atomic-free and lock-free code within the tasks. This likely slows down + the code @<:@yes/no@:>@] + )], + [enable_atomics_within_tasks="$enableval"], + [enable_atomics_within_tasks="yes"] +) +AS_IF([test "x$enable_atomics_within_tasks" != "xno"], + , # Note: atomics are allowed by default, we define the macro if we don't want them. + [AC_DEFINE([SWIFT_TASKS_WITHOUT_ATOMICS],1,[Makes SWIFT use atomic-free and lock-free tasks.]) +]) + + # Check if task debugging is on. AC_ARG_ENABLE([task-debugging], [AS_HELP_STRING([--enable-task-debugging], @@ -2301,6 +2318,7 @@ AC_MSG_RESULT([ Black holes model : $with_black_holes Task dependencies : $with_task_order + Atomic operations in tasks : $enable_atomics_within_tasks Individual timers : $enable_timers Task debugging : $enable_task_debugging Threadpool debugging : $enable_threadpool_debugging -- GitLab From 5ee57ba2c0fe934de0a8861c18ce70abb8514eea Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Tue, 31 Mar 2020 11:21:09 +0200 Subject: [PATCH 18/27] Hide the atomics into a layer of functions that can be used to update the particles atomically or not in a transparent way --- src/Makefile.am | 2 +- src/accumulate.h | 92 ++++++++++++++++++++++++++++ src/gravity/MultiSoftening/gravity.h | 8 +-- src/gravity/Potential/gravity.h | 8 +-- src/gravity_cache.h | 13 ++-- src/mesh_gravity.c | 13 ++-- src/multipole.h | 19 ++---- 7 files changed, 111 insertions(+), 44 deletions(-) create mode 100644 src/accumulate.h diff --git a/src/Makefile.am b/src/Makefile.am index f1f9157b1..db86d524c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -114,7 +114,7 @@ AM_SOURCES = space.c runner_main.c runner_doiact_hydro.c runner_doiact_limiter.c # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ - gravity_iact.h kernel_long_gravity.h vector.h cache.h \ + gravity_iact.h kernel_long_gravity.h vector.h accumulate.h cache.h \ runner_doiact_nosort.h runner_doiact_hydro.h runner_doiact_stars.h runner_doiact_black_holes.h runner_doiact_grav.h \ runner_doiact_functions_hydro.h runner_doiact_functions_stars.h runner_doiact_functions_black_holes.h \ runner_doiact_functions_limiter.h runner_doiact_limiter.h units.h intrinsics.h minmax.h \ diff --git a/src/accumulate.h b/src/accumulate.h new file mode 100644 index 000000000..4968b331e --- /dev/null +++ b/src/accumulate.h @@ -0,0 +1,92 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + ******************************************************************************/ +#ifndef SWIFT_ACCUMULATE_H +#define SWIFT_ACCUMULATE_H + +/* Config parameters. */ +#include "../config.h" + +/* Local includes */ +#include "atomic.h" + +/** + * @file accumulate.h + * @brief Defines a series of functions used to update the fields of a particle + * atomically. These functions should be used in any task that can run in + * parallel to another one. + */ + +/** + * @brief Add x to the value stored at the location address (int version) + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + * @param x The value to add to *address. + */ +__attribute__((always_inline)) INLINE static void accumulate_add_i( + volatile int *const address, const int x) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + *address += x; +#else + atomic_add(address, x); +#endif +} + +/** + * @brief Add x to the value stored at the location address (float version) + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + * @param x The value to add to *address. + */ +__attribute__((always_inline)) INLINE static void accumulate_add_f( + volatile float *const address, const float x) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + *address += x; +#else + atomic_add_f(address, x); +#endif +} + +/** + * @brief Add x to the value stored at the location address (double version) + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + * @param x The value to add to *address. + */ +__attribute__((always_inline)) INLINE static void accumulate_add_d( + volatile double *const address, const double x) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + *address += x; +#else + atomic_add_d(address, x); +#endif +} + +#endif /* SWIFT_ACCUMULATE_H */ diff --git a/src/gravity/MultiSoftening/gravity.h b/src/gravity/MultiSoftening/gravity.h index ce47dbe5c..d7716cf71 100644 --- a/src/gravity/MultiSoftening/gravity.h +++ b/src/gravity/MultiSoftening/gravity.h @@ -23,7 +23,7 @@ #include /* Local includes. */ -#include "atomic.h" +#include "accumulate.h" #include "cosmology.h" #include "error.h" #include "gravity_properties.h" @@ -78,11 +78,7 @@ __attribute__((always_inline)) INLINE static float gravity_get_softening( __attribute__((always_inline)) INLINE static void gravity_add_comoving_potential(struct gpart* restrict gp, float pot) { -#ifdef SWIFT_TASKS_WITHOUT_ATOMICS - gp->potential += pot; -#else - atomic_add_f(&gp->potential, pot); -#endif + accumulate_add_f(&gp->potential, pot); } /** diff --git a/src/gravity/Potential/gravity.h b/src/gravity/Potential/gravity.h index e467e05e0..f9a9502a5 100644 --- a/src/gravity/Potential/gravity.h +++ b/src/gravity/Potential/gravity.h @@ -23,7 +23,7 @@ #include /* Local includes. */ -#include "atomic.h" +#include "accumulate.h" #include "cosmology.h" #include "gravity_properties.h" #include "kernel_gravity.h" @@ -64,11 +64,7 @@ __attribute__((always_inline)) INLINE static float gravity_get_softening( __attribute__((always_inline)) INLINE static void gravity_add_comoving_potential(struct gpart* restrict gp, float pot) { -#ifdef SWIFT_TASKS_WITHOUT_ATOMICS - gp->potential += pot; -#else - atomic_add_f(&gp->potential, pot); -#endif + accumulate_add_f(&gp->potential, pot); } /** diff --git a/src/gravity_cache.h b/src/gravity_cache.h index 23103f050..ba3432d7e 100644 --- a/src/gravity_cache.h +++ b/src/gravity_cache.h @@ -23,6 +23,7 @@ #include "../config.h" /* Local headers */ +#include "accumulate.h" #include "align.h" #include "error.h" #include "gravity.h" @@ -491,15 +492,9 @@ __attribute__((always_inline)) INLINE static void gravity_cache_write_back( /* Write stuff back to the particles */ for (int i = 0; i < gcount; ++i) { if (active[i]) { -#ifdef SWIFT_TASKS_WITHOUT_ATOMICS - gparts[i].a_grav[0] += a_x[i]; - gparts[i].a_grav[1] += a_y[i]; - gparts[i].a_grav[2] += a_z[i]; -#else - atomic_add_f(&gparts[i].a_grav[0], a_x[i]); - atomic_add_f(&gparts[i].a_grav[1], a_y[i]); - atomic_add_f(&gparts[i].a_grav[2], a_z[i]); -#endif + accumulate_add_f(&gparts[i].a_grav[0], a_x[i]); + accumulate_add_f(&gparts[i].a_grav[1], a_y[i]); + accumulate_add_f(&gparts[i].a_grav[2], a_z[i]); gravity_add_comoving_potential(&gparts[i], pot[i]); } } diff --git a/src/mesh_gravity.c b/src/mesh_gravity.c index f36200718..cbfb9ef7a 100644 --- a/src/mesh_gravity.c +++ b/src/mesh_gravity.c @@ -28,6 +28,7 @@ #include "mesh_gravity.h" /* Local includes. */ +#include "accumulate.h" #include "active.h" #include "debug.h" #include "engine.h" @@ -334,15 +335,9 @@ void mesh_to_gparts_CIC(struct gpart* gp, const double* pot, const int N, /* ---- */ /* Store things back */ -#ifdef SWIFT_TASKS_WITHOUT_ATOMICS - gp->a_grav[0] += fac * a[0]; - gp->a_grav[1] += fac * a[1]; - gp->a_grav[2] += fac * a[2]; -#else - atomic_add_f(&gp->a_grav[0], fac * a[0]); - atomic_add_f(&gp->a_grav[1], fac * a[1]); - atomic_add_f(&gp->a_grav[2], fac * a[2]); -#endif + accumulate_add_f(&gp->a_grav[0], fac * a[0]); + accumulate_add_f(&gp->a_grav[1], fac * a[1]); + accumulate_add_f(&gp->a_grav[2], fac * a[2]); gravity_add_comoving_potential(gp, p); #ifdef SWIFT_GRAVITY_FORCE_CHECKS gp->potential_PM = p; diff --git a/src/multipole.h b/src/multipole.h index e7dcfbeb1..fede08c09 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -2589,21 +2589,14 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb, #endif /* Update the particle */ -#ifdef SWIFT_TASKS_WITHOUT_ATOMICS - gp->a_grav[0] += a_grav[0]; - gp->a_grav[1] += a_grav[1]; - gp->a_grav[2] += a_grav[2]; -#else - atomic_add_f(&gp->a_grav[0], a_grav[0]); - atomic_add_f(&gp->a_grav[1], a_grav[1]); - atomic_add_f(&gp->a_grav[2], a_grav[2]); -#endif - gravity_add_comoving_potential(gp, pot); + accumulate_add_f(&gp->a_grav[0], a_grav[0]); + accumulate_add_f(&gp->a_grav[1], a_grav[1]); + accumulate_add_f(&gp->a_grav[2], a_grav[2]); #ifdef SWIFT_GRAVITY_FORCE_CHECKS - atomic_add_f(&gp->a_grav_m2l[0], a_grav[0]); - atomic_add_f(&gp->a_grav_m2l[1], a_grav[1]); - atomic_add_f(&gp->a_grav_m2l[2], a_grav[2]); + accumulate_add_f(&gp->a_grav_m2l[0], a_grav[0]); + accumulate_add_f(&gp->a_grav_m2l[1], a_grav[1]); + accumulate_add_f(&gp->a_grav_m2l[2], a_grav[2]); #endif } -- GitLab From 85af978e039dfe154abe90aecb4379fa07b11e7c Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Tue, 31 Mar 2020 11:40:48 +0200 Subject: [PATCH 19/27] Apply the same behaviour to the accumulators used to check the accuracy of the gravity calculation. --- src/accumulate.h | 55 +++++++++++++++++++++++++ src/multipole.h | 6 +-- src/runner_doiact_grav.c | 86 +++++++++++++++++++++------------------- 3 files changed, 104 insertions(+), 43 deletions(-) diff --git a/src/accumulate.h b/src/accumulate.h index 4968b331e..a7e8e6c1b 100644 --- a/src/accumulate.h +++ b/src/accumulate.h @@ -51,6 +51,25 @@ __attribute__((always_inline)) INLINE static void accumulate_add_i( #endif } +/** + * @brief Add x to the value stored at the location address (long long version) + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + * @param x The value to add to *address. + */ +__attribute__((always_inline)) INLINE static void accumulate_add_ll( + volatile long long *const address, const long long x) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + *address += x; +#else + atomic_add(address, x); +#endif +} + /** * @brief Add x to the value stored at the location address (float version) * @@ -89,4 +108,40 @@ __attribute__((always_inline)) INLINE static void accumulate_add_d( #endif } +/** + * @brief Add 1 to the value stored at the location address (int version) + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + */ +__attribute__((always_inline)) INLINE static void accumulate_inc_i( + volatile int *const address) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + (*address)++; +#else + atomic_inc(address); +#endif +} + +/** + * @brief Add 1 to the value stored at the location address (long long version) + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + */ +__attribute__((always_inline)) INLINE static void accumulate_inc_ll( + volatile long long *const address) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + (*address)++; +#else + atomic_inc(address); +#endif +} + #endif /* SWIFT_ACCUMULATE_H */ diff --git a/src/multipole.h b/src/multipole.h index fede08c09..867244d28 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -2467,12 +2467,12 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb, #ifdef SWIFT_DEBUG_CHECKS if (lb->num_interacted == 0) error("Interacting with empty field tensor"); - atomic_add(&gp->num_interacted, lb->num_interacted); + accumulate_add_ll(&gp->num_interacted, lb->num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS - atomic_add(&gp->num_interacted_m2l, lb->num_interacted_tree); - atomic_add(&gp->num_interacted_pm, lb->num_interacted_pm); + accumulate_add_ll(&gp->num_interacted_m2l, lb->num_interacted_tree); + accumulate_add_ll(&gp->num_interacted_pm, lb->num_interacted_pm); #endif /* Local accumulator */ diff --git a/src/runner_doiact_grav.c b/src/runner_doiact_grav.c index bb914dc47..91abe9e91 100644 --- a/src/runner_doiact_grav.c +++ b/src/runner_doiact_grav.c @@ -264,13 +264,13 @@ static INLINE void runner_dopair_grav_pp_full( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) - atomic_inc(&gparts_i[pid].num_interacted); + accumulate_inc_ll(&gparts_i[pid].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the p2p interaction counter if it's not a padded gpart */ if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) - atomic_inc(&gparts_i[pid].num_interacted_p2p); + accumulate_inc_ll(&gparts_i[pid].num_interacted_p2p); #endif } @@ -281,9 +281,9 @@ static INLINE void runner_dopair_grav_pp_full( ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS - atomic_add_f(&gparts_i[pid].a_grav_p2p[0], a_x); - atomic_add_f(&gparts_i[pid].a_grav_p2p[1], a_y); - atomic_add_f(&gparts_i[pid].a_grav_p2p[2], a_z); + accumulate_add_f(&gparts_i[pid].a_grav_p2p[0], a_x); + accumulate_add_f(&gparts_i[pid].a_grav_p2p[1], a_y); + accumulate_add_f(&gparts_i[pid].a_grav_p2p[2], a_z); #endif } } @@ -420,13 +420,13 @@ static INLINE void runner_dopair_grav_pp_truncated( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) - atomic_inc(&gparts_i[pid].num_interacted); + accumulate_inc_ll(&gparts_i[pid].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the p2p interaction counter if it's not a padded gpart */ if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) - atomic_inc(&gparts_i[pid].num_interacted_p2p); + accumulate_inc_ll(&gparts_i[pid].num_interacted_p2p); #endif } @@ -437,9 +437,9 @@ static INLINE void runner_dopair_grav_pp_truncated( ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS - atomic_add_f(&gparts_i[pid].a_grav_p2p[0], a_x); - atomic_add_f(&gparts_i[pid].a_grav_p2p[1], a_y); - atomic_add_f(&gparts_i[pid].a_grav_p2p[2], a_z); + accumulate_add_f(&gparts_i[pid].a_grav_p2p[0], a_x); + accumulate_add_f(&gparts_i[pid].a_grav_p2p[1], a_y); + accumulate_add_f(&gparts_i[pid].a_grav_p2p[2], a_z); #endif } } @@ -565,18 +565,18 @@ static INLINE void runner_dopair_grav_pm_full( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter */ if (pid < gcount_i) - atomic_add(&gparts_i[pid].num_interacted, - cj->grav.multipole->m_pole.num_gpart); + accumulate_add_ll(&gparts_i[pid].num_interacted, + cj->grav.multipole->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the M2P interaction counter and forces. */ if (pid < gcount_i) { - atomic_add(&gparts_i[pid].num_interacted_m2p, - cj->grav.multipole->m_pole.num_gpart); - atomic_add_f(&gparts_i[pid].a_grav_m2p[0], f_x); - atomic_add_f(&gparts_i[pid].a_grav_m2p[1], f_y); - atomic_add_f(&gparts_i[pid].a_grav_m2p[2], f_z); + accumulate_add_ll(&gparts_i[pid].num_interacted_m2p, + cj->grav.multipole->m_pole.num_gpart); + accumulate_add_f(&gparts_i[pid].a_grav_m2p[0], f_x); + accumulate_add_f(&gparts_i[pid].a_grav_m2p[1], f_y); + accumulate_add_f(&gparts_i[pid].a_grav_m2p[2], f_z); } #endif } @@ -708,18 +708,18 @@ static INLINE void runner_dopair_grav_pm_truncated( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter */ if (pid < gcount_i) - atomic_add(&gparts_i[pid].num_interacted, - cj->grav.multipole->m_pole.num_gpart); + accumulate_add_ll(&gparts_i[pid].num_interacted, + cj->grav.multipole->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the M2P interaction counter and forces. */ if (pid < gcount_i) { - atomic_add(&gparts_i[pid].num_interacted_m2p, - cj->grav.multipole->m_pole.num_gpart); - atomic_add_f(&gparts_i[pid].a_grav_m2p[0], f_x); - atomic_add_f(&gparts_i[pid].a_grav_m2p[1], f_y); - atomic_add_f(&gparts_i[pid].a_grav_m2p[2], f_z); + accumulate_add_ll(&gparts_i[pid].num_interacted_m2p, + cj->grav.multipole->m_pole.num_gpart); + accumulate_add_f(&gparts_i[pid].a_grav_m2p[0], f_x); + accumulate_add_f(&gparts_i[pid].a_grav_m2p[1], f_y); + accumulate_add_f(&gparts_i[pid].a_grav_m2p[2], f_z); } #endif } @@ -1045,13 +1045,13 @@ static INLINE void runner_doself_grav_pp_full( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) - atomic_inc(&gparts[pid].num_interacted); + accumulate_inc_ll(&gparts[pid].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the P2P interaction counter if it's not a padded gpart */ if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) - atomic_inc(&gparts[pid].num_interacted_p2p); + accumulate_inc_ll(&gparts[pid].num_interacted_p2p); #endif } @@ -1062,9 +1062,9 @@ static INLINE void runner_doself_grav_pp_full( ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS - atomic_add_f(&gparts[pid].a_grav_p2p[0], a_x); - atomic_add_f(&gparts[pid].a_grav_p2p[1], a_y); - atomic_add_f(&gparts[pid].a_grav_p2p[2], a_z); + accumulate_add_f(&gparts[pid].a_grav_p2p[0], a_x); + accumulate_add_f(&gparts[pid].a_grav_p2p[1], a_y); + accumulate_add_f(&gparts[pid].a_grav_p2p[2], a_z); #endif } } @@ -1184,13 +1184,13 @@ static INLINE void runner_doself_grav_pp_truncated( #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) - atomic_inc(&gparts[pid].num_interacted); + accumulate_inc_ll(&gparts[pid].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the P2P interaction counter if it's not a padded gpart */ if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) - atomic_inc(&gparts[pid].num_interacted_p2p); + accumulate_inc_ll(&gparts[pid].num_interacted_p2p); #endif } @@ -1201,9 +1201,9 @@ static INLINE void runner_doself_grav_pp_truncated( ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS - atomic_add_f(&gparts[pid].a_grav_p2p[0], a_x); - atomic_add_f(&gparts[pid].a_grav_p2p[1], a_y); - atomic_add_f(&gparts[pid].a_grav_p2p[2], a_z); + accumulate_add_f(&gparts[pid].a_grav_p2p[0], a_x); + accumulate_add_f(&gparts[pid].a_grav_p2p[1], a_y); + accumulate_add_f(&gparts[pid].a_grav_p2p[2], a_z); #endif } } @@ -1678,17 +1678,21 @@ void runner_dopair_recursive_grav(struct runner *r, struct cell *ci, #ifdef SWIFT_DEBUG_CHECKS if (cell_is_active_gravity(ci, e)) - atomic_add(&multi_i->pot.num_interacted, multi_j->m_pole.num_gpart); + accumulate_add_ll(&multi_i->pot.num_interacted, + multi_j->m_pole.num_gpart); if (cell_is_active_gravity(cj, e)) - atomic_add(&multi_j->pot.num_interacted, multi_i->m_pole.num_gpart); + accumulate_add_ll(&multi_j->pot.num_interacted, + multi_i->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Need to account for the interactions we missed */ if (cell_is_active_gravity(ci, e)) - atomic_add(&multi_i->pot.num_interacted_pm, multi_j->m_pole.num_gpart); + accumulate_add_ll(&multi_i->pot.num_interacted_pm, + multi_j->m_pole.num_gpart); if (cell_is_active_gravity(cj, e)) - atomic_add(&multi_j->pot.num_interacted_pm, multi_i->m_pole.num_gpart); + accumulate_add_ll(&multi_j->pot.num_interacted_pm, + multi_i->m_pole.num_gpart); #endif return; } @@ -1894,12 +1898,14 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { #ifdef SWIFT_DEBUG_CHECKS /* Need to account for the interactions we missed */ - atomic_add(&multi_i->pot.num_interacted, multi_j->m_pole.num_gpart); + accumulate_add_ll(&multi_i->pot.num_interacted, + multi_j->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Need to account for the interactions we missed */ - atomic_add(&multi_i->pot.num_interacted_pm, multi_j->m_pole.num_gpart); + accumulate_add_ll(&multi_i->pot.num_interacted_pm, + multi_j->m_pole.num_gpart); #endif /* Record that this multipole received a contribution */ -- GitLab From 449346aa4cdfbc2784ee01d1fb3c99c2e3badf6d Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Tue, 31 Mar 2020 11:58:18 +0200 Subject: [PATCH 20/27] Added an atomic max for int --- src/atomic.h | 69 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 23 deletions(-) diff --git a/src/atomic.h b/src/atomic.h index fc2d1265a..8ba1b0e69 100644 --- a/src/atomic.h +++ b/src/atomic.h @@ -38,6 +38,29 @@ #define atomic_cas(v, o, n) __sync_val_compare_and_swap(v, o, n) #define atomic_swap(v, n) __sync_lock_test_and_set(v, n) +/** + * @brief Atomic min operation on ints. + * + * This is a text-book implementation based on an atomic CAS. + * + * @param address The address to update. + * @param y The value to update the address with. + */ +__attribute__((always_inline)) INLINE static void atomic_min( + volatile int *const address, const int y) { + + int *int_ptr = (int *)address; + + int test_val, old_val, new_val; + old_val = *address; + + do { + test_val = old_val; + new_val = min(old_val, y); + old_val = atomic_cas(int_ptr, test_val, new_val); + } while (test_val != old_val); +} + /** * @brief Atomic min operation on floats. * @@ -69,29 +92,6 @@ __attribute__((always_inline)) INLINE static void atomic_min_f( } while (test_val.as_int != old_val.as_int); } -/** - * @brief Atomic min operation on ints. - * - * This is a text-book implementation based on an atomic CAS. - * - * @param address The address to update. - * @param y The value to update the address with. - */ -__attribute__((always_inline)) INLINE static void atomic_min( - volatile int *address, int y) { - - int *int_ptr = (int *)address; - - int test_val, old_val, new_val; - old_val = *address; - - do { - test_val = old_val; - new_val = min(old_val, y); - old_val = atomic_cas(int_ptr, test_val, new_val); - } while (test_val != old_val); -} - /** * @brief Atomic min operation on doubles. * @@ -145,6 +145,29 @@ __attribute__((always_inline)) INLINE static void atomic_max_c( } while (test_val != old_val); } +/** + * @brief Atomic max operation on ints. + * + * This is a text-book implementation based on an atomic CAS. + * + * @param address The address to update. + * @param y The value to update the address with. + */ +__attribute__((always_inline)) INLINE static void atomic_max( + volatile int *const address, const int y) { + + int *int_ptr = (int *)address; + + int test_val, old_val, new_val; + old_val = *address; + + do { + test_val = old_val; + new_val = max(old_val, y); + old_val = atomic_cas(int_ptr, test_val, new_val); + } while (test_val != old_val); +} + /** * @brief Atomic max operation on floats. * -- GitLab From ac2448b184e1df0144d5cae0988005caaa220b7e Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Tue, 31 Mar 2020 12:04:37 +0200 Subject: [PATCH 21/27] Apply the same change to the max operation taking place in the timestep limiter task --- src/accumulate.h | 61 +++++++++++++++++++++++++++++++++++++ src/timestep_limiter_iact.h | 8 ++--- 2 files changed, 63 insertions(+), 6 deletions(-) diff --git a/src/accumulate.h b/src/accumulate.h index a7e8e6c1b..877ac23ba 100644 --- a/src/accumulate.h +++ b/src/accumulate.h @@ -24,6 +24,7 @@ /* Local includes */ #include "atomic.h" +#include "minmax.h" /** * @file accumulate.h @@ -144,4 +145,64 @@ __attribute__((always_inline)) INLINE static void accumulate_inc_ll( #endif } +/** + * @brief Compute the max of x and the value storedd at the location address + * and store the value at the address (int8_t version). + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + * @param x The value to max against *address. + */ +__attribute__((always_inline)) INLINE static void accumulate_max_c( + volatile int8_t *const address, const int8_t x) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + *address = max(*address, x); +#else + atomic_max_c(address, x); +#endif +} + +/** + * @brief Compute the max of x and the value storedd at the location address + * and store the value at the address (int version). + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + * @param x The value to max against *address. + */ +__attribute__((always_inline)) INLINE static void accumulate_max_i( + volatile int *const address, const int x) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + *address = max(*address, x); +#else + atomic_max(address, x); +#endif +} + +/** + * @brief Compute the max of x and the value storedd at the location address + * and store the value at the address (float version). + * + * When SWIFT_TASKS_WITHOUT_ATOMICS is *not* defined this function uses an + * atomic operation. + * + * @param address The address to update. + * @param x The value to max against *address. + */ +__attribute__((always_inline)) INLINE static void accumulate_max_f( + volatile float *const address, const float x) { + +#ifdef SWIFT_TASKS_WITHOUT_ATOMICS + *address = max(*address, x); +#else + atomic_max_f(address, x); +#endif +} + #endif /* SWIFT_ACCUMULATE_H */ diff --git a/src/timestep_limiter_iact.h b/src/timestep_limiter_iact.h index accfd33d0..839b1a3dc 100644 --- a/src/timestep_limiter_iact.h +++ b/src/timestep_limiter_iact.h @@ -87,12 +87,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_limiter( /* Wake up the neighbour? */ if (pj->time_bin > pi->time_bin + time_bin_neighbour_max_delta_bin) { - /* Store the smallest time bin that woke up this particle */ -#ifdef SWIFT_TASKS_WITHOUT_ATOMICS - pj->limiter_data.wakeup = max(pj->limiter_data.wakeup, -pi->time_bin); -#else - atomic_max_c(&pj->limiter_data.wakeup, -pi->time_bin); -#endif + /* Store the smallest time bin that woke up this particle */ + accumulate_max_c(&pj->limiter_data.wakeup, -pi->time_bin); } } -- GitLab From e73aafb31f55572c7cedca34f407069e5fddd44d Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Mon, 30 Mar 2020 09:42:34 +0200 Subject: [PATCH 22/27] Added missing activation of the time-step sync task in black-hole only cells --- src/cell.c | 25 +++++++++++++++++-------- src/cell.h | 3 ++- src/engine_marktasks.c | 6 ++++-- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/cell.c b/src/cell.c index cd4cb2418..3eda165e2 100644 --- a/src/cell.c +++ b/src/cell.c @@ -3175,9 +3175,11 @@ void cell_activate_subcell_stars_tasks(struct cell *ci, struct cell *cj, * @param ci The first #cell we recurse in. * @param cj The second #cell we recurse in. * @param s The task #scheduler. + * @param with_timestep_sync Are we running with time-step synchronization on? */ void cell_activate_subcell_black_holes_tasks(struct cell *ci, struct cell *cj, - struct scheduler *s) { + struct scheduler *s, + const int with_timestep_sync) { const struct engine *e = s->space->e; /* Store the current dx_max and h_max values. */ @@ -3205,11 +3207,12 @@ void cell_activate_subcell_black_holes_tasks(struct cell *ci, struct cell *cj, /* Loop over all progenies and pairs of progenies */ for (int j = 0; j < 8; j++) { if (ci->progeny[j] != NULL) { - cell_activate_subcell_black_holes_tasks(ci->progeny[j], NULL, s); + cell_activate_subcell_black_holes_tasks(ci->progeny[j], NULL, s, + with_timestep_sync); for (int k = j + 1; k < 8; k++) if (ci->progeny[k] != NULL) - cell_activate_subcell_black_holes_tasks(ci->progeny[j], - ci->progeny[k], s); + cell_activate_subcell_black_holes_tasks( + ci->progeny[j], ci->progeny[k], s, with_timestep_sync); } } } else { @@ -3238,8 +3241,8 @@ void cell_activate_subcell_black_holes_tasks(struct cell *ci, struct cell *cj, const int pid = csp->pairs[k].pid; const int pjd = csp->pairs[k].pjd; if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL) - cell_activate_subcell_black_holes_tasks(ci->progeny[pid], - cj->progeny[pjd], s); + cell_activate_subcell_black_holes_tasks( + ci->progeny[pid], cj->progeny[pjd], s, with_timestep_sync); } } @@ -3250,10 +3253,14 @@ void cell_activate_subcell_black_holes_tasks(struct cell *ci, struct cell *cj, /* Activate the drifts if the cells are local. */ if (ci->nodeID == engine_rank) cell_activate_drift_bpart(ci, s); if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s); + if (cj->nodeID == engine_rank && with_timestep_sync) + cell_activate_sync_part(cj, s); /* Activate the drifts if the cells are local. */ if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s); if (cj->nodeID == engine_rank) cell_activate_drift_bpart(cj, s); + if (ci->nodeID == engine_rank && with_timestep_sync) + cell_activate_sync_part(ci, s); } } /* Otherwise, pair interation */ } @@ -4090,6 +4097,7 @@ int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s, int cell_unskip_black_holes_tasks(struct cell *c, struct scheduler *s) { struct engine *e = s->space->e; + const int with_timestep_sync = (e->policy & engine_policy_timestep_sync); const int nodeID = e->nodeID; int rebuild = 0; @@ -4137,12 +4145,13 @@ int cell_unskip_black_holes_tasks(struct cell *c, struct scheduler *s) { /* Store current values of dx_max and h_max. */ else if (t->type == task_type_sub_self) { - cell_activate_subcell_black_holes_tasks(ci, NULL, s); + cell_activate_subcell_black_holes_tasks(ci, NULL, s, + with_timestep_sync); } /* Store current values of dx_max and h_max. */ else if (t->type == task_type_sub_pair) { - cell_activate_subcell_black_holes_tasks(ci, cj, s); + cell_activate_subcell_black_holes_tasks(ci, cj, s, with_timestep_sync); } } diff --git a/src/cell.h b/src/cell.h index 44b802f07..4506e206a 100644 --- a/src/cell.h +++ b/src/cell.h @@ -911,7 +911,8 @@ void cell_activate_subcell_stars_tasks(struct cell *ci, struct cell *cj, const int with_star_formation, const int with_timestep_sync); void cell_activate_subcell_black_holes_tasks(struct cell *ci, struct cell *cj, - struct scheduler *s); + struct scheduler *s, + const int with_timestep_sync); void cell_activate_subcell_external_grav_tasks(struct cell *ci, struct scheduler *s); void cell_activate_super_spart_drifts(struct cell *c, struct scheduler *s); diff --git a/src/engine_marktasks.c b/src/engine_marktasks.c index d3d17bcdd..532297a02 100644 --- a/src/engine_marktasks.c +++ b/src/engine_marktasks.c @@ -192,7 +192,8 @@ void engine_marktasks_mapper(void *map_data, int num_elements, t_subtype == task_subtype_bh_density) { if (ci_active_black_holes) { scheduler_activate(s, t); - cell_activate_subcell_black_holes_tasks(ci, NULL, s); + cell_activate_subcell_black_holes_tasks(ci, NULL, s, + with_timestep_sync); } } @@ -446,7 +447,8 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* Store current values of dx_max and h_max. */ else if (t_type == task_type_sub_pair && t_subtype == task_subtype_bh_density) { - cell_activate_subcell_black_holes_tasks(ci, cj, s); + cell_activate_subcell_black_holes_tasks(ci, cj, s, + with_timestep_sync); } } -- GitLab From 8c8934c5b7a3d50d55f3af4f70ae11223a1be320 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Tue, 31 Mar 2020 17:27:58 +0200 Subject: [PATCH 23/27] Fix the time-step debugging check for the rare case where a BH has swallowed the particle with the smallest time-bin in a cell. --- src/cell.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/cell.c b/src/cell.c index 3eda165e2..056c0aee0 100644 --- a/src/cell.c +++ b/src/cell.c @@ -5358,16 +5358,31 @@ void cell_check_timesteps(const struct cell *c, const integertime_t ti_current, /* Only check cells that have at least one non-inhibited particle */ if (count > 0) { - if (ti_end_min != c->hydro.ti_end_min) - error( - "Non-matching ti_end_min. Cell=%lld true=%lld ti_current=%lld " - "depth=%d", - c->hydro.ti_end_min, ti_end_min, ti_current, c->depth); + if (count != c->hydro.count) { + + /* Note that we use a < as the particle with the smallest time-bin + might have been swallowed. This means we will run this cell with + 0 active particles but that's not wrong */ + if (ti_end_min < c->hydro.ti_end_min) + error( + "Non-matching ti_end_min. Cell=%lld true=%lld ti_current=%lld " + "depth=%d", + c->hydro.ti_end_min, ti_end_min, ti_current, c->depth); + + } else /* Normal case: nothing was swallowed/converted */ { + if (ti_end_min != c->hydro.ti_end_min) + error( + "Non-matching ti_end_min. Cell=%lld true=%lld ti_current=%lld " + "depth=%d", + c->hydro.ti_end_min, ti_end_min, ti_current, c->depth); + } + if (ti_end_max > c->hydro.ti_end_max) error( "Non-matching ti_end_max. Cell=%lld true=%lld ti_current=%lld " "depth=%d", c->hydro.ti_end_max, ti_end_max, ti_current, c->depth); + if (ti_beg_max != c->hydro.ti_beg_max) error( "Non-matching ti_beg_max. Cell=%lld true=%lld ti_current=%lld " -- GitLab From 271b1b151b221607c4d36bc5de8d24a6897559e9 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Tue, 31 Mar 2020 20:05:56 +0200 Subject: [PATCH 24/27] Do not report the task times when rebuilding after a repartiton. Do so before dumping the task array in engine_repartition() --- src/engine.c | 6 +++++- src/scheduler.c | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/engine.c b/src/engine.c index daa28518c..e5e7b78dc 100644 --- a/src/engine.c +++ b/src/engine.c @@ -210,6 +210,9 @@ void engine_repartition(struct engine *e) { /* Sorting indices. */ if (e->s->cells_top != NULL) space_free_cells(e->s); + /* Report the time spent in the different task categories */ + if (e->verbose) scheduler_report_task_times(&e->sched, e->nr_threads); + /* Task arrays. */ scheduler_free_tasks(&e->sched); @@ -1671,7 +1674,8 @@ void engine_rebuild(struct engine *e, int repartitioned, e->restarting = 0; /* Report the time spent in the different task categories */ - if (e->verbose) scheduler_report_task_times(&e->sched, e->nr_threads); + if (e->verbose && !repartitioned) + scheduler_report_task_times(&e->sched, e->nr_threads); /* Give some breathing space */ scheduler_free_tasks(&e->sched); diff --git a/src/scheduler.c b/src/scheduler.c index d7fe3fb46..14d3627f0 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -2286,6 +2286,7 @@ void scheduler_free_tasks(struct scheduler *s) { s->tid_active = NULL; } s->size = 0; + s->nr_tasks = 0; } /** -- GitLab From bd2e837bf4e5e0e00a80f6ba4472828b838e607a Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Wed, 1 Apr 2020 21:53:14 +0200 Subject: [PATCH 25/27] Reinstated incorrectly dropped update of the potential in gravity_L2P() --- src/multipole.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/multipole.h b/src/multipole.h index 867244d28..347dd59ba 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -2592,6 +2592,7 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb, accumulate_add_f(&gp->a_grav[0], a_grav[0]); accumulate_add_f(&gp->a_grav[1], a_grav[1]); accumulate_add_f(&gp->a_grav[2], a_grav[2]); + gravity_add_comoving_potential(gp, pot); #ifdef SWIFT_GRAVITY_FORCE_CHECKS accumulate_add_f(&gp->a_grav_m2l[0], a_grav[0]); -- GitLab From 5ed9ecedf6c1a614513c1157d0a41396f91bb7c0 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Thu, 2 Apr 2020 14:04:39 +0200 Subject: [PATCH 26/27] Added a simple unit test checking the correctness of the atomic implementation --- .gitignore | 1 + tests/Makefile.am | 8 ++- tests/testAtomic.c | 146 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 153 insertions(+), 2 deletions(-) create mode 100644 tests/testAtomic.c diff --git a/.gitignore b/.gitignore index f843002a1..99aa018c9 100644 --- a/.gitignore +++ b/.gitignore @@ -122,6 +122,7 @@ tests/testInteractions.sh tests/testSymmetry tests/testHydroMPIrules tests/testMaths +tests/testAtomic tests/testRandom tests/testRandomSpacing tests/testThreadpool diff --git a/tests/Makefile.am b/tests/Makefile.am index 11426ac89..e24a2a69b 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -29,7 +29,8 @@ TESTS = testGreetings testMaths testReading.sh testKernel \ testPeriodicBC.sh testPeriodicBCPerturbed.sh testPotentialSelf \ testPotentialPair testEOS testUtilities testSelectOutput.sh \ testCbrt testCosmology testOutputList testFormat.sh \ - test27cellsStars.sh test27cellsStarsPerturbed.sh testHydroMPIrules + test27cellsStars.sh test27cellsStarsPerturbed.sh testHydroMPIrules \ + testAtomic # List of test programs to compile check_PROGRAMS = testGreetings testReading testTimeIntegration \ @@ -41,7 +42,8 @@ check_PROGRAMS = testGreetings testReading testTimeIntegration \ testVoronoi1D testVoronoi2D testVoronoi3D testPeriodicBC \ testGravityDerivatives testPotentialSelf testPotentialPair testEOS testUtilities \ testSelectOutput testCbrt testCosmology testOutputList test27cellsStars \ - test27cellsStars_subset testCooling testComovingCooling testFeedback testHashmap testHydroMPIrules + test27cellsStars_subset testCooling testComovingCooling testFeedback testHashmap \ + testAtomic testHydroMPIrules # Rebuild tests when SWIFT is updated. $(check_PROGRAMS): ../src/.libs/libswiftsim.a @@ -51,6 +53,8 @@ testGreetings_SOURCES = testGreetings.c testMaths_SOURCES = testMaths.c +testAtomic_SOURCES = testAtomic.c + testRandom_SOURCES = testRandom.c testRandomSpacing_SOURCES = testRandomSpacing.c diff --git a/tests/testAtomic.c b/tests/testAtomic.c new file mode 100644 index 000000000..47eca7b73 --- /dev/null +++ b/tests/testAtomic.c @@ -0,0 +1,146 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2020 Matthieu Schaller (schaller@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Standard includes. */ +#include + +/* Local includes */ +#include "swift.h" + +const int array_size = 2048 * 2048; +const int num_threads = 64; +const int chunk_size = 64; + +void map_function_sum_f(void *data, int num_elements, void *extra_data) { + + float *array = (float *)data; + float *sum = (float *)extra_data; + + for (int i = 0; i < num_elements; ++i) atomic_add_f(sum, array[i]); +} + +void map_function_sum_ll(void *data, int num_elements, void *extra_data) { + + long long *array = (long long *)data; + long long *sum = (long long *)extra_data; + + for (int i = 0; i < num_elements; ++i) atomic_add(sum, array[i]); +} + +void map_function_inc_ll(void *data, int num_elements, void *extra_data) { + + long long *sum = (long long *)extra_data; + + for (int i = 0; i < num_elements; ++i) atomic_inc(sum); +} + +int main(int argc, char *argv[]) { + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + +/* Choke on FPEs */ +#ifdef HAVE_FE_ENABLE_EXCEPT + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif + + /* Get some randomness going */ + const int seed = time(NULL); + message("Seed = %d", seed); + srand(seed); + + /* Start a bunch of threads */ + printf("# Creating threadpool with %d threads\n", num_threads); + struct threadpool tp; + threadpool_init(&tp, num_threads); + + /* Create some random data */ + float *array_f = malloc(array_size * sizeof(float)); + long long *array_ll = malloc(array_size * sizeof(long long)); + + for (int i = 0; i < array_size; ++i) { + array_f[i] = rand() / ((float)RAND_MAX); + array_ll[i] = rand(); + } + + /*** Test the addition atomic ops *******************************/ + + /* float case */ + + /* Compute the real answer */ + float real_sum_f = 0.f; + for (int i = 0; i < array_size; ++i) { + real_sum_f += array_f[i]; + } + + /* Compute the answer via threads and atomic */ + float atomic_sum_f = 0.f; + threadpool_map(&tp, map_function_sum_f, array_f, array_size, sizeof(float), + chunk_size, &atomic_sum_f); + + const double diff_sum_f = (double)real_sum_f - (double)atomic_sum_f; + const double sum_sum_f = (double)real_sum_f + (double)atomic_sum_f; + const double rel_sum_f = 0.5 * fabs(diff_sum_f) / sum_sum_f; + message("Real sum = %.7e -- atomic sum = %.7e rel=%e", real_sum_f, + atomic_sum_f, rel_sum_f); + + /* long long case */ + + /* Compute the real answer */ + long long real_sum_ll = 0.f; + for (int i = 0; i < array_size; ++i) { + real_sum_ll += array_ll[i]; + } + + /* Compute the answer via threads and atomic */ + long long atomic_sum_ll = 0LL; + threadpool_map(&tp, map_function_sum_ll, array_ll, array_size, + sizeof(long long), chunk_size, &atomic_sum_ll); + + const double diff_sum_ll = (double)real_sum_ll - (double)atomic_sum_ll; + const double sum_sum_ll = (double)real_sum_ll + (double)atomic_sum_ll; + const double rel_sum_ll = 0.5 * fabs(diff_sum_ll) / sum_sum_ll; + message("Real sum = %lld -- atomic sum = %lld rel=%e", real_sum_ll, + atomic_sum_ll, rel_sum_ll); + + /*** Test the inc atomic ops *******************************/ + + long long real_inc_ll = array_size; + + /* Compute the answer via threads and atomic */ + long long atomic_inc_ll = 0LL; + threadpool_map(&tp, map_function_inc_ll, array_ll, array_size, + sizeof(long long), chunk_size, &atomic_inc_ll); + + const double diff_inc_ll = (double)real_inc_ll - (double)atomic_inc_ll; + const double sum_inc_ll = (double)real_inc_ll + (double)atomic_inc_ll; + const double rel_inc_ll = 0.5 * fabs(diff_inc_ll) / sum_inc_ll; + message("Real inc = %lld -- atomic inc = %lld rel=%e", real_inc_ll, + atomic_inc_ll, rel_inc_ll); + + /* Be clean */ + threadpool_clean(&tp); + free(array_f); + free(array_ll); + return 0; +} -- GitLab From fb75b0bd7c218769931a27d3d748c442421bce6b Mon Sep 17 00:00:00 2001 From: Matthieu Schaller Date: Thu, 2 Apr 2020 16:39:28 +0200 Subject: [PATCH 27/27] Use atomics also on the counters of interactions in the M2L function --- src/multipole.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/multipole.h b/src/multipole.h index 347dd59ba..55de9c0ba 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -1608,12 +1608,19 @@ INLINE static void gravity_M2L_apply( const struct potential_derivatives_M2L *pot) { #ifdef SWIFT_DEBUG_CHECKS - /* Count interactions */ - l_b->num_interacted += m_a->num_gpart; + /* Count all interactions + * Note that despite being in a section of the code protected by locks, + * we must use atomics here as the long-range task may update this + * counter in a lock-free section of code. */ + accumulate_add_ll(&l_b->num_interacted, m_a->num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS - l_b->num_interacted_tree += m_a->num_gpart; + /* Count tree interactions + * Note that despite being in a section of the code protected by locks, + * we must use atomics here as the long-range task may update this + * counter in a lock-free section of code. */ + accumulate_add_ll(&l_b->num_interacted_tree, m_a->num_gpart); #endif /* Record that this tensor has received contributions */ -- GitLab