diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py index e070bdb565d6d8305b7974137292910dd720afe2..f2d0aa95d1f35f30476e1989349a07be8d9e5b0a 100755 --- a/examples/plot_tasks.py +++ b/examples/plot_tasks.py @@ -55,8 +55,8 @@ PLOT_PARAMS = {"axes.labelsize": 10, pl.rcParams.update(PLOT_PARAMS) # Tasks and subtypes. Indexed as in tasks.h. -TASKTYPES = ["none", "sort", "self", "pair", "sub", "init", "ghost", "drift", - "kick", "kick_fixdt", "send", "recv", "grav_pp", "grav_mm", +TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init", "ghost", + "drift", "kick", "kick_fixdt", "send", "recv", "grav_pp", "grav_mm", "grav_up", "grav_down", "grav_external", "part_sort", "gpart_sort", "split_cell", "rewait", "count"] @@ -64,7 +64,8 @@ TASKCOLOURS = {"none": "black", "sort": "lightblue", "self": "greenyellow", "pair": "navy", - "sub": "hotpink", + "sub_self": "greenyellow", + "sub_pair": "navy", "init": "indigo", "ghost": "cyan", "drift": "maroon", diff --git a/examples/plot_tasks_MPI.py b/examples/plot_tasks_MPI.py index 44705a0164034f8a22f1f3aa860a64d3e7656d2e..9a92faf9417c9a302831eb8cb2f4471eb672d59c 100755 --- a/examples/plot_tasks_MPI.py +++ b/examples/plot_tasks_MPI.py @@ -61,8 +61,8 @@ PLOT_PARAMS = {"axes.labelsize": 10, pl.rcParams.update(PLOT_PARAMS) # Tasks and subtypes. Indexed as in tasks.h. -TASKTYPES = ["none", "sort", "self", "pair", "sub", "init", "ghost", "drift", - "kick", "kick_fixdt", "send", "recv", "grav_pp", "grav_mm", +TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", "init", "ghost", + "drift", "kick", "kick_fixdt", "send", "recv", "grav_pp", "grav_mm", "grav_up", "grav_down", "grav_external", "part_sort", "gpart_sort", "split_cell", "rewait", "count"] @@ -70,7 +70,8 @@ TASKCOLOURS = {"none": "black", "sort": "lightblue", "self": "greenyellow", "pair": "navy", - "sub": "hotpink", + "sub_self": "greenyellow", + "sub_pair": "navy", "init": "indigo", "ghost": "cyan", "drift": "maroon", diff --git a/src/Makefile.am b/src/Makefile.am index 26eb7f636912e69b369476ecb30eda565b1362b8..21b5ef25ca21202caaa9107bfbf09e62aa66011d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,3 @@ - # This file is part of SWIFT. # Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk), # Matthieu Schaller (matthieu.schaller@durham.ac.uk). @@ -17,10 +16,10 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # Add the debug flag to the whole thing -AM_CFLAGS = -DTIMER -DCOUNTER $(HDF5_CPPFLAGS) +AM_CFLAGS = -DTIMER $(HDF5_CPPFLAGS) # Assign a "safe" version number -AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) $(HDF5_LDFLAGS) -version-info 0:0:0 # -fsanitize=address +AM_LDFLAGS = $(HDF5_LDFLAGS) -version-info 0:0:0 # The git command, if available. GIT_CMD = @GIT_CMD@ @@ -70,6 +69,7 @@ libswiftsim_la_SOURCES = $(AM_SOURCES) # Sources and flags for MPI library libswiftsim_mpi_la_SOURCES = $(AM_SOURCES) libswiftsim_mpi_la_CFLAGS = $(AM_CFLAGS) -DWITH_MPI $(METIS_INCS) +libswiftsim_mpi_la_LDFLAGS = $(AM_LDFLAGS) -DWITH_MPI $(METIS_LIBS) libswiftsim_mpi_la_SHORTNAME = mpi diff --git a/src/engine.c b/src/engine.c index 3d33d7400e6f1529f43d9c654f8b947cafb9ee48..aa65f45678fae1e21276422b69f1c76cd7e9eaef 100644 --- a/src/engine.c +++ b/src/engine.c @@ -1200,30 +1200,22 @@ void engine_count_and_link_tasks(struct engine *e) { t->cj->density = engine_addlink(e, t->cj->density, t); atomic_inc(&t->cj->nr_density); } - } else if (t->type == task_type_sub) { + } else if (t->type == task_type_sub_self) { atomic_inc(&t->ci->nr_tasks); - if (t->cj != NULL) atomic_inc(&t->cj->nr_tasks); if (t->subtype == task_subtype_density) { t->ci->density = engine_addlink(e, t->ci->density, t); atomic_inc(&t->ci->nr_density); - if (t->cj != NULL) { - t->cj->density = engine_addlink(e, t->cj->density, t); - atomic_inc(&t->cj->nr_density); - } + } + } else if (t->type == task_type_sub_pair) { + atomic_inc(&t->ci->nr_tasks); + atomic_inc(&t->cj->nr_tasks); + if (t->subtype == task_subtype_density) { + t->ci->density = engine_addlink(e, t->ci->density, t); + atomic_inc(&t->ci->nr_density); + t->cj->density = engine_addlink(e, t->cj->density, t); + atomic_inc(&t->cj->nr_density); } } - - /* /\* Link gravity multipole tasks to the up/down tasks. *\/ */ - /* if (t->type == task_type_grav_mm || */ - /* (t->type == task_type_sub && t->subtype == task_subtype_grav)) { */ - /* atomic_inc(&t->ci->nr_tasks); */ - /* scheduler_addunlock(sched, t->ci->grav_up, t); */ - /* scheduler_addunlock(sched, t, t->ci->grav_down); */ - /* if (t->cj != NULL && t->ci->grav_up != t->cj->grav_up) { */ - /* scheduler_addunlock(sched, t->cj->grav_up, t); */ - /* scheduler_addunlock(sched, t, t->cj->grav_down); */ - /* } */ - /* } */ } } @@ -1311,37 +1303,51 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { } } - /* Otherwise, sub interaction? */ - else if (t->type == task_type_sub && t->subtype == task_subtype_density) { + /* Otherwise, sub-self interaction? */ + else if (t->type == task_type_sub_self && + t->subtype == task_subtype_density) { /* Start by constructing the task for the second hydro loop */ struct task *t2 = - scheduler_addtask(sched, task_type_sub, task_subtype_force, t->flags, - 0, t->ci, t->cj, 0); + scheduler_addtask(sched, task_type_sub_self, task_subtype_force, + t->flags, 0, t->ci, t->cj, 0); - /* Add the link between the new loop and both cells */ + /* Add the link between the new loop and the cell */ t->ci->force = engine_addlink(e, t->ci->force, t2); atomic_inc(&t->ci->nr_force); - if (t->cj != NULL) { - t->cj->force = engine_addlink(e, t->cj->force, t2); - atomic_inc(&t->cj->nr_force); + + /* Now, build all the dependencies for the hydro for the cells */ + /* that are local and are not descendant of the same super-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_hydro_loops_dependencies(sched, t, t2, t->ci); } + } + + /* Otherwise, sub-pair interaction? */ + else if (t->type == task_type_sub_pair && + t->subtype == task_subtype_density) { + + /* Start by constructing the task for the second hydro loop */ + struct task *t2 = + scheduler_addtask(sched, task_type_sub_pair, task_subtype_force, + t->flags, 0, t->ci, t->cj, 0); + + /* Add the link between the new loop and both cells */ + t->ci->force = engine_addlink(e, t->ci->force, t2); + atomic_inc(&t->ci->nr_force); + t->cj->force = engine_addlink(e, t->cj->force, t2); + atomic_inc(&t->cj->nr_force); /* Now, build all the dependencies for the hydro for the cells */ /* that are local and are not descendant of the same super-cells */ if (t->ci->nodeID == nodeID) { engine_make_hydro_loops_dependencies(sched, t, t2, t->ci); } - if (t->cj != NULL && t->cj->nodeID == nodeID && - t->ci->super != t->cj->super) { + if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { engine_make_hydro_loops_dependencies(sched, t, t2, t->cj); } } - /* /\* Kick tasks should rely on the grav_down tasks of their cell. *\/ */ - /* else if (t->type == task_type_kick && t->ci->grav_down != NULL) */ - /* scheduler_addunlock(sched, t->ci->grav_down, t); */ - /* External gravity tasks should depend on init and unlock the kick */ else if (t->type == task_type_grav_external) { scheduler_addunlock(sched, t->ci->init, t); @@ -1544,8 +1550,7 @@ int engine_marktasks(struct engine *e) { struct task *t = &tasks[ind[k]]; /* Pair? */ - if (t->type == task_type_pair || - (t->type == task_type_sub && t->cj != NULL)) { + if (t->type == task_type_pair || t->type == task_type_sub_pair) { /* Local pointers. */ const struct cell *ci = t->ci; @@ -1589,15 +1594,14 @@ int engine_marktasks(struct engine *e) { /* Single-cell task? */ else if (t->type == task_type_self || t->type == task_type_ghost || - (t->type == task_type_sub && t->cj == NULL)) { + t->type == task_type_sub_self) { /* Set this task's skip. */ t->skip = (t->ci->ti_end_min > ti_end); } /* Pair? */ - else if (t->type == task_type_pair || - (t->type == task_type_sub && t->cj != NULL)) { + else if (t->type == task_type_pair || t->type == task_type_sub_pair) { /* Local pointers. */ const struct cell *ci = t->ci; @@ -2109,7 +2113,8 @@ void engine_init_particles(struct engine *e) { mask |= 1 << task_type_self; mask |= 1 << task_type_pair; - mask |= 1 << task_type_sub; + mask |= 1 << task_type_sub_self; + mask |= 1 << task_type_sub_pair; mask |= 1 << task_type_ghost; submask |= 1 << task_subtype_density; @@ -2238,7 +2243,8 @@ void engine_step(struct engine *e) { mask |= 1 << task_type_self; mask |= 1 << task_type_pair; - mask |= 1 << task_type_sub; + mask |= 1 << task_type_sub_self; + mask |= 1 << task_type_sub_pair; mask |= 1 << task_type_ghost; submask |= 1 << task_subtype_density; diff --git a/src/partition.c b/src/partition.c index b79dfd6b81d727b6e53f9e0906c7c59eee6607f1..dbec9f7ebd976a9779d4a54ee536e0e4eef331b2 100644 --- a/src/partition.c +++ b/src/partition.c @@ -453,9 +453,9 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, /* Skip un-interesting tasks. */ if (t->type != task_type_self && t->type != task_type_pair && - t->type != task_type_sub && t->type != task_type_ghost && - t->type != task_type_drift && t->type != task_type_kick && - t->type != task_type_init) + t->type != task_type_sub_self && t->type != task_type_sub_self && + t->type != task_type_ghost && t->type != task_type_drift && + t->type != task_type_kick && t->type != task_type_init) continue; /* Get the task weight. */ @@ -496,7 +496,8 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, /* Self interaction? */ else if ((t->type == task_type_self && ci->nodeID == nodeID) || - (t->type == task_type_sub && cj == NULL && ci->nodeID == nodeID)) { + (t->type == task_type_sub_self && cj == NULL && + ci->nodeID == nodeID)) { /* Self interactions add only to vertex weight. */ if (taskvweights) weights_v[cid] += w; @@ -504,7 +505,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, /* Pair? */ else if (t->type == task_type_pair || - (t->type == task_type_sub && cj != NULL)) { + (t->type == task_type_sub_pair)) { /* In-cell pair? */ if (ci == cj) { /* Add weight to vertex for ci. */ diff --git a/src/queue.c b/src/queue.c index a62ded2561fc683bf2f44abcf1110b0fb7eebd0c..ab414bba72b71edf76cd2c0a316ad52d6fee9044 100644 --- a/src/queue.c +++ b/src/queue.c @@ -38,16 +38,6 @@ #include "const.h" #include "error.h" -/* Counter macros. */ -#ifdef COUNTER -#define COUNT(c) (__sync_add_and_fetch(&queue_counter[c], 1)) -#else -#define COUNT(c) -#endif - -/* The counters. */ -int queue_counter[queue_counter_count]; - /** * @brief Enqueue all tasks in the incoming DEQ. * diff --git a/src/runner.c b/src/runner.c index 75c3723473cd391c8631435e5a320c356f119340..addeebda93a4d3f4b031f6fdc1f9eb856adc0a32 100644 --- a/src/runner.c +++ b/src/runner.c @@ -86,9 +86,6 @@ const char runner_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, #define FUNCTION force #include "runner_doiact.h" -/* Import the gravity loop functions. */ -#include "runner_doiact_grav.h" - /** * @brief Calculate gravity acceleration from external potential * @@ -98,8 +95,8 @@ const char runner_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, */ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) { - struct gpart *g, *gparts = c->gparts; - int i, k, gcount = c->gcount; + struct gpart *restrict gparts = c->gparts; + const int gcount = c->gcount; const int ti_current = r->e->ti_current; const struct external_potential *potential = r->e->external_potential; const struct phys_const *constants = r->e->physical_constants; @@ -108,7 +105,7 @@ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) { /* Recurse? */ if (c->split) { - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) runner_do_grav_external(r, c->progeny[k], 0); return; } @@ -118,10 +115,10 @@ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) { #endif /* Loop over the parts in this cell. */ - for (i = 0; i < gcount; i++) { + for (int i = 0; i < gcount; i++) { /* Get a direct pointer on the part. */ - g = &gparts[i]; + struct gpart *const g = &gparts[i]; /* Is this part within the time step? */ if (g->ti_end <= ti_current) { @@ -553,8 +550,13 @@ void runner_do_ghost(struct runner *r, struct cell *c) { } - /* Otherwise, sub interaction? */ - else if (l->t->type == task_type_sub) { + /* Otherwise, sub-self interaction? */ + else if (l->t->type == task_type_sub_self) + runner_dosub_subset_density(r, finger, parts, pid, count, NULL, -1, + 1); + + /* Otherwise, sub-pair interaction? */ + else if (l->t->type == task_type_sub_pair) { /* Left or right? */ if (l->t->ci == finger) @@ -1066,13 +1068,19 @@ void *runner_main(void *data) { case task_type_sort: runner_do_sort(r, ci, t->flags, 1); break; - case task_type_sub: + case task_type_sub_self: + if (t->subtype == task_subtype_density) + runner_dosub_self1_density(r, ci, 1); + else if (t->subtype == task_subtype_force) + runner_dosub_self2_force(r, ci, 1); + else + error("Unknown task subtype."); + break; + case task_type_sub_pair: if (t->subtype == task_subtype_density) - runner_dosub1_density(r, ci, cj, t->flags, 1); + runner_dosub_pair1_density(r, ci, cj, t->flags, 1); else if (t->subtype == task_subtype_force) - runner_dosub2_force(r, ci, cj, t->flags, 1); - else if (t->subtype == task_subtype_grav) - runner_dosub_grav(r, ci, cj, 1); + runner_dosub_pair2_force(r, ci, cj, t->flags, 1); else error("Unknown task subtype."); break; @@ -1096,21 +1104,6 @@ void *runner_main(void *data) { case task_type_recv: runner_do_recv_cell(r, ci, 1); break; - case task_type_grav_pp: - if (t->cj == NULL) - runner_doself_grav(r, t->ci); - else - runner_dopair_grav(r, t->ci, t->cj); - break; - case task_type_grav_mm: - runner_dograv_mm(r, t->ci, t->cj); - break; - case task_type_grav_up: - runner_dograv_up(r, t->ci); - break; - case task_type_grav_down: - runner_dograv_down(r, t->ci); - break; case task_type_grav_external: runner_do_grav_external(r, t->ci, 1); break; diff --git a/src/runner_doiact.h b/src/runner_doiact.h index e0c13e4e0adb101d9a8975651f24225551277f43..7fe19af853601e49de553a55ee205c654167d67e 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -1,4 +1,3 @@ - /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) @@ -57,11 +56,17 @@ #define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f) #define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION) -#define _DOSUB1(f) PASTE(runner_dosub1, f) -#define DOSUB1 _DOSUB1(FUNCTION) +#define _DOSUB_SELF1(f) PASTE(runner_dosub_self1, f) +#define DOSUB_SELF1 _DOSUB_SELF1(FUNCTION) + +#define _DOSUB_PAIR1(f) PASTE(runner_dosub_pair1, f) +#define DOSUB_PAIR1 _DOSUB_PAIR1(FUNCTION) + +#define _DOSUB_SELF2(f) PASTE(runner_dosub_self2, f) +#define DOSUB_SELF2 _DOSUB_SELF2(FUNCTION) -#define _DOSUB2(f) PASTE(runner_dosub2, f) -#define DOSUB2 _DOSUB2(FUNCTION) +#define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f) +#define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION) #define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f) #define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION) @@ -78,8 +83,11 @@ #define _TIMER_DOPAIR(f) PASTE(timer_dopair, f) #define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION) -#define _TIMER_DOSUB(f) PASTE(timer_dosub, f) -#define TIMER_DOSUB _TIMER_DOSUB(FUNCTION) +#define _TIMER_DOSUB_SELF(f) PASTE(timer_dosub_self, f) +#define TIMER_DOSUB_SELF _TIMER_DOSUB_SELF(FUNCTION) + +#define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f) +#define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION) #define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f) #define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION) @@ -1711,7 +1719,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { } /** - * @brief Compute grouped sub-cell interactions + * @brief Compute grouped sub-cell interactions for pairs * * @param r The #runner. * @param ci The first #cell. @@ -1722,543 +1730,561 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { * @todo Hard-code the sid on the recursive calls to avoid the * redundant computations to find the sid on-the-fly. */ +void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, + int gettimer) { -void DOSUB1(struct runner *r, struct cell *ci, struct cell *cj, int sid, - int gettimer) { - - int j = 0, k; - double shift[3]; - float h; struct space *s = r->e->s; const int ti_current = r->e->ti_current; TIMER_TIC - /* Is this a single cell? */ - if (cj == NULL) { - - /* Should we even bother? */ - if (ci->ti_end_min > ti_current) return; + /* Should we even bother? */ + if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return; - /* Recurse? */ - if (ci->split) { + /* Get the cell dimensions. */ + const float h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2])); - /* Loop over all progeny. */ - for (k = 0; k < 8; k++) - if (ci->progeny[k] != NULL) { - DOSUB1(r, ci->progeny[k], NULL, -1, 0); - for (j = k + 1; j < 8; j++) - if (ci->progeny[j] != NULL) - DOSUB1(r, ci->progeny[k], ci->progeny[j], -1, 0); - } + /* Get the type of pair if not specified explicitly. */ + // if ( sid < 0 ) + double shift[3]; + sid = space_getsid(s, &ci, &cj, shift); - } + /* Recurse? */ + if (ci->split && cj->split && + fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max < + h / 2) { - /* Otherwise, compute self-interaction. */ - else - DOSELF1(r, ci); + /* Different types of flags. */ + switch (sid) { - } /* self-interaction. */ + /* Regular sub-cell interactions of a single cell. */ + case 0: /* ( 1 , 1 , 1 ) */ + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0); + break; - /* Otherwise, it's a pair interaction. */ - else { + case 1: /* ( 1 , 1 , 0 ) */ + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[1], -1, 0); + break; - /* Should we even bother? */ - if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return; + case 2: /* ( 1 , 1 , -1 ) */ + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0); + break; - /* Get the cell dimensions. */ - h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2])); + case 3: /* ( 1 , 0 , 1 ) */ + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[2], -1, 0); + break; - /* Get the type of pair if not specified explicitly. */ - // if ( sid < 0 ) - sid = space_getsid(s, &ci, &cj, shift); + case 4: /* ( 1 , 0 , 0 ) */ + if (ci->progeny[4] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[0], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[1], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[2], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[3], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[1], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[3], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[2], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[3], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[3], -1, 0); + break; - /* Recurse? */ - if (ci->split && cj->split && - fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max < - h / 2) { + case 5: /* ( 1 , 0 , -1 ) */ + if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[1], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[3], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[3], -1, 0); + break; - /* Different types of flags. */ - switch (sid) { + case 6: /* ( 1 , -1 , 1 ) */ + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[2], -1, 0); + break; - /* Regular sub-cell interactions of a single cell. */ - case 0: /* ( 1 , 1 , 1 ) */ - if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0); - break; + case 7: /* ( 1 , -1 , 0 ) */ + if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[2], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[3], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[3], -1, 0); + break; - case 1: /* ( 1 , 1 , 0 ) */ - if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[6], cj->progeny[0], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[1], -1, 0); - break; + case 8: /* ( 1 , -1 , -1 ) */ + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR1(r, ci->progeny[4], cj->progeny[3], -1, 0); + break; - case 2: /* ( 1 , 1 , -1 ) */ - if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0); - break; + case 9: /* ( 0 , 1 , 1 ) */ + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[4], -1, 0); + break; - case 3: /* ( 1 , 0 , 1 ) */ - if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[5], cj->progeny[0], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) - DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[2], -1, 0); - break; + case 10: /* ( 0 , 1 , 0 ) */ + if (ci->progeny[2] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[0], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[1], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[4], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[5], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[1], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[4], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[5], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[4], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[5], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[5], -1, 0); + break; - case 4: /* ( 1 , 0 , 0 ) */ - if (ci->progeny[4] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[4], cj->progeny[0], -1, 0); - if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) - DOSUB1(r, ci->progeny[4], cj->progeny[1], -1, 0); - if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) - DOSUB1(r, ci->progeny[4], cj->progeny[2], -1, 0); - if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) - DOSUB1(r, ci->progeny[4], cj->progeny[3], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[5], cj->progeny[0], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[1] != NULL) - DOSUB1(r, ci->progeny[5], cj->progeny[1], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) - DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) - DOSUB1(r, ci->progeny[5], cj->progeny[3], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[6], cj->progeny[0], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[2] != NULL) - DOSUB1(r, ci->progeny[6], cj->progeny[2], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) - DOSUB1(r, ci->progeny[6], cj->progeny[3], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[1], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[2], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[3] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[3], -1, 0); - break; + case 11: /* ( 0 , 1 , -1 ) */ + if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[1], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR1(r, ci->progeny[2], cj->progeny[5], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR1(r, ci->progeny[6], cj->progeny[5], -1, 0); + break; - case 5: /* ( 1 , 0 , -1 ) */ - if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) - DOSUB1(r, ci->progeny[4], cj->progeny[1], -1, 0); - if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) - DOSUB1(r, ci->progeny[4], cj->progeny[3], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) - DOSUB1(r, ci->progeny[6], cj->progeny[3], -1, 0); - break; + case 12: /* ( 0 , 0 , 1 ) */ + if (ci->progeny[1] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[1], cj->progeny[0], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1(r, ci->progeny[1], cj->progeny[2], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1(r, ci->progeny[1], cj->progeny[4], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[6] != NULL) + DOSUB_PAIR1(r, ci->progeny[1], cj->progeny[6], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[2], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[4], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[6] != NULL) + DOSUB_PAIR1(r, ci->progeny[3], cj->progeny[6], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[4], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[6] != NULL) + DOSUB_PAIR1(r, ci->progeny[5], cj->progeny[6], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[6] != NULL) + DOSUB_PAIR1(r, ci->progeny[7], cj->progeny[6], -1, 0); + break; + } - case 6: /* ( 1 , -1 , 1 ) */ - if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) - DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0); - break; + } - case 7: /* ( 1 , -1 , 0 ) */ - if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) - DOSUB1(r, ci->progeny[4], cj->progeny[2], -1, 0); - if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) - DOSUB1(r, ci->progeny[4], cj->progeny[3], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) - DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) - DOSUB1(r, ci->progeny[5], cj->progeny[3], -1, 0); - break; + /* Otherwise, compute the pair directly. */ + else if (ci->ti_end_min <= ti_current || cj->ti_end_min <= ti_current) { - case 8: /* ( 1 , -1 , -1 ) */ - if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) - DOSUB1(r, ci->progeny[4], cj->progeny[3], -1, 0); - break; + /* Do any of the cells need to be sorted first? */ + if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1); + if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1); - case 9: /* ( 0 , 1 , 1 ) */ - if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[3], cj->progeny[0], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) - DOSUB1(r, ci->progeny[3], cj->progeny[4], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[4], -1, 0); - break; + /* Compute the interactions. */ + DOPAIR1(r, ci, cj); + } - case 10: /* ( 0 , 1 , 0 ) */ - if (ci->progeny[2] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[2], cj->progeny[0], -1, 0); - if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) - DOSUB1(r, ci->progeny[2], cj->progeny[1], -1, 0); - if (ci->progeny[2] != NULL && cj->progeny[4] != NULL) - DOSUB1(r, ci->progeny[2], cj->progeny[4], -1, 0); - if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) - DOSUB1(r, ci->progeny[2], cj->progeny[5], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[3], cj->progeny[0], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[1] != NULL) - DOSUB1(r, ci->progeny[3], cj->progeny[1], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) - DOSUB1(r, ci->progeny[3], cj->progeny[4], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[5] != NULL) - DOSUB1(r, ci->progeny[3], cj->progeny[5], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[6], cj->progeny[0], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[4] != NULL) - DOSUB1(r, ci->progeny[6], cj->progeny[4], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) - DOSUB1(r, ci->progeny[6], cj->progeny[5], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[1], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[4], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[5] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[5], -1, 0); - break; + if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR); +} - case 11: /* ( 0 , 1 , -1 ) */ - if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) - DOSUB1(r, ci->progeny[2], cj->progeny[1], -1, 0); - if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) - DOSUB1(r, ci->progeny[2], cj->progeny[5], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - DOSUB1(r, ci->progeny[6], cj->progeny[1], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) - DOSUB1(r, ci->progeny[6], cj->progeny[5], -1, 0); - break; +/** + * @brief Compute grouped sub-cell interactions for self tasks + * + * @param r The #runner. + * @param ci The first #cell. + * @param gettimer Do we have a timer ? + */ +void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { - case 12: /* ( 0 , 0 , 1 ) */ - if (ci->progeny[1] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[1], cj->progeny[0], -1, 0); - if (ci->progeny[1] != NULL && cj->progeny[2] != NULL) - DOSUB1(r, ci->progeny[1], cj->progeny[2], -1, 0); - if (ci->progeny[1] != NULL && cj->progeny[4] != NULL) - DOSUB1(r, ci->progeny[1], cj->progeny[4], -1, 0); - if (ci->progeny[1] != NULL && cj->progeny[6] != NULL) - DOSUB1(r, ci->progeny[1], cj->progeny[6], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[3], cj->progeny[0], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[2] != NULL) - DOSUB1(r, ci->progeny[3], cj->progeny[2], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) - DOSUB1(r, ci->progeny[3], cj->progeny[4], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[6] != NULL) - DOSUB1(r, ci->progeny[3], cj->progeny[6], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[5], cj->progeny[0], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) - DOSUB1(r, ci->progeny[5], cj->progeny[2], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[4] != NULL) - DOSUB1(r, ci->progeny[5], cj->progeny[4], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[6] != NULL) - DOSUB1(r, ci->progeny[5], cj->progeny[6], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[0], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[2], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[4], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[6] != NULL) - DOSUB1(r, ci->progeny[7], cj->progeny[6], -1, 0); - break; - } + const int ti_current = r->e->ti_current; - } + TIMER_TIC - /* Otherwise, compute the pair directly. */ - else if (ci->ti_end_min <= ti_current || cj->ti_end_min <= ti_current) { + /* Should we even bother? */ + if (ci->ti_end_min > ti_current) return; - /* Do any of the cells need to be sorted first? */ - if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1); - if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1); + /* Recurse? */ + if (ci->split) { - /* Compute the interactions. */ - DOPAIR1(r, ci, cj); - } + /* Loop over all progeny. */ + for (int k = 0; k < 8; k++) + if (ci->progeny[k] != NULL) { + DOSUB_SELF1(r, ci->progeny[k], 0); + for (int j = k + 1; j < 8; j++) + if (ci->progeny[j] != NULL) + DOSUB_PAIR1(r, ci->progeny[k], ci->progeny[j], -1, 0); + } + } - } /* otherwise, pair interaction. */ + /* Otherwise, compute self-interaction. */ + else + DOSELF1(r, ci); - if (gettimer) TIMER_TOC(TIMER_DOSUB); + if (gettimer) TIMER_TOC(TIMER_DOSUB_SELF); } -void DOSUB2(struct runner *r, struct cell *ci, struct cell *cj, int sid, - int gettimer) { +/** + * @brief Compute grouped sub-cell interactions for pairs (symmetric case) + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + * @param sid The direction linking the cells + * @param gettimer Do we have a timer ? + * + * @todo Hard-code the sid on the recursive calls to avoid the + * redundant computations to find the sid on-the-fly. + */ +void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid, + int gettimer) { - int j, k; - double shift[3]; - float h; struct space *s = r->e->s; const int ti_current = r->e->ti_current; TIMER_TIC - /* Is this a single cell? */ - if (cj == NULL) { + /* Should we even bother? */ + if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return; - /* Should we even bother? */ - if (ci->ti_end_min > ti_current) return; + /* Get the cell dimensions. */ + const float h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2])); - /* Recurse? */ - if (ci->split) { + /* Get the type of pair if not specified explicitly. */ + // if ( sid < 0 ) + double shift[3]; + sid = space_getsid(s, &ci, &cj, shift); - /* Loop over all progeny. */ - for (k = 0; k < 8; k++) - if (ci->progeny[k] != NULL) { - DOSUB2(r, ci->progeny[k], NULL, -1, 0); - for (j = k + 1; j < 8; j++) - if (ci->progeny[j] != NULL) - DOSUB2(r, ci->progeny[k], ci->progeny[j], -1, 0); - } + /* Recurse? */ + if (ci->split && cj->split && + fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max < + h / 2) { - } + /* Different types of flags. */ + switch (sid) { - /* Otherwise, compute self-interaction. */ - else - DOSELF2(r, ci); + /* Regular sub-cell interactions of a single cell. */ + case 0: /* ( 1 , 1 , 1 ) */ + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0); + break; - } /* self-interaction. */ + case 1: /* ( 1 , 1 , 0 ) */ + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[1], -1, 0); + break; - /* Otherwise, it's a pair interaction. */ - else { + case 2: /* ( 1 , 1 , -1 ) */ + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0); + break; - /* Should we even bother? */ - if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return; + case 3: /* ( 1 , 0 , 1 ) */ + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[2], -1, 0); + break; - /* Get the cell dimensions. */ - h = fmin(ci->h[0], fmin(ci->h[1], ci->h[2])); + case 4: /* ( 1 , 0 , 0 ) */ + if (ci->progeny[4] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[0], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[1], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[2], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[3], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[1], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[3], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[2], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[3], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[3], -1, 0); + break; - /* Get the type of pair if not specified explicitly. */ - // if ( sid < 0 ) - sid = space_getsid(s, &ci, &cj, shift); + case 5: /* ( 1 , 0 , -1 ) */ + if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[1], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[3], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[3], -1, 0); + break; - /* Recurse? */ - if (ci->split && cj->split && - fmaxf(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max < - h / 2) { + case 6: /* ( 1 , -1 , 1 ) */ + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[2], -1, 0); + break; - /* Different types of flags. */ - switch (sid) { + case 7: /* ( 1 , -1 , 0 ) */ + if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[2], -1, 0); + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[3], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[3], -1, 0); + break; - /* Regular sub-cell interactions of a single cell. */ - case 0: /* ( 1 , 1 , 1 ) */ - if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0); - break; + case 8: /* ( 1 , -1 , -1 ) */ + if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) + DOSUB_PAIR2(r, ci->progeny[4], cj->progeny[3], -1, 0); + break; - case 1: /* ( 1 , 1 , 0 ) */ - if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[6], cj->progeny[0], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[1], -1, 0); - break; + case 9: /* ( 0 , 1 , 1 ) */ + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[4], -1, 0); + break; - case 2: /* ( 1 , 1 , -1 ) */ - if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0); - break; + case 10: /* ( 0 , 1 , 0 ) */ + if (ci->progeny[2] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[0], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[1], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[4], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[5], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[1], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[4], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[5], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[0], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[4], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[5], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[1], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[5], -1, 0); + break; - case 3: /* ( 1 , 0 , 1 ) */ - if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[5], cj->progeny[0], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) - DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[2], -1, 0); - break; + case 11: /* ( 0 , 1 , -1 ) */ + if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[1], -1, 0); + if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR2(r, ci->progeny[2], cj->progeny[5], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) + DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[1], -1, 0); + if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) + DOSUB_PAIR2(r, ci->progeny[6], cj->progeny[5], -1, 0); + break; - case 4: /* ( 1 , 0 , 0 ) */ - if (ci->progeny[4] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[4], cj->progeny[0], -1, 0); - if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) - DOSUB2(r, ci->progeny[4], cj->progeny[1], -1, 0); - if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) - DOSUB2(r, ci->progeny[4], cj->progeny[2], -1, 0); - if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) - DOSUB2(r, ci->progeny[4], cj->progeny[3], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[5], cj->progeny[0], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[1] != NULL) - DOSUB2(r, ci->progeny[5], cj->progeny[1], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) - DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) - DOSUB2(r, ci->progeny[5], cj->progeny[3], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[6], cj->progeny[0], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[2] != NULL) - DOSUB2(r, ci->progeny[6], cj->progeny[2], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) - DOSUB2(r, ci->progeny[6], cj->progeny[3], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[1], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[2], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[3] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[3], -1, 0); - break; + case 12: /* ( 0 , 0 , 1 ) */ + if (ci->progeny[1] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[1], cj->progeny[0], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR2(r, ci->progeny[1], cj->progeny[2], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR2(r, ci->progeny[1], cj->progeny[4], -1, 0); + if (ci->progeny[1] != NULL && cj->progeny[6] != NULL) + DOSUB_PAIR2(r, ci->progeny[1], cj->progeny[6], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[0], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[2], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[4], -1, 0); + if (ci->progeny[3] != NULL && cj->progeny[6] != NULL) + DOSUB_PAIR2(r, ci->progeny[3], cj->progeny[6], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[0], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[2], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[4], -1, 0); + if (ci->progeny[5] != NULL && cj->progeny[6] != NULL) + DOSUB_PAIR2(r, ci->progeny[5], cj->progeny[6], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[0], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[2], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[4], -1, 0); + if (ci->progeny[7] != NULL && cj->progeny[6] != NULL) + DOSUB_PAIR2(r, ci->progeny[7], cj->progeny[6], -1, 0); + break; + } - case 5: /* ( 1 , 0 , -1 ) */ - if (ci->progeny[4] != NULL && cj->progeny[1] != NULL) - DOSUB2(r, ci->progeny[4], cj->progeny[1], -1, 0); - if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) - DOSUB2(r, ci->progeny[4], cj->progeny[3], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[3] != NULL) - DOSUB2(r, ci->progeny[6], cj->progeny[3], -1, 0); - break; + } - case 6: /* ( 1 , -1 , 1 ) */ - if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) - DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0); - break; + /* Otherwise, compute the pair directly. */ + else if (ci->ti_end_min <= ti_current || cj->ti_end_min <= ti_current) { - case 7: /* ( 1 , -1 , 0 ) */ - if (ci->progeny[4] != NULL && cj->progeny[2] != NULL) - DOSUB2(r, ci->progeny[4], cj->progeny[2], -1, 0); - if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) - DOSUB2(r, ci->progeny[4], cj->progeny[3], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) - DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[3] != NULL) - DOSUB2(r, ci->progeny[5], cj->progeny[3], -1, 0); - break; + /* Do any of the cells need to be sorted first? */ + if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1); + if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1); - case 8: /* ( 1 , -1 , -1 ) */ - if (ci->progeny[4] != NULL && cj->progeny[3] != NULL) - DOSUB2(r, ci->progeny[4], cj->progeny[3], -1, 0); - break; + /* Compute the interactions. */ + DOPAIR2(r, ci, cj); + } - case 9: /* ( 0 , 1 , 1 ) */ - if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[3], cj->progeny[0], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) - DOSUB2(r, ci->progeny[3], cj->progeny[4], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[4], -1, 0); - break; + if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR); +} - case 10: /* ( 0 , 1 , 0 ) */ - if (ci->progeny[2] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[2], cj->progeny[0], -1, 0); - if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) - DOSUB2(r, ci->progeny[2], cj->progeny[1], -1, 0); - if (ci->progeny[2] != NULL && cj->progeny[4] != NULL) - DOSUB2(r, ci->progeny[2], cj->progeny[4], -1, 0); - if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) - DOSUB2(r, ci->progeny[2], cj->progeny[5], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[3], cj->progeny[0], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[1] != NULL) - DOSUB2(r, ci->progeny[3], cj->progeny[1], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) - DOSUB2(r, ci->progeny[3], cj->progeny[4], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[5] != NULL) - DOSUB2(r, ci->progeny[3], cj->progeny[5], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[6], cj->progeny[0], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[4] != NULL) - DOSUB2(r, ci->progeny[6], cj->progeny[4], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) - DOSUB2(r, ci->progeny[6], cj->progeny[5], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[1] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[1], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[4], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[5] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[5], -1, 0); - break; +/** + * @brief Compute grouped sub-cell interactions for self tasks (symmetric case) + * + * @param r The #runner. + * @param ci The first #cell. + * @param gettimer Do we have a timer ? + */ +void DOSUB_SELF2(struct runner *r, struct cell *ci, int gettimer) { - case 11: /* ( 0 , 1 , -1 ) */ - if (ci->progeny[2] != NULL && cj->progeny[1] != NULL) - DOSUB2(r, ci->progeny[2], cj->progeny[1], -1, 0); - if (ci->progeny[2] != NULL && cj->progeny[5] != NULL) - DOSUB2(r, ci->progeny[2], cj->progeny[5], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[1] != NULL) - DOSUB2(r, ci->progeny[6], cj->progeny[1], -1, 0); - if (ci->progeny[6] != NULL && cj->progeny[5] != NULL) - DOSUB2(r, ci->progeny[6], cj->progeny[5], -1, 0); - break; + const int ti_current = r->e->ti_current; - case 12: /* ( 0 , 0 , 1 ) */ - if (ci->progeny[1] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[1], cj->progeny[0], -1, 0); - if (ci->progeny[1] != NULL && cj->progeny[2] != NULL) - DOSUB2(r, ci->progeny[1], cj->progeny[2], -1, 0); - if (ci->progeny[1] != NULL && cj->progeny[4] != NULL) - DOSUB2(r, ci->progeny[1], cj->progeny[4], -1, 0); - if (ci->progeny[1] != NULL && cj->progeny[6] != NULL) - DOSUB2(r, ci->progeny[1], cj->progeny[6], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[3], cj->progeny[0], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[2] != NULL) - DOSUB2(r, ci->progeny[3], cj->progeny[2], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[4] != NULL) - DOSUB2(r, ci->progeny[3], cj->progeny[4], -1, 0); - if (ci->progeny[3] != NULL && cj->progeny[6] != NULL) - DOSUB2(r, ci->progeny[3], cj->progeny[6], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[5], cj->progeny[0], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[2] != NULL) - DOSUB2(r, ci->progeny[5], cj->progeny[2], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[4] != NULL) - DOSUB2(r, ci->progeny[5], cj->progeny[4], -1, 0); - if (ci->progeny[5] != NULL && cj->progeny[6] != NULL) - DOSUB2(r, ci->progeny[5], cj->progeny[6], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[0] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[0], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[2] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[2], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[4] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[4], -1, 0); - if (ci->progeny[7] != NULL && cj->progeny[6] != NULL) - DOSUB2(r, ci->progeny[7], cj->progeny[6], -1, 0); - break; - } + TIMER_TIC - } + /* Should we even bother? */ + if (ci->ti_end_min > ti_current) return; - /* Otherwise, compute the pair directly. */ - else if (ci->ti_end_min <= ti_current || cj->ti_end_min <= ti_current) { + /* Recurse? */ + if (ci->split) { - /* Do any of the cells need to be sorted first? */ - if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1); - if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1); + /* Loop over all progeny. */ + for (int k = 0; k < 8; k++) + if (ci->progeny[k] != NULL) { + DOSUB_SELF2(r, ci->progeny[k], 0); + for (int j = k + 1; j < 8; j++) + if (ci->progeny[j] != NULL) + DOSUB_PAIR2(r, ci->progeny[k], ci->progeny[j], -1, 0); + } - /* Compute the interactions. */ - DOPAIR2(r, ci, cj); - } + } - } /* otherwise, pair interaction. */ + /* Otherwise, compute self-interaction. */ + else + DOSELF2(r, ci); - if (gettimer) TIMER_TOC(TIMER_DOSUB); + if (gettimer) TIMER_TOC(TIMER_DOSUB_SELF); } void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, @@ -2856,5 +2882,5 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, } /* otherwise, pair interaction. */ - if (gettimer) TIMER_TOC(TIMER_DOSUB); + if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR); } diff --git a/src/scheduler.c b/src/scheduler.c index d58957f521f3b910e5360594aa1f0210c1dc4532..496df93adf5a656460f7b39904a3cc58ac3e5caa 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -173,7 +173,7 @@ void scheduler_splittasks(struct scheduler *s) { if (scheduler_dosub && ci->count < space_subsize / ci->count) { /* convert to a self-subtask. */ - t->type = task_type_sub; + t->type = task_type_sub_self; } @@ -235,7 +235,7 @@ void scheduler_splittasks(struct scheduler *s) { sid != 0 && sid != 2 && sid != 6 && sid != 8) { /* Make this task a sub task. */ - t->type = task_type_sub; + t->type = task_type_sub_pair; } @@ -515,132 +515,6 @@ void scheduler_splittasks(struct scheduler *s) { } /* pair interaction? */ - /* Gravity interaction? */ - else if (t->type == task_type_grav_mm) { - - /* Get a handle on the cells involved. */ - struct cell *ci = t->ci; - struct cell *cj = t->cj; - - /* Self-interaction? */ - if (cj == NULL) { - - /* Ignore this task if the cell has no gparts. */ - if (ci->gcount == 0) t->type = task_type_none; - - /* If the cell is split, recurse. */ - else if (ci->split) { - - /* Make a single sub-task? */ - if (scheduler_dosub && ci->gcount < space_subsize / ci->gcount) { - - t->type = task_type_sub; - t->subtype = task_subtype_grav; - - } - - /* Otherwise, just split the task. */ - else { - - /* Split this task into tasks on its progeny. */ - t->type = task_type_none; - for (int j = 0; j < 8; j++) - if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) { - if (t->type == task_type_none) { - t->type = task_type_grav_mm; - t->ci = ci->progeny[j]; - t->cj = NULL; - } else - t = scheduler_addtask(s, task_type_grav_mm, task_subtype_none, - 0, 0, ci->progeny[j], NULL, 0); - for (int k = j + 1; k < 8; k++) - if (ci->progeny[k] != NULL && ci->progeny[k]->gcount > 0) { - if (t->type == task_type_none) { - t->type = task_type_grav_mm; - t->ci = ci->progeny[j]; - t->cj = ci->progeny[k]; - } else - t = scheduler_addtask(s, task_type_grav_mm, - task_subtype_none, 0, 0, - ci->progeny[j], ci->progeny[k], 0); - } - } - redo = (t->type != task_type_none); - } - - } - - /* Otherwise, just make a pp task out of it. */ - else - t->type = task_type_grav_pp; - - } - - /* Nope, pair. */ - else { - - /* Make a sub-task? */ - if (scheduler_dosub && ci->gcount < space_subsize / cj->gcount) { - - t->type = task_type_sub; - t->subtype = task_subtype_grav; - - } - - /* Otherwise, split the task. */ - else { - - /* Get the opening angle theta. */ - float dx[3], theta; - for (int k = 0; k < 3; k++) { - dx[k] = fabs(ci->loc[k] - cj->loc[k]); - if (s->space->periodic && dx[k] > 0.5 * s->space->dim[k]) - dx[k] = -dx[k] + s->space->dim[k]; - if (dx[k] > 0.0f) dx[k] -= ci->h[k]; - } - theta = - (dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]) / - (ci->h[0] * ci->h[0] + ci->h[1] * ci->h[1] + ci->h[2] * ci->h[2]); - - /* Ignore this task if the cell has no gparts. */ - if (ci->gcount == 0 || cj->gcount == 0) t->type = task_type_none; - - /* Split the interaction? */ - else if (theta < const_theta_max * const_theta_max) { - - /* Are both ci and cj split? */ - if (ci->split && cj->split) { - - /* Split this task into tasks on its progeny. */ - t->type = task_type_none; - for (int j = 0; j < 8; j++) - if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) { - for (int k = 0; k < 8; k++) - if (cj->progeny[k] != NULL && cj->progeny[k]->gcount > 0) { - if (t->type == task_type_none) { - t->type = task_type_grav_mm; - t->ci = ci->progeny[j]; - t->cj = cj->progeny[k]; - } else - t = scheduler_addtask( - s, task_type_grav_mm, task_subtype_none, 0, 0, - ci->progeny[j], cj->progeny[k], 0); - } - } - redo = (t->type != task_type_none); - - } - - /* Otherwise, make a pp task out of it. */ - else - t->type = task_type_grav_pp; - } - } - - } /* gravity pair interaction? */ - - } /* gravity interaction? */ - } /* loop over all tasks. */ } @@ -899,23 +773,23 @@ void scheduler_reweight(struct scheduler *s) { t->weight += 2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags]; break; - case task_type_sub: - if (t->cj != NULL) { - if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) { - if (t->flags < 0) - t->weight += 3 * wscale * t->ci->count * t->cj->count; - else - t->weight += 3 * wscale * t->ci->count * t->cj->count * - sid_scale[t->flags]; - } else { - if (t->flags < 0) - t->weight += 2 * wscale * t->ci->count * t->cj->count; - else - t->weight += 2 * wscale * t->ci->count * t->cj->count * - sid_scale[t->flags]; - } - } else - t->weight += 1 * wscale * t->ci->count * t->ci->count; + case task_type_sub_pair: + if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) { + if (t->flags < 0) + t->weight += 3 * wscale * t->ci->count * t->cj->count; + else + t->weight += 3 * wscale * t->ci->count * t->cj->count * + sid_scale[t->flags]; + } else { + if (t->flags < 0) + t->weight += 2 * wscale * t->ci->count * t->cj->count; + else + t->weight += 2 * wscale * t->ci->count * t->cj->count * + sid_scale[t->flags]; + } + break; + case task_type_sub_self: + t->weight += 1 * wscale * t->ci->count * t->ci->count; break; case task_type_ghost: if (t->ci == t->ci->super) t->weight += wscale * t->ci->count; @@ -1082,6 +956,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { any pre-processing needed. */ switch (t->type) { case task_type_self: + case task_type_sub_self: case task_type_sort: case task_type_ghost: case task_type_kick: @@ -1090,11 +965,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { qid = t->ci->super->owner; break; case task_type_pair: - case task_type_sub: + case task_type_sub_pair: qid = t->ci->super->owner; - if (t->cj != NULL && - (qid < 0 || - s->queues[qid].count > s->queues[t->cj->super->owner].count)) + if (qid < 0 || + s->queues[qid].count > s->queues[t->cj->super->owner].count) qid = t->cj->super->owner; break; case task_type_recv: diff --git a/src/task.c b/src/task.c index 1aad53f103c0f1ca600c7b093575923284bce3b5..77c5a944c8ca23e870624770c1e0cde4bf6195be 100644 --- a/src/task.c +++ b/src/task.c @@ -47,11 +47,11 @@ /* Task type names. */ const char *taskID_names[task_type_count] = { - "none", "sort", "self", "pair", "sub", - "init", "ghost", "drift", "kick", "kick_fixdt", - "send", "recv", "grav_pp", "grav_mm", "grav_up", - "grav_down", "grav_external", "part_sort", "gpart_sort", "split_cell", - "rewait"}; + "none", "sort", "self", "pair", "sub_self", + "sub_pair", "init", "ghost", "drift", "kick", + "kick_fixdt", "send", "recv", "grav_pp", "grav_mm", + "grav_up", "grav_down", "grav_external", "part_sort", "gpart_sort", + "split_cell", "rewait"}; const char *subtaskID_names[task_type_count] = {"none", "density", "force", "grav"}; @@ -117,13 +117,14 @@ void task_unlock(struct task *t) { /* Act based on task type. */ switch (t->type) { case task_type_self: + case task_type_sub_self: case task_type_sort: cell_unlocktree(t->ci); break; case task_type_pair: - case task_type_sub: + case task_type_sub_pair: cell_unlocktree(t->ci); - if (t->cj != NULL) cell_unlocktree(t->cj); + cell_unlocktree(t->cj); break; case task_type_grav_pp: case task_type_grav_mm: @@ -144,55 +145,68 @@ void task_unlock(struct task *t) { int task_lock(struct task *t) { - int type = t->type; + const int type = t->type; + const int subtype = t->subtype; struct cell *ci = t->ci, *cj = t->cj; +#ifdef WITH_MPI + int res = 0, err = 0; + MPI_Status stat; +#endif - /* Communication task? */ - if (type == task_type_recv || type == task_type_send) { + switch (type) { + /* Communication task? */ + case task_type_recv: + case task_type_send: #ifdef WITH_MPI - /* Check the status of the MPI request. */ - int res = 0, err = 0; - MPI_Status stat; - if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) { - char buff[MPI_MAX_ERROR_STRING]; - int len; - MPI_Error_string(err, buff, &len); - error("Failed to test request on send/recv task (tag=%i, %s).", t->flags, - buff); - } - return res; + /* Check the status of the MPI request. */ + if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) { + char buff[MPI_MAX_ERROR_STRING]; + int len; + MPI_Error_string(err, buff, &len); + error("Failed to test request on send/recv task (tag=%i, %s).", + t->flags, buff); + } + return res; #else - error("SWIFT was not compiled with MPI support."); + error("SWIFT was not compiled with MPI support."); #endif + break; - } + case task_type_sort: + if (cell_locktree(ci) != 0) return 0; + break; - /* Unary lock? */ - else if (type == task_type_self || type == task_type_sort || - (type == task_type_sub && cj == NULL)) { - if (cell_locktree(ci) != 0) return 0; - } + case task_type_self: + case task_type_sub_self: + if (subtype == task_subtype_grav) { + if (cell_glocktree(ci) != 0) return 0; + } else { + if (cell_locktree(ci) != 0) return 0; + } + break; - /* Otherwise, binary lock. */ - else if (type == task_type_pair || (type == task_type_sub && cj != NULL)) { - if (ci->hold || cj->hold) return 0; - if (cell_locktree(ci) != 0) return 0; - if (cell_locktree(cj) != 0) { - cell_unlocktree(ci); - return 0; - } - } + case task_type_pair: + case task_type_sub_pair: + if (subtype == task_subtype_grav) { + if (ci->ghold || cj->ghold) return 0; + if (cell_glocktree(ci) != 0) return 0; + if (cell_glocktree(cj) != 0) { + cell_gunlocktree(ci); + return 0; + } + } else { + if (ci->hold || cj->hold) return 0; + if (cell_locktree(ci) != 0) return 0; + if (cell_locktree(cj) != 0) { + cell_unlocktree(ci); + return 0; + } + } + break; - /* Gravity tasks? */ - else if (type == task_type_grav_mm || type == task_type_grav_pp || - type == task_type_grav_down) { - if (ci->ghold || (cj != NULL && cj->ghold)) return 0; - if (cell_glocktree(ci) != 0) return 0; - if (cj != NULL && cell_glocktree(cj) != 0) { - cell_gunlocktree(ci); - return 0; - } + default: + break; } /* If we made it this far, we've got a lock. */ @@ -270,48 +284,6 @@ void task_rmunlock_blind(struct task *ta, struct task *tb) { lock_unlock_blind(&ta->lock); } -/** - * @brief Add an unlock_task to the given task. - * - * @param ta The unlocking #task. - * @param tb The #task that will be unlocked. - */ - -void task_addunlock(struct task *ta, struct task *tb) { - - error("Use sched_addunlock instead."); - - /* Add the lock atomically. */ - ta->unlock_tasks[atomic_inc(&ta->nr_unlock_tasks)] = tb; - - /* Check a posteriori if we did not overshoot. */ - if (ta->nr_unlock_tasks > task_maxunlock) - error("Too many unlock_tasks in task."); -} - -void task_addunlock_old(struct task *ta, struct task *tb) { - - int k; - - lock_lock(&ta->lock); - - /* Check if ta already unlocks tb. */ - for (k = 0; k < ta->nr_unlock_tasks; k++) - if (ta->unlock_tasks[k] == tb) { - error("Duplicate unlock."); - lock_unlock_blind(&ta->lock); - return; - } - - if (ta->nr_unlock_tasks == task_maxunlock) - error("Too many unlock_tasks in task."); - - ta->unlock_tasks[ta->nr_unlock_tasks] = tb; - ta->nr_unlock_tasks += 1; - - lock_unlock_blind(&ta->lock); -} - /** * @brief Prints the list of tasks contained in a given mask * diff --git a/src/task.h b/src/task.h index b110c6621741c22770f7e84b1f145432bdfa34d5..51a44b3127694a60da772ca4e728073b15ac1147 100644 --- a/src/task.h +++ b/src/task.h @@ -37,7 +37,8 @@ enum task_types { task_type_sort, task_type_self, task_type_pair, - task_type_sub, + task_type_sub_self, + task_type_sub_pair, task_type_init, task_type_ghost, task_type_drift, @@ -97,7 +98,6 @@ struct task { void task_rmunlock(struct task *ta, struct task *tb); void task_rmunlock_blind(struct task *ta, struct task *tb); void task_cleanunlock(struct task *t, int type); -void task_addunlock(struct task *ta, struct task *tb); void task_unlock(struct task *t); float task_overlap(const struct task *ta, const struct task *tb); int task_lock(struct task *t); diff --git a/src/timers.h b/src/timers.h index 342f46a9703501bfd4a05d25342b4dcff4bef052..c48961b39737f23eb936d7283f76651d33892991 100644 --- a/src/timers.h +++ b/src/timers.h @@ -42,9 +42,12 @@ enum { timer_dopair_force, timer_dopair_grav, timer_dograv_external, - timer_dosub_density, - timer_dosub_force, - timer_dosub_grav, + timer_dosub_self_density, + timer_dosub_self_force, + timer_dosub_self_grav, + timer_dosub_pair_density, + timer_dosub_pair_force, + timer_dosub_pair_grav, timer_dopair_subset, timer_do_ghost, timer_dorecv_cell,