diff --git a/src/engine.c b/src/engine.c index a435178acb833a06db7f8432dc38c990f247833b..bfee0f2e5a6e475a4726d65c38c0e1821210ba1a 100644 --- a/src/engine.c +++ b/src/engine.c @@ -1165,48 +1165,54 @@ void engine_make_hydroloop_tasks(struct engine *e) { * * @param e The #engine. */ -void engine_count_and_link_tasks_mapper(void *map_data, void *extra_data) { +void engine_count_and_link_tasks_mapper(void *map_data, int num_elements, + void *extra_data) { struct engine *e = (struct engine *)extra_data; - struct task *t = (struct task *)map_data; + struct task *tasks = (struct task *)map_data; struct scheduler *sched = &e->sched; - if (t->skip) return; + for (int ind = 0; ind < num_elements; ind++) { - /* Link sort tasks together. */ - if (t->type == task_type_sort && t->ci->split) - for (int j = 0; j < 8; j++) - if (t->ci->progeny[j] != NULL && t->ci->progeny[j]->sorts != NULL) { - t->ci->progeny[j]->sorts->skip = 0; - scheduler_addunlock(sched, t->ci->progeny[j]->sorts, t); - } + struct task *t = &tasks[ind]; - /* Link density tasks to cells. */ - if (t->type == task_type_self) { - atomic_inc(&t->ci->nr_tasks); - if (t->subtype == task_subtype_density) { - engine_addlink(e, &t->ci->density, t); - atomic_inc(&t->ci->nr_density); - } - } else if (t->type == task_type_pair) { - atomic_inc(&t->ci->nr_tasks); - atomic_inc(&t->cj->nr_tasks); - if (t->subtype == task_subtype_density) { - engine_addlink(e, &t->ci->density, t); - atomic_inc(&t->ci->nr_density); - engine_addlink(e, &t->cj->density, t); - atomic_inc(&t->cj->nr_density); - } - } else if (t->type == task_type_sub) { - atomic_inc(&t->ci->nr_tasks); - if (t->cj != NULL) atomic_inc(&t->cj->nr_tasks); - if (t->subtype == task_subtype_density) { - engine_addlink(e, &t->ci->density, t); - atomic_inc(&t->ci->nr_density); - if (t->cj != NULL) { + if (t->skip) continue; + + /* Link sort tasks together. */ + if (t->type == task_type_sort && t->ci->split) + for (int j = 0; j < 8; j++) + if (t->ci->progeny[j] != NULL && t->ci->progeny[j]->sorts != NULL) { + t->ci->progeny[j]->sorts->skip = 0; + scheduler_addunlock(sched, t->ci->progeny[j]->sorts, t); + } + + /* Link density tasks to cells. */ + if (t->type == task_type_self) { + atomic_inc(&t->ci->nr_tasks); + if (t->subtype == task_subtype_density) { + engine_addlink(e, &t->ci->density, t); + atomic_inc(&t->ci->nr_density); + } + } else if (t->type == task_type_pair) { + atomic_inc(&t->ci->nr_tasks); + atomic_inc(&t->cj->nr_tasks); + if (t->subtype == task_subtype_density) { + engine_addlink(e, &t->ci->density, t); + atomic_inc(&t->ci->nr_density); engine_addlink(e, &t->cj->density, t); atomic_inc(&t->cj->nr_density); } + } else if (t->type == task_type_sub) { + atomic_inc(&t->ci->nr_tasks); + if (t->cj != NULL) atomic_inc(&t->cj->nr_tasks); + if (t->subtype == task_subtype_density) { + engine_addlink(e, &t->ci->density, t); + atomic_inc(&t->ci->nr_density); + if (t->cj != NULL) { + engine_addlink(e, &t->cj->density, t); + atomic_inc(&t->cj->nr_density); + } + } } } } @@ -1246,89 +1252,94 @@ static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched, * * @param e The #engine. */ -void engine_make_extra_hydroloop_tasks_mapper(void *map_data, +void engine_make_extra_hydroloop_tasks_mapper(void *map_data, int num_elements, void *extra_data) { struct engine *e = (struct engine *)extra_data; struct scheduler *sched = &e->sched; const int nodeID = e->nodeID; - struct task *t = (struct task *)map_data; + struct task *tasks = (struct task *)map_data; - /* Skip? */ - if (t->skip) return; + for (int ind = 0; ind < num_elements; ind++) { + struct task *t = &tasks[ind]; - /* Self-interaction? */ - if (t->type == task_type_self && t->subtype == task_subtype_density) { + /* Skip? */ + if (t->skip) continue; - /* Start by constructing the task for the second hydro loop */ - struct task *t2 = scheduler_addtask( - sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL, 0); + /* Self-interaction? */ + if (t->type == task_type_self && t->subtype == task_subtype_density) { - /* Add the link between the new loop and the cell */ - engine_addlink(e, &t->ci->force, t2); - atomic_inc(&t->ci->nr_force); + /* Start by constructing the task for the second hydro loop */ + struct task *t2 = scheduler_addtask( + sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL, 0); - /* Now, build all the dependencies for the hydro */ - engine_make_hydro_loops_dependencies(sched, t, t2, t->ci); - } + /* Add the link between the new loop and the cell */ + engine_addlink(e, &t->ci->force, t2); + atomic_inc(&t->ci->nr_force); - /* Otherwise, pair interaction? */ - else if (t->type == task_type_pair && t->subtype == task_subtype_density) { - - /* Start by constructing the task for the second hydro loop */ - struct task *t2 = scheduler_addtask( - sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj, 0); - - /* Add the link between the new loop and both cells */ - engine_addlink(e, &t->ci->force, t2); - atomic_inc(&t->ci->nr_force); - engine_addlink(e, &t->cj->force, t2); - atomic_inc(&t->cj->nr_force); - - /* Now, build all the dependencies for the hydro for the cells */ - /* that are local and are not descendant of the same super-cells */ - if (t->ci->nodeID == nodeID) { + /* Now, build all the dependencies for the hydro */ engine_make_hydro_loops_dependencies(sched, t, t2, t->ci); } - if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { - engine_make_hydro_loops_dependencies(sched, t, t2, t->cj); - } - } - /* Otherwise, sub interaction? */ - else if (t->type == task_type_sub && t->subtype == task_subtype_density) { + /* Otherwise, pair interaction? */ + else if (t->type == task_type_pair && t->subtype == task_subtype_density) { - /* Start by constructing the task for the second hydro loop */ - struct task *t2 = scheduler_addtask( - sched, task_type_sub, task_subtype_force, t->flags, 0, t->ci, t->cj, 0); + /* Start by constructing the task for the second hydro loop */ + struct task *t2 = scheduler_addtask( + sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj, 0); - /* Add the link between the new loop and both cells */ - engine_addlink(e, &t->ci->force, t2); - atomic_inc(&t->ci->nr_force); - if (t->cj != NULL) { + /* Add the link between the new loop and both cells */ + engine_addlink(e, &t->ci->force, t2); + atomic_inc(&t->ci->nr_force); engine_addlink(e, &t->cj->force, t2); atomic_inc(&t->cj->nr_force); - } - /* Now, build all the dependencies for the hydro for the cells */ - /* that are local and are not descendant of the same super-cells */ - if (t->ci->nodeID == nodeID) { - engine_make_hydro_loops_dependencies(sched, t, t2, t->ci); + /* Now, build all the dependencies for the hydro for the cells */ + /* that are local and are not descendant of the same super-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_hydro_loops_dependencies(sched, t, t2, t->ci); + } + if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { + engine_make_hydro_loops_dependencies(sched, t, t2, t->cj); + } } - if (t->cj != NULL && t->cj->nodeID == nodeID && - t->ci->super != t->cj->super) { - engine_make_hydro_loops_dependencies(sched, t, t2, t->cj); + + /* Otherwise, sub interaction? */ + else if (t->type == task_type_sub && t->subtype == task_subtype_density) { + + /* Start by constructing the task for the second hydro loop */ + struct task *t2 = + scheduler_addtask(sched, task_type_sub, task_subtype_force, t->flags, + 0, t->ci, t->cj, 0); + + /* Add the link between the new loop and both cells */ + engine_addlink(e, &t->ci->force, t2); + atomic_inc(&t->ci->nr_force); + if (t->cj != NULL) { + engine_addlink(e, &t->cj->force, t2); + atomic_inc(&t->cj->nr_force); + } + + /* Now, build all the dependencies for the hydro for the cells */ + /* that are local and are not descendant of the same super-cells */ + if (t->ci->nodeID == nodeID) { + engine_make_hydro_loops_dependencies(sched, t, t2, t->ci); + } + if (t->cj != NULL && t->cj->nodeID == nodeID && + t->ci->super != t->cj->super) { + engine_make_hydro_loops_dependencies(sched, t, t2, t->cj); + } } - } - /* /\* Kick tasks should rely on the grav_down tasks of their cell. *\/ */ - /* else if (t->type == task_type_kick && t->ci->grav_down != NULL) */ - /* scheduler_addunlock(sched, t->ci->grav_down, t); */ + /* /\* Kick tasks should rely on the grav_down tasks of their cell. *\/ */ + /* else if (t->type == task_type_kick && t->ci->grav_down != NULL) */ + /* scheduler_addunlock(sched, t->ci->grav_down, t); */ - /* External gravity tasks should depend on init and unlock the kick */ - else if (t->type == task_type_grav_external) { - scheduler_addunlock(sched, t->ci->init, t); - scheduler_addunlock(sched, t, t->ci->kick); + /* External gravity tasks should depend on init and unlock the kick */ + else if (t->type == task_type_grav_external) { + scheduler_addunlock(sched, t->ci->init, t); + scheduler_addunlock(sched, t, t->ci->kick); + } } } @@ -1445,7 +1456,7 @@ void engine_maketasks(struct engine *e) { store the density tasks in each cell, and make each sort depend on the sorts of its progeny. */ threadpool_map(&e->threadpool, engine_count_and_link_tasks_mapper, - sched->tasks, sched->nr_tasks, sizeof(struct task), e); + sched->tasks, sched->nr_tasks, sizeof(struct task), 1000, e); /* Append hierarchical tasks to each cells */ for (int k = 0; k < nr_cells; k++) @@ -1455,7 +1466,7 @@ void engine_maketasks(struct engine *e) { Each force task depends on the cell ghosts and unlocks the kick task of its super-cell. */ threadpool_map(&e->threadpool, engine_make_extra_hydroloop_tasks_mapper, - sched->tasks, sched->nr_tasks, sizeof(struct task), e); + sched->tasks, sched->nr_tasks, sizeof(struct task), 1000, e); /* Add the communication tasks if MPI is being used. */ if (e->policy & engine_policy_mpi) { @@ -1507,140 +1518,67 @@ void engine_maketasks(struct engine *e) { * @return 1 if the space has to be rebuilt, 0 otherwise. */ -void engine_marktasks_fixdt_mapper(void *map_data, void *extra_data) { +void engine_marktasks_fixdt_mapper(void *map_data, int num_elements, + void *extra_data) { /* Unpack the arguments. */ - struct task *t = (struct task *)map_data; + struct task *tasks = (struct task *)map_data; int *rebuild_space = (int *)extra_data; - /* Pair? */ - if (t->type == task_type_pair || - (t->type == task_type_sub && t->cj != NULL)) { + for (int ind = 0; ind < num_elements; ind++) { + struct task *t = &tasks[ind]; - /* Local pointers. */ - const struct cell *ci = t->ci; - const struct cell *cj = t->cj; + /* Pair? */ + if (t->type == task_type_pair || + (t->type == task_type_sub && t->cj != NULL)) { - /* Too much particle movement? */ - if (t->tight && - (fmaxf(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin || - ci->dx_max > space_maxreldx * ci->h_max || - cj->dx_max > space_maxreldx * cj->h_max)) - *rebuild_space = 1; + /* Local pointers. */ + const struct cell *ci = t->ci; + const struct cell *cj = t->cj; - } + /* Too much particle movement? */ + if (t->tight && + (fmaxf(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin || + ci->dx_max > space_maxreldx * ci->h_max || + cj->dx_max > space_maxreldx * cj->h_max)) + *rebuild_space = 1; - /* Sort? */ - else if (t->type == task_type_sort) { + } - /* If all the sorts have been done, make this task implicit. */ - if (!(t->flags & (t->flags ^ t->ci->sorted))) t->implicit = 1; + /* Sort? */ + else if (t->type == task_type_sort) { + + /* If all the sorts have been done, make this task implicit. */ + if (!(t->flags & (t->flags ^ t->ci->sorted))) t->implicit = 1; + } } } -void engine_marktasks_sorts_mapper(void *map_data, void *extra_data) { +void engine_marktasks_sorts_mapper(void *map_data, int num_elements, + void *extra_data) { /* Unpack the arguments. */ - struct task *t = (struct task *)map_data; - if (t->type == task_type_sort) { - t->flags = 0; - t->skip = 1; + struct task *tasks = (struct task *)map_data; + for (int ind = 0; ind < num_elements; ind++) { + struct task *t = &tasks[ind]; + if (t->type == task_type_sort) { + t->flags = 0; + t->skip = 1; + } } } -void engine_marktasks_mapper(void *map_data, void *extra_data) { +void engine_marktasks_mapper(void *map_data, int num_elements, + void *extra_data) { /* Unpack the arguments. */ - struct task *t = (struct task *)map_data; + struct task *tasks = (struct task *)map_data; const int ti_end = ((int *)extra_data)[0]; int *rebuild_space = &((int *)extra_data)[1]; - /* Single-cell task? */ - if (t->type == task_type_self || t->type == task_type_ghost || - (t->type == task_type_sub && t->cj == NULL)) { - - /* Set this task's skip. */ - t->skip = (t->ci->ti_end_min > ti_end); - } - - /* Pair? */ - else if (t->type == task_type_pair || - (t->type == task_type_sub && t->cj != NULL)) { - - /* Local pointers. */ - const struct cell *ci = t->ci; - const struct cell *cj = t->cj; - - /* Set this task's skip. */ - t->skip = (ci->ti_end_min > ti_end && cj->ti_end_min > ti_end); - - /* Too much particle movement? */ - if (t->tight && - (fmaxf(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin || - ci->dx_max > space_maxreldx * ci->h_max || - cj->dx_max > space_maxreldx * cj->h_max)) - *rebuild_space = 1; - - /* Set the sort flags. */ - if (!t->skip && t->type == task_type_pair) { - if (!(ci->sorted & (1 << t->flags))) { - atomic_or(&ci->sorts->flags, (1 << t->flags)); - ci->sorts->skip = 0; - } - if (!(cj->sorted & (1 << t->flags))) { - atomic_or(&cj->sorts->flags, (1 << t->flags)); - cj->sorts->skip = 0; - } - } - - } - - /* Kick? */ - else if (t->type == task_type_kick) { - t->skip = (t->ci->ti_end_min > ti_end); - t->ci->updated = 0; - t->ci->g_updated = 0; - } - - /* Drift? */ - else if (t->type == task_type_drift) - t->skip = 0; - - /* Init? */ - else if (t->type == task_type_init) { - /* Set this task's skip. */ - t->skip = (t->ci->ti_end_min > ti_end); - } - - /* None? */ - else if (t->type == task_type_none) - t->skip = 1; -} - -int engine_marktasks_serial(struct engine *e) { - - struct scheduler *s = &e->sched; - const int ti_end = e->ti_current; - const int nr_tasks = s->nr_tasks; - const int *const ind = s->tasks_ind; - struct task *tasks = s->tasks; - - /* Run through the tasks and mark as skip or not. */ - for (int k = 0; k < nr_tasks; k++) { - - /* Get a handle on the kth task. */ - struct task *t = &tasks[ind[k]]; - - /* Sort-task? Note that due to the task ranking, the sorts - will all come before the pairs. */ - if (t->type == task_type_sort) { - - /* Re-set the flags. */ - t->flags = 0; - t->skip = 1; - - } + for (int ind = 0; ind < num_elements; ind++) { + struct task *t = &tasks[ind]; /* Single-cell task? */ - else if (t->type == task_type_self || t->type == task_type_ghost || - (t->type == task_type_sub && t->cj == NULL)) { + if (t->type == task_type_self || t->type == task_type_ghost || + (t->type == task_type_sub && t->cj == NULL)) { /* Set this task's skip. */ t->skip = (t->ci->ti_end_min > ti_end); @@ -1662,16 +1600,16 @@ int engine_marktasks_serial(struct engine *e) { (fmaxf(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin || ci->dx_max > space_maxreldx * ci->h_max || cj->dx_max > space_maxreldx * cj->h_max)) - return 1; + *rebuild_space = 1; /* Set the sort flags. */ if (!t->skip && t->type == task_type_pair) { if (!(ci->sorted & (1 << t->flags))) { - ci->sorts->flags |= (1 << t->flags); + atomic_or(&ci->sorts->flags, (1 << t->flags)); ci->sorts->skip = 0; } if (!(cj->sorted & (1 << t->flags))) { - cj->sorts->flags |= (1 << t->flags); + atomic_or(&cj->sorts->flags, (1 << t->flags)); cj->sorts->skip = 0; } } @@ -1699,9 +1637,6 @@ int engine_marktasks_serial(struct engine *e) { else if (t->type == task_type_none) t->skip = 1; } - - /* All is well... */ - return 0; } int engine_marktasks(struct engine *e) { @@ -1715,20 +1650,19 @@ int engine_marktasks(struct engine *e) { /* Run through the tasks and mark as skip or not. */ threadpool_map(&e->threadpool, engine_marktasks_fixdt_mapper, s->tasks, - s->nr_tasks, sizeof(struct task), &rebuild_space); + s->nr_tasks, sizeof(struct task), 1000, &rebuild_space); return rebuild_space; /* Multiple-timestep case */ } else { /* Run through the tasks and mark as skip or not. */ - /* int extra_data[2] = {e->ti_current, rebuild_space}; + int extra_data[2] = {e->ti_current, rebuild_space}; threadpool_map(&e->threadpool, engine_marktasks_sorts_mapper, s->tasks, - s->nr_tasks, sizeof(struct task), NULL); + s->nr_tasks, sizeof(struct task), 1000, NULL); threadpool_map(&e->threadpool, engine_marktasks_mapper, s->tasks, - s->nr_tasks, sizeof(struct task), extra_data); - rebuild_space = extra_data[1]; */ - rebuild_space = engine_marktasks_serial(e); + s->nr_tasks, sizeof(struct task), 1000, extra_data); + rebuild_space = extra_data[1]; } if (e->verbose) diff --git a/src/scheduler.c b/src/scheduler.c index fde7cd0e8e0816b7be899205ec5f160c3e8fb295..09e9ad6a6d559f09325c56fadda15955c10d87c3 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -103,7 +103,8 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta, * @param s The #scheduler we are working in. */ -void scheduler_splittasks_mapper(void *map_data, void *extra_data) { +void scheduler_splittasks_mapper(void *map_data, int num_elements, + void *extra_data) { /* Static constants. */ const static int pts[7][8] = {{-1, 12, 10, 9, 4, 3, 1, 0}, @@ -119,517 +120,521 @@ void scheduler_splittasks_mapper(void *map_data, void *extra_data) { /* Extract the parameters. */ struct scheduler *s = (struct scheduler *)extra_data; - struct task *t = (struct task *)map_data; - - /* Iterate on this task until we're done with it. */ - int redo = 1; - while (redo) { - - /* Reset the redo flag. */ - redo = 0; - - /* Non-splittable task? */ - if ((t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) || - ((t->type == task_type_kick) && t->ci->nodeID != s->nodeID) || - ((t->type == task_type_drift) && t->ci->nodeID != s->nodeID) || - ((t->type == task_type_init) && t->ci->nodeID != s->nodeID)) { - t->type = task_type_none; - t->skip = 1; - return; - } + struct task *tasks = (struct task *)map_data; + + for (int ind = 0; ind < num_elements; ind++) { + struct task *t = &tasks[ind]; - /* Self-interaction? */ - if (t->type == task_type_self) { + /* Iterate on this task until we're done with it. */ + int redo = 1; + while (redo) { - /* Get a handle on the cell involved. */ - struct cell *ci = t->ci; + /* Reset the redo flag. */ + redo = 0; - /* Foreign task? */ - if (ci->nodeID != s->nodeID) { + /* Non-splittable task? */ + if ((t->ci == NULL || (t->type == task_type_pair && t->cj == NULL)) || + ((t->type == task_type_kick) && t->ci->nodeID != s->nodeID) || + ((t->type == task_type_drift) && t->ci->nodeID != s->nodeID) || + ((t->type == task_type_init) && t->ci->nodeID != s->nodeID)) { + t->type = task_type_none; t->skip = 1; - return; + break; } - /* Is this cell even split? */ - if (ci->split) { + /* Self-interaction? */ + if (t->type == task_type_self) { - /* Make a sub? */ - if (scheduler_dosub && ci->count < space_subsize / ci->count) { + /* Get a handle on the cell involved. */ + struct cell *ci = t->ci; - /* convert to a self-subtask. */ - t->type = task_type_sub; - - /* Otherwise, make tasks explicitly. */ - } else { - - /* Take a step back (we're going to recycle the current task)... */ - redo = 1; - - /* Add the self tasks. */ - int first_child = 0; - while (ci->progeny[first_child] == NULL) first_child++; - t->ci = ci->progeny[first_child]; - for (int k = first_child + 1; k < 8; k++) - if (ci->progeny[k] != NULL) - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_self, t->subtype, 0, 0, - ci->progeny[k], NULL, 0), - s); - - /* Make a task for each pair of progeny. */ - for (int j = 0; j < 8; j++) - if (ci->progeny[j] != NULL) - for (int k = j + 1; k < 8; k++) - if (ci->progeny[k] != NULL) - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, - pts[j][k], 0, ci->progeny[j], - ci->progeny[k], 0), - s); + /* Foreign task? */ + if (ci->nodeID != s->nodeID) { + t->skip = 1; + break; } - } - /* Pair interaction? */ - } else if (t->type == task_type_pair) { + /* Is this cell even split? */ + if (ci->split) { - /* Get a handle on the cells involved. */ - struct cell *ci = t->ci; - struct cell *cj = t->cj; - const double hi = ci->dmin; - const double hj = cj->dmin; + /* Make a sub? */ + if (scheduler_dosub && ci->count < space_subsize / ci->count) { - /* Foreign task? */ - if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) { - t->skip = 1; - return; - } + /* convert to a self-subtask. */ + t->type = task_type_sub; - /* Get the sort ID, use space_getsid and not t->flags - to make sure we get ci and cj swapped if needed. */ - double shift[3]; - int sid = space_getsid(s->space, &ci, &cj, shift); + /* Otherwise, make tasks explicitly. */ + } else { - /* Should this task be split-up? */ - if (ci->split && cj->split && - ci->h_max * kernel_gamma * space_stretch < hi / 2 && - cj->h_max * kernel_gamma * space_stretch < hj / 2) { + /* Take a step back (we're going to recycle the current task)... */ + redo = 1; - /* Replace by a single sub-task? */ - if (scheduler_dosub && - ci->count * sid_scale[sid] < space_subsize / cj->count && - sid != 0 && sid != 2 && sid != 6 && sid != 8) { + /* Add the self tasks. */ + int first_child = 0; + while (ci->progeny[first_child] == NULL) first_child++; + t->ci = ci->progeny[first_child]; + for (int k = first_child + 1; k < 8; k++) + if (ci->progeny[k] != NULL) + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_self, t->subtype, 0, 0, + ci->progeny[k], NULL, 0), + 1, s); + + /* Make a task for each pair of progeny. */ + for (int j = 0; j < 8; j++) + if (ci->progeny[j] != NULL) + for (int k = j + 1; k < 8; k++) + if (ci->progeny[k] != NULL) + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, + pts[j][k], 0, ci->progeny[j], + ci->progeny[k], 0), + 1, s); + } + } - /* Make this task a sub task. */ - t->type = task_type_sub; + /* Pair interaction? */ + } else if (t->type == task_type_pair) { - /* Otherwise, split it. */ - } else { + /* Get a handle on the cells involved. */ + struct cell *ci = t->ci; + struct cell *cj = t->cj; + const double hi = ci->dmin; + const double hj = cj->dmin; - /* Take a step back (we're going to recycle the current task)... */ - redo = 1; - - /* For each different sorting type... */ - switch (sid) { - - case 0: /* ( 1 , 1 , 1 ) */ - t->ci = ci->progeny[7]; - t->cj = cj->progeny[0]; - t->flags = 0; - break; - - case 1: /* ( 1 , 1 , 0 ) */ - t->ci = ci->progeny[6]; - t->cj = cj->progeny[0]; - t->flags = 1; - t->tight = 1; - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, - ci->progeny[7], cj->progeny[1], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, - ci->progeny[6], cj->progeny[1], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, - ci->progeny[7], cj->progeny[0], 1), - s); - break; - - case 2: /* ( 1 , 1 , -1 ) */ - t->ci = ci->progeny[6]; - t->cj = cj->progeny[1]; - t->flags = 2; - t->tight = 1; - break; - - case 3: /* ( 1 , 0 , 1 ) */ - t->ci = ci->progeny[5]; - t->cj = cj->progeny[0]; - t->flags = 3; - t->tight = 1; - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, - ci->progeny[7], cj->progeny[2], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, - ci->progeny[5], cj->progeny[2], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, - ci->progeny[7], cj->progeny[0], 1), - s); - break; - - case 4: /* ( 1 , 0 , 0 ) */ - t->ci = ci->progeny[4]; - t->cj = cj->progeny[0]; - t->flags = 4; - t->tight = 1; - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, - ci->progeny[5], cj->progeny[0], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, - ci->progeny[6], cj->progeny[0], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, - ci->progeny[7], cj->progeny[0], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, - ci->progeny[4], cj->progeny[1], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, - ci->progeny[5], cj->progeny[1], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, - ci->progeny[6], cj->progeny[1], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, - ci->progeny[7], cj->progeny[1], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, - ci->progeny[4], cj->progeny[2], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, - ci->progeny[5], cj->progeny[2], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, - ci->progeny[6], cj->progeny[2], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, - ci->progeny[7], cj->progeny[2], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, - ci->progeny[4], cj->progeny[3], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, - ci->progeny[5], cj->progeny[3], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, - ci->progeny[6], cj->progeny[3], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, - ci->progeny[7], cj->progeny[3], 1), - s); - break; - - case 5: /* ( 1 , 0 , -1 ) */ - t->ci = ci->progeny[4]; - t->cj = cj->progeny[1]; - t->flags = 5; - t->tight = 1; - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, - ci->progeny[6], cj->progeny[3], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, - ci->progeny[4], cj->progeny[3], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, - ci->progeny[6], cj->progeny[1], 1), - s); - break; - - case 6: /* ( 1 , -1 , 1 ) */ - t->ci = ci->progeny[5]; - t->cj = cj->progeny[2]; - t->flags = 6; - t->tight = 1; - break; - - case 7: /* ( 1 , -1 , 0 ) */ - t->ci = ci->progeny[4]; - t->cj = cj->progeny[3]; - t->flags = 6; - t->tight = 1; - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, - ci->progeny[5], cj->progeny[2], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, - ci->progeny[4], cj->progeny[2], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, - ci->progeny[5], cj->progeny[3], 1), - s); - break; - - case 8: /* ( 1 , -1 , -1 ) */ - t->ci = ci->progeny[4]; - t->cj = cj->progeny[3]; - t->flags = 8; - t->tight = 1; - break; - - case 9: /* ( 0 , 1 , 1 ) */ - t->ci = ci->progeny[3]; - t->cj = cj->progeny[0]; - t->flags = 9; - t->tight = 1; - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, - ci->progeny[7], cj->progeny[4], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, - ci->progeny[3], cj->progeny[4], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, - ci->progeny[7], cj->progeny[0], 1), - s); - break; - - case 10: /* ( 0 , 1 , 0 ) */ - t->ci = ci->progeny[2]; - t->cj = cj->progeny[0]; - t->flags = 10; - t->tight = 1; - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, - ci->progeny[3], cj->progeny[0], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, - ci->progeny[6], cj->progeny[0], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, - ci->progeny[7], cj->progeny[0], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, - ci->progeny[2], cj->progeny[1], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, - ci->progeny[3], cj->progeny[1], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, - ci->progeny[6], cj->progeny[1], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, - ci->progeny[7], cj->progeny[1], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, - ci->progeny[2], cj->progeny[4], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, - ci->progeny[3], cj->progeny[4], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, - ci->progeny[6], cj->progeny[4], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, - ci->progeny[7], cj->progeny[4], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, - ci->progeny[2], cj->progeny[5], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, - ci->progeny[3], cj->progeny[5], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, - ci->progeny[6], cj->progeny[5], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, - ci->progeny[7], cj->progeny[5], 1), - s); - break; - - case 11: /* ( 0 , 1 , -1 ) */ - t->ci = ci->progeny[2]; - t->cj = cj->progeny[1]; - t->flags = 11; - t->tight = 1; - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, - ci->progeny[6], cj->progeny[5], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, - ci->progeny[2], cj->progeny[5], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, - ci->progeny[6], cj->progeny[1], 1), - s); - break; - - case 12: /* ( 0 , 0 , 1 ) */ - t->ci = ci->progeny[1]; - t->cj = cj->progeny[0]; - t->flags = 12; - t->tight = 1; - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, - ci->progeny[3], cj->progeny[0], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, - ci->progeny[5], cj->progeny[0], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, - ci->progeny[7], cj->progeny[0], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, - ci->progeny[1], cj->progeny[2], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, - ci->progeny[3], cj->progeny[2], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, - ci->progeny[5], cj->progeny[2], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, - ci->progeny[7], cj->progeny[2], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, - ci->progeny[1], cj->progeny[4], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, - ci->progeny[3], cj->progeny[4], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, - ci->progeny[5], cj->progeny[4], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, - ci->progeny[7], cj->progeny[4], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, - ci->progeny[1], cj->progeny[6], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, - ci->progeny[3], cj->progeny[6], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, - ci->progeny[5], cj->progeny[6], 1), - s); - scheduler_splittasks_mapper( - scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, - ci->progeny[7], cj->progeny[6], 1), - s); - break; - } /* switch(sid) */ + /* Foreign task? */ + if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) { + t->skip = 1; + break; } - /* Otherwise, break it up if it is too large? */ - } else if (scheduler_doforcesplit && ci->split && cj->split && - (ci->count > space_maxsize / cj->count)) { + /* Get the sort ID, use space_getsid and not t->flags + to make sure we get ci and cj swapped if needed. */ + double shift[3]; + int sid = space_getsid(s->space, &ci, &cj, shift); - // message( "force splitting pair with %i and %i parts." , ci->count , - // cj->count ); + /* Should this task be split-up? */ + if (ci->split && cj->split && + ci->h_max * kernel_gamma * space_stretch < hi / 2 && + cj->h_max * kernel_gamma * space_stretch < hj / 2) { - /* Replace the current task. */ - t->type = task_type_none; + /* Replace by a single sub-task? */ + if (scheduler_dosub && + ci->count * sid_scale[sid] < space_subsize / cj->count && + sid != 0 && sid != 2 && sid != 6 && sid != 8) { + + /* Make this task a sub task. */ + t->type = task_type_sub; + + /* Otherwise, split it. */ + } else { + + /* Take a step back (we're going to recycle the current task)... */ + redo = 1; - for (int j = 0; j < 8; j++) - if (ci->progeny[j] != NULL) - for (int k = 0; k < 8; k++) - if (cj->progeny[k] != NULL) { + /* For each different sorting type... */ + switch (sid) { + + case 0: /* ( 1 , 1 , 1 ) */ + t->ci = ci->progeny[7]; + t->cj = cj->progeny[0]; + t->flags = 0; + break; + + case 1: /* ( 1 , 1 , 0 ) */ + t->ci = ci->progeny[6]; + t->cj = cj->progeny[0]; + t->flags = 1; + t->tight = 1; + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, + ci->progeny[7], cj->progeny[1], 1), + 1, s); scheduler_splittasks_mapper( scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, - ci->progeny[j], cj->progeny[k], 0), - s); - t->flags = space_getsid(s->space, &t->ci, &t->cj, shift); - } - - /* Otherwise, if not spilt, stitch-up the sorting. */ - } else { - - /* Create the sort for ci. */ - lock_lock(&ci->lock); - if (ci->sorts == NULL) - ci->sorts = - scheduler_addtask(s, task_type_sort, 0, 1 << sid, 0, ci, NULL, 0); - else - ci->sorts->flags |= (1 << sid); - lock_unlock_blind(&ci->lock); - scheduler_addunlock(s, ci->sorts, t); - - /* Create the sort for cj. */ - lock_lock(&cj->lock); - if (cj->sorts == NULL) - cj->sorts = - scheduler_addtask(s, task_type_sort, 0, 1 << sid, 0, cj, NULL, 0); - else - cj->sorts->flags |= (1 << sid); - lock_unlock_blind(&cj->lock); - scheduler_addunlock(s, cj->sorts, t); - } + ci->progeny[6], cj->progeny[1], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[7], cj->progeny[0], 1), + 1, s); + break; + + case 2: /* ( 1 , 1 , -1 ) */ + t->ci = ci->progeny[6]; + t->cj = cj->progeny[1]; + t->flags = 2; + t->tight = 1; + break; + + case 3: /* ( 1 , 0 , 1 ) */ + t->ci = ci->progeny[5]; + t->cj = cj->progeny[0]; + t->flags = 3; + t->tight = 1; + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, + ci->progeny[7], cj->progeny[2], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[5], cj->progeny[2], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, + ci->progeny[7], cj->progeny[0], 1), + 1, s); + break; + + case 4: /* ( 1 , 0 , 0 ) */ + t->ci = ci->progeny[4]; + t->cj = cj->progeny[0]; + t->flags = 4; + t->tight = 1; + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, + ci->progeny[5], cj->progeny[0], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[6], cj->progeny[0], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[7], cj->progeny[0], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, + ci->progeny[4], cj->progeny[1], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, + ci->progeny[5], cj->progeny[1], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, + ci->progeny[6], cj->progeny[1], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[7], cj->progeny[1], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, + ci->progeny[4], cj->progeny[2], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[5], cj->progeny[2], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, + ci->progeny[6], cj->progeny[2], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, + ci->progeny[7], cj->progeny[2], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[4], cj->progeny[3], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, + ci->progeny[5], cj->progeny[3], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, + ci->progeny[6], cj->progeny[3], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, + ci->progeny[7], cj->progeny[3], 1), + 1, s); + break; + + case 5: /* ( 1 , 0 , -1 ) */ + t->ci = ci->progeny[4]; + t->cj = cj->progeny[1]; + t->flags = 5; + t->tight = 1; + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, + ci->progeny[6], cj->progeny[3], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[4], cj->progeny[3], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[6], cj->progeny[1], 1), + 1, s); + break; + + case 6: /* ( 1 , -1 , 1 ) */ + t->ci = ci->progeny[5]; + t->cj = cj->progeny[2]; + t->flags = 6; + t->tight = 1; + break; + + case 7: /* ( 1 , -1 , 0 ) */ + t->ci = ci->progeny[4]; + t->cj = cj->progeny[3]; + t->flags = 6; + t->tight = 1; + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[5], cj->progeny[2], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[4], cj->progeny[2], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[5], cj->progeny[3], 1), + 1, s); + break; + + case 8: /* ( 1 , -1 , -1 ) */ + t->ci = ci->progeny[4]; + t->cj = cj->progeny[3]; + t->flags = 8; + t->tight = 1; + break; + + case 9: /* ( 0 , 1 , 1 ) */ + t->ci = ci->progeny[3]; + t->cj = cj->progeny[0]; + t->flags = 9; + t->tight = 1; + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, + ci->progeny[7], cj->progeny[4], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[3], cj->progeny[4], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[7], cj->progeny[0], 1), + 1, s); + break; + + case 10: /* ( 0 , 1 , 0 ) */ + t->ci = ci->progeny[2]; + t->cj = cj->progeny[0]; + t->flags = 10; + t->tight = 1; + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, + ci->progeny[3], cj->progeny[0], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[6], cj->progeny[0], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, + ci->progeny[7], cj->progeny[0], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, + ci->progeny[2], cj->progeny[1], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, + ci->progeny[3], cj->progeny[1], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[6], cj->progeny[1], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, + ci->progeny[7], cj->progeny[1], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, + ci->progeny[2], cj->progeny[4], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[3], cj->progeny[4], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, + ci->progeny[6], cj->progeny[4], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, + ci->progeny[7], cj->progeny[4], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[2], cj->progeny[5], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, + ci->progeny[3], cj->progeny[5], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, + ci->progeny[6], cj->progeny[5], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, + ci->progeny[7], cj->progeny[5], 1), + 1, s); + break; + + case 11: /* ( 0 , 1 , -1 ) */ + t->ci = ci->progeny[2]; + t->cj = cj->progeny[1]; + t->flags = 11; + t->tight = 1; + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, + ci->progeny[6], cj->progeny[5], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[2], cj->progeny[5], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, + ci->progeny[6], cj->progeny[1], 1), + 1, s); + break; + + case 12: /* ( 0 , 0 , 1 ) */ + t->ci = ci->progeny[1]; + t->cj = cj->progeny[0]; + t->flags = 12; + t->tight = 1; + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, + ci->progeny[3], cj->progeny[0], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, + ci->progeny[5], cj->progeny[0], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, + ci->progeny[7], cj->progeny[0], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, + ci->progeny[1], cj->progeny[2], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, + ci->progeny[3], cj->progeny[2], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, + ci->progeny[5], cj->progeny[2], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, + ci->progeny[7], cj->progeny[2], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, + ci->progeny[1], cj->progeny[4], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, + ci->progeny[3], cj->progeny[4], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, + ci->progeny[5], cj->progeny[4], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, + ci->progeny[7], cj->progeny[4], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[1], cj->progeny[6], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, + ci->progeny[3], cj->progeny[6], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, + ci->progeny[5], cj->progeny[6], 1), + 1, s); + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, + ci->progeny[7], cj->progeny[6], 1), + 1, s); + break; + } /* switch(sid) */ + } + + /* Otherwise, break it up if it is too large? */ + } else if (scheduler_doforcesplit && ci->split && cj->split && + (ci->count > space_maxsize / cj->count)) { + + // message( "force splitting pair with %i and %i parts." , ci->count , + // cj->count ); + + /* Replace the current task. */ + t->type = task_type_none; + + for (int j = 0; j < 8; j++) + if (ci->progeny[j] != NULL) + for (int k = 0; k < 8; k++) + if (cj->progeny[k] != NULL) { + scheduler_splittasks_mapper( + scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, + ci->progeny[j], cj->progeny[k], 0), + 1, s); + t->flags = space_getsid(s->space, &t->ci, &t->cj, shift); + } - } /* pair interaction? */ + /* Otherwise, if not spilt, stitch-up the sorting. */ + } else { - } /* iterate over the current task. */ + /* Create the sort for ci. */ + lock_lock(&ci->lock); + if (ci->sorts == NULL) + ci->sorts = scheduler_addtask(s, task_type_sort, 0, 1 << sid, 0, ci, + NULL, 0); + else + ci->sorts->flags |= (1 << sid); + lock_unlock_blind(&ci->lock); + scheduler_addunlock(s, ci->sorts, t); + + /* Create the sort for cj. */ + lock_lock(&cj->lock); + if (cj->sorts == NULL) + cj->sorts = scheduler_addtask(s, task_type_sort, 0, 1 << sid, 0, cj, + NULL, 0); + else + cj->sorts->flags |= (1 << sid); + lock_unlock_blind(&cj->lock); + scheduler_addunlock(s, cj->sorts, t); + } + + } /* pair interaction? */ + + } /* iterate over the current task. */ + } } void scheduler_splittasks(struct scheduler *s) { /* Call the mapper on each current task. */ threadpool_map(s->threadpool, scheduler_splittasks_mapper, s->tasks, - s->nr_tasks, sizeof(struct task), s); + s->nr_tasks, sizeof(struct task), 1, s); } /** @@ -758,16 +763,21 @@ void scheduler_set_unlocks(struct scheduler *s) { * graph and re-computes the task wait counters. */ -void scheduler_simple_rewait_mapper(void *map_data, void *extra_data) { +void scheduler_simple_rewait_mapper(void *map_data, int num_elements, + void *extra_data) { - struct task *t = (struct task *)map_data; + struct task *tasks = (struct task *)map_data; + for (int ind = 0; ind < num_elements; ind++) { + struct task *t = &tasks[ind]; - /* Increment the waits of the dependances */ - for (int k = 0; k < t->nr_unlock_tasks; k++) { - struct task *u = t->unlock_tasks[k]; - atomic_inc(&u->wait); + /* Increment the waits of the dependances */ + for (int k = 0; k < t->nr_unlock_tasks; k++) { + struct task *u = t->unlock_tasks[k]; + atomic_inc(&u->wait); + } } } + /** * @brief Sort the tasks in topological order over all queues. * @@ -782,7 +792,7 @@ void scheduler_ranktasks(struct scheduler *s) { /* Run through the tasks and get all the waits right. */ threadpool_map(s->threadpool, scheduler_simple_rewait_mapper, tasks, nr_tasks, - sizeof(struct task), NULL); + sizeof(struct task), 1000, NULL); /* Load the tids of tasks with no waits. */ int left = 0; @@ -960,33 +970,43 @@ void scheduler_reweight(struct scheduler *s) { * graph and re-computes the task wait counters. */ -void scheduler_rewait_mapper(void *map_data, void *extra_data) { +void scheduler_rewait_mapper(void *map_data, int num_elements, + void *extra_data) { struct scheduler *s = (struct scheduler *)extra_data; - struct task *t = (struct task *)map_data; + struct task *tasks = (struct task *)map_data; - if (t->skip) return; + for (int ind = 0; ind < num_elements; ind++) { + struct task *t = &tasks[ind]; - /* Skip tasks not in the mask */ - if (!((1 << t->type) & s->mask) || !((1 << t->subtype) & s->submask)) return; + if (t->skip || !((1 << t->type) & s->mask) || + !((1 << t->subtype) & s->submask)) + continue; - /* Skip sort tasks that have already been performed */ - if (t->type == task_type_sort && t->flags == 0) return; + /* Skip sort tasks that have already been performed */ + if (t->type == task_type_sort && t->flags == 0) { + error("empty sort task, bad!"); + } - /* Sets the waits of the dependances */ - for (int k = 0; k < t->nr_unlock_tasks; k++) { - struct task *u = t->unlock_tasks[k]; - atomic_inc(&u->wait); + /* Sets the waits of the dependances */ + for (int k = 0; k < t->nr_unlock_tasks; k++) { + struct task *u = t->unlock_tasks[k]; + atomic_inc(&u->wait); + } } } -void scheduler_enqueue_mapper(void *map_data, void *extra_data) { +void scheduler_enqueue_mapper(void *map_data, int num_elements, + void *extra_data) { struct scheduler *s = (struct scheduler *)extra_data; - struct task *t = (struct task *)map_data; - if (atomic_dec(&t->wait) == 1 && !t->skip && ((1 << t->type) & s->mask) && - ((1 << t->subtype) & s->submask)) { - scheduler_enqueue(s, t); - pthread_cond_signal(&s->sleep_cond); + struct task *tasks = (struct task *)map_data; + for (int ind = 0; ind < num_elements; ind++) { + struct task *t = &tasks[ind]; + if (atomic_dec(&t->wait) == 1 && !t->skip && ((1 << t->type) & s->mask) && + ((1 << t->subtype) & s->submask)) { + scheduler_enqueue(s, t); + pthread_cond_signal(&s->sleep_cond); + } } } @@ -1015,11 +1035,11 @@ void scheduler_start(struct scheduler *s, unsigned int mask, /* Re-wait the tasks. */ threadpool_map(s->threadpool, scheduler_rewait_mapper, s->tasks, s->nr_tasks, - sizeof(struct task), s); + sizeof(struct task), 1000, s); /* Loop over the tasks and enqueue whoever is ready. */ threadpool_map(s->threadpool, scheduler_enqueue_mapper, s->tasks, s->nr_tasks, - sizeof(struct task), s); + sizeof(struct task), 1000, s); /* To be safe, fire of one last sleep_cond in a safe way. */ pthread_mutex_lock(&s->sleep_mutex); diff --git a/src/space.c b/src/space.c index 5f6e9aa4fb03ab72318578d5ec0180e0f52b5ca2..4f6a3362534b1454b5864bfde25a2a497ce72697 100644 --- a/src/space.c +++ b/src/space.c @@ -654,7 +654,7 @@ void space_split(struct space *s, struct cell *cells, int verbose) { const ticks tic = getticks(); threadpool_map(&s->e->threadpool, space_split_mapper, cells, s->nr_cells, - sizeof(struct cell), s); + sizeof(struct cell), 1, s); if (verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -684,8 +684,8 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, sort_struct.xparts = s->xparts; sort_struct.ind = ind; sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads; - if ((sort_struct.stack = malloc(sizeof(struct qstack) * - sort_struct.stack_size)) == NULL) + if ((sort_struct.stack = + malloc(sizeof(struct qstack) * sort_struct.stack_size)) == NULL) error("Failed to allocate sorting stack."); for (int i = 0; i < sort_struct.stack_size; i++) sort_struct.stack[i].ready = 0; @@ -702,8 +702,8 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, /* Launch the sorting tasks with a stride of zero such that the same map data is passed to each thread. */ - threadpool_map(&s->e->threadpool, space_parts_sort_mapper, - &sort_struct, s->e->threadpool.num_threads, 0, NULL); + threadpool_map(&s->e->threadpool, space_parts_sort_mapper, &sort_struct, + s->e->threadpool.num_threads, 0, 1, NULL); /* Verify sort_struct. */ /* for (int i = 1; i < N; i++) @@ -721,7 +721,7 @@ void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, clocks_getunit()); } -void space_parts_sort_mapper(void *map_data, void *extra_data) { +void space_parts_sort_mapper(void *map_data, int num_elements, void *extra_data) { /* Unpack the mapping data. */ struct parallel_sort *sort_struct = (struct parallel_sort *)map_data; @@ -735,8 +735,7 @@ void space_parts_sort_mapper(void *map_data, void *extra_data) { while (sort_struct->waiting) { /* Grab an interval off the queue. */ - int qid = - atomic_inc(&sort_struct->first) % sort_struct->stack_size; + int qid = atomic_inc(&sort_struct->first) % sort_struct->stack_size; /* Wait for the entry to be ready, or for the sorting do be done. */ while (!sort_struct->stack[qid].ready) @@ -795,16 +794,14 @@ void space_parts_sort_mapper(void *map_data, void *extra_data) { /* Recurse on the left? */ if (jj > i && pivot > min) { - qid = atomic_inc(&sort_struct->last) % - sort_struct->stack_size; + qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size; while (sort_struct->stack[qid].ready) ; sort_struct->stack[qid].i = i; sort_struct->stack[qid].j = jj; sort_struct->stack[qid].min = min; sort_struct->stack[qid].max = pivot; - if (atomic_inc(&sort_struct->waiting) >= - sort_struct->stack_size) + if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size) error("Qstack overflow."); sort_struct->stack[qid].ready = 1; } @@ -820,16 +817,14 @@ void space_parts_sort_mapper(void *map_data, void *extra_data) { /* Recurse on the right? */ if (pivot + 1 < max) { - qid = atomic_inc(&sort_struct->last) % - sort_struct->stack_size; + qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size; while (sort_struct->stack[qid].ready) ; sort_struct->stack[qid].i = jj + 1; sort_struct->stack[qid].j = j; sort_struct->stack[qid].min = pivot + 1; sort_struct->stack[qid].max = max; - if (atomic_inc(&sort_struct->waiting) >= - sort_struct->stack_size) + if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size) error("Qstack overflow."); sort_struct->stack[qid].ready = 1; } @@ -870,8 +865,8 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max, sort_struct.gparts = s->gparts; sort_struct.ind = ind; sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads; - if ((sort_struct.stack = malloc(sizeof(struct qstack) * - sort_struct.stack_size)) == NULL) + if ((sort_struct.stack = + malloc(sizeof(struct qstack) * sort_struct.stack_size)) == NULL) error("Failed to allocate sorting stack."); for (int i = 0; i < sort_struct.stack_size; i++) sort_struct.stack[i].ready = 0; @@ -888,8 +883,8 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max, /* Launch the sorting tasks with a stride of zero such that the same map data is passed to each thread. */ - threadpool_map(&s->e->threadpool, space_gparts_sort_mapper, - &sort_struct, s->e->threadpool.num_threads, 0, NULL); + threadpool_map(&s->e->threadpool, space_gparts_sort_mapper, &sort_struct, + s->e->threadpool.num_threads, 0, 1, NULL); /* Verify sort_struct. */ /* for (int i = 1; i < N; i++) @@ -907,7 +902,7 @@ void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max, clocks_getunit()); } -void space_gparts_sort_mapper(void *map_data, void *extra_data) { +void space_gparts_sort_mapper(void *map_data, int num_elements, void *extra_data) { /* Unpack the mapping data. */ struct parallel_sort *sort_struct = (struct parallel_sort *)map_data; @@ -920,8 +915,7 @@ void space_gparts_sort_mapper(void *map_data, void *extra_data) { while (sort_struct->waiting) { /* Grab an interval off the queue. */ - int qid = - atomic_inc(&sort_struct->first) % sort_struct->stack_size; + int qid = atomic_inc(&sort_struct->first) % sort_struct->stack_size; /* Wait for the entry to be ready, or for the sorting do be done. */ while (!sort_struct->stack[qid].ready) @@ -977,16 +971,14 @@ void space_gparts_sort_mapper(void *map_data, void *extra_data) { /* Recurse on the left? */ if (jj > i && pivot > min) { - qid = atomic_inc(&sort_struct->last) % - sort_struct->stack_size; + qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size; while (sort_struct->stack[qid].ready) ; sort_struct->stack[qid].i = i; sort_struct->stack[qid].j = jj; sort_struct->stack[qid].min = min; sort_struct->stack[qid].max = pivot; - if (atomic_inc(&sort_struct->waiting) >= - sort_struct->stack_size) + if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size) error("Qstack overflow."); sort_struct->stack[qid].ready = 1; } @@ -1002,16 +994,14 @@ void space_gparts_sort_mapper(void *map_data, void *extra_data) { /* Recurse on the right? */ if (pivot + 1 < max) { - qid = atomic_inc(&sort_struct->last) % - sort_struct->stack_size; + qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size; while (sort_struct->stack[qid].ready) ; sort_struct->stack[qid].i = jj + 1; sort_struct->stack[qid].j = j; sort_struct->stack[qid].min = pivot + 1; sort_struct->stack[qid].max = max; - if (atomic_inc(&sort_struct->waiting) >= - sort_struct->stack_size) + if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size) error("Qstack overflow."); sort_struct->stack[qid].ready = 1; } @@ -1211,124 +1201,129 @@ void space_map_cells_pre(struct space *s, int full, * too many particles. */ -void space_split_mapper(void *map_data, void *extra_data) { +void space_split_mapper(void *map_data, int num_elements, void *extra_data) { /* Unpack the inputs. */ struct space *s = (struct space *)extra_data; - struct cell *c = (struct cell *)map_data; - - const int count = c->count; - const int gcount = c->gcount; - int maxdepth = 0; - float h_max = 0.0f; - int ti_end_min = max_nr_timesteps, ti_end_max = 0; - struct cell *temp; - struct part *parts = c->parts; - struct gpart *gparts = c->gparts; - struct xpart *xparts = c->xparts; - - /* Check the depth. */ - if (c->depth > s->maxdepth) s->maxdepth = c->depth; - - /* Split or let it be? */ - if (count > space_splitsize || gcount > space_splitsize) { - - /* No longer just a leaf. */ - c->split = 1; - - /* Create the cell's progeny. */ - for (int k = 0; k < 8; k++) { - temp = space_getcell(s); - temp->count = 0; - temp->gcount = 0; - temp->loc[0] = c->loc[0]; - temp->loc[1] = c->loc[1]; - temp->loc[2] = c->loc[2]; - temp->h[0] = c->h[0] / 2; - temp->h[1] = c->h[1] / 2; - temp->h[2] = c->h[2] / 2; - temp->dmin = c->dmin / 2; - if (k & 4) temp->loc[0] += temp->h[0]; - if (k & 2) temp->loc[1] += temp->h[1]; - if (k & 1) temp->loc[2] += temp->h[2]; - temp->depth = c->depth + 1; - temp->split = 0; - temp->h_max = 0.0; - temp->dx_max = 0.f; - temp->nodeID = c->nodeID; - temp->parent = c; - c->progeny[k] = temp; - } - - /* Split the cell data. */ - cell_split(c); - - /* Remove any progeny with zero parts. */ - for (int k = 0; k < 8; k++) - if (c->progeny[k]->count == 0 && c->progeny[k]->gcount == 0) { - space_recycle(s, c->progeny[k]); - c->progeny[k] = NULL; - } else { - space_split_mapper(c->progeny[k], s); - h_max = fmaxf(h_max, c->progeny[k]->h_max); - ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); - ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); - if (c->progeny[k]->maxdepth > maxdepth) - maxdepth = c->progeny[k]->maxdepth; + struct cell *cells = (struct cell *)map_data; + + for (int ind = 0; ind < num_elements; ind++) { + + struct cell *c = &cells[ind]; + + const int count = c->count; + const int gcount = c->gcount; + int maxdepth = 0; + float h_max = 0.0f; + int ti_end_min = max_nr_timesteps, ti_end_max = 0; + struct cell *temp; + struct part *parts = c->parts; + struct gpart *gparts = c->gparts; + struct xpart *xparts = c->xparts; + + /* Check the depth. */ + if (c->depth > s->maxdepth) s->maxdepth = c->depth; + + /* Split or let it be? */ + if (count > space_splitsize || gcount > space_splitsize) { + + /* No longer just a leaf. */ + c->split = 1; + + /* Create the cell's progeny. */ + for (int k = 0; k < 8; k++) { + temp = space_getcell(s); + temp->count = 0; + temp->gcount = 0; + temp->loc[0] = c->loc[0]; + temp->loc[1] = c->loc[1]; + temp->loc[2] = c->loc[2]; + temp->h[0] = c->h[0] / 2; + temp->h[1] = c->h[1] / 2; + temp->h[2] = c->h[2] / 2; + temp->dmin = c->dmin / 2; + if (k & 4) temp->loc[0] += temp->h[0]; + if (k & 2) temp->loc[1] += temp->h[1]; + if (k & 1) temp->loc[2] += temp->h[2]; + temp->depth = c->depth + 1; + temp->split = 0; + temp->h_max = 0.0; + temp->dx_max = 0.f; + temp->nodeID = c->nodeID; + temp->parent = c; + c->progeny[k] = temp; } - /* Set the values for this cell. */ - c->h_max = h_max; - c->ti_end_min = ti_end_min; - c->ti_end_max = ti_end_max; - c->maxdepth = maxdepth; - - } + /* Split the cell data. */ + cell_split(c); + + /* Remove any progeny with zero parts. */ + for (int k = 0; k < 8; k++) + if (c->progeny[k]->count == 0 && c->progeny[k]->gcount == 0) { + space_recycle(s, c->progeny[k]); + c->progeny[k] = NULL; + } else { + space_split_mapper(c->progeny[k], 1, s); + h_max = fmaxf(h_max, c->progeny[k]->h_max); + ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); + ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); + if (c->progeny[k]->maxdepth > maxdepth) + maxdepth = c->progeny[k]->maxdepth; + } - /* Otherwise, collect the data for this cell. */ - else { + /* Set the values for this cell. */ + c->h_max = h_max; + c->ti_end_min = ti_end_min; + c->ti_end_max = ti_end_max; + c->maxdepth = maxdepth; - /* Clear the progeny. */ - bzero(c->progeny, sizeof(struct cell *) * 8); - c->split = 0; - c->maxdepth = c->depth; - - /* Get dt_min/dt_max. */ - for (int k = 0; k < count; k++) { - struct part *p = &parts[k]; - struct xpart *xp = &xparts[k]; - const float h = p->h; - const int ti_end = p->ti_end; - xp->x_diff[0] = 0.f; - xp->x_diff[1] = 0.f; - xp->x_diff[2] = 0.f; - if (h > h_max) h_max = h; - if (ti_end < ti_end_min) ti_end_min = ti_end; - if (ti_end > ti_end_max) ti_end_max = ti_end; } - for (int k = 0; k < gcount; k++) { - struct gpart *gp = &gparts[k]; - const int ti_end = gp->ti_end; - gp->x_diff[0] = 0.f; - gp->x_diff[1] = 0.f; - gp->x_diff[2] = 0.f; - if (ti_end < ti_end_min) ti_end_min = ti_end; - if (ti_end > ti_end_max) ti_end_max = ti_end; + + /* Otherwise, collect the data for this cell. */ + else { + + /* Clear the progeny. */ + bzero(c->progeny, sizeof(struct cell *) * 8); + c->split = 0; + c->maxdepth = c->depth; + + /* Get dt_min/dt_max. */ + for (int k = 0; k < count; k++) { + struct part *p = &parts[k]; + struct xpart *xp = &xparts[k]; + const float h = p->h; + const int ti_end = p->ti_end; + xp->x_diff[0] = 0.f; + xp->x_diff[1] = 0.f; + xp->x_diff[2] = 0.f; + if (h > h_max) h_max = h; + if (ti_end < ti_end_min) ti_end_min = ti_end; + if (ti_end > ti_end_max) ti_end_max = ti_end; + } + for (int k = 0; k < gcount; k++) { + struct gpart *gp = &gparts[k]; + const int ti_end = gp->ti_end; + gp->x_diff[0] = 0.f; + gp->x_diff[1] = 0.f; + gp->x_diff[2] = 0.f; + if (ti_end < ti_end_min) ti_end_min = ti_end; + if (ti_end > ti_end_max) ti_end_max = ti_end; + } + c->h_max = h_max; + c->ti_end_min = ti_end_min; + c->ti_end_max = ti_end_max; } - c->h_max = h_max; - c->ti_end_min = ti_end_min; - c->ti_end_max = ti_end_max; - } - /* Set ownership according to the start of the parts array. */ - if (s->nr_parts > 0) - c->owner = - ((c->parts - s->parts) % s->nr_parts) * s->nr_queues / s->nr_parts; - else if (s->nr_gparts > 0) - c->owner = - ((c->gparts - s->gparts) % s->nr_gparts) * s->nr_queues / s->nr_gparts; - else - c->owner = 0; /* Ok, there is really nothing on this rank... */ + /* Set ownership according to the start of the parts array. */ + if (s->nr_parts > 0) + c->owner = + ((c->parts - s->parts) % s->nr_parts) * s->nr_queues / s->nr_parts; + else if (s->nr_gparts > 0) + c->owner = ((c->gparts - s->gparts) % s->nr_gparts) * s->nr_queues / + s->nr_gparts; + else + c->owner = 0; /* Ok, there is really nothing on this rank... */ + } } /** diff --git a/src/space.h b/src/space.h index d448cd1279611c26139672608798089f7c6a7afb..d6802db02fccc398f15ad5637d24c5ab7140d147 100644 --- a/src/space.h +++ b/src/space.h @@ -150,12 +150,12 @@ void space_map_parts_xparts(struct space *s, struct cell *c)); void space_map_cells_post(struct space *s, int full, void (*fun)(struct cell *c, void *data), void *data); -void space_parts_sort_mapper(void *map_data, void *extra_data); -void space_gparts_sort_mapper(void *map_data, void *extra_data); +void space_parts_sort_mapper(void *map_data, int num_elements, void *extra_data); +void space_gparts_sort_mapper(void *map_data, int num_elements, void *extra_data); void space_rebuild(struct space *s, double h_max, int verbose); void space_recycle(struct space *s, struct cell *c); void space_split(struct space *s, struct cell *cells, int verbose); -void space_split_mapper(void *map_data, void *extra_data); +void space_split_mapper(void *map_data, int num_elements, void *extra_data); void space_do_parts_sort(); void space_do_gparts_sort(); void space_link_cleanup(struct space *s); diff --git a/src/threadpool.c b/src/threadpool.c index 09dd642e1c1bf061f9113440429abdb104c930c6..8605e04bac08bdf61295446bd5074de0182a573b 100644 --- a/src/threadpool.c +++ b/src/threadpool.c @@ -48,17 +48,25 @@ void *threadpool_runner(void *data) { if (tp->num_threads_waiting == tp->num_threads) { pthread_cond_signal(&tp->control_cond); } - + /* Wait for the controller. */ pthread_cond_wait(&tp->thread_cond, &tp->thread_mutex); tp->num_threads_waiting -= 1; + tp->num_threads_running += 1; + if (tp->num_threads_running == tp->num_threads) { + pthread_cond_signal(&tp->control_cond); + } pthread_mutex_unlock(&tp->thread_mutex); /* The index of the mapping task we will work on next. */ size_t task_ind; - while ((task_ind = atomic_inc(&tp->map_data_count)) < tp->map_data_size) { + while ((task_ind = atomic_add(&tp->map_data_count, tp->map_data_chunk)) < + tp->map_data_size) { + const int num_elements = task_ind + tp->map_data_chunk > tp->map_data_size + ? tp->map_data_size - task_ind + : tp->map_data_chunk; tp->map_function(tp->map_data + tp->map_data_stride * task_ind, - tp->map_extra_data); + num_elements, tp->map_extra_data); } } } @@ -80,6 +88,7 @@ void threadpool_init(struct threadpool *tp, int num_threads) { tp->map_data_size = 0; tp->map_data_count = 0; tp->map_data_stride = 0; + tp->map_data_chunk = 0; tp->map_function = NULL; /* Allocate the threads. */ @@ -89,13 +98,13 @@ void threadpool_init(struct threadpool *tp, int num_threads) { } /* Create and start the threads. */ + pthread_mutex_lock(&tp->thread_mutex); for (int k = 0; k < num_threads; k++) { if (pthread_create(&tp->threads[k], NULL, &threadpool_runner, tp) != 0) error("Failed to create threadpool runner thread."); } /* Wait for all the threads to be up and running. */ - pthread_mutex_lock(&tp->thread_mutex); while (tp->num_threads_waiting < tp->num_threads) { pthread_cond_wait(&tp->control_cond, &tp->thread_mutex); } @@ -113,24 +122,35 @@ void threadpool_init(struct threadpool *tp, int num_threads) { * @param map_data The data on which the mapping function will be called. * @param N Number of elements in @c map_data. * @param stride Size, in bytes, of each element of @c map_data. + * @param chunk Number of map data elements to pass to the function at a time. * @param extra_data Addtitional pointer that will be passed to the mapping * function, may contain additional data. */ void threadpool_map(struct threadpool *tp, threadpool_map_function map_function, - void *map_data, size_t N, int stride, void *extra_data) { + void *map_data, size_t N, int stride, int chunk, + void *extra_data) { /* Set the map data and signal the threads. */ pthread_mutex_lock(&tp->thread_mutex); tp->map_data_stride = stride; tp->map_data_size = N; tp->map_data_count = 0; + tp->map_data_chunk = chunk; tp->map_function = map_function; tp->map_data = map_data; tp->map_extra_data = extra_data; + tp->num_threads_running = 0; pthread_cond_broadcast(&tp->thread_cond); - /* Wait for the threads to come home. */ - pthread_cond_wait(&tp->control_cond, &tp->thread_mutex); + /* Wait for all the threads to be up and running. */ + while (tp->num_threads_running < tp->num_threads) { + pthread_cond_wait(&tp->control_cond, &tp->thread_mutex); + } + + /* Wait for all threads to be done. */ + while (tp->num_threads_waiting < tp->num_threads) { + pthread_cond_wait(&tp->control_cond, &tp->thread_mutex); + } pthread_mutex_unlock(&tp->thread_mutex); } diff --git a/src/threadpool.h b/src/threadpool.h index 8ebe34319b47fff35998355a77b8c2b6de30a856..66dc6752d29a654dbaacfd1d0267f1cedf4dd981 100644 --- a/src/threadpool.h +++ b/src/threadpool.h @@ -26,7 +26,8 @@ #include <pthread.h> /* Function type for mappings. */ -typedef void (*threadpool_map_function)(void *map_data, void *extra_data); +typedef void (*threadpool_map_function)(void *map_data, int num_elements, + void *extra_data); /* Data of a threadpool. */ struct threadpool { @@ -43,7 +44,8 @@ struct threadpool { /* Current map data and count. */ void *map_data, *map_extra_data; - volatile size_t map_data_count, map_data_size, map_data_stride; + volatile size_t map_data_count, map_data_size, map_data_stride, + map_data_chunk; volatile threadpool_map_function map_function; /* Counter for the number of threads that are done. */ @@ -53,6 +55,6 @@ struct threadpool { /* Function prototypes. */ void threadpool_init(struct threadpool *tp, int num_threads); void threadpool_map(struct threadpool *tp, threadpool_map_function map_function, - void *map_data, size_t N, int stride, void *extra_data); + void *map_data, size_t N, int stride, int chunk, void *extra_data); #endif /* SWIFT_THREADPOOL_H */ diff --git a/tests/threadpool_test.c b/tests/threadpool_test.c index 9a7d6b1db4962e431ba15f324eb22e5792f98dfa..5207881c27b51ed4090c2c8c015f3b4f97e422fb 100644 --- a/tests/threadpool_test.c +++ b/tests/threadpool_test.c @@ -26,18 +26,24 @@ #include "../src/threadpool.h" #include "../src/atomic.h" -void map_function_first(void *map_data, void *extra_data) { - const int input = *(int *)map_data; - usleep(rand() % 1000000); - printf("map_function_first: got input %i.\n", input); - fflush(stdout); +void map_function_first(void *map_data, int num_elements, void *extra_data) { + const int *inputs = (int *)map_data; + for (int ind = 0; ind < num_elements; ind++) { + int input = inputs[ind]; + usleep(rand() % 1000000); + printf("map_function_first: got input %i.\n", input); + fflush(stdout); + } } -void map_function_second(void *map_data, void *extra_data) { - const int input = *(int *)map_data; - usleep(rand() % 1000000); - printf("map_function_second: got input %i.\n", input); - fflush(stdout); +void map_function_second(void *map_data, int num_elements, void *extra_data) { + const int *inputs = (int *)map_data; + for (int ind = 0; ind < num_elements; ind++) { + int input = inputs[ind]; + usleep(rand() % 1000000); + printf("map_function_second: got input %i.\n", input); + fflush(stdout); + } } int main(int argc, char *argv[]) { @@ -59,12 +65,16 @@ int main(int argc, char *argv[]) { for (int k = 0; k < N; k++) data[k] = k; printf("processing integers from 0..%i.\n", N); fflush(stdout); - threadpool_map(&tp, map_function_first, data, N, sizeof(int), NULL); + threadpool_map(&tp, map_function_first, data, N, sizeof(int), 1, NULL); // Do the same thing again, with less jobs than threads. - printf("processing integers from 0..%i.\n", num_threads / 2); + printf("processing integers from 0..%i.\n", N / 2); + fflush(stdout); + threadpool_map(&tp, map_function_second, data, N / 2, sizeof(int), 1, NULL); + + // Do the same thing again, with a chunk size of two. + printf("processing integers from 0..%i.\n", N); fflush(stdout); - threadpool_map(&tp, map_function_second, data, num_threads / 2, sizeof(int), - NULL); + threadpool_map(&tp, map_function_first, data, N, sizeof(int), 2, NULL); } }