diff --git a/examples/main.c b/examples/main.c index e884ea5b6bba907819e189b842f558302e4ca9af..8bd591ccbd1c2fd860fa8ae2321d2e34132b287f 100644 --- a/examples/main.c +++ b/examples/main.c @@ -120,6 +120,10 @@ int main(int argc, char *argv[]) { error("MPI_Comm_size failed with error %i.", res); if ((res = MPI_Comm_rank(MPI_COMM_WORLD, &myrank)) != MPI_SUCCESS) error("Call to MPI_Comm_rank failed with error %i.", res); + + /* Make sure messages are stamped with the correct rank. */ + engine_rank = myrank; + if ((res = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN)) != MPI_SUCCESS) error("Call to MPI_Comm_set_errhandler failed with error %i.", res); @@ -131,6 +135,7 @@ int main(int argc, char *argv[]) { message("WARNING: you should use the non-MPI version of this program."); } fflush(stdout); + #endif /* Let's pin the main thread */ @@ -572,8 +577,8 @@ int main(int argc, char *argv[]) { fprintf(file_thread, " %03i 0 0 0 0 %lli %lli 0 0 0 0 %lli\n", myrank, e.tic_step, e.toc_step, cpufreq); int count = 0; - for (int l = 0; l < e.sched.nr_tasks; l++) - if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit) { + for (int l = 0; l < e.sched.nr_tasks; l++) { + if (!e.sched.tasks[l].implicit && e.sched.tasks[l].toc != 0) { fprintf( file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %i\n", myrank, e.sched.tasks[l].rid, e.sched.tasks[l].type, @@ -588,11 +593,10 @@ int main(int argc, char *argv[]) { (e.sched.tasks[l].cj != NULL) ? e.sched.tasks[l].cj->gcount : 0, e.sched.tasks[l].flags); - fflush(stdout); - count++; } - message("rank %d counted %d tasks", myrank, count); - + fflush(stdout); + count++; + } fclose(file_thread); } @@ -608,8 +612,8 @@ int main(int argc, char *argv[]) { /* Add some information to help with the plots */ fprintf(file_thread, " %i %i %i %i %lli %lli %i %i %i %lli\n", -2, -1, -1, 1, e.tic_step, e.toc_step, 0, 0, 0, cpufreq); - for (int l = 0; l < e.sched.nr_tasks; l++) - if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit) + for (int l = 0; l < e.sched.nr_tasks; l++) { + if (!e.sched.tasks[l].implicit && e.sched.tasks[l].toc != 0) { fprintf( file_thread, " %i %i %i %i %lli %lli %i %i %i %i\n", e.sched.tasks[l].rid, e.sched.tasks[l].type, @@ -619,6 +623,8 @@ int main(int argc, char *argv[]) { (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->count, (e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->gcount, (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->gcount); + } + } fclose(file_thread); #endif } diff --git a/src/Makefile.am b/src/Makefile.am index 37c88f1f9cc5facd61beb5f840fb83f7fcd92d8f..30afe6cf16960fe5b13f7a0bf6cb0cae86eaa43b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -44,7 +44,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h \ physical_constants.h physical_constants_cgs.h potential.h version.h \ hydro_properties.h riemann.h threadpool.h cooling.h cooling_struct.h sourceterms.h \ - sourceterms_struct.h + sourceterms_struct.h statistics.h # Common source files @@ -53,7 +53,8 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ units.c common_io.c single_io.c multipole.c version.c map.c \ kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \ physical_constants.c potential.c hydro_properties.c \ - runner_doiact_fft.c threadpool.c cooling.c sourceterms.c + runner_doiact_fft.c threadpool.c cooling.c sourceterms.c \ + statistics.c # Include files for distribution, not installation. nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ diff --git a/src/cell.c b/src/cell.c index c694ef9abff1dd183ec135765b3d227f55116ead..3e3a8e6cc537f948a809c1a3ebf3cb18194fedd1 100644 --- a/src/cell.c +++ b/src/cell.c @@ -52,6 +52,7 @@ #include "gravity.h" #include "hydro.h" #include "hydro_properties.h" +#include "scheduler.h" #include "space.h" #include "timers.h" @@ -763,6 +764,9 @@ void cell_clean_links(struct cell *c, void *data) { c->force = NULL; c->nr_force = 0; + + c->grav = NULL; + c->nr_grav = 0; } /** @@ -893,6 +897,120 @@ int cell_is_drift_needed(struct cell *c, int ti_current) { return 0; } +/** + * @brief Un-skips all the tasks associated with a given cell and checks + * if the space needs to be rebuilt. + * + * @param c the #cell. + * @param s the #scheduler. + * + * @return 1 If the space needs rebuilding. 0 otherwise. + */ +int cell_unskip_tasks(struct cell *c, struct scheduler *s) { + + /* Un-skip the density tasks involved with this cell. */ + for (struct link *l = c->density; l != NULL; l = l->next) { + struct task *t = l->t; + const struct cell *ci = t->ci; + const struct cell *cj = t->cj; + scheduler_activate(s, t); + + /* Set the correct sorting flags */ + if (t->type == task_type_pair) { + if (!(ci->sorted & (1 << t->flags))) { + atomic_or(&ci->sorts->flags, (1 << t->flags)); + scheduler_activate(s, ci->sorts); + } + if (!(cj->sorted & (1 << t->flags))) { + atomic_or(&cj->sorts->flags, (1 << t->flags)); + scheduler_activate(s, cj->sorts); + } + } + + /* Check whether there was too much particle motion */ + if (t->type == task_type_pair || t->type == task_type_sub_pair) { + if (t->tight && + (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin || + ci->dx_max > space_maxreldx * ci->h_max || + cj->dx_max > space_maxreldx * cj->h_max)) + return 1; + +#ifdef WITH_MPI + /* Activate the send/recv flags. */ + if (ci->nodeID != engine_rank) { + + /* Activate the tasks to recv foreign cell ci's data. */ + scheduler_activate(s, ci->recv_xv); + scheduler_activate(s, ci->recv_rho); + scheduler_activate(s, ci->recv_ti); + + /* Look for the local cell cj's send tasks. */ + struct link *l = NULL; + for (l = cj->send_xv; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_xv task."); + scheduler_activate(s, l->t); + + for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_rho task."); + scheduler_activate(s, l->t); + + for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_ti task."); + scheduler_activate(s, l->t); + + } else if (cj->nodeID != engine_rank) { + + /* Activate the tasks to recv foreign cell cj's data. */ + scheduler_activate(s, cj->recv_xv); + scheduler_activate(s, cj->recv_rho); + scheduler_activate(s, cj->recv_ti); + /* Look for the local cell ci's send tasks. */ + struct link *l = NULL; + for (l = ci->send_xv; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_xv task."); + scheduler_activate(s, l->t); + + for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_rho task."); + scheduler_activate(s, l->t); + + for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID; + l = l->next) + ; + if (l == NULL) error("Missing link to send_ti task."); + scheduler_activate(s, l->t); + } +#endif + } + } + + /* Unskip all the other task types. */ + for (struct link *l = c->gradient; l != NULL; l = l->next) + scheduler_activate(s, l->t); + for (struct link *l = c->force; l != NULL; l = l->next) + scheduler_activate(s, l->t); + for (struct link *l = c->grav; l != NULL; l = l->next) + scheduler_activate(s, l->t); + if (c->extra_ghost != NULL) scheduler_activate(s, c->extra_ghost); + if (c->ghost != NULL) scheduler_activate(s, c->ghost); + if (c->init != NULL) scheduler_activate(s, c->init); + if (c->kick != NULL) scheduler_activate(s, c->kick); + if (c->cooling != NULL) scheduler_activate(s, c->cooling); + if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms); + + return 0; +} + /** * @brief Set the super-cell pointers for all cells in a hierarchy. * diff --git a/src/cell.h b/src/cell.h index 6bdadfb6e543882ee5cc98499f6ac1c4205b0873..dfe0261f628799980111f4bedb8fcfdad90ef043 100644 --- a/src/cell.h +++ b/src/cell.h @@ -39,6 +39,7 @@ /* Avoid cyclic inclusions */ struct space; +struct scheduler; /* Max tag size set to 2^29 to take into account some MPI implementations * that use 2^31 as the upper bound on MPI tags and the fact that @@ -158,9 +159,6 @@ struct cell { /* Tasks for gravity tree. */ struct task *grav_up, *grav_down; - /* Task for external gravity */ - struct task *grav_external; - /* Task for cooling */ struct task *cooling; @@ -179,12 +177,6 @@ struct cell { /* ID of the previous owner, e.g. runner. */ int owner; - /* Momentum of particles in cell. */ - double mom[3], ang_mom[3]; - - /* Mass, potential, internal and kinetic energy of particles in this cell. */ - double mass, e_pot_self, e_pot_ext, e_pot, e_int, e_kin, e_rad, entropy; - /* Number of particles updated in this cell. */ int updated, g_updated; @@ -236,6 +228,7 @@ int cell_are_neighbours(const struct cell *restrict ci, void cell_check_multipole(struct cell *c, void *data); void cell_clean(struct cell *c); int cell_is_drift_needed(struct cell *c, int ti_current); +int cell_unskip_tasks(struct cell *c, struct scheduler *s); void cell_set_super(struct cell *c, struct cell *super); #endif /* SWIFT_CELL_H */ diff --git a/src/engine.c b/src/engine.c index 1323cd7c7d30f5e24b5faee1f2fcb9c0ea85e592..b841cdd299dfb27d302c23ba786c43e6d3d26389 100644 --- a/src/engine.c +++ b/src/engine.c @@ -63,6 +63,7 @@ #include "runner.h" #include "serial_io.h" #include "single_io.h" +#include "statistics.h" #include "timers.h" #include "tools.h" #include "units.h" @@ -1249,7 +1250,7 @@ void engine_make_external_gravity_tasks(struct engine *e) { /* Is that neighbour local ? */ if (ci->nodeID != nodeID) continue; - /* If the cells is local build a self-interaction */ + /* If the cell is local, build a self-interaction */ scheduler_addtask(sched, task_type_self, task_subtype_external_grav, 0, 0, ci, NULL, 0); } @@ -1330,59 +1331,101 @@ void engine_make_hydroloop_tasks(struct engine *e) { /** * @brief Counts the tasks associated with one cell and constructs the links * - * For each hydrodynamic task, construct the links with the corresponding cell. - * Similarly, construct the dependencies for all the sorting tasks. + * For each hydrodynamic and gravity task, construct the links with + * the corresponding cell. Similarly, construct the dependencies for + * all the sorting tasks. * * @param e The #engine. */ void engine_count_and_link_tasks(struct engine *e) { - struct scheduler *sched = &e->sched; - - for (int ind = 0; ind < sched->nr_tasks; ind++) { + struct scheduler *const sched = &e->sched; + const int nr_tasks = sched->nr_tasks; - struct task *t = &sched->tasks[ind]; + for (int ind = 0; ind < nr_tasks; ind++) { - if (t->skip) continue; + struct task *const t = &sched->tasks[ind]; + struct cell *const ci = t->ci; + struct cell *const cj = t->cj; /* Link sort tasks together. */ - if (t->type == task_type_sort && t->ci->split) + if (t->type == task_type_sort && ci->split) for (int j = 0; j < 8; j++) - if (t->ci->progeny[j] != NULL && t->ci->progeny[j]->sorts != NULL) { - t->ci->progeny[j]->sorts->skip = 0; - scheduler_addunlock(sched, t->ci->progeny[j]->sorts, t); + if (ci->progeny[j] != NULL && ci->progeny[j]->sorts != NULL) { + scheduler_addunlock(sched, ci->progeny[j]->sorts, t); } - /* Link density tasks to cells. */ + /* Link self tasks to cells. */ if (t->type == task_type_self) { - atomic_inc(&t->ci->nr_tasks); + atomic_inc(&ci->nr_tasks); if (t->subtype == task_subtype_density) { - engine_addlink(e, &t->ci->density, t); - atomic_inc(&t->ci->nr_density); + engine_addlink(e, &ci->density, t); + atomic_inc(&ci->nr_density); } + if (t->subtype == task_subtype_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); + } + if (t->subtype == task_subtype_external_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); + } + + /* Link pair tasks to cells. */ } else if (t->type == task_type_pair) { - atomic_inc(&t->ci->nr_tasks); - atomic_inc(&t->cj->nr_tasks); + atomic_inc(&ci->nr_tasks); + atomic_inc(&cj->nr_tasks); if (t->subtype == task_subtype_density) { - engine_addlink(e, &t->ci->density, t); - atomic_inc(&t->ci->nr_density); - engine_addlink(e, &t->cj->density, t); - atomic_inc(&t->cj->nr_density); + engine_addlink(e, &ci->density, t); + atomic_inc(&ci->nr_density); + engine_addlink(e, &cj->density, t); + atomic_inc(&cj->nr_density); } + if (t->subtype == task_subtype_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); + engine_addlink(e, &cj->grav, t); + atomic_inc(&cj->nr_grav); + } + + /* Link sub-self tasks to cells. */ } else if (t->type == task_type_sub_self) { - atomic_inc(&t->ci->nr_tasks); + atomic_inc(&ci->nr_tasks); if (t->subtype == task_subtype_density) { - engine_addlink(e, &t->ci->density, t); - atomic_inc(&t->ci->nr_density); + engine_addlink(e, &ci->density, t); + atomic_inc(&ci->nr_density); + } + if (t->subtype == task_subtype_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); } + if (t->subtype == task_subtype_external_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); + } + + /* Link sub-pair tasks to cells. */ } else if (t->type == task_type_sub_pair) { - atomic_inc(&t->ci->nr_tasks); - atomic_inc(&t->cj->nr_tasks); + atomic_inc(&ci->nr_tasks); + atomic_inc(&cj->nr_tasks); if (t->subtype == task_subtype_density) { - engine_addlink(e, &t->ci->density, t); - atomic_inc(&t->ci->nr_density); - engine_addlink(e, &t->cj->density, t); - atomic_inc(&t->cj->nr_density); + engine_addlink(e, &ci->density, t); + atomic_inc(&ci->nr_density); + engine_addlink(e, &cj->density, t); + atomic_inc(&cj->nr_density); + } + if (t->subtype == task_subtype_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); + engine_addlink(e, &cj->grav, t); + atomic_inc(&cj->nr_grav); + } + if (t->subtype == task_subtype_external_grav) { + error("Found a sub-pair/external-gravity task..."); + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); + engine_addlink(e, &cj->grav, t); + atomic_inc(&cj->nr_grav); } } } @@ -1449,9 +1492,6 @@ void engine_link_gravity_tasks(struct engine *e) { /* Get a pointer to the task. */ struct task *t = &sched->tasks[k]; - /* Skip? */ - if (t->skip) continue; - /* Multipole construction */ if (t->type == task_type_grav_up) { scheduler_addunlock(sched, t, gather); @@ -1599,9 +1639,6 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { for (int ind = 0; ind < nr_tasks; ind++) { struct task *t = &sched->tasks[ind]; - /* Skip? */ - if (t->skip) continue; - /* Self-interaction? */ if (t->type == task_type_self && t->subtype == task_subtype_density) { @@ -1874,14 +1911,16 @@ void engine_maketasks(struct engine *e) { /* Split the tasks. */ scheduler_splittasks(sched); - /* Allocate the list of cell-task links. The maximum number of links - is the number of cells (s->tot_cells) times the number of neighbours (27) - times the number of interaction types (2, density and force). */ + /* Allocate the list of cell-task links. The maximum number of links is the + * number of cells (s->tot_cells) times the number of neighbours (26) times + * the number of interaction types, so 26 * 3 (density, force, grav) pairs + * and 4 (density, force, grav, ext_grav) self. + */ if (e->links != NULL) free(e->links); #ifdef EXTRA_HYDRO_LOOP - e->size_links = s->tot_cells * 27 * 3; + e->size_links = s->tot_cells * (26 * 4 + 4); #else - e->size_links = s->tot_cells * 27 * 2; + e->size_links = s->tot_cells * (26 * 3 + 4); #endif if ((e->links = malloc(sizeof(struct link) * e->size_links)) == NULL) error("Failed to allocate cell-task links."); @@ -1956,7 +1995,7 @@ void engine_maketasks(struct engine *e) { } /** - * @brief Mark tasks to be skipped and set the sort flags accordingly. + * @brief Mark tasks to be un-skipped and set the sort flags accordingly. * Threadpool mapper function for fixdt version. * * @param map_data pointer to the tasks @@ -1967,11 +2006,15 @@ void engine_marktasks_fixdt_mapper(void *map_data, int num_elements, void *extra_data) { /* Unpack the arguments. */ struct task *tasks = (struct task *)map_data; - int *rebuild_space = (int *)extra_data; + size_t *rebuild_space = &((size_t *)extra_data)[0]; + struct scheduler *s = (struct scheduler *)(((size_t *)extra_data)[1]); for (int ind = 0; ind < num_elements; ind++) { struct task *t = &tasks[ind]; + /* All tasks are unskipped (we skip by default). */ + scheduler_activate(s, t); + /* Pair? */ if (t->type == task_type_pair || t->type == task_type_sub_pair) { @@ -1998,28 +2041,7 @@ void engine_marktasks_fixdt_mapper(void *map_data, int num_elements, } /** - * @brief Mark any sort tasks as initially skipped. - * Threadpool mapper function. - * - * @param map_data pointer to the tasks - * @param num_elements number of tasks - * @param extra_data unused - */ -void engine_marktasks_sorts_mapper(void *map_data, int num_elements, - void *extra_data) { - /* Unpack the arguments. */ - struct task *tasks = (struct task *)map_data; - for (int ind = 0; ind < num_elements; ind++) { - struct task *t = &tasks[ind]; - if (t->type == task_type_sort) { - t->flags = 0; - t->skip = 1; - } - } -} - -/** - * @brief Mark tasks to be skipped and set the sort flags accordingly. + * @brief Mark tasks to be un-skipped and set the sort flags accordingly. * Threadpool mapper function. * * @param map_data pointer to the tasks @@ -2030,18 +2052,20 @@ void engine_marktasks_mapper(void *map_data, int num_elements, void *extra_data) { /* Unpack the arguments. */ struct task *tasks = (struct task *)map_data; - const int ti_end = ((int *)extra_data)[0]; - int *rebuild_space = &((int *)extra_data)[1]; + const int ti_end = ((size_t *)extra_data)[0]; + size_t *rebuild_space = &((size_t *)extra_data)[1]; + struct scheduler *s = (struct scheduler *)(((size_t *)extra_data)[2]); for (int ind = 0; ind < num_elements; ind++) { struct task *t = &tasks[ind]; /* Single-cell task? */ if (t->type == task_type_self || t->type == task_type_ghost || - t->type == task_type_sub_self) { + t->type == task_type_extra_ghost || t->type == task_type_cooling || + t->type == task_type_sourceterms || t->type == task_type_sub_self) { /* Set this task's skip. */ - t->skip = (t->ci->ti_end_min > ti_end); + if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t); } /* Pair? */ @@ -2058,19 +2082,24 @@ void engine_marktasks_mapper(void *map_data, int num_elements, cj->dx_max > space_maxreldx * cj->h_max)) *rebuild_space = 1; - /* Set this task's skip. */ - if ((t->skip = (ci->ti_end_min > ti_end && cj->ti_end_min > ti_end)) == 1) + /* Set this task's skip, otherwise nothing to do. */ + if (ci->ti_end_min <= ti_end || cj->ti_end_min <= ti_end) + scheduler_activate(s, t); + else continue; + /* If this is not a density task, we don't have to do any of the below. */ + if (t->subtype != task_subtype_density) continue; + /* Set the sort flags. */ - if (t->type == task_type_pair && t->subtype != task_subtype_grav) { + if (t->type == task_type_pair) { if (!(ci->sorted & (1 << t->flags))) { atomic_or(&ci->sorts->flags, (1 << t->flags)); - ci->sorts->skip = 0; + scheduler_activate(s, ci->sorts); } if (!(cj->sorted & (1 << t->flags))) { atomic_or(&cj->sorts->flags, (1 << t->flags)); - cj->sorts->skip = 0; + scheduler_activate(s, cj->sorts); } } @@ -2080,9 +2109,9 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (ci->nodeID != engine_rank) { /* Activate the tasks to recv foreign cell ci's data. */ - ci->recv_xv->skip = 0; - ci->recv_rho->skip = 0; - ci->recv_ti->skip = 0; + scheduler_activate(s, ci->recv_xv); + scheduler_activate(s, ci->recv_rho); + scheduler_activate(s, ci->recv_ti); /* Look for the local cell cj's send tasks. */ struct link *l = NULL; @@ -2090,45 +2119,46 @@ void engine_marktasks_mapper(void *map_data, int num_elements, l = l->next) ; if (l == NULL) error("Missing link to send_xv task."); - l->t->skip = 0; + scheduler_activate(s, l->t); for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; l = l->next) ; if (l == NULL) error("Missing link to send_rho task."); - l->t->skip = 0; + scheduler_activate(s, l->t); for (l = cj->send_ti; l != NULL && l->t->cj->nodeID != ci->nodeID; l = l->next) ; if (l == NULL) error("Missing link to send_ti task."); - l->t->skip = 0; + scheduler_activate(s, l->t); } else if (cj->nodeID != engine_rank) { /* Activate the tasks to recv foreign cell cj's data. */ - cj->recv_xv->skip = 0; - cj->recv_rho->skip = 0; - cj->recv_ti->skip = 0; + scheduler_activate(s, cj->recv_xv); + scheduler_activate(s, cj->recv_rho); + scheduler_activate(s, cj->recv_ti); + /* Look for the local cell ci's send tasks. */ struct link *l = NULL; for (l = ci->send_xv; l != NULL && l->t->cj->nodeID != cj->nodeID; l = l->next) ; if (l == NULL) error("Missing link to send_xv task."); - l->t->skip = 0; + scheduler_activate(s, l->t); for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; l = l->next) ; if (l == NULL) error("Missing link to send_rho task."); - l->t->skip = 0; + scheduler_activate(s, l->t); for (l = ci->send_ti; l != NULL && l->t->cj->nodeID != cj->nodeID; l = l->next) ; if (l == NULL) error("Missing link to send_ti task."); - l->t->skip = 0; + scheduler_activate(s, l->t); } #endif @@ -2136,25 +2166,26 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* Kick? */ else if (t->type == task_type_kick) { - t->skip = (t->ci->ti_end_min > ti_end); t->ci->updated = 0; t->ci->g_updated = 0; + if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t); } /* Init? */ else if (t->type == task_type_init) { - /* Set this task's skip. */ - t->skip = (t->ci->ti_end_min > ti_end); + if (t->ci->ti_end_min <= ti_end) scheduler_activate(s, t); } - /* None? */ - else if (t->type == task_type_none) - t->skip = 1; + /* Tasks with no cells should not be skipped? */ + else if (t->type == task_type_grav_gather_m || + t->type == task_type_grav_fft) { + scheduler_activate(s, t); + } } } /** - * @brief Mark tasks to be skipped and set the sort flags accordingly. + * @brief Mark tasks to be un-skipped and set the sort flags accordingly. * * @return 1 if the space has to be rebuilt, 0 otherwise. */ @@ -2168,31 +2199,16 @@ int engine_marktasks(struct engine *e) { if (e->policy & engine_policy_fixdt) { /* Run through the tasks and mark as skip or not. */ + size_t extra_data[2] = {rebuild_space, (size_t)&e->sched}; threadpool_map(&e->threadpool, engine_marktasks_fixdt_mapper, s->tasks, - s->nr_tasks, sizeof(struct task), 1000, &rebuild_space); + s->nr_tasks, sizeof(struct task), 1000, extra_data); return rebuild_space; /* Multiple-timestep case */ } else { /* Run through the tasks and mark as skip or not. */ - int extra_data[2] = {e->ti_current, rebuild_space}; - threadpool_map(&e->threadpool, engine_marktasks_sorts_mapper, s->tasks, - s->nr_tasks, sizeof(struct task), 10000, NULL); - -#ifdef WITH_MPI - if (e->policy & engine_policy_mpi) { - - /* Skip all sends and recvs, we will unmark if needed. */ - for (int k = 0; k < s->nr_tasks; k++) { - struct task *t = &s->tasks[k]; - if (t->type == task_type_send || t->type == task_type_recv) { - t->skip = 1; - } - } - } -#endif - + size_t extra_data[3] = {e->ti_current, rebuild_space, (size_t)&e->sched}; threadpool_map(&e->threadpool, engine_marktasks_mapper, s->tasks, s->nr_tasks, sizeof(struct task), 10000, extra_data); rebuild_space = extra_data[1]; @@ -2213,16 +2229,21 @@ int engine_marktasks(struct engine *e) { */ void engine_print_task_counts(struct engine *e) { - struct scheduler *sched = &e->sched; + const ticks tic = getticks(); + struct scheduler *const sched = &e->sched; + const int nr_tasks = sched->nr_tasks; + const struct task *const tasks = sched->tasks; /* Count and print the number of each task type. */ int counts[task_type_count + 1]; for (int k = 0; k <= task_type_count; k++) counts[k] = 0; - for (int k = 0; k < sched->nr_tasks; k++) - if (!sched->tasks[k].skip) - counts[(int)sched->tasks[k].type] += 1; - else + for (int k = 0; k < nr_tasks; k++) { + if (tasks[k].skip) counts[task_type_count] += 1; + else + counts[(int)tasks[k].type] += 1; + } + message("Total = %d", nr_tasks); #ifdef WITH_MPI printf("[%04i] %s engine_print_task_counts: task counts are [ %s=%i", e->nodeID, clocks_get_timesincestart(), taskID_names[0], counts[0]); @@ -2236,6 +2257,10 @@ void engine_print_task_counts(struct engine *e) { fflush(stdout); message("nr_parts = %zu.", e->s->nr_parts); message("nr_gparts = %zu.", e->s->nr_gparts); + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); } /** @@ -2288,7 +2313,7 @@ void engine_prepare(struct engine *e, int nodrift) { TIMER_TIC; /* Run through the tasks and mark as skip or not. */ - int rebuild = (e->forcerebuild || engine_marktasks(e)); + int rebuild = e->forcerebuild; /* Collect the values of rebuild from all nodes. */ #ifdef WITH_MPI @@ -2398,6 +2423,10 @@ void engine_collect_kick(struct cell *c) { ti_end_min = min(ti_end_min, cp->ti_end_min); updated += cp->updated; g_updated += cp->g_updated; + + /* Collected, so clear for next time. */ + cp->updated = 0; + cp->g_updated = 0; } } } @@ -2433,6 +2462,10 @@ void engine_collect_timestep(struct engine *e) { ti_end_min = min(ti_end_min, c->ti_end_min); updates += c->updated; g_updates += c->g_updated; + + /* Collected, so clear for next time. */ + c->updated = 0; + c->g_updated = 0; } /* Aggregate the data from the different nodes. */ @@ -2475,80 +2508,28 @@ void engine_collect_timestep(struct engine *e) { void engine_print_stats(struct engine *e) { const ticks tic = getticks(); - const struct space *s = e->s; - double e_kin = 0.0, e_int = 0.0, e_pot_self = 0.0, e_pot_ext = 0.0 ,e_rad = 0.0; - double entropy = 0.0, mass = 0.0; - double mom[3] = {0.0, 0.0, 0.0}, ang_mom[3] = {0.0, 0.0, 0.0}; + struct statistics stats; + stats_init(&stats); - /* Collect the cell data. */ - for (int k = 0; k < s->nr_cells; k++) - if (s->cells_top[k].nodeID == e->nodeID) { - struct cell *c = &s->cells_top[k]; - mass += c->mass; - e_kin += c->e_kin; - e_int += c->e_int; - e_pot_self += c->e_pot_self; - e_pot_ext += c->e_pot_ext; - e_rad += c->e_rad; - entropy += c->entropy; - mom[0] += c->mom[0]; - mom[1] += c->mom[1]; - mom[2] += c->mom[2]; - ang_mom[0] += c->ang_mom[0]; - ang_mom[1] += c->ang_mom[1]; - ang_mom[2] += c->ang_mom[2]; - } + /* Collect the stats on this node */ + stats_collect(e->s, &stats); /* Aggregate the data from the different nodes. */ #ifdef WITH_MPI - { - double in[13] = {0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.}; - double out[13]; - out[0] = e_kin; - out[1] = e_int; - out[2] = e_pot_self; - out[3] = e_pot_ext; - out[4] = e_rad; - out[5] = mom[0]; - out[6] = mom[1]; - out[7] = mom[2]; - out[8] = ang_mom[0]; - out[9] = ang_mom[1]; - out[10] = ang_mom[2]; - out[11] = mass; - out[12] = entropy; - if (MPI_Reduce(out, in, 11, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD) != - MPI_SUCCESS) - error("Failed to aggregate stats."); - e_kin = out[0]; - e_int = out[1]; - e_pot_self = out[2]; - e_pot_ext = out[3]; - e_rad = out[4]; - mom[0] = out[5]; - mom[1] = out[6]; - mom[2] = out[7]; - ang_mom[0] = out[8]; - ang_mom[1] = out[9]; - ang_mom[2] = out[10]; - mass = out[11]; - entropy = out[12]; - } -#endif + struct statistics global_stats; + stats_init(&global_stats); - const double e_pot = e_pot_self + e_pot_ext; - const double e_tot = e_kin + e_int + e_pot; + if (MPI_Reduce(&stats, &global_stats, 1, statistics_mpi_type, + statistics_mpi_reduce_op, 0, MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to aggregate stats."); +#else + struct statistics global_stats = stats; +#endif /* Print info */ - if (e->nodeID == 0) { - fprintf(e->file_stats, - " %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e " - "%14e %14e %14e\n", - e->time, mass, e_tot, e_kin, e_int, e_pot, e_pot_self, e_pot_ext, e_rad, entropy, mom[0], - mom[1], mom[2], ang_mom[0], ang_mom[1], ang_mom[2]); - fflush(e->file_stats); - } + if (e->nodeID == 0) + stats_print_to_file(e->file_stats, &global_stats, e->time); if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -2759,13 +2740,7 @@ void engine_step(struct engine *e) { fflush(e->file_timesteps); } - /* Save some statistics */ - if (e->time - e->timeLastStatistics >= e->deltaTimeStatistics) { - engine_print_stats(e); - e->timeLastStatistics += e->deltaTimeStatistics; - } - - /* Drift only the necessary particles, that all means all particles + /* Drift only the necessary particles, that means all particles * if we are about to repartition. */ int repart = (e->forcerepart != REPART_NONE); e->drift_all = repart || e->drift_all; @@ -2861,6 +2836,12 @@ void engine_step(struct engine *e) { engine_launch(e, e->nr_threads, mask, submask); TIMER_TOC(timer_runners); + /* Save some statistics */ + if (e->time - e->timeLastStatistics >= e->deltaTimeStatistics) { + engine_print_stats(e); + e->timeLastStatistics += e->deltaTimeStatistics; + } + TIMER_TOC2(timer_step); clocks_gettime(&time2); @@ -2886,9 +2867,10 @@ void engine_drift(struct engine *e) { const ticks tic = getticks(); threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 1, e); + if (e->verbose) - message("took %.3f %s.", clocks_from_ticks(getticks() - tic), - clocks_getunit()); + message("took %.3f %s (including task unskipping).", + clocks_from_ticks(getticks() - tic), clocks_getunit()); } /** @@ -3516,6 +3498,7 @@ void engine_init(struct engine *e, struct space *s, /* Construct types for MPI communications */ #ifdef WITH_MPI part_create_mpi_types(); + stats_create_MPI_type(); #endif /* Initialize the threadpool. */ diff --git a/src/potential/disc_patch/potential.h b/src/potential/disc_patch/potential.h index 3a2205c3bdb33d0e43badca8eb35175d3477871a..725a7a7634cc34291d6c9d9fdf29acf8c5a035af 100644 --- a/src/potential/disc_patch/potential.h +++ b/src/potential/disc_patch/potential.h @@ -163,7 +163,7 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration( __attribute__((always_inline)) INLINE static float external_gravity_get_potential_energy( const struct external_potential* potential, - const struct phys_const* const phys_const, struct part* p) { + const struct phys_const* const phys_const, const struct part* p) { return 0.; } diff --git a/src/potential/isothermal/potential.h b/src/potential/isothermal/potential.h index 6ac51e3e77e3cd8151adfc6b30a81e14827f72ff..bb9206172d245b3bded709b4d04328523d417c47 100644 --- a/src/potential/isothermal/potential.h +++ b/src/potential/isothermal/potential.h @@ -122,16 +122,16 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration( * * @param potential The #external_potential used in the run. * @param phys_const Physical constants in internal units. - * @param p Pointer to the particle data. + * @param g Pointer to the particle data. */ __attribute__((always_inline)) INLINE static float external_gravity_get_potential_energy( const struct external_potential* potential, - const struct phys_const* const phys_const, struct part* p) { + const struct phys_const* const phys_const, const struct gpart* g) { - const float dx = p->x[0] - potential->x; - const float dy = p->x[1] - potential->y; - const float dz = p->x[2] - potential->z; + const float dx = g->x[0] - potential->x; + const float dy = g->x[1] - potential->y; + const float dz = g->x[2] - potential->z; return potential->vrot * potential->vrot * 0.5 * log(dx*dx + dy*dy * dz*dz); } diff --git a/src/potential/none/potential.h b/src/potential/none/potential.h index 3f075f8fdc9b10e107bb4663b26d10e37119c9bb..fb3b27636cd9840f6b0b8fe1b7c6a9e20e851e82 100644 --- a/src/potential/none/potential.h +++ b/src/potential/none/potential.h @@ -68,6 +68,21 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration( double time, const struct external_potential* restrict potential, const struct phys_const* restrict phys_const, struct gpart* restrict g) {} +/** + * @brief Computes the gravitational potential energy due to nothing. + * + * @param potential The #external_potential used in the run. + * @param phys_const Physical constants in internal units. + * @param g Pointer to the particle data. + */ + + __attribute__((always_inline)) INLINE static float external_gravity_get_potential_energy( + const struct external_potential* potential, + const struct phys_const* const phys_const, const struct part* g) { + + return 0.; + } + /** * @brief Initialises the external potential properties in the internal system * of units. diff --git a/src/potential/point_mass/potential.h b/src/potential/point_mass/potential.h index eafb27f35a56900cf814c218f370a6821ef07230..27ed2bcd4be1d2d85800a9e296e327755e90dc4c 100644 --- a/src/potential/point_mass/potential.h +++ b/src/potential/point_mass/potential.h @@ -118,20 +118,20 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration( /** - * @brief Computes the gravitational potential energy of a particle in an isothermal potential. + * @brief Computes the gravitational potential energy of a particle in a point mass potential. * * @param potential The #external_potential used in the run. * @param phys_const Physical constants in internal units. - * @param p Pointer to the particle data. + * @param g Pointer to the particle data. */ __attribute__((always_inline)) INLINE static float external_gravity_get_potential_energy( const struct external_potential* potential, - const struct phys_const* const phys_const, struct part* p) { + const struct phys_const* const phys_const, const struct part* g) { - const float dx = p->x[0] - potential->x; - const float dy = p->x[1] - potential->y; - const float dz = p->x[2] - potential->z; + const float dx = g->x[0] - potential->x; + const float dy = g->x[1] - potential->y; + const float dz = g->x[2] - potential->z; const float rinv = 1./sqrtf(dx * dx + dy * dy + dz * dz); return -phys_const->const_newton_G * potential->mass * r_inv; } diff --git a/src/potential/softened_isothermal/potential.h b/src/potential/softened_isothermal/potential.h index 639c7530d2bee87567dcd35527f8bb0965da0295..9533986f3b6bb6e98f4aba40ae09065e967d71cb 100644 --- a/src/potential/softened_isothermal/potential.h +++ b/src/potential/softened_isothermal/potential.h @@ -125,16 +125,16 @@ __attribute__((always_inline)) INLINE static void external_gravity_acceleration( * * @param potential The #external_potential used in the run. * @param phys_const Physical constants in internal units. - * @param p Pointer to the particle data. + * @param g Pointer to the particle data. */ __attribute__((always_inline)) INLINE static float external_gravity_get_potential_energy( const struct external_potential* potential, - const struct phys_const* const phys_const, struct part* p) { + const struct phys_const* const phys_const, const struct part* g) { - const float dx = p->x[0] - potential->x; - const float dy = p->x[1] - potential->y; - const float dz = p->x[2] - potential->z; + const float dx = g->x[0] - potential->x; + const float dy = g->x[1] - potential->y; + const float dz = g->x[2] - potential->z; return potential->vrot * potential->vrot * 0.5 * log(dx*dx + dy*dy * dz*dz + potential->epsilon2); } diff --git a/src/runner.c b/src/runner.c index 132bbd7feba6e7a708331a81a35855f9ace56970..2f9ca45d6d7d2d6ac01668acdc0ec74010800402 100644 --- a/src/runner.c +++ b/src/runner.c @@ -748,114 +748,99 @@ void runner_do_ghost(struct runner *r, struct cell *c) { } /** - * @brief Drift particles and g-particles in a cell forward in time + * @brief Drift particles and g-particles in a cell forward in time, + * unskipping any tasks associated with active cells. * * @param c The cell. * @param e The engine. + * @param drift whether to actually drift the particles, will not be + * necessary for non-local cells. */ -static void runner_do_drift(struct cell *c, struct engine *e) { +static void runner_do_drift(struct cell *c, struct engine *e, int drift) { + + const int ti_current = e->ti_current; + + /* Unskip any active tasks. */ + if (c->ti_end_min == e->ti_current) { + const int forcerebuild = cell_unskip_tasks(c, &e->sched); + if (forcerebuild) atomic_inc(&e->forcerebuild); + } + + /* Do we really need to drift? */ + if (drift) { + if (!e->drift_all && !cell_is_drift_needed(c, ti_current)) return; + } else { + + /* Not drifting, but may still need to recurse for task skipping. */ + if (c->split) { + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + struct cell *cp = c->progeny[k]; + runner_do_drift(cp, e, 0); + } + } + } + return; + } const double timeBase = e->timeBase; const int ti_old = c->ti_old; - const int ti_current = e->ti_current; struct part *const parts = c->parts; struct xpart *const xparts = c->xparts; struct gpart *const gparts = c->gparts; - const struct phys_const *constants = e->physical_constants; - const struct external_potential *potential = e->external_potential; - - /* Do we need to drift ? */ - if (!e->drift_all && !cell_is_drift_needed(c, ti_current)) return; - - /* Check that we are actually going to move forward. */ - if (ti_current == ti_old) return; /* Drift from the last time the cell was drifted to the current time */ const double dt = (ti_current - ti_old) * timeBase; - float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f; - double e_kin = 0.0, e_int = 0.0, e_pot_self = 0.0, e_pot_ext = 0.0, e_rad = 0.0; - double entropy = 0.0, mass = 0.0; - double mom[3] = {0.0, 0.0, 0.0}; - double ang_mom[3] = {0.0, 0.0, 0.0}; /* No children? */ if (!c->split) { - /* Loop over all the g-particles in the cell */ - const size_t nr_gparts = c->gcount; - for (size_t k = 0; k < nr_gparts; k++) { + /* Check that we are actually going to move forward. */ + if (ti_current >= ti_old) { - /* Get a handle on the gpart. */ - struct gpart *const gp = &gparts[k]; + /* Loop over all the g-particles in the cell */ + const size_t nr_gparts = c->gcount; + for (size_t k = 0; k < nr_gparts; k++) { - /* Drift... */ - drift_gpart(gp, dt, timeBase, ti_old, ti_current); + /* Get a handle on the gpart. */ + struct gpart *const gp = &gparts[k]; - /* Compute (square of) motion since last cell construction */ - const float dx2 = gp->x_diff[0] * gp->x_diff[0] + - gp->x_diff[1] * gp->x_diff[1] + - gp->x_diff[2] * gp->x_diff[2]; - dx2_max = (dx2_max > dx2) ? dx2_max : dx2; - } + /* Drift... */ + drift_gpart(gp, dt, timeBase, ti_old, ti_current); - /* Loop over all the particles in the cell (more work for these !) */ - const size_t nr_parts = c->count; - for (size_t k = 0; k < nr_parts; k++) { + /* Compute (square of) motion since last cell construction */ + const float dx2 = gp->x_diff[0] * gp->x_diff[0] + + gp->x_diff[1] * gp->x_diff[1] + + gp->x_diff[2] * gp->x_diff[2]; + dx2_max = (dx2_max > dx2) ? dx2_max : dx2; + } - /* Get a handle on the part. */ - struct part *const p = &parts[k]; - struct xpart *const xp = &xparts[k]; - - /* Drift... */ - drift_part(p, xp, dt, timeBase, ti_old, ti_current); - - /* Compute (square of) motion since last cell construction */ - const float dx2 = xp->x_diff[0] * xp->x_diff[0] + - xp->x_diff[1] * xp->x_diff[1] + - xp->x_diff[2] * xp->x_diff[2]; - dx2_max = (dx2_max > dx2) ? dx2_max : dx2; - - /* Maximal smoothing length */ - h_max = (h_max > p->h) ? h_max : p->h; - - /* Now collect quantities for statistics */ - - const float half_dt = - (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase; - const double x[3] = {p->x[0], p->x[1], p->x[2]}; - const float v[3] = {xp->v_full[0] + p->a_hydro[0] * half_dt, - xp->v_full[1] + p->a_hydro[1] * half_dt, - xp->v_full[2] + p->a_hydro[2] * half_dt}; - - const float m = hydro_get_mass(p); - - /* Collect mass */ - mass += m; - - /* Collect momentum */ - mom[0] += m * v[0]; - mom[1] += m * v[1]; - mom[2] += m * v[2]; - - /* Collect angular momentum */ - ang_mom[0] += m * (x[1] * v[2] - x[2] * v[1]); - ang_mom[1] += m * (x[2] * v[0] - x[0] * v[2]); - ang_mom[2] += m * (x[0] * v[1] - x[1] * v[0]); - - /* Collect energies. */ - e_kin += 0.5 * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); - e_pot_self += 0.; - e_pot_ext += m * external_gravity_get_potential_energy(potential,constants,p); - e_int += m * hydro_get_internal_energy(p, half_dt); - e_rad += cooling_get_radiated_energy(xp); - - /* Collect entropy */ - entropy += m * hydro_get_entropy(p, half_dt); - } + /* Loop over all the particles in the cell */ + const size_t nr_parts = c->count; + for (size_t k = 0; k < nr_parts; k++) { + + /* Get a handle on the part. */ + struct part *const p = &parts[k]; + struct xpart *const xp = &xparts[k]; + + /* Drift... */ + drift_part(p, xp, dt, timeBase, ti_old, ti_current); + + /* Compute (square of) motion since last cell construction */ + const float dx2 = xp->x_diff[0] * xp->x_diff[0] + + xp->x_diff[1] * xp->x_diff[1] + + xp->x_diff[2] * xp->x_diff[2]; + dx2_max = (dx2_max > dx2) ? dx2_max : dx2; + + /* Maximal smoothing length */ + h_max = (h_max > p->h) ? h_max : p->h; + } + + /* Now, get the maximal particle motion from its square */ + dx_max = sqrtf(dx2_max); - /* Now, get the maximal particle motion from its square */ - dx_max = sqrtf(dx2_max); + } /* Check that we are actually going to move forward. */ } /* Otherwise, aggregate data from children. */ @@ -867,42 +852,15 @@ static void runner_do_drift(struct cell *c, struct engine *e) { struct cell *cp = c->progeny[k]; /* Recurse. */ - runner_do_drift(cp, e); - + runner_do_drift(cp, e, drift); dx_max = max(dx_max, cp->dx_max); h_max = max(h_max, cp->h_max); - mass += cp->mass; - e_kin += cp->e_kin; - e_int += cp->e_int; - e_pot_self += cp->e_pot_self; - e_pot_ext += cp->e_pot_ext; - e_rad += cp->e_rad; - entropy += cp->entropy; - mom[0] += cp->mom[0]; - mom[1] += cp->mom[1]; - mom[2] += cp->mom[2]; - ang_mom[0] += cp->ang_mom[0]; - ang_mom[1] += cp->ang_mom[1]; - ang_mom[2] += cp->ang_mom[2]; } } /* Store the values */ c->h_max = h_max; c->dx_max = dx_max; - c->mass = mass; - c->e_kin = e_kin; - c->e_int = e_int; - c->e_pot_self = e_pot_self; - c->e_pot_ext = e_pot_ext; - c->e_rad = e_rad; - c->entropy = entropy; - c->mom[0] = mom[0]; - c->mom[1] = mom[1]; - c->mom[2] = mom[2]; - c->ang_mom[0] = ang_mom[0]; - c->ang_mom[1] = ang_mom[1]; - c->ang_mom[2] = ang_mom[2]; /* Update the time of the last drift */ c->ti_old = ti_current; @@ -924,9 +882,11 @@ void runner_do_drift_mapper(void *map_data, int num_elements, for (int ind = 0; ind < num_elements; ind++) { struct cell *c = &cells[ind]; - - /* Only drift local particles. */ - if (c != NULL && c->nodeID == e->nodeID) runner_do_drift(c, e); +#ifdef WITH_MPI + if (c != NULL) runner_do_drift(c, e, (c->nodeID == e->nodeID)); +#else + if (c != NULL) runner_do_drift(c, e, 1); +#endif } } @@ -1274,6 +1234,35 @@ void *runner_main(void *data) { struct cell *cj = t->cj; t->rid = r->cpuid; +/* Check that we haven't scheduled an inactive task */ +#ifdef SWIFT_DEBUG_CHECKS + if (cj == NULL) { /* self */ + if (ci->ti_end_min > e->ti_current && t->type != task_type_sort) + error( + "Task (type='%s/%s') should have been skipped ti_current=%d " + "c->ti_end_min=%d", + taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current, + ci->ti_end_min); + + /* Special case for sorts */ + if (ci->ti_end_min > e->ti_current && t->type == task_type_sort && + t->flags == 0) + error( + "Task (type='%s/%s') should have been skipped ti_current=%d " + "c->ti_end_min=%d t->flags=%d", + taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current, + ci->ti_end_min, t->flags); + + } else { /* pair */ + if (ci->ti_end_min > e->ti_current && cj->ti_end_min > e->ti_current) + error( + "Task (type='%s/%s') should have been skipped ti_current=%d " + "ci->ti_end_min=%d cj->ti_end_min=%d", + taskID_names[t->type], subtaskID_names[t->subtype], e->ti_current, + ci->ti_end_min, cj->ti_end_min); + } +#endif + /* Different types of tasks... */ switch (t->type) { case task_type_self: diff --git a/src/scheduler.c b/src/scheduler.c index 76e0ecb41b9068e471caa75db95d572265bdcb22..44790fcd2fa5f67e6f325ba5849da19e35ab285a 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -50,6 +50,12 @@ #include "task.h" #include "timers.h" +/** + * @brief Re-set the list of active tasks. + */ + +void scheduler_clear_active(struct scheduler *s) { s->active_count = 0; } + /** * @brief Add an unlock_task to the given task. * @@ -697,7 +703,7 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type, t->wait = wait; t->ci = ci; t->cj = cj; - t->skip = 0; + t->skip = 1; /* Mark tasks as skip by default. */ t->tight = tight; t->implicit = 0; t->weight = 0; @@ -863,6 +869,7 @@ void scheduler_reset(struct scheduler *s, int size) { /* Free existing task lists if necessary. */ if (s->tasks != NULL) free(s->tasks); if (s->tasks_ind != NULL) free(s->tasks_ind); + if (s->tid_active != NULL) free(s->tid_active); /* Allocate the new lists. */ if (posix_memalign((void *)&s->tasks, task_align, @@ -871,6 +878,9 @@ void scheduler_reset(struct scheduler *s, int size) { if ((s->tasks_ind = (int *)malloc(sizeof(int) * size)) == NULL) error("Failed to allocate task lists."); + + if ((s->tid_active = (int *)malloc(sizeof(int) * size)) == NULL) + error("Failed to allocate aactive task lists."); } /* Reset the counters. */ @@ -882,6 +892,7 @@ void scheduler_reset(struct scheduler *s, int size) { s->submask = 0; s->nr_unlocks = 0; s->completed_unlock_writes = 0; + s->active_count = 0; /* Set the task pointers in the queues. */ for (int k = 0; k < s->nr_queues; k++) s->queues[k].tasks = s->tasks; @@ -891,11 +902,11 @@ void scheduler_reset(struct scheduler *s, int size) { * @brief Compute the task weights * * @param s The #scheduler. - * @param verbose Are we talkative ? + * @param verbose Are we talkative? */ + void scheduler_reweight(struct scheduler *s, int verbose) { - const ticks tic = getticks(); const int nr_tasks = s->nr_tasks; int *tid = s->tasks_ind; struct task *tasks = s->tasks; @@ -904,6 +915,7 @@ void scheduler_reweight(struct scheduler *s, int verbose) { 0.4025, 0.1897, 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.5788}; const float wscale = 0.001; + const ticks tic = getticks(); /* Run through the tasks backwards and set their weights. */ for (int k = nr_tasks - 1; k >= 0; k--) { @@ -1033,33 +1045,84 @@ void scheduler_enqueue_mapper(void *map_data, int num_elements, void scheduler_start(struct scheduler *s, unsigned int mask, unsigned int submask) { - // ticks tic = getticks(); - /* Store the masks */ s->mask = mask; s->submask = submask | (1 << task_subtype_none); - /* Clear all the waits and rids. */ + /* Clear all the waits, rids, and times. */ for (int k = 0; k < s->nr_tasks; k++) { s->tasks[k].wait = 1; s->tasks[k].rid = -1; + s->tasks[k].tic = 0; + s->tasks[k].toc = 0; + if (((1 << s->tasks[k].type) & mask) == 0 || + ((1 << s->tasks[k].subtype) & s->submask) == 0) + s->tasks[k].skip = 1; } /* Re-wait the tasks. */ threadpool_map(s->threadpool, scheduler_rewait_mapper, s->tasks, s->nr_tasks, sizeof(struct task), 1000, s); +/* Check we have not missed an active task */ +#ifdef SWIFT_DEBUG_CHECKS + + const int ti_current = s->space->e->ti_current; + for (int k = 0; k < s->nr_tasks; k++) { + + struct task *t = &s->tasks[k]; + struct cell *ci = t->ci; + struct cell *cj = t->cj; + + if (cj == NULL) { /* self */ + + if (ci->ti_end_min == ti_current && t->skip && t->type != task_type_sort) + error( + "Task (type='%s/%s') should not have been skipped ti_current=%d " + "c->ti_end_min=%d", + taskID_names[t->type], subtaskID_names[t->subtype], ti_current, + ci->ti_end_min); + + /* Special treatment for sort tasks */ + if (ci->ti_end_min == ti_current && t->skip && + t->type == task_type_sort && t->flags == 0) + error( + "Task (type='%s/%s') should not have been skipped ti_current=%d " + "c->ti_end_min=%d t->flags=%d", + taskID_names[t->type], subtaskID_names[t->subtype], ti_current, + ci->ti_end_min, t->flags); + + } else { /* pair */ + + if ((ci->ti_end_min == ti_current || cj->ti_end_min == ti_current) && + t->skip) + error( + "Task (type='%s/%s') should not have been skipped ti_current=%d " + "ci->ti_end_min=%d cj->ti_end_min=%d", + taskID_names[t->type], subtaskID_names[t->subtype], ti_current, + ci->ti_end_min, cj->ti_end_min); + } + } + +#endif + /* Loop over the tasks and enqueue whoever is ready. */ - threadpool_map(s->threadpool, scheduler_enqueue_mapper, s->tasks_ind, - s->nr_tasks, sizeof(int), 1000, s); + for (int k = 0; k < s->active_count; k++) { + struct task *t = &s->tasks[s->tid_active[k]]; + if (atomic_dec(&t->wait) == 1 && !t->skip && ((1 << t->type) & s->mask) && + ((1 << t->subtype) & s->submask)) { + scheduler_enqueue(s, t); + pthread_cond_signal(&s->sleep_cond); + } + } + + /* Clear the list of active tasks. */ + s->active_count = 0; /* To be safe, fire of one last sleep_cond in a safe way. */ pthread_mutex_lock(&s->sleep_mutex); pthread_cond_broadcast(&s->sleep_cond); pthread_mutex_unlock(&s->sleep_mutex); - - /* message("enqueueing tasks took %.3f %s." , - clocks_from_ticks( getticks() - tic ), clocks_getunit()); */ } /** @@ -1212,6 +1275,9 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) { pthread_mutex_unlock(&s->sleep_mutex); } + /* Mark the task as skip. */ + t->skip = 1; + /* Return the next best task. Note that we currently do not implement anything that does this, as getting it to respect priorities is too tricky and currently unnecessary. */ @@ -1427,6 +1493,7 @@ void scheduler_clean(struct scheduler *s) { free(s->tasks_ind); free(s->unlocks); free(s->unlock_ind); + free(s->tid_active); for (int i = 0; i < s->nr_queues; ++i) queue_clean(&s->queues[i]); free(s->queues); } diff --git a/src/scheduler.h b/src/scheduler.h index c4eb5e99447d623e5fb8e442efc1c254c00bfadd..22c83ae3c551d4dc7e601e23f7a5301241bd1fa4 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -33,6 +33,7 @@ /* Includes. */ #include "cell.h" +#include "inline.h" #include "lock.h" #include "queue.h" #include "task.h" @@ -83,6 +84,10 @@ struct scheduler { /* The task indices. */ int *tasks_ind; + /* List of initial tasks. */ + int *tid_active; + int active_count; + /* The task unlocks. */ struct task **volatile unlocks; int *volatile unlock_ind; @@ -105,7 +110,24 @@ struct scheduler { int nodeID; }; +/* Inlined functions (for speed). */ +/** + * @brief Add a task to the list of active tasks. + * + * @param s The #scheduler. + * @param t The task to be added. + */ + +__attribute__((always_inline)) INLINE static void scheduler_activate( + struct scheduler *s, struct task *t) { + if (atomic_cas(&t->skip, 1, 0)) { + int ind = atomic_inc(&s->active_count); + s->tid_active[ind] = t - s->tasks; + } +} + /* Function prototypes. */ +void scheduler_clear_active(struct scheduler *s); void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks, int nr_queues, unsigned int flags, int nodeID, struct threadpool *tp); diff --git a/src/space.c b/src/space.c index 7f6e2880304939c96397563be299158817e2dfff..d2dc6cf511a5c5d8cb9a83dc563f88826836f645 100644 --- a/src/space.c +++ b/src/space.c @@ -395,9 +395,11 @@ void space_regrid(struct space *s, double cell_max, int verbose) { s->cells_top[k].nr_density = 0; s->cells_top[k].nr_gradient = 0; s->cells_top[k].nr_force = 0; + s->cells_top[k].nr_grav = 0; s->cells_top[k].density = NULL; s->cells_top[k].gradient = NULL; s->cells_top[k].force = NULL; + s->cells_top[k].grav = NULL; s->cells_top[k].dx_max = 0.0f; s->cells_top[k].sorted = 0; s->cells_top[k].count = 0; @@ -406,7 +408,20 @@ void space_regrid(struct space *s, double cell_max, int verbose) { s->cells_top[k].extra_ghost = NULL; s->cells_top[k].ghost = NULL; s->cells_top[k].kick = NULL; + s->cells_top[k].cooling = NULL; + s->cells_top[k].sourceterms = NULL; s->cells_top[k].super = &s->cells_top[k]; +#if WITH_MPI + s->cells_top[k].recv_xv = NULL; + s->cells_top[k].recv_rho = NULL; + s->cells_top[k].recv_gradient = NULL; + s->cells_top[k].recv_ti = NULL; + + s->cells_top[k].send_xv = NULL; + s->cells_top[k].send_rho = NULL; + s->cells_top[k].send_gradient = NULL; + s->cells_top[k].send_ti = NULL; +#endif } s->maxdepth = 0; } @@ -587,8 +602,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { for (size_t k = 1; k < nr_parts; k++) { if (ind[k - 1] > ind[k]) { error("Sort failed!"); - } else if (ind[k] != cell_getid(s->cdim, - s->parts[k].x[0] * s->iwidth[0], + } else if (ind[k] != cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0], s->parts[k].x[1] * s->iwidth[1], s->parts[k].x[2] * s->iwidth[2])) { error("Incorrect indices!"); diff --git a/src/statistics.c b/src/statistics.c new file mode 100644 index 0000000000000000000000000000000000000000..96d73b0e610d616dc05aff4dec76961221db1c1f --- /dev/null +++ b/src/statistics.c @@ -0,0 +1,331 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <string.h> + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + +/* This object's header. */ +#include "statistics.h" + +/* Local headers. */ +#include "cooling.h" +#include "engine.h" +#include "error.h" +#include "hydro.h" +#include "threadpool.h" + +/** + * @brief Information required to compute the statistics in the mapper + */ +struct index_data { + /*! The space we play with */ + const struct space *s; + + /*! The #statistics aggregator to fill */ + struct statistics *stats; +}; + +/** + * @brief Adds the content of one #statistics aggregator to another one. + * + * Performs a += b; + * + * @param a The #statistics structure to update. + * @param b The #statistics structure to add to a. + */ +void stats_add(struct statistics *a, const struct statistics *b) { + + /* Add everything */ + a->E_kin += b->E_kin; + a->E_int += b->E_int; + a->E_pot_self += b->E_pot_self; + a->E_pot_ext += b->E_pot_ext; + a->E_rad += b->E_rad; + a->entropy += b->entropy; + a->mass += b->mass; + a->mom[0] += b->mom[0]; + a->mom[1] += b->mom[1]; + a->mom[2] += b->mom[2]; + a->ang_mom[0] += b->ang_mom[0]; + a->ang_mom[1] += b->ang_mom[1]; + a->ang_mom[2] += b->ang_mom[2]; +} + +/** + * @brief Initialises a statistics aggregator to a valid state. + * + * @param s The #statistics aggregator to initialise + */ +void stats_init(struct statistics *s) { + + /* Zero everything */ + bzero(s, sizeof(struct statistics)); + + /* Set the lock */ + lock_init(&s->lock); +} + +/** + * @brief The #threadpool mapper function used to collect statistics for #part. + * + * @param map_data Pointer to the particles. + * @param nr_parts The number of particles in this chunk + * @param extra_data The #statistics aggregator. + */ +void stats_collect_part_mapper(void *map_data, int nr_parts, void *extra_data) { + + /* Unpack the data */ + const struct index_data *data = (struct index_data *)extra_data; + const struct space *s = data->s; + const struct part *restrict parts = (struct part *)map_data; + const struct xpart *restrict xparts = + s->xparts + (ptrdiff_t)(parts - s->parts); + const int ti_current = s->e->ti_current; + const double timeBase = s->e->timeBase; + struct statistics *const global_stats = data->stats; + + /* Required for external potential energy */ + const struct external_potential *potential = s->e->external_potential; + const struct phys_const *phys_const = s->e->physical_constants; + + /* Local accumulator */ + struct statistics stats; + stats_init(&stats); + + /* Loop over particles */ + for (int k = 0; k < nr_parts; k++) { + + /* Get the particle */ + const struct part *p = &parts[k]; + const struct xpart *xp = &xparts[k]; + struct gpart *gp = NULL; + if (p->gpart != NULL) + gp = p->gpart; + + /* Get useful variables */ + const float dt = (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase; + const double x[3] = {p->x[0], p->x[1], p->x[2]}; + float a_tot[3] = {p->a_hydro[0], p->a_hydro[1], p->a_hydro[2]}; + if (p->gpart != NULL) { + a_tot[0] += p->gpart->a_grav[0]; + a_tot[1] += p->gpart->a_grav[1]; + a_tot[2] += p->gpart->a_grav[2]; + } + const float v[3] = {xp->v_full[0] + a_tot[0] * dt, + xp->v_full[1] + a_tot[1] * dt, + xp->v_full[2] + a_tot[2] * dt}; + + const float m = hydro_get_mass(p); + + /* Collect mass */ + stats.mass += m; + + /* Collect momentum */ + stats.mom[0] += m * v[0]; + stats.mom[1] += m * v[1]; + stats.mom[2] += m * v[2]; + + /* Collect angular momentum */ + stats.ang_mom[0] += m * (x[1] * v[2] - x[2] * v[1]); + stats.ang_mom[1] += m * (x[2] * v[0] - x[0] * v[2]); + stats.ang_mom[2] += m * (x[0] * v[1] - x[1] * v[0]); + + /* Collect energies. */ + stats.E_kin += 0.5f * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); + stats.E_pot_self += 0.; + stats.E_pot_ext += m * external_gravity_get_potential_energy(potential,phys_const,gp); + stats.E_int += m * hydro_get_internal_energy(p, dt); + stats.E_rad += cooling_get_radiated_energy(xp); + + /* Collect entropy */ + stats.entropy += m * hydro_get_entropy(p, dt); + } + + /* Now write back to memory */ + if (lock_lock(&global_stats->lock) == 0) stats_add(global_stats, &stats); + if (lock_unlock(&global_stats->lock) != 0) error("Failed to unlock stats."); +} + +/** + * @brief The #threadpool mapper function used to collect statistics for #gpart. + * + * @param map_data Pointer to the g-particles. + * @param nr_gparts The number of g-particles in this chunk + * @param extra_data The #statistics aggregator. + */ +void stats_collect_gpart_mapper(void *map_data, int nr_gparts, + void *extra_data) { + + /* Unpack the data */ + const struct index_data *data = (struct index_data *)extra_data; + const struct space *s = data->s; + const struct gpart *restrict gparts = (struct gpart *)map_data; + const int ti_current = s->e->ti_current; + const double timeBase = s->e->timeBase; + struct statistics *const global_stats = data->stats; + + /* Required for external potential energy */ + const struct external_potential *potential = s->e->external_potential; + const struct phys_const *phys_const = s->e->physical_constants; + + /* Local accumulator */ + struct statistics stats; + stats_init(&stats); + + /* Loop over particles */ + for (int k = 0; k < nr_gparts; k++) { + + /* Get the particle */ + const struct gpart *gp = &gparts[k]; + + /* If the g-particle has a counterpart, ignore it */ + if (gp->id_or_neg_offset < 0) continue; + + /* Get useful variables */ + const float dt = (ti_current - (gp->ti_begin + gp->ti_end) / 2) * timeBase; + const double x[3] = {gp->x[0], gp->x[1], gp->x[2]}; + const float v[3] = {gp->v_full[0] + gp->a_grav[0] * dt, + gp->v_full[1] + gp->a_grav[1] * dt, + gp->v_full[2] + gp->a_grav[2] * dt}; + + const float m = gp->mass; + + /* Collect mass */ + stats.mass += m; + + /* Collect momentum */ + stats.mom[0] += m * v[0]; + stats.mom[1] += m * v[1]; + stats.mom[2] += m * v[2]; + + /* Collect angular momentum */ + stats.ang_mom[0] += m * (x[1] * v[2] - x[2] * v[1]); + stats.ang_mom[1] += m * (x[2] * v[0] - x[0] * v[2]); + stats.ang_mom[2] += m * (x[0] * v[1] - x[1] * v[0]); + + /* Collect energies. */ + stats.E_kin += 0.5f * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); + stats.E_pot_self += 0.; + stats.E_pot_ext += m * external_gravity_get_potential_energy(potential,phys_const,gp); + } + + /* Now write back to memory */ + if (lock_lock(&global_stats->lock) == 0) stats_add(global_stats, &stats); + if (lock_unlock(&global_stats->lock) != 0) error("Failed to unlock stats."); +} + +/** + * @brief Collect physical statistics over all particles in a #space. + * + * @param s The #space to collect from. + * @param stats The #statistics aggregator to fill. + */ +void stats_collect(const struct space *s, struct statistics *stats) { + + /* Prepare the data */ + struct index_data extra_data; + extra_data.s = s; + extra_data.stats = stats; + + /* Run parallel collection of statistics for parts */ + if (s->nr_parts > 0) + threadpool_map(&s->e->threadpool, stats_collect_part_mapper, s->parts, + s->nr_parts, sizeof(struct part), 10000, &extra_data); + + /* Run parallel collection of statistics for gparts */ + if (s->nr_gparts > 0) + threadpool_map(&s->e->threadpool, stats_collect_gpart_mapper, s->gparts, + s->nr_gparts, sizeof(struct gpart), 10000, &extra_data); +} + +/** + * @brief Prints the content of a #statistics aggregator to a file + * + * @param file File to write to. + * @param stats The #statistics object to write to the file + * @param time The current physical time. + */ +void stats_print_to_file(FILE *file, const struct statistics *stats, + double time) { + + const double E_pot = stats->E_pot_self + stats->E_pot_ext; + const double E_tot = stats->E_kin + stats->E_int + E_pot; + + fprintf(file, + " %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e %14e " + "%14e %14e %14e\n", + time, stats->mass, E_tot, stats->E_kin, stats->E_int, E_pot, + stats->E_pot_self, stats->E_pot_ext, stats->E_rad, stats->entropy, + stats->mom[0], stats->mom[1], stats->mom[2], stats->ang_mom[0], + stats->ang_mom[1], stats->ang_mom[2]); + fflush(file); +} + +/* Extra stuff in MPI-land */ +#ifdef WITH_MPI + +/** + * @brief MPI datatype corresponding to the #statistics structure. + */ +MPI_Datatype statistics_mpi_type; + +/** + * @brief MPI operator used for the reduction of #statistics structure. + */ +MPI_Op statistics_mpi_reduce_op; + +/** + * @brief MPI reduce operator for #statistics structures. + */ +void stats_add_MPI(void *in, void *inout, int *len, MPI_Datatype *datatype) { + + for (int i = 0; i < *len; ++i) + stats_add(&((struct statistics *)inout)[0], + &((const struct statistics *)in)[i]); +} + +/** + * @brief Registers MPI #statistics type and reduction function. + */ +void stats_create_MPI_type() { + + /* This is not the recommended way of doing this. + One should define the structure field by field + But as long as we don't do serialization via MPI-IO + we don't really care. + Also we would have to modify this function everytime something + is added to the statistics structure. */ + if (MPI_Type_contiguous(sizeof(struct statistics) / sizeof(unsigned char), + MPI_BYTE, &statistics_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&statistics_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for statistics."); + } + + /* Create the reduction operation */ + MPI_Op_create(stats_add_MPI, 1, &statistics_mpi_reduce_op); +} +#endif diff --git a/src/statistics.h b/src/statistics.h new file mode 100644 index 0000000000000000000000000000000000000000..b007d1314c458254986862f99b7cb1669d995545 --- /dev/null +++ b/src/statistics.h @@ -0,0 +1,79 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_STATISTICS_H +#define SWIFT_STATISTICS_H + +/* Config parameters. */ +#include "../config.h" + +/* Local headers. */ +#include "lock.h" +#include "space.h" + +/** + * @brief Quantities collected for physics statistics + */ +struct statistics { + + /*! Kinetic energy */ + double E_kin; + + /*! Internal energy */ + double E_int; + + /*! Self potential energy */ + double E_pot_self; + + /*! External potential energy */ + double E_pot_ext; + + /*! Radiative energy */ + double E_rad; + + /*! Entropy */ + double entropy; + + /*! Mass */ + double mass; + + /*! Momentum */ + double mom[3]; + + /*! Angular momentum */ + double ang_mom[3]; + + /*! Lock for threaded access */ + swift_lock_type lock; +}; + +void stats_collect(const struct space* s, struct statistics* stats); +void stats_add(struct statistics* a, const struct statistics* b); +void stats_print_to_file(FILE* file, const struct statistics* stats, + double time); +void stats_init(struct statistics* s); + +#ifdef WITH_MPI +extern MPI_Datatype statistics_mpi_type; +extern MPI_Op statistics_mpi_reduce_op; + +void stats_add_MPI(void* in, void* out, int* len, MPI_Datatype* datatype); +void stats_create_MPI_type(); +#endif + +#endif /* SWIFT_STATISTICS_H */ diff --git a/src/threadpool.c b/src/threadpool.c index 4ef75954b39603db0d442acc9be2bd95b39614d3..6bc887d96cb72804f0fbc8e2801a6522bf27f947 100644 --- a/src/threadpool.c +++ b/src/threadpool.c @@ -59,14 +59,22 @@ void *threadpool_runner(void *data) { pthread_mutex_unlock(&tp->thread_mutex); /* The index of the mapping task we will work on next. */ - size_t task_ind; - while ((task_ind = atomic_add(&tp->map_data_count, tp->map_data_chunk)) < - tp->map_data_size) { - const int num_elements = task_ind + tp->map_data_chunk > tp->map_data_size - ? tp->map_data_size - task_ind - : tp->map_data_chunk; + while (1) { + /* Desired chunk size. */ + size_t chunk_size = + (tp->map_data_size - tp->map_data_count) / (2 * tp->num_threads); + if (chunk_size > tp->map_data_chunk) chunk_size = tp->map_data_chunk; + if (chunk_size < 1) chunk_size = 1; + + /* Get a chunk and check its size. */ + size_t task_ind = atomic_add(&tp->map_data_count, chunk_size); + if (task_ind >= tp->map_data_size) break; + if (task_ind + chunk_size > tp->map_data_size) + chunk_size = tp->map_data_size - task_ind; + + /* Call the mapper function. */ tp->map_function((char *)tp->map_data + (tp->map_data_stride * task_ind), - num_elements, tp->map_extra_data); + chunk_size, tp->map_extra_data); } } }