diff --git a/INSTALL.swift b/INSTALL.swift index a07d5b24c2d8c75778e2a24d90f77724459ab61f..8e1635b0715426512503fd9dcde32f59a7ad1b62 100644 --- a/INSTALL.swift +++ b/INSTALL.swift @@ -83,39 +83,65 @@ SWIFT depends on a number of third party libraries that should be available before you can build it. -HDF5: a HDF5 library (v. 1.8.x or higher) is required to read and write -particle data. One of the commands "h5cc" or "h5pcc" should be -available. If "h5pcc" is located them a parallel HDF5 built for the version -of MPI located should be provided. If the command is not available then it -can be located using the "--with-hfd5" configure option. The value should -be the full path to the "h5cc" or "h5pcc" commands. + - HDF5: a HDF5 library (v. 1.8.x or higher) is required to read and + write particle data. One of the commands "h5cc" or "h5pcc" + should be available. If "h5pcc" is located them a parallel + HDF5 built for the version of MPI located should be + provided. If the command is not available then it can be + located using the "--with-hfd5" configure option. The value + should be the full path to the "h5cc" or "h5pcc" commands. -MPI: an optional MPI library that fully supports MPI_THREAD_MULTIPLE. -Before running configure the "mpirun" command should be available in the -shell. If your command isn't called "mpirun" then define the "MPIRUN" -environment variable, either in the shell or when running configure. + - MPI: to run on more than one node an MPI library that fully + supports MPI_THREAD_MULTIPLE. Before running configure the + "mpirun" command should be available in the shell. If your + command isn't called "mpirun" then define the "MPIRUN" + environment variable, either in the shell or when running + configure. -The MPI compiler can be controlled using the MPICC variable, much like -the CC one. Use this when your MPI compiler has a none-standard name. + The MPI compiler can be controlled using the MPICC variable, + much like the CC one. Use this when your MPI compiler has a + none-standard name. -METIS: a build of the METIS library can be optionally used to optimize the -load between MPI nodes (requires an MPI library). This should be found in -the standard installation directories, or pointed at using the -"--with-metis" configuration option. In this case the top-level -installation directory of the METIS build should be given. Note to use -METIS you should at least supply "--with-metis". + - libtool: The build system relies on libtool. -libNUMA: a build of the NUMA library can be used to pin the threads to -the physical core of the machine SWIFT is running on. This is not always -necessary as the OS scheduler may do a good job at distributing the threads -among the different cores on each computing node. + Optional Dependencies + ===================== -DOXYGEN: the doxygen library is required to create the SWIFT API -documentation. + - METIS: a build of the METIS library can be optionally used to + optimize the load between MPI nodes (requires an MPI + library). This should be found in the standard installation + directories, or pointed at using the "--with-metis" + configuration option. In this case the top-level + installation directory of the METIS build should be + given. Note to use METIS you should at least supply + "--with-metis". + + + - libNUMA: a build of the NUMA library can be used to pin the threads + to the physical core of the machine SWIFT is running + on. This is not always necessary as the OS scheduler may + do a good job at distributing the threads among the + different cores on each computing node. + + + - TCMalloc: a build of the TCMalloc library (part of gperftools) can + be used to obtain faster allocations than the standard C + malloc function part of glibc. The option "-with-tcmalloc" + should be passed to the configuration script to use it. + + + - gperftools: a build of gperftools can be used to obtain good + profiling of the code. The option "-with-profiler" + needs to be passed to the configuration script to use + it. + + + - DOXYGEN: the doxygen library is required to create the SWIFT API + documentation. diff --git a/configure.ac b/configure.ac index 977a86a03d5e4c82139dfd56d3f1ee7afe3ea6e8..f6e2ea0db8e9829b719a0190eea7a1d891bfbbd6 100644 --- a/configure.ac +++ b/configure.ac @@ -355,7 +355,7 @@ fi AC_SUBST([TCMALLOC_LIBS]) AM_CONDITIONAL([HAVETCMALLOC],[test -n "$TCMALLOC_LIBS"]) -# Check for -lprofiler usually part of the gpreftools along with tcmalloc. +# Check for -lprofiler usually part of the gperftools along with tcmalloc. have_profiler="no" AC_ARG_WITH([profiler], [AS_HELP_STRING([--with-profiler], diff --git a/src/cell.c b/src/cell.c index 166538aa2fd9fe20de9c2a9b43abad6f13383ec4..3e3a8e6cc537f948a809c1a3ebf3cb18194fedd1 100644 --- a/src/cell.c +++ b/src/cell.c @@ -764,6 +764,9 @@ void cell_clean_links(struct cell *c, void *data) { c->force = NULL; c->nr_force = 0; + + c->grav = NULL; + c->nr_grav = 0; } /** @@ -1004,7 +1007,6 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { if (c->kick != NULL) scheduler_activate(s, c->kick); if (c->cooling != NULL) scheduler_activate(s, c->cooling); if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms); - if (c->grav_external != NULL) scheduler_activate(s, c->grav_external); return 0; } diff --git a/src/cell.h b/src/cell.h index 7c6eed55b83277fe9c489d2ab58683d4859d3d59..f1b5359e92251d73733a49eccd8029af993aa3f9 100644 --- a/src/cell.h +++ b/src/cell.h @@ -158,9 +158,6 @@ struct cell { /* Tasks for gravity tree. */ struct task *grav_up, *grav_down; - /* Task for external gravity */ - struct task *grav_external; - /* Task for cooling */ struct task *cooling; diff --git a/src/engine.c b/src/engine.c index e0ad2c2028b660ac473bf3fe6c9c587a3c6c179a..d659c5b15cea81f450b3c99b64b840735bd8e2b1 100644 --- a/src/engine.c +++ b/src/engine.c @@ -1248,10 +1248,9 @@ void engine_make_external_gravity_tasks(struct engine *e) { /* Is that neighbour local ? */ if (ci->nodeID != nodeID) continue; - /* If the cells is local build a self-interaction */ - ci->grav_external = scheduler_addtask(sched, task_type_self, - task_subtype_external_grav, 0, 0, - ci, NULL, 0); + /* If the cell is local build a self-interaction */ + scheduler_addtask(sched, task_type_self, task_subtype_external_grav, 0, 0, + ci, NULL, 0); } } @@ -1330,56 +1329,101 @@ void engine_make_hydroloop_tasks(struct engine *e) { /** * @brief Counts the tasks associated with one cell and constructs the links * - * For each hydrodynamic task, construct the links with the corresponding cell. - * Similarly, construct the dependencies for all the sorting tasks. + * For each hydrodynamic and gravity task, construct the links with + * the corresponding cell. Similarly, construct the dependencies for + * all the sorting tasks. * * @param e The #engine. */ void engine_count_and_link_tasks(struct engine *e) { - struct scheduler *sched = &e->sched; + struct scheduler *const sched = &e->sched; + const int nr_tasks = sched->nr_tasks; - for (int ind = 0; ind < sched->nr_tasks; ind++) { + for (int ind = 0; ind < nr_tasks; ind++) { - struct task *t = &sched->tasks[ind]; + struct task *const t = &sched->tasks[ind]; + struct cell *const ci = t->ci; + struct cell *const cj = t->cj; /* Link sort tasks together. */ - if (t->type == task_type_sort && t->ci->split) + if (t->type == task_type_sort && ci->split) for (int j = 0; j < 8; j++) - if (t->ci->progeny[j] != NULL && t->ci->progeny[j]->sorts != NULL) { - scheduler_addunlock(sched, t->ci->progeny[j]->sorts, t); + if (ci->progeny[j] != NULL && ci->progeny[j]->sorts != NULL) { + scheduler_addunlock(sched, ci->progeny[j]->sorts, t); } - /* Link density tasks to cells. */ + /* Link self tasks to cells. */ if (t->type == task_type_self) { - atomic_inc(&t->ci->nr_tasks); + atomic_inc(&ci->nr_tasks); if (t->subtype == task_subtype_density) { - engine_addlink(e, &t->ci->density, t); - atomic_inc(&t->ci->nr_density); + engine_addlink(e, &ci->density, t); + atomic_inc(&ci->nr_density); + } + if (t->subtype == task_subtype_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); } + if (t->subtype == task_subtype_external_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); + } + + /* Link pair tasks to cells. */ } else if (t->type == task_type_pair) { - atomic_inc(&t->ci->nr_tasks); - atomic_inc(&t->cj->nr_tasks); + atomic_inc(&ci->nr_tasks); + atomic_inc(&cj->nr_tasks); if (t->subtype == task_subtype_density) { - engine_addlink(e, &t->ci->density, t); - atomic_inc(&t->ci->nr_density); - engine_addlink(e, &t->cj->density, t); - atomic_inc(&t->cj->nr_density); + engine_addlink(e, &ci->density, t); + atomic_inc(&ci->nr_density); + engine_addlink(e, &cj->density, t); + atomic_inc(&cj->nr_density); } + if (t->subtype == task_subtype_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); + engine_addlink(e, &cj->grav, t); + atomic_inc(&cj->nr_grav); + } + + /* Link sub-self tasks to cells. */ } else if (t->type == task_type_sub_self) { - atomic_inc(&t->ci->nr_tasks); + atomic_inc(&ci->nr_tasks); if (t->subtype == task_subtype_density) { - engine_addlink(e, &t->ci->density, t); - atomic_inc(&t->ci->nr_density); + engine_addlink(e, &ci->density, t); + atomic_inc(&ci->nr_density); + } + if (t->subtype == task_subtype_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); + } + if (t->subtype == task_subtype_external_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); } + + /* Link sub-pair tasks to cells. */ } else if (t->type == task_type_sub_pair) { - atomic_inc(&t->ci->nr_tasks); - atomic_inc(&t->cj->nr_tasks); + atomic_inc(&ci->nr_tasks); + atomic_inc(&cj->nr_tasks); if (t->subtype == task_subtype_density) { - engine_addlink(e, &t->ci->density, t); - atomic_inc(&t->ci->nr_density); - engine_addlink(e, &t->cj->density, t); - atomic_inc(&t->cj->nr_density); + engine_addlink(e, &ci->density, t); + atomic_inc(&ci->nr_density); + engine_addlink(e, &cj->density, t); + atomic_inc(&cj->nr_density); + } + if (t->subtype == task_subtype_grav) { + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); + engine_addlink(e, &cj->grav, t); + atomic_inc(&cj->nr_grav); + } + if (t->subtype == task_subtype_external_grav) { + error("Found a sub-pair/external-gravity task..."); + engine_addlink(e, &ci->grav, t); + atomic_inc(&ci->nr_grav); + engine_addlink(e, &cj->grav, t); + atomic_inc(&cj->nr_grav); } } } @@ -1865,14 +1909,16 @@ void engine_maketasks(struct engine *e) { /* Split the tasks. */ scheduler_splittasks(sched); - /* Allocate the list of cell-task links. The maximum number of links - is the number of cells (s->tot_cells) times the number of neighbours (27) - times the number of interaction types (2, density and force). */ + /* Allocate the list of cell-task links. The maximum number of links is the + * number of cells (s->tot_cells) times the number of neighbours (26) times + * the number of interaction types, so 26 * 3 (density, force, grav) pairs + * and 4 (density, force, grav, ext_grav) self. + */ if (e->links != NULL) free(e->links); #ifdef EXTRA_HYDRO_LOOP - e->size_links = s->tot_cells * 27 * 3; + e->size_links = s->tot_cells * (26 * 4 + 4); #else - e->size_links = s->tot_cells * 27 * 2; + e->size_links = s->tot_cells * (26 * 3 + 4); #endif if ((e->links = malloc(sizeof(struct link) * e->size_links)) == NULL) error("Failed to allocate cell-task links."); diff --git a/src/space.c b/src/space.c index 186e11c066f3c8d788c3eaa189da3ed2461aed87..432f26621da978e9cea882b10934dfe9412112cc 100644 --- a/src/space.c +++ b/src/space.c @@ -395,6 +395,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { s->cells_top[k].nr_density = 0; s->cells_top[k].nr_gradient = 0; s->cells_top[k].nr_force = 0; + s->cells_top[k].nr_grav = 0; s->cells_top[k].density = NULL; s->cells_top[k].gradient = NULL; s->cells_top[k].force = NULL; @@ -406,7 +407,6 @@ void space_regrid(struct space *s, double cell_max, int verbose) { s->cells_top[k].extra_ghost = NULL; s->cells_top[k].ghost = NULL; s->cells_top[k].kick = NULL; - s->cells_top[k].grav_external = NULL; s->cells_top[k].cooling = NULL; s->cells_top[k].sourceterms = NULL; s->cells_top[k].super = &s->cells_top[k]; @@ -601,8 +601,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { for (size_t k = 1; k < nr_parts; k++) { if (ind[k - 1] > ind[k]) { error("Sort failed!"); - } else if (ind[k] != cell_getid(s->cdim, - s->parts[k].x[0] * s->iwidth[0], + } else if (ind[k] != cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0], s->parts[k].x[1] * s->iwidth[1], s->parts[k].x[2] * s->iwidth[2])) { error("Incorrect indices!");