Commit c67605d6 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge remote-tracking branch 'origin/master' into stats_after_kick

Conflicts:
	src/engine.c
parents 1a3e3902 d89f9b87
...@@ -83,39 +83,65 @@ SWIFT depends on a number of third party libraries that should be available ...@@ -83,39 +83,65 @@ SWIFT depends on a number of third party libraries that should be available
before you can build it. before you can build it.
HDF5: a HDF5 library (v. 1.8.x or higher) is required to read and write - HDF5: a HDF5 library (v. 1.8.x or higher) is required to read and
particle data. One of the commands "h5cc" or "h5pcc" should be write particle data. One of the commands "h5cc" or "h5pcc"
available. If "h5pcc" is located them a parallel HDF5 built for the version should be available. If "h5pcc" is located them a parallel
of MPI located should be provided. If the command is not available then it HDF5 built for the version of MPI located should be
can be located using the "--with-hfd5" configure option. The value should provided. If the command is not available then it can be
be the full path to the "h5cc" or "h5pcc" commands. located using the "--with-hfd5" configure option. The value
should be the full path to the "h5cc" or "h5pcc" commands.
MPI: an optional MPI library that fully supports MPI_THREAD_MULTIPLE. - MPI: to run on more than one node an MPI library that fully
Before running configure the "mpirun" command should be available in the supports MPI_THREAD_MULTIPLE. Before running configure the
shell. If your command isn't called "mpirun" then define the "MPIRUN" "mpirun" command should be available in the shell. If your
environment variable, either in the shell or when running configure. command isn't called "mpirun" then define the "MPIRUN"
environment variable, either in the shell or when running
configure.
The MPI compiler can be controlled using the MPICC variable, much like The MPI compiler can be controlled using the MPICC variable,
the CC one. Use this when your MPI compiler has a none-standard name. much like the CC one. Use this when your MPI compiler has a
none-standard name.
METIS: a build of the METIS library can be optionally used to optimize the - libtool: The build system relies on libtool.
load between MPI nodes (requires an MPI library). This should be found in
the standard installation directories, or pointed at using the
"--with-metis" configuration option. In this case the top-level
installation directory of the METIS build should be given. Note to use
METIS you should at least supply "--with-metis".
libNUMA: a build of the NUMA library can be used to pin the threads to Optional Dependencies
the physical core of the machine SWIFT is running on. This is not always =====================
necessary as the OS scheduler may do a good job at distributing the threads
among the different cores on each computing node.
DOXYGEN: the doxygen library is required to create the SWIFT API - METIS: a build of the METIS library can be optionally used to
documentation. optimize the load between MPI nodes (requires an MPI
library). This should be found in the standard installation
directories, or pointed at using the "--with-metis"
configuration option. In this case the top-level
installation directory of the METIS build should be
given. Note to use METIS you should at least supply
"--with-metis".
- libNUMA: a build of the NUMA library can be used to pin the threads
to the physical core of the machine SWIFT is running
on. This is not always necessary as the OS scheduler may
do a good job at distributing the threads among the
different cores on each computing node.
- TCMalloc: a build of the TCMalloc library (part of gperftools) can
be used to obtain faster allocations than the standard C
malloc function part of glibc. The option "-with-tcmalloc"
should be passed to the configuration script to use it.
- gperftools: a build of gperftools can be used to obtain good
profiling of the code. The option "-with-profiler"
needs to be passed to the configuration script to use
it.
- DOXYGEN: the doxygen library is required to create the SWIFT API
documentation.
......
...@@ -355,7 +355,7 @@ fi ...@@ -355,7 +355,7 @@ fi
AC_SUBST([TCMALLOC_LIBS]) AC_SUBST([TCMALLOC_LIBS])
AM_CONDITIONAL([HAVETCMALLOC],[test -n "$TCMALLOC_LIBS"]) AM_CONDITIONAL([HAVETCMALLOC],[test -n "$TCMALLOC_LIBS"])
# Check for -lprofiler usually part of the gpreftools along with tcmalloc. # Check for -lprofiler usually part of the gperftools along with tcmalloc.
have_profiler="no" have_profiler="no"
AC_ARG_WITH([profiler], AC_ARG_WITH([profiler],
[AS_HELP_STRING([--with-profiler], [AS_HELP_STRING([--with-profiler],
......
...@@ -120,6 +120,10 @@ int main(int argc, char *argv[]) { ...@@ -120,6 +120,10 @@ int main(int argc, char *argv[]) {
error("MPI_Comm_size failed with error %i.", res); error("MPI_Comm_size failed with error %i.", res);
if ((res = MPI_Comm_rank(MPI_COMM_WORLD, &myrank)) != MPI_SUCCESS) if ((res = MPI_Comm_rank(MPI_COMM_WORLD, &myrank)) != MPI_SUCCESS)
error("Call to MPI_Comm_rank failed with error %i.", res); error("Call to MPI_Comm_rank failed with error %i.", res);
/* Make sure messages are stamped with the correct rank. */
engine_rank = myrank;
if ((res = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN)) != if ((res = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN)) !=
MPI_SUCCESS) MPI_SUCCESS)
error("Call to MPI_Comm_set_errhandler failed with error %i.", res); error("Call to MPI_Comm_set_errhandler failed with error %i.", res);
...@@ -131,6 +135,7 @@ int main(int argc, char *argv[]) { ...@@ -131,6 +135,7 @@ int main(int argc, char *argv[]) {
message("WARNING: you should use the non-MPI version of this program."); message("WARNING: you should use the non-MPI version of this program.");
} }
fflush(stdout); fflush(stdout);
#endif #endif
/* Let's pin the main thread */ /* Let's pin the main thread */
......
...@@ -764,6 +764,9 @@ void cell_clean_links(struct cell *c, void *data) { ...@@ -764,6 +764,9 @@ void cell_clean_links(struct cell *c, void *data) {
c->force = NULL; c->force = NULL;
c->nr_force = 0; c->nr_force = 0;
c->grav = NULL;
c->nr_grav = 0;
} }
/** /**
...@@ -1004,7 +1007,6 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { ...@@ -1004,7 +1007,6 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
if (c->kick != NULL) scheduler_activate(s, c->kick); if (c->kick != NULL) scheduler_activate(s, c->kick);
if (c->cooling != NULL) scheduler_activate(s, c->cooling); if (c->cooling != NULL) scheduler_activate(s, c->cooling);
if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms); if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms);
if (c->grav_external != NULL) scheduler_activate(s, c->grav_external);
return 0; return 0;
} }
......
...@@ -158,9 +158,6 @@ struct cell { ...@@ -158,9 +158,6 @@ struct cell {
/* Tasks for gravity tree. */ /* Tasks for gravity tree. */
struct task *grav_up, *grav_down; struct task *grav_up, *grav_down;
/* Task for external gravity */
struct task *grav_external;
/* Task for cooling */ /* Task for cooling */
struct task *cooling; struct task *cooling;
......
...@@ -1249,9 +1249,9 @@ void engine_make_external_gravity_tasks(struct engine *e) { ...@@ -1249,9 +1249,9 @@ void engine_make_external_gravity_tasks(struct engine *e) {
/* Is that neighbour local ? */ /* Is that neighbour local ? */
if (ci->nodeID != nodeID) continue; if (ci->nodeID != nodeID) continue;
/* If the cells is local build a self-interaction */ /* If the cell is local, build a self-interaction */
ci->grav_external = scheduler_addtask( scheduler_addtask(sched, task_type_self, task_subtype_external_grav, 0, 0,
sched, task_type_self, task_subtype_external_grav, 0, 0, ci, NULL, 0); ci, NULL, 0);
} }
} }
...@@ -1330,56 +1330,101 @@ void engine_make_hydroloop_tasks(struct engine *e) { ...@@ -1330,56 +1330,101 @@ void engine_make_hydroloop_tasks(struct engine *e) {
/** /**
* @brief Counts the tasks associated with one cell and constructs the links * @brief Counts the tasks associated with one cell and constructs the links
* *
* For each hydrodynamic task, construct the links with the corresponding cell. * For each hydrodynamic and gravity task, construct the links with
* Similarly, construct the dependencies for all the sorting tasks. * the corresponding cell. Similarly, construct the dependencies for
* all the sorting tasks.
* *
* @param e The #engine. * @param e The #engine.
*/ */
void engine_count_and_link_tasks(struct engine *e) { void engine_count_and_link_tasks(struct engine *e) {
struct scheduler *sched = &e->sched; struct scheduler *const sched = &e->sched;
const int nr_tasks = sched->nr_tasks;
for (int ind = 0; ind < sched->nr_tasks; ind++) { for (int ind = 0; ind < nr_tasks; ind++) {
struct task *t = &sched->tasks[ind]; struct task *const t = &sched->tasks[ind];
struct cell *const ci = t->ci;
struct cell *const cj = t->cj;
/* Link sort tasks together. */ /* Link sort tasks together. */
if (t->type == task_type_sort && t->ci->split) if (t->type == task_type_sort && ci->split)
for (int j = 0; j < 8; j++) for (int j = 0; j < 8; j++)
if (t->ci->progeny[j] != NULL && t->ci->progeny[j]->sorts != NULL) { if (ci->progeny[j] != NULL && ci->progeny[j]->sorts != NULL) {
scheduler_addunlock(sched, t->ci->progeny[j]->sorts, t); scheduler_addunlock(sched, ci->progeny[j]->sorts, t);
} }
/* Link density tasks to cells. */ /* Link self tasks to cells. */
if (t->type == task_type_self) { if (t->type == task_type_self) {
atomic_inc(&t->ci->nr_tasks); atomic_inc(&ci->nr_tasks);
if (t->subtype == task_subtype_density) { if (t->subtype == task_subtype_density) {
engine_addlink(e, &t->ci->density, t); engine_addlink(e, &ci->density, t);
atomic_inc(&t->ci->nr_density); atomic_inc(&ci->nr_density);
}
if (t->subtype == task_subtype_grav) {
engine_addlink(e, &ci->grav, t);
atomic_inc(&ci->nr_grav);
} }
if (t->subtype == task_subtype_external_grav) {
engine_addlink(e, &ci->grav, t);
atomic_inc(&ci->nr_grav);
}
/* Link pair tasks to cells. */
} else if (t->type == task_type_pair) { } else if (t->type == task_type_pair) {
atomic_inc(&t->ci->nr_tasks); atomic_inc(&ci->nr_tasks);
atomic_inc(&t->cj->nr_tasks); atomic_inc(&cj->nr_tasks);
if (t->subtype == task_subtype_density) { if (t->subtype == task_subtype_density) {
engine_addlink(e, &t->ci->density, t); engine_addlink(e, &ci->density, t);
atomic_inc(&t->ci->nr_density); atomic_inc(&ci->nr_density);
engine_addlink(e, &t->cj->density, t); engine_addlink(e, &cj->density, t);
atomic_inc(&t->cj->nr_density); atomic_inc(&cj->nr_density);
} }
if (t->subtype == task_subtype_grav) {
engine_addlink(e, &ci->grav, t);
atomic_inc(&ci->nr_grav);
engine_addlink(e, &cj->grav, t);
atomic_inc(&cj->nr_grav);
}
/* Link sub-self tasks to cells. */
} else if (t->type == task_type_sub_self) { } else if (t->type == task_type_sub_self) {
atomic_inc(&t->ci->nr_tasks); atomic_inc(&ci->nr_tasks);
if (t->subtype == task_subtype_density) { if (t->subtype == task_subtype_density) {
engine_addlink(e, &t->ci->density, t); engine_addlink(e, &ci->density, t);
atomic_inc(&t->ci->nr_density); atomic_inc(&ci->nr_density);
}
if (t->subtype == task_subtype_grav) {
engine_addlink(e, &ci->grav, t);
atomic_inc(&ci->nr_grav);
}
if (t->subtype == task_subtype_external_grav) {
engine_addlink(e, &ci->grav, t);
atomic_inc(&ci->nr_grav);
} }
/* Link sub-pair tasks to cells. */
} else if (t->type == task_type_sub_pair) { } else if (t->type == task_type_sub_pair) {
atomic_inc(&t->ci->nr_tasks); atomic_inc(&ci->nr_tasks);
atomic_inc(&t->cj->nr_tasks); atomic_inc(&cj->nr_tasks);
if (t->subtype == task_subtype_density) { if (t->subtype == task_subtype_density) {
engine_addlink(e, &t->ci->density, t); engine_addlink(e, &ci->density, t);
atomic_inc(&t->ci->nr_density); atomic_inc(&ci->nr_density);
engine_addlink(e, &t->cj->density, t); engine_addlink(e, &cj->density, t);
atomic_inc(&t->cj->nr_density); atomic_inc(&cj->nr_density);
}
if (t->subtype == task_subtype_grav) {
engine_addlink(e, &ci->grav, t);
atomic_inc(&ci->nr_grav);
engine_addlink(e, &cj->grav, t);
atomic_inc(&cj->nr_grav);
}
if (t->subtype == task_subtype_external_grav) {
error("Found a sub-pair/external-gravity task...");
engine_addlink(e, &ci->grav, t);
atomic_inc(&ci->nr_grav);
engine_addlink(e, &cj->grav, t);
atomic_inc(&cj->nr_grav);
} }
} }
} }
...@@ -1865,14 +1910,16 @@ void engine_maketasks(struct engine *e) { ...@@ -1865,14 +1910,16 @@ void engine_maketasks(struct engine *e) {
/* Split the tasks. */ /* Split the tasks. */
scheduler_splittasks(sched); scheduler_splittasks(sched);
/* Allocate the list of cell-task links. The maximum number of links /* Allocate the list of cell-task links. The maximum number of links is the
is the number of cells (s->tot_cells) times the number of neighbours (27) * number of cells (s->tot_cells) times the number of neighbours (26) times
times the number of interaction types (2, density and force). */ * the number of interaction types, so 26 * 3 (density, force, grav) pairs
* and 4 (density, force, grav, ext_grav) self.
*/
if (e->links != NULL) free(e->links); if (e->links != NULL) free(e->links);
#ifdef EXTRA_HYDRO_LOOP #ifdef EXTRA_HYDRO_LOOP
e->size_links = s->tot_cells * 27 * 3; e->size_links = s->tot_cells * (26 * 4 + 4);
#else #else
e->size_links = s->tot_cells * 27 * 2; e->size_links = s->tot_cells * (26 * 3 + 4);
#endif #endif
if ((e->links = malloc(sizeof(struct link) * e->size_links)) == NULL) if ((e->links = malloc(sizeof(struct link) * e->size_links)) == NULL)
error("Failed to allocate cell-task links."); error("Failed to allocate cell-task links.");
......
...@@ -395,9 +395,11 @@ void space_regrid(struct space *s, double cell_max, int verbose) { ...@@ -395,9 +395,11 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
s->cells_top[k].nr_density = 0; s->cells_top[k].nr_density = 0;
s->cells_top[k].nr_gradient = 0; s->cells_top[k].nr_gradient = 0;
s->cells_top[k].nr_force = 0; s->cells_top[k].nr_force = 0;
s->cells_top[k].nr_grav = 0;
s->cells_top[k].density = NULL; s->cells_top[k].density = NULL;
s->cells_top[k].gradient = NULL; s->cells_top[k].gradient = NULL;
s->cells_top[k].force = NULL; s->cells_top[k].force = NULL;
s->cells_top[k].grav = NULL;
s->cells_top[k].dx_max = 0.0f; s->cells_top[k].dx_max = 0.0f;
s->cells_top[k].sorted = 0; s->cells_top[k].sorted = 0;
s->cells_top[k].count = 0; s->cells_top[k].count = 0;
...@@ -406,7 +408,6 @@ void space_regrid(struct space *s, double cell_max, int verbose) { ...@@ -406,7 +408,6 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
s->cells_top[k].extra_ghost = NULL; s->cells_top[k].extra_ghost = NULL;
s->cells_top[k].ghost = NULL; s->cells_top[k].ghost = NULL;
s->cells_top[k].kick = NULL; s->cells_top[k].kick = NULL;
s->cells_top[k].grav_external = NULL;
s->cells_top[k].cooling = NULL; s->cells_top[k].cooling = NULL;
s->cells_top[k].sourceterms = NULL; s->cells_top[k].sourceterms = NULL;
s->cells_top[k].super = &s->cells_top[k]; s->cells_top[k].super = &s->cells_top[k];
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment