Commit 98fff2d2 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge remote-tracking branch 'origin/master' into mark_tasks_in_drift2

Conflicts:
   src/engine.c
parents 09aeffb8 cd34c3b6
......@@ -83,39 +83,65 @@ SWIFT depends on a number of third party libraries that should be available
before you can build it.
HDF5: a HDF5 library (v. 1.8.x or higher) is required to read and write
particle data. One of the commands "h5cc" or "h5pcc" should be
available. If "h5pcc" is located them a parallel HDF5 built for the version
of MPI located should be provided. If the command is not available then it
can be located using the "--with-hfd5" configure option. The value should
be the full path to the "h5cc" or "h5pcc" commands.
- HDF5: a HDF5 library (v. 1.8.x or higher) is required to read and
write particle data. One of the commands "h5cc" or "h5pcc"
should be available. If "h5pcc" is located them a parallel
HDF5 built for the version of MPI located should be
provided. If the command is not available then it can be
located using the "--with-hfd5" configure option. The value
should be the full path to the "h5cc" or "h5pcc" commands.
MPI: an optional MPI library that fully supports MPI_THREAD_MULTIPLE.
Before running configure the "mpirun" command should be available in the
shell. If your command isn't called "mpirun" then define the "MPIRUN"
environment variable, either in the shell or when running configure.
- MPI: to run on more than one node an MPI library that fully
supports MPI_THREAD_MULTIPLE. Before running configure the
"mpirun" command should be available in the shell. If your
command isn't called "mpirun" then define the "MPIRUN"
environment variable, either in the shell or when running
configure.
The MPI compiler can be controlled using the MPICC variable, much like
the CC one. Use this when your MPI compiler has a none-standard name.
The MPI compiler can be controlled using the MPICC variable,
much like the CC one. Use this when your MPI compiler has a
none-standard name.
METIS: a build of the METIS library can be optionally used to optimize the
load between MPI nodes (requires an MPI library). This should be found in
the standard installation directories, or pointed at using the
"--with-metis" configuration option. In this case the top-level
installation directory of the METIS build should be given. Note to use
METIS you should at least supply "--with-metis".
- libtool: The build system relies on libtool.
libNUMA: a build of the NUMA library can be used to pin the threads to
the physical core of the machine SWIFT is running on. This is not always
necessary as the OS scheduler may do a good job at distributing the threads
among the different cores on each computing node.
Optional Dependencies
=====================
DOXYGEN: the doxygen library is required to create the SWIFT API
documentation.
- METIS: a build of the METIS library can be optionally used to
optimize the load between MPI nodes (requires an MPI
library). This should be found in the standard installation
directories, or pointed at using the "--with-metis"
configuration option. In this case the top-level
installation directory of the METIS build should be
given. Note to use METIS you should at least supply
"--with-metis".
- libNUMA: a build of the NUMA library can be used to pin the threads
to the physical core of the machine SWIFT is running
on. This is not always necessary as the OS scheduler may
do a good job at distributing the threads among the
different cores on each computing node.
- TCMalloc: a build of the TCMalloc library (part of gperftools) can
be used to obtain faster allocations than the standard C
malloc function part of glibc. The option "-with-tcmalloc"
should be passed to the configuration script to use it.
- gperftools: a build of gperftools can be used to obtain good
profiling of the code. The option "-with-profiler"
needs to be passed to the configuration script to use
it.
- DOXYGEN: the doxygen library is required to create the SWIFT API
documentation.
......
......@@ -355,7 +355,7 @@ fi
AC_SUBST([TCMALLOC_LIBS])
AM_CONDITIONAL([HAVETCMALLOC],[test -n "$TCMALLOC_LIBS"])
# Check for -lprofiler usually part of the gpreftools along with tcmalloc.
# Check for -lprofiler usually part of the gperftools along with tcmalloc.
have_profiler="no"
AC_ARG_WITH([profiler],
[AS_HELP_STRING([--with-profiler],
......
......@@ -764,6 +764,9 @@ void cell_clean_links(struct cell *c, void *data) {
c->force = NULL;
c->nr_force = 0;
c->grav = NULL;
c->nr_grav = 0;
}
/**
......@@ -1004,7 +1007,6 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
if (c->kick != NULL) scheduler_activate(s, c->kick);
if (c->cooling != NULL) scheduler_activate(s, c->cooling);
if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms);
if (c->grav_external != NULL) scheduler_activate(s, c->grav_external);
return 0;
}
......
......@@ -158,9 +158,6 @@ struct cell {
/* Tasks for gravity tree. */
struct task *grav_up, *grav_down;
/* Task for external gravity */
struct task *grav_external;
/* Task for cooling */
struct task *cooling;
......
......@@ -1248,10 +1248,9 @@ void engine_make_external_gravity_tasks(struct engine *e) {
/* Is that neighbour local ? */
if (ci->nodeID != nodeID) continue;
/* If the cells is local build a self-interaction */
ci->grav_external = scheduler_addtask(sched, task_type_self,
task_subtype_external_grav, 0, 0,
ci, NULL, 0);
/* If the cell is local build a self-interaction */
scheduler_addtask(sched, task_type_self, task_subtype_external_grav, 0, 0,
ci, NULL, 0);
}
}
......@@ -1330,56 +1329,101 @@ void engine_make_hydroloop_tasks(struct engine *e) {
/**
* @brief Counts the tasks associated with one cell and constructs the links
*
* For each hydrodynamic task, construct the links with the corresponding cell.
* Similarly, construct the dependencies for all the sorting tasks.
* For each hydrodynamic and gravity task, construct the links with
* the corresponding cell. Similarly, construct the dependencies for
* all the sorting tasks.
*
* @param e The #engine.
*/
void engine_count_and_link_tasks(struct engine *e) {
struct scheduler *sched = &e->sched;
struct scheduler *const sched = &e->sched;
const int nr_tasks = sched->nr_tasks;
for (int ind = 0; ind < sched->nr_tasks; ind++) {
for (int ind = 0; ind < nr_tasks; ind++) {
struct task *t = &sched->tasks[ind];
struct task *const t = &sched->tasks[ind];
struct cell *const ci = t->ci;
struct cell *const cj = t->cj;
/* Link sort tasks together. */
if (t->type == task_type_sort && t->ci->split)
if (t->type == task_type_sort && ci->split)
for (int j = 0; j < 8; j++)
if (t->ci->progeny[j] != NULL && t->ci->progeny[j]->sorts != NULL) {
scheduler_addunlock(sched, t->ci->progeny[j]->sorts, t);
if (ci->progeny[j] != NULL && ci->progeny[j]->sorts != NULL) {
scheduler_addunlock(sched, ci->progeny[j]->sorts, t);
}
/* Link density tasks to cells. */
/* Link self tasks to cells. */
if (t->type == task_type_self) {
atomic_inc(&t->ci->nr_tasks);
atomic_inc(&ci->nr_tasks);
if (t->subtype == task_subtype_density) {
engine_addlink(e, &t->ci->density, t);
atomic_inc(&t->ci->nr_density);
engine_addlink(e, &ci->density, t);
atomic_inc(&ci->nr_density);
}
if (t->subtype == task_subtype_grav) {
engine_addlink(e, &ci->grav, t);
atomic_inc(&ci->nr_grav);
}
if (t->subtype == task_subtype_external_grav) {
engine_addlink(e, &ci->grav, t);
atomic_inc(&ci->nr_grav);
}
/* Link pair tasks to cells. */
} else if (t->type == task_type_pair) {
atomic_inc(&t->ci->nr_tasks);
atomic_inc(&t->cj->nr_tasks);
atomic_inc(&ci->nr_tasks);
atomic_inc(&cj->nr_tasks);
if (t->subtype == task_subtype_density) {
engine_addlink(e, &t->ci->density, t);
atomic_inc(&t->ci->nr_density);
engine_addlink(e, &t->cj->density, t);
atomic_inc(&t->cj->nr_density);
engine_addlink(e, &ci->density, t);
atomic_inc(&ci->nr_density);
engine_addlink(e, &cj->density, t);
atomic_inc(&cj->nr_density);
}
if (t->subtype == task_subtype_grav) {
engine_addlink(e, &ci->grav, t);
atomic_inc(&ci->nr_grav);
engine_addlink(e, &cj->grav, t);
atomic_inc(&cj->nr_grav);
}
/* Link sub-self tasks to cells. */
} else if (t->type == task_type_sub_self) {
atomic_inc(&t->ci->nr_tasks);
atomic_inc(&ci->nr_tasks);
if (t->subtype == task_subtype_density) {
engine_addlink(e, &t->ci->density, t);
atomic_inc(&t->ci->nr_density);
engine_addlink(e, &ci->density, t);
atomic_inc(&ci->nr_density);
}
if (t->subtype == task_subtype_grav) {
engine_addlink(e, &ci->grav, t);
atomic_inc(&ci->nr_grav);
}
if (t->subtype == task_subtype_external_grav) {
engine_addlink(e, &ci->grav, t);
atomic_inc(&ci->nr_grav);
}
/* Link sub-pair tasks to cells. */
} else if (t->type == task_type_sub_pair) {
atomic_inc(&t->ci->nr_tasks);
atomic_inc(&t->cj->nr_tasks);
atomic_inc(&ci->nr_tasks);
atomic_inc(&cj->nr_tasks);
if (t->subtype == task_subtype_density) {
engine_addlink(e, &t->ci->density, t);
atomic_inc(&t->ci->nr_density);
engine_addlink(e, &t->cj->density, t);
atomic_inc(&t->cj->nr_density);
engine_addlink(e, &ci->density, t);
atomic_inc(&ci->nr_density);
engine_addlink(e, &cj->density, t);
atomic_inc(&cj->nr_density);
}
if (t->subtype == task_subtype_grav) {
engine_addlink(e, &ci->grav, t);
atomic_inc(&ci->nr_grav);
engine_addlink(e, &cj->grav, t);
atomic_inc(&cj->nr_grav);
}
if (t->subtype == task_subtype_external_grav) {
error("Found a sub-pair/external-gravity task...");
engine_addlink(e, &ci->grav, t);
atomic_inc(&ci->nr_grav);
engine_addlink(e, &cj->grav, t);
atomic_inc(&cj->nr_grav);
}
}
}
......@@ -1865,14 +1909,16 @@ void engine_maketasks(struct engine *e) {
/* Split the tasks. */
scheduler_splittasks(sched);
/* Allocate the list of cell-task links. The maximum number of links
is the number of cells (s->tot_cells) times the number of neighbours (27)
times the number of interaction types (2, density and force). */
/* Allocate the list of cell-task links. The maximum number of links is the
* number of cells (s->tot_cells) times the number of neighbours (26) times
* the number of interaction types, so 26 * 3 (density, force, grav) pairs
* and 4 (density, force, grav, ext_grav) self.
*/
if (e->links != NULL) free(e->links);
#ifdef EXTRA_HYDRO_LOOP
e->size_links = s->tot_cells * 27 * 3;
e->size_links = s->tot_cells * (26 * 4 + 4);
#else
e->size_links = s->tot_cells * 27 * 2;
e->size_links = s->tot_cells * (26 * 3 + 4);
#endif
if ((e->links = malloc(sizeof(struct link) * e->size_links)) == NULL)
error("Failed to allocate cell-task links.");
......
......@@ -395,6 +395,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
s->cells_top[k].nr_density = 0;
s->cells_top[k].nr_gradient = 0;
s->cells_top[k].nr_force = 0;
s->cells_top[k].nr_grav = 0;
s->cells_top[k].density = NULL;
s->cells_top[k].gradient = NULL;
s->cells_top[k].force = NULL;
......@@ -406,7 +407,6 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
s->cells_top[k].extra_ghost = NULL;
s->cells_top[k].ghost = NULL;
s->cells_top[k].kick = NULL;
s->cells_top[k].grav_external = NULL;
s->cells_top[k].cooling = NULL;
s->cells_top[k].sourceterms = NULL;
s->cells_top[k].super = &s->cells_top[k];
......@@ -601,8 +601,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
for (size_t k = 1; k < nr_parts; k++) {
if (ind[k - 1] > ind[k]) {
error("Sort failed!");
} else if (ind[k] != cell_getid(s->cdim,
s->parts[k].x[0] * s->iwidth[0],
} else if (ind[k] != cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0],
s->parts[k].x[1] * s->iwidth[1],
s->parts[k].x[2] * s->iwidth[2])) {
error("Incorrect indices!");
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment