diff --git a/INSTALL.swift b/INSTALL.swift
index a07d5b24c2d8c75778e2a24d90f77724459ab61f..8e1635b0715426512503fd9dcde32f59a7ad1b62 100644
--- a/INSTALL.swift
+++ b/INSTALL.swift
@@ -83,39 +83,65 @@ SWIFT depends on a number of third party libraries that should be available
 before you can build it.
 
 
-HDF5: a HDF5 library (v. 1.8.x or higher) is required to read and write
-particle data. One of the commands "h5cc" or "h5pcc" should be
-available. If "h5pcc" is located them a parallel HDF5 built for the version
-of MPI located should be provided. If the command is not available then it
-can be located using the "--with-hfd5" configure option. The value should
-be the full path to the "h5cc" or "h5pcc" commands.
+ - HDF5: a HDF5 library (v. 1.8.x or higher) is required to read and
+         write particle data. One of the commands "h5cc" or "h5pcc"
+         should be available. If "h5pcc" is located them a parallel
+         HDF5 built for the version of MPI located should be
+         provided. If the command is not available then it can be
+         located using the "--with-hfd5" configure option. The value
+         should be the full path to the "h5cc" or "h5pcc" commands.
 
 
-MPI: an optional MPI library that fully supports MPI_THREAD_MULTIPLE.
-Before running configure the "mpirun" command should be available in the
-shell. If your command isn't called "mpirun" then define the "MPIRUN"
-environment variable, either in the shell or when running configure.
+ - MPI: to run on more than one node an MPI library that fully
+        supports MPI_THREAD_MULTIPLE.  Before running configure the
+        "mpirun" command should be available in the shell. If your
+        command isn't called "mpirun" then define the "MPIRUN"
+        environment variable, either in the shell or when running
+        configure.
 
-The MPI compiler can be controlled using the MPICC variable, much like
-the CC one. Use this when your MPI compiler has a none-standard name.
+	The MPI compiler can be controlled using the MPICC variable,
+	much like the CC one. Use this when your MPI compiler has a
+	none-standard name.
 
 
-METIS: a build of the METIS library can be optionally used to optimize the
-load between MPI nodes (requires an MPI library). This should be found in
-the standard installation directories, or pointed at using the
-"--with-metis" configuration option.  In this case the top-level
-installation directory of the METIS build should be given. Note to use
-METIS you should at least supply "--with-metis".
+ - libtool: The build system relies on libtool.
 
 
-libNUMA: a build of the NUMA library can be used to pin the threads to
-the physical core of the machine SWIFT is running on. This is not always
-necessary as the OS scheduler may do a good job at distributing the threads
-among the different cores on each computing node.
+                           Optional Dependencies
+                           =====================
 
 
-DOXYGEN: the doxygen library is required to create the SWIFT API
-documentation.
+ - METIS: a build of the METIS library can be optionally used to
+          optimize the load between MPI nodes (requires an MPI
+          library). This should be found in the standard installation
+          directories, or pointed at using the "--with-metis"
+          configuration option.  In this case the top-level
+          installation directory of the METIS build should be
+          given. Note to use METIS you should at least supply
+          "--with-metis".
+
+
+ - libNUMA: a build of the NUMA library can be used to pin the threads
+            to the physical core of the machine SWIFT is running
+            on. This is not always necessary as the OS scheduler may
+            do a good job at distributing the threads among the
+            different cores on each computing node.
+
+
+ - TCMalloc: a build of the TCMalloc library (part of gperftools) can
+             be used to obtain faster allocations than the standard C
+             malloc function part of glibc. The option "-with-tcmalloc"
+	     should be passed to the configuration script to use it.
+
+
+ - gperftools: a build of gperftools can be used to obtain good
+               profiling of the code. The option "-with-profiler"
+               needs to be passed to the configuration script to use
+               it.
+
+
+ - DOXYGEN: the doxygen library is required to create the SWIFT API
+            documentation.
 
 
 
diff --git a/configure.ac b/configure.ac
index 977a86a03d5e4c82139dfd56d3f1ee7afe3ea6e8..f6e2ea0db8e9829b719a0190eea7a1d891bfbbd6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -355,7 +355,7 @@ fi
 AC_SUBST([TCMALLOC_LIBS])
 AM_CONDITIONAL([HAVETCMALLOC],[test -n "$TCMALLOC_LIBS"])
 
-#  Check for -lprofiler usually part of the gpreftools along with tcmalloc.
+#  Check for -lprofiler usually part of the gperftools along with tcmalloc.
 have_profiler="no"
 AC_ARG_WITH([profiler],
    [AS_HELP_STRING([--with-profiler],
diff --git a/src/cell.c b/src/cell.c
index 166538aa2fd9fe20de9c2a9b43abad6f13383ec4..3e3a8e6cc537f948a809c1a3ebf3cb18194fedd1 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -764,6 +764,9 @@ void cell_clean_links(struct cell *c, void *data) {
 
   c->force = NULL;
   c->nr_force = 0;
+
+  c->grav = NULL;
+  c->nr_grav = 0;
 }
 
 /**
@@ -1004,7 +1007,6 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
   if (c->kick != NULL) scheduler_activate(s, c->kick);
   if (c->cooling != NULL) scheduler_activate(s, c->cooling);
   if (c->sourceterms != NULL) scheduler_activate(s, c->sourceterms);
-  if (c->grav_external != NULL) scheduler_activate(s, c->grav_external);
 
   return 0;
 }
diff --git a/src/cell.h b/src/cell.h
index 7c6eed55b83277fe9c489d2ab58683d4859d3d59..f1b5359e92251d73733a49eccd8029af993aa3f9 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -158,9 +158,6 @@ struct cell {
   /* Tasks for gravity tree. */
   struct task *grav_up, *grav_down;
 
-  /* Task for external gravity */
-  struct task *grav_external;
-
   /* Task for cooling */
   struct task *cooling;
 
diff --git a/src/engine.c b/src/engine.c
index e0ad2c2028b660ac473bf3fe6c9c587a3c6c179a..d659c5b15cea81f450b3c99b64b840735bd8e2b1 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -1248,10 +1248,9 @@ void engine_make_external_gravity_tasks(struct engine *e) {
     /* Is that neighbour local ? */
     if (ci->nodeID != nodeID) continue;
 
-    /* If the cells is local build a self-interaction */
-    ci->grav_external = scheduler_addtask(sched, task_type_self,
-                                          task_subtype_external_grav, 0, 0,
-                                          ci, NULL, 0);
+    /* If the cell is local build a self-interaction */
+    scheduler_addtask(sched, task_type_self, task_subtype_external_grav, 0, 0,
+                      ci, NULL, 0);
   }
 }
 
@@ -1330,56 +1329,101 @@ void engine_make_hydroloop_tasks(struct engine *e) {
 /**
  * @brief Counts the tasks associated with one cell and constructs the links
  *
- * For each hydrodynamic task, construct the links with the corresponding cell.
- * Similarly, construct the dependencies for all the sorting tasks.
+ * For each hydrodynamic and gravity task, construct the links with
+ * the corresponding cell.  Similarly, construct the dependencies for
+ * all the sorting tasks.
  *
  * @param e The #engine.
  */
 void engine_count_and_link_tasks(struct engine *e) {
 
-  struct scheduler *sched = &e->sched;
+  struct scheduler *const sched = &e->sched;
+  const int nr_tasks = sched->nr_tasks;
 
-  for (int ind = 0; ind < sched->nr_tasks; ind++) {
+  for (int ind = 0; ind < nr_tasks; ind++) {
 
-    struct task *t = &sched->tasks[ind];
+    struct task *const t = &sched->tasks[ind];
+    struct cell *const ci = t->ci;
+    struct cell *const cj = t->cj;
 
     /* Link sort tasks together. */
-    if (t->type == task_type_sort && t->ci->split)
+    if (t->type == task_type_sort && ci->split)
       for (int j = 0; j < 8; j++)
-        if (t->ci->progeny[j] != NULL && t->ci->progeny[j]->sorts != NULL) {
-          scheduler_addunlock(sched, t->ci->progeny[j]->sorts, t);
+        if (ci->progeny[j] != NULL && ci->progeny[j]->sorts != NULL) {
+          scheduler_addunlock(sched, ci->progeny[j]->sorts, t);
         }
 
-    /* Link density tasks to cells. */
+    /* Link self tasks to cells. */
     if (t->type == task_type_self) {
-      atomic_inc(&t->ci->nr_tasks);
+      atomic_inc(&ci->nr_tasks);
       if (t->subtype == task_subtype_density) {
-        engine_addlink(e, &t->ci->density, t);
-        atomic_inc(&t->ci->nr_density);
+        engine_addlink(e, &ci->density, t);
+        atomic_inc(&ci->nr_density);
+      }
+      if (t->subtype == task_subtype_grav) {
+        engine_addlink(e, &ci->grav, t);
+        atomic_inc(&ci->nr_grav);
       }
+      if (t->subtype == task_subtype_external_grav) {
+        engine_addlink(e, &ci->grav, t);
+        atomic_inc(&ci->nr_grav);
+      }
+
+      /* Link pair tasks to cells. */
     } else if (t->type == task_type_pair) {
-      atomic_inc(&t->ci->nr_tasks);
-      atomic_inc(&t->cj->nr_tasks);
+      atomic_inc(&ci->nr_tasks);
+      atomic_inc(&cj->nr_tasks);
       if (t->subtype == task_subtype_density) {
-        engine_addlink(e, &t->ci->density, t);
-        atomic_inc(&t->ci->nr_density);
-        engine_addlink(e, &t->cj->density, t);
-        atomic_inc(&t->cj->nr_density);
+        engine_addlink(e, &ci->density, t);
+        atomic_inc(&ci->nr_density);
+        engine_addlink(e, &cj->density, t);
+        atomic_inc(&cj->nr_density);
       }
+      if (t->subtype == task_subtype_grav) {
+        engine_addlink(e, &ci->grav, t);
+        atomic_inc(&ci->nr_grav);
+        engine_addlink(e, &cj->grav, t);
+        atomic_inc(&cj->nr_grav);
+      }
+
+      /* Link sub-self tasks to cells. */
     } else if (t->type == task_type_sub_self) {
-      atomic_inc(&t->ci->nr_tasks);
+      atomic_inc(&ci->nr_tasks);
       if (t->subtype == task_subtype_density) {
-        engine_addlink(e, &t->ci->density, t);
-        atomic_inc(&t->ci->nr_density);
+        engine_addlink(e, &ci->density, t);
+        atomic_inc(&ci->nr_density);
+      }
+      if (t->subtype == task_subtype_grav) {
+        engine_addlink(e, &ci->grav, t);
+        atomic_inc(&ci->nr_grav);
+      }
+      if (t->subtype == task_subtype_external_grav) {
+        engine_addlink(e, &ci->grav, t);
+        atomic_inc(&ci->nr_grav);
       }
+
+      /* Link sub-pair tasks to cells. */
     } else if (t->type == task_type_sub_pair) {
-      atomic_inc(&t->ci->nr_tasks);
-      atomic_inc(&t->cj->nr_tasks);
+      atomic_inc(&ci->nr_tasks);
+      atomic_inc(&cj->nr_tasks);
       if (t->subtype == task_subtype_density) {
-        engine_addlink(e, &t->ci->density, t);
-        atomic_inc(&t->ci->nr_density);
-        engine_addlink(e, &t->cj->density, t);
-        atomic_inc(&t->cj->nr_density);
+        engine_addlink(e, &ci->density, t);
+        atomic_inc(&ci->nr_density);
+        engine_addlink(e, &cj->density, t);
+        atomic_inc(&cj->nr_density);
+      }
+      if (t->subtype == task_subtype_grav) {
+        engine_addlink(e, &ci->grav, t);
+        atomic_inc(&ci->nr_grav);
+        engine_addlink(e, &cj->grav, t);
+        atomic_inc(&cj->nr_grav);
+      }
+      if (t->subtype == task_subtype_external_grav) {
+        error("Found a sub-pair/external-gravity task...");
+        engine_addlink(e, &ci->grav, t);
+        atomic_inc(&ci->nr_grav);
+        engine_addlink(e, &cj->grav, t);
+        atomic_inc(&cj->nr_grav);
       }
     }
   }
@@ -1865,14 +1909,16 @@ void engine_maketasks(struct engine *e) {
   /* Split the tasks. */
   scheduler_splittasks(sched);
 
-  /* Allocate the list of cell-task links. The maximum number of links
-     is the number of cells (s->tot_cells) times the number of neighbours (27)
-     times the number of interaction types (2, density and force). */
+  /* Allocate the list of cell-task links. The maximum number of links is the
+   * number of cells (s->tot_cells) times the number of neighbours (26) times
+   * the number of interaction types, so 26 * 3 (density, force, grav) pairs
+   * and 4 (density, force, grav, ext_grav) self.
+   */
   if (e->links != NULL) free(e->links);
 #ifdef EXTRA_HYDRO_LOOP
-  e->size_links = s->tot_cells * 27 * 3;
+  e->size_links = s->tot_cells * (26 * 4 + 4);
 #else
-  e->size_links = s->tot_cells * 27 * 2;
+  e->size_links = s->tot_cells * (26 * 3 + 4);
 #endif
   if ((e->links = malloc(sizeof(struct link) * e->size_links)) == NULL)
     error("Failed to allocate cell-task links.");
diff --git a/src/space.c b/src/space.c
index 186e11c066f3c8d788c3eaa189da3ed2461aed87..432f26621da978e9cea882b10934dfe9412112cc 100644
--- a/src/space.c
+++ b/src/space.c
@@ -395,6 +395,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
       s->cells_top[k].nr_density = 0;
       s->cells_top[k].nr_gradient = 0;
       s->cells_top[k].nr_force = 0;
+      s->cells_top[k].nr_grav = 0;
       s->cells_top[k].density = NULL;
       s->cells_top[k].gradient = NULL;
       s->cells_top[k].force = NULL;
@@ -406,7 +407,6 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
       s->cells_top[k].extra_ghost = NULL;
       s->cells_top[k].ghost = NULL;
       s->cells_top[k].kick = NULL;
-      s->cells_top[k].grav_external = NULL;
       s->cells_top[k].cooling = NULL;
       s->cells_top[k].sourceterms = NULL;
       s->cells_top[k].super = &s->cells_top[k];
@@ -601,8 +601,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
   for (size_t k = 1; k < nr_parts; k++) {
     if (ind[k - 1] > ind[k]) {
       error("Sort failed!");
-    } else if (ind[k] != cell_getid(s->cdim, 
-                                    s->parts[k].x[0] * s->iwidth[0],
+    } else if (ind[k] != cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0],
                                     s->parts[k].x[1] * s->iwidth[1],
                                     s->parts[k].x[2] * s->iwidth[2])) {
       error("Incorrect indices!");