diff --git a/examples/analyse_tasks.py b/examples/analyse_tasks.py
index 17816cab62a343711abee14e0c9d6b7e70ce8e5c..bb16a143182cd749d2a42dd81ac787425ca18b65 100755
--- a/examples/analyse_tasks.py
+++ b/examples/analyse_tasks.py
@@ -56,6 +56,11 @@ TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
 SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
             "tend", "xv", "rho", "gpart", "multipole", "spart", "count"]
 
+SIDS = ["(-1,-1,-1)", "(-1,-1, 0)", "(-1,-1, 1)", "(-1, 0,-1)",
+        "(-1, 0, 0)", "(-1, 0, 1)", "(-1, 1,-1)", "(-1, 1, 0)",
+        "(-1, 1, 1)", "( 0,-1,-1)", "( 0,-1, 0)", "( 0,-1, 1)",
+        "( 0, 0,-1)"]
+
 #  Read input.
 data = pl.loadtxt( infile )
 
@@ -66,11 +71,17 @@ print "# Maximum thread id:", maxthread
 full_step = data[0,:]
 tic_step = int(full_step[4])
 toc_step = int(full_step[5])
+updates = int(full_step[6])
+g_updates = int(full_step[7])
+s_updates = int(full_step[8])
 CPU_CLOCK = float(full_step[-1]) / 1000.0
 data = data[1:,:]
 if args.verbose:
-    print "CPU frequency:", CPU_CLOCK * 1000.0
-
+    print "# CPU frequency:", CPU_CLOCK * 1000.0
+print "#   updates:", updates
+print "# g_updates:", g_updates
+print "# s_updates:", s_updates
+    
 #  Avoid start and end times of zero.
 data = data[data[:,4] != 0]
 data = data[data[:,5] != 0]
@@ -78,6 +89,7 @@ data = data[data[:,5] != 0]
 #  Calculate the time range.
 total_t = (toc_step - tic_step)/ CPU_CLOCK
 print "# Data range: ", total_t, "ms"
+print
 
 #  Correct times to relative values.
 start_t = float(tic_step)
@@ -90,15 +102,16 @@ for i in range(maxthread):
     tasks[i] = []
 
 #  Gather into by thread data.
-num_lines = pl.size(data) / 10
+num_lines = pl.size(data) / pl.size(full_step)
 for line in range(num_lines):
     thread = int(data[line,0])
     tic = int(data[line,4]) / CPU_CLOCK
     toc = int(data[line,5]) / CPU_CLOCK
     tasktype = int(data[line,1])
     subtype = int(data[line,2])
+    sid = int(data[line, -1])
 
-    tasks[thread].append([tic,toc,tasktype,subtype])
+    tasks[thread].append([tic,toc,tasktype,subtype, sid])
 
 #  Sort by tic and gather used thread ids.
 threadids = []
@@ -110,10 +123,11 @@ for i in range(maxthread):
 #  Times per task.
 print "# Task times:"
 print "# -----------"
-print "# {0:<16s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
+print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
       .format("type/subtype", "count","minimum", "maximum",
               "sum", "mean", "percent")
 alltasktimes = {}
+sidtimes = {}
 for i in threadids:
     tasktimes = {}
     for task in tasks[i]:
@@ -127,12 +141,19 @@ for i in threadids:
             alltasktimes[key] = []
         alltasktimes[key].append(dt)
 
+        my_sid = task[4]
+        if my_sid > -1:
+            if not my_sid in sidtimes:
+                sidtimes[my_sid] = []
+            sidtimes[my_sid].append(dt)
+                
+        
     print "# Thread : ", i
     for key in sorted(tasktimes.keys()):
         taskmin = min(tasktimes[key])
         taskmax = max(tasktimes[key])
         tasksum = sum(tasktimes[key])
-        print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
+        print "{0:19s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
               .format(key, len(tasktimes[key]), taskmin, taskmax, tasksum,
                       tasksum / len(tasktimes[key]), tasksum / total_t * 100.0)
     print
@@ -142,12 +163,28 @@ for key in sorted(alltasktimes.keys()):
     taskmin = min(alltasktimes[key])
     taskmax = max(alltasktimes[key])
     tasksum = sum(alltasktimes[key])
-    print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
+    print "{0:19s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
           .format(key, len(alltasktimes[key]), taskmin, taskmax, tasksum,
                   tasksum / len(alltasktimes[key]),
                   tasksum / (len(threadids) * total_t) * 100.0)
 print
 
+# For pairs, show stuf sorted by SID
+print "# By SID (all threads): "
+print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
+    .format("Pair/Sub-pair SID", "count","minimum", "maximum",
+            "sum", "mean", "percent")
+
+for sid in range(0,13):
+    sidmin = min(sidtimes[sid])
+    sidmax = max(sidtimes[sid])
+    sidsum = sum(sidtimes[sid])
+    print "{0:3d} {1:15s}: {2:7d} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.4f} {7:9.2f}"\
+        .format(sid, SIDS[sid], len(sidtimes[sid]), sidmin, sidmax, sidsum,
+                sidsum / len(sidtimes[sid]),
+                sidsum / (len(threadids) * total_t) * 100.0)   
+print
+
 #  Dead times.
 print "# Times not in tasks (deadtimes)"
 print "# ------------------------------"
diff --git a/examples/main.c b/examples/main.c
index 5a4e56ad4f5744345a7c2a61fbe008c8e750685b..5d747b2c4fd493a4b2b5edf43c2841584c8a88cf 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -717,19 +717,21 @@ int main(int argc, char *argv[]) {
       FILE *file_thread;
       file_thread = fopen(dumpfile, "w");
       /* Add some information to help with the plots */
-      fprintf(file_thread, " %i %i %i %i %lli %lli %i %i %i %lli\n", -2, -1, -1,
-              1, e.tic_step, e.toc_step, 0, 0, 0, cpufreq);
+      fprintf(file_thread, " %i %i %i %i %lli %lli %zi %zi %zi %i %lli\n", -2,
+              -1, -1, 1, e.tic_step, e.toc_step, e.updates, e.g_updates,
+              e.s_updates, 0, cpufreq);
       for (int l = 0; l < e.sched.nr_tasks; l++) {
         if (!e.sched.tasks[l].implicit && e.sched.tasks[l].toc != 0) {
           fprintf(
-              file_thread, " %i %i %i %i %lli %lli %i %i %i %i\n",
+              file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n",
               e.sched.tasks[l].rid, e.sched.tasks[l].type,
               e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL),
               e.sched.tasks[l].tic, e.sched.tasks[l].toc,
               (e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->count,
               (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->count,
               (e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->gcount,
-              (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->gcount);
+              (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->gcount,
+              e.sched.tasks[l].sid);
         }
       }
       fclose(file_thread);
diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py
index 88f176687db8116cfd4370970769164985e4d366..43fb5f004e2397a830c47725f3564c08cf004f97 100755
--- a/examples/plot_tasks.py
+++ b/examples/plot_tasks.py
@@ -183,7 +183,7 @@ ecounter = []
 for i in range(nthread):
     ecounter.append(0)
 
-num_lines = pl.size(data) / 10
+num_lines = pl.size(data) / pl.size(full_step)
 for line in range(num_lines):
     thread = int(data[line,0])
 
diff --git a/src/engine.c b/src/engine.c
index 1281470dbb0c8a506997929fd01e9c9eb5343f71..827778100fcdea0a89afb210677bf26fc9acd8b8 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -331,7 +331,8 @@ static void *engine_do_redistribute(int *counts, char *parts,
       for (int k = 0; k < 2 * nr_nodes; k++) {
         char buff[MPI_MAX_ERROR_STRING];
         MPI_Error_string(stats[k].MPI_ERROR, buff, &res);
-        message("request from source %i, tag %i has error '%s'.", stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff);
+        message("request from source %i, tag %i has error '%s'.",
+                stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff);
       }
       error("Failed during waitall for part data.");
     }
@@ -605,7 +606,6 @@ void engine_redistribute(struct engine *e) {
   if (s->nr_gparts > 0)
     space_gparts_sort(s, g_dest, s->nr_gparts, 0, nr_nodes - 1, e->verbose);
 
-
 #ifdef SWIFT_DEBUG_CHECKS
   /* Verify that the gpart have been sorted correctly. */
   for (size_t k = 0; k < s->nr_gparts; k++) {
@@ -621,8 +621,8 @@ void engine_redistribute(struct engine *e) {
     const int new_node = c->nodeID;
 
     if (g_dest[k] != new_node)
-        error("gpart's new node index not matching sorted index (%d != %d).",
-              g_dest[k], new_node);
+      error("gpart's new node index not matching sorted index (%d != %d).",
+            g_dest[k], new_node);
 
     if (gp->x[0] < c->loc[0] || gp->x[0] > c->loc[0] + c->width[0] ||
         gp->x[1] < c->loc[1] || gp->x[1] > c->loc[1] + c->width[1] ||
diff --git a/src/runner.c b/src/runner.c
index 08605b9093e93e8988ca462e92507a7d8f5fc068..735865bad088f09e01756a4e9c6ee4c2f81ab4aa 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -1739,9 +1739,19 @@ void *runner_main(void *data) {
       struct cell *ci = t->ci;
       struct cell *cj = t->cj;
 
-/* Mark the thread we run on */
 #ifdef SWIFT_DEBUG_TASKS
+      /* Mark the thread we run on */
       t->rid = r->cpuid;
+
+      /* And recover the pair direction */
+      if (t->type == task_type_pair || t->type == task_type_sub_pair) {
+        struct cell *ci_temp = ci;
+        struct cell *cj_temp = cj;
+        double shift[3];
+        t->sid = space_getsid(e->s, &ci_temp, &cj_temp, shift);
+      } else {
+        t->sid = -1;
+      }
 #endif
 
 /* Check that we haven't scheduled an inactive task */
diff --git a/src/task.h b/src/task.h
index 052f3e8036381441e283d3f7847d09e98ec1dac2..cd15e09cc5a1b597f96dad7dfeaa479f721f0c8b 100644
--- a/src/task.h
+++ b/src/task.h
@@ -162,6 +162,9 @@ struct task {
   /*! ID of the queue or runner owning this task */
   short int rid;
 
+  /*! Information about the direction of the pair task */
+  short int sid;
+
   /*! Start and end time of this task */
   ticks tic, toc;
 #endif