Merge MPI and non-MPI task dump plotting and analysis scripts.

Keep maintenance down.

Merge MPI and non-MPI task dump plotting and analysis scripts.
24132c48 · Peter W. Draper · 637853ce · 24132c48 · 637853ce · 24132c48
Commit 24132c48 authored Sep 15, 2017 by Peter W. Draper
--- a/examples/analyse_tasks.py
+++ b/examples/analyse_tasks.py
@@ -3,8 +3,9 @@
 Usage:
    analsyse_tasks.py [options] input.dat

-where input.dat is a thread info file for a step.  Use the '-y interval' flag
-of the swift command to create these.
+where input.dat is a thread info file for a step (MPI or non-MPI). Use the
+'-y interval' flag of the swift and swift_mpi commands to create these
+(you will also need to configure with the --enable-task-debugging option).

 The output is an analysis of the task timings, including deadtime per thread
 and step, total amount of time spent for each task type, for the whole step
@@ -42,6 +43,9 @@ parser.add_argument("input", help="Thread data file (-y output)")
 parser.add_argument("-v", "--verbose", dest="verbose",
                    help="Verbose output (default: False)",
                    default=False, action="store_true")
+parser.add_argument("-r", "--rank", dest="rank",
+                    help="Rank to process (default: all)",
+                    default="all", action="store")

 args = parser.parse_args()
 infile = args.input
@@ -50,8 +54,8 @@ infile = args.input
 TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
             "init_grav", "ghost", "extra_ghost", "drift_part",
             "drift_gpart", "kick1", "kick2", "timestep", "send", "recv",
-             "grav_top_level", "grav_long_range", "grav_ghost", "grav_mm",
-             "grav_down", "cooling", "sourceterms", "count"]
+             "grav_top_level", "grav_long_range", "grav_mm", "grav_down",
+             "cooling", "sourceterms", "count"]

 SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
            "tend", "xv", "rho", "gpart", "multipole", "spart", "count"]
@@ -63,28 +67,81 @@ SIDS = ["(-1,-1,-1)", "(-1,-1, 0)", "(-1,-1, 1)", "(-1, 0,-1)",

 #  Read input.
 data = pl.loadtxt( infile )
+full_step = data[0,:]

-maxthread = int(max(data[:,0])) + 1
-print "# Maximum thread id:", maxthread
-
-#  Recover the start and end time
+#  Do we have an MPI file?
 full_step = data[0,:]
-tic_step = int(full_step[4])
-toc_step = int(full_step[5])
+if full_step.size == 13:
+    print "# MPI mode"
+    mpimode = True
+    nranks = int(max(data[:,0])) + 1
+    print "# Number of ranks:", nranks
+    rankcol = 0
+    threadscol = 1
+    taskcol = 2
+    subtaskcol = 3
+    ticcol = 5
+    toccol = 6
+    updates = int(full_step[7])
+    g_updates = int(full_step[8])
+    s_updates = int(full_step[9])
+else:
+    print "# non MPI mode"
+    nranks = 1
+    mpimode = False
+    rankcol = -1
+    threadscol = 0
+    taskcol = 1
+    subtaskcol = 2
+    ticcol = 4
+    toccol = 5
    updates = int(full_step[6])
    g_updates = int(full_step[7])
    s_updates = int(full_step[8])
+
+#  Get the CPU clock to convert ticks into milliseconds.
 CPU_CLOCK = float(full_step[-1]) / 1000.0
-data = data[1:,:]
 if args.verbose:
    print "# CPU frequency:", CPU_CLOCK * 1000.0
 print "#   updates:", updates
 print "# g_updates:", g_updates
 print "# s_updates:", s_updates

+if mpimode:
+    if args.rank == "all":
+        ranks = range(nranks)
+    else:
+        ranks = [int(args.rank)]
+        if ranks[0] >= nranks:
+            print "Error: maximum rank is " + str(nranks - 1)
+            sys.exit(1)
+else:
+    ranks = [1]
+
+maxthread = int(max(data[:,threadscol])) + 1
+print "# Maximum thread id:", maxthread
+
 #  Avoid start and end times of zero.
-data = data[data[:,4] != 0]
-data = data[data[:,5] != 0]
+sdata = data[data[:,ticcol] != 0]
+sdata = data[data[:,toccol] != 0]
+
+#  Now we process the required ranks.
+for rank in ranks:
+    if mpimode:
+        print "# Rank", rank
+        data = sdata[sdata[:,rankcol] == rank]
+        full_step = data[0,:]
+    else:
+        data = sdata
+
+    #  Recover the start and end time
+    tic_step = int(full_step[ticcol])
+    toc_step = int(full_step[toccol])
+    data = data[1:,:]
+
+    #  Avoid start and end times of zero.
+    data = data[data[:,ticcol] != 0]
+    data = data[data[:,toccol] != 0]

    #  Calculate the time range.
    total_t = (toc_step - tic_step)/ CPU_CLOCK
@@ -93,8 +150,9 @@ print

    #  Correct times to relative values.
    start_t = float(tic_step)
-data[:,4] -= start_t
-data[:,5] -= start_t
+    data[:,ticcol] -= start_t
+    data[:,toccol] -= start_t
+    end_t = (toc_step - start_t) / CPU_CLOCK

    tasks = {}
    tasks[-1] = []
@@ -102,21 +160,20 @@ for i in range(maxthread):
        tasks[i] = []

    #  Gather into by thread data.
-num_lines = pl.size(data) / pl.size(full_step)
+    num_lines = pl.shape(data)[0]
    for line in range(num_lines):
-    thread = int(data[line,0])
-    tic = int(data[line,4]) / CPU_CLOCK
-    toc = int(data[line,5]) / CPU_CLOCK
-    tasktype = int(data[line,1])
-    subtype = int(data[line,2])
+        thread = int(data[line,threadscol])
+        tic = int(data[line,ticcol]) / CPU_CLOCK
+        toc = int(data[line,toccol]) / CPU_CLOCK
+        tasktype = int(data[line,taskcol])
+        subtype = int(data[line,subtaskcol])
        sid = int(data[line, -1])

        tasks[thread].append([tic,toc,tasktype,subtype, sid])

-#  Sort by tic and gather used thread ids.
+    #  Sort by tic and gather used threads.
    threadids = []
    for i in range(maxthread):
-    if len(tasks[i]) > 0:
        tasks[i] = sorted(tasks[i], key=lambda task: task[0])
        threadids.append(i)

@@ -126,6 +183,7 @@ print "# -----------"
    print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
          .format("type/subtype", "count","minimum", "maximum",
                  "sum", "mean", "percent")
+
    alltasktimes = {}
    sidtimes = {}
    for i in threadids:
@@ -147,7 +205,6 @@ for i in threadids:
                    sidtimes[my_sid] = []
                sidtimes[my_sid].append(dt)

-        
        print "# Thread : ", i
        for key in sorted(tasktimes.keys()):
            taskmin = min(tasktimes[key])
@@ -163,13 +220,13 @@ for key in sorted(alltasktimes.keys()):
        taskmin = min(alltasktimes[key])
        taskmax = max(alltasktimes[key])
        tasksum = sum(alltasktimes[key])
-    print "{0:19s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
+        print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
              .format(key, len(alltasktimes[key]), taskmin, taskmax, tasksum,
                      tasksum / len(alltasktimes[key]),
                      tasksum / (len(threadids) * total_t) * 100.0)
    print

-# For pairs, show stuf sorted by SID
+    # For pairs, show stuff sorted by SID
    print "# By SID (all threads): "
    print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
        .format("Pair/Sub-pair SID", "count","minimum", "maximum",
@@ -200,10 +257,13 @@ print "# Time before first task:"
    print "# no.    : {0:>9s} {1:>9s}".format("value", "percent")
    predeadtimes = []
    for i in threadids:
+        if len(tasks[i]) > 0:
            predeadtime = tasks[i][0][0]
            print "thread {0:2d}: {1:9.4f} {2:9.4f}"\
                  .format(i, predeadtime, predeadtime / total_t * 100.0)
            predeadtimes.append(predeadtime)
+        else:
+            predeadtimes.append(0.0)

    predeadmin = min(predeadtimes)
    predeadmax = max(predeadtimes)
@@ -220,10 +280,13 @@ print "# Time after last task:"
    print "# no.    : {0:>9s} {1:>9s}".format("value", "percent")
    postdeadtimes = []
    for i in threadids:
+        if len(tasks[i]) > 0:
            postdeadtime = total_t - tasks[i][-1][1]
            print "thread {0:2d}: {1:9.4f} {2:9.4f}"\
                  .format(i, postdeadtime, postdeadtime / total_t * 100.0)
            postdeadtimes.append(postdeadtime)
+        else:
+            postdeadtimes.append(0.0)

    postdeadmin = min(postdeadtimes)
    postdeadmax = max(postdeadtimes)
@@ -243,7 +306,10 @@ print "# no.    : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\
    enginedeadtimes = []
    for i in threadids:
        deadtimes = []
+        if len(tasks[i]) > 0:
            last = tasks[i][0][0]
+        else:
+            last = 0.0
        for task in tasks[i]:
            dt = task[0] - last
            deadtimes.append(dt)
@@ -253,7 +319,7 @@ for i in threadids:
        if len(deadtimes) > 1:
            deadtimes = deadtimes[1:]
        else:
-        #  Only one task, so no deadtime by definition.
+            #  Only one or fewer tasks, so no deadtime by definition.
            deadtimes = [0.0]

        deadmin = min(deadtimes)


--- a/examples/analyse_tasks_MPI.py
+++ b/examples/analyse_tasks_MPI.py
-#!/usr/bin/env python
-"""
-Usage:
-    analsyse_tasks_MPI.py [options] input.dat
-
-where input.dat is a thread info file for an MPI step.  Use the '-y interval'
-flag of the swift command to create these.
-
-The output is an analysis of the task timings, including deadtime per thread
-and step, total amount of time spent for each task type, for the whole step
-and per thread and the minimum and maximum times spent per task type.
-
-This file is part of SWIFT.
-Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published
-by the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with this program.  If not, see <http://www.gnu.org/licenses/>.
-"""
-
-import matplotlib
-matplotlib.use("Agg")
-import matplotlib.collections as collections
-import matplotlib.ticker as plticker
-import pylab as pl
-import sys
-import argparse
-
-#  Handle the command line.
-parser = argparse.ArgumentParser(description="Analyse task dumps")
-
-parser.add_argument("input", help="Thread data file (-y output)")
-parser.add_argument("-v", "--verbose", dest="verbose",
-                    help="Verbose output (default: False)",
-                    default=False, action="store_true")
-parser.add_argument("-r", "--rank", dest="rank",
-                    help="Rank to process (default: all)",
-                    default="all", action="store")
-
-args = parser.parse_args()
-infile = args.input
-
-#  Tasks and subtypes. Indexed as in tasks.h.
-TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
-             "init_grav", "ghost", "extra_ghost", "drift_part",
-             "drift_gpart", "kick1", "kick2", "timestep", "send", "recv",
-             "grav_top_level", "grav_long_range", "grav_mm", "grav_down",
-             "cooling", "sourceterms", "count"]
-
-SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
-            "tend", "xv", "rho", "gpart", "multipole", "spart", "count"]
-
-SIDS = ["(-1,-1,-1)", "(-1,-1, 0)", "(-1,-1, 1)", "(-1, 0,-1)",
-        "(-1, 0, 0)", "(-1, 0, 1)", "(-1, 1,-1)", "(-1, 1, 0)",
-        "(-1, 1, 1)", "( 0,-1,-1)", "( 0,-1, 0)", "( 0,-1, 1)",
-        "( 0, 0,-1)"]
-
-#  Read input.
-data = pl.loadtxt( infile )
-
-#  Get the CPU clock to convert ticks into milliseconds.
-full_step = data[0,:]
-updates = int(full_step[7])
-g_updates = int(full_step[8])
-s_updates = int(full_step[9])
-CPU_CLOCK = float(full_step[-1]) / 1000.0
-if args.verbose:
-    print "# CPU frequency:", CPU_CLOCK * 1000.0
-print "#   updates:", updates
-print "# g_updates:", g_updates
-print "# s_updates:", s_updates
-
-nranks = int(max(data[:,0])) + 1
-print "# Number of ranks:", nranks
-if args.rank == "all":
-    ranks = range(nranks)
-else:
-    ranks = [int(args.rank)]
-    if ranks[0] >= nranks:
-        print "Error: maximum rank is " + str(nranks - 1)
-        sys.exit(1)
-
-maxthread = int(max(data[:,1])) + 1
-print "# Maximum thread id:", maxthread
-
-#  Avoid start and end times of zero.
-sdata = data[data[:,5] != 0]
-sdata = data[data[:,6] != 0]
-
-#  Now we process the required ranks.
-for rank in ranks:
-    print "# Rank", rank
-    data = sdata[sdata[:,0] == rank]
-
-    #  Recover the start and end time
-    full_step = data[0,:]
-    tic_step = int(full_step[5])
-    toc_step = int(full_step[6])
-    data = data[1:,:]
-
-    #  Avoid start and end times of zero.
-    data = data[data[:,5] != 0]
-    data = data[data[:,6] != 0]
-
-    #  Calculate the time range.
-    total_t = (toc_step - tic_step)/ CPU_CLOCK
-    print "# Data range: ", total_t, "ms"
-    print
-
-    #  Correct times to relative values.
-    start_t = float(tic_step)
-    data[:,5] -= start_t
-    data[:,6] -= start_t
-    end_t = (toc_step - start_t) / CPU_CLOCK
-
-    tasks = {}
-    tasks[-1] = []
-    for i in range(maxthread):
-        tasks[i] = []
-
-    #  Gather into by thread data.
-    num_lines = pl.shape(data)[0]
-    for line in range(num_lines):
-        thread = int(data[line,1])
-        tic = int(data[line,5]) / CPU_CLOCK
-        toc = int(data[line,6]) / CPU_CLOCK
-        tasktype = int(data[line,2])
-        subtype = int(data[line,3])
-        sid = int(data[line, -1])
-
-        tasks[thread].append([tic,toc,tasktype,subtype, sid])
-
-    #  Sort by tic and gather used threads.
-    threadids = []
-    for i in range(maxthread):
-        tasks[i] = sorted(tasks[i], key=lambda task: task[0])
-        threadids.append(i)
-
-    #  Times per task.
-    print "# Task times:"
-    print "# -----------"
-    print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
-          .format("type/subtype", "count","minimum", "maximum",
-                  "sum", "mean", "percent")
-
-    alltasktimes = {}
-    sidtimes = {}
-    for i in threadids:
-        tasktimes = {}
-        for task in tasks[i]:
-            key = TASKTYPES[task[2]] + "/" + SUBTYPES[task[3]]
-            dt = task[1] - task[0]
-            if not key in tasktimes:
-                tasktimes[key] = []
-            tasktimes[key].append(dt)
-
-            if not key in alltasktimes:
-                alltasktimes[key] = []
-            alltasktimes[key].append(dt)
-            
-            my_sid = task[4]
-            if my_sid > -1:
-                if not my_sid in sidtimes:
-                    sidtimes[my_sid] = []
-                sidtimes[my_sid].append(dt)
-
-        print "# Thread : ", i
-        for key in sorted(tasktimes.keys()):
-            taskmin = min(tasktimes[key])
-            taskmax = max(tasktimes[key])
-            tasksum = sum(tasktimes[key])
-            print "{0:19s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
-                  .format(key, len(tasktimes[key]), taskmin, taskmax, tasksum,
-                          tasksum / len(tasktimes[key]), tasksum / total_t * 100.0)
-        print
-
-    print "# All threads : "
-    for key in sorted(alltasktimes.keys()):
-        taskmin = min(alltasktimes[key])
-        taskmax = max(alltasktimes[key])
-        tasksum = sum(alltasktimes[key])
-        print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
-              .format(key, len(alltasktimes[key]), taskmin, taskmax, tasksum,
-                      tasksum / len(alltasktimes[key]),
-                      tasksum / (len(threadids) * total_t) * 100.0)
-    print
-
-    # For pairs, show stuf sorted by SID
-    print "# By SID (all threads): "
-    print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
-        .format("Pair/Sub-pair SID", "count","minimum", "maximum",
-                "sum", "mean", "percent")
-
-    for sid in range(0,13):
-        if sid in sidtimes:
-            sidmin = min(sidtimes[sid])
-            sidmax = max(sidtimes[sid])
-            sidsum = sum(sidtimes[sid])
-            sidcount = len(sidtimes[sid])
-            sidmean = sidsum / sidcount
-        else:
-            sidmin = 0.
-            sidmax = 0.
-            sidsum = 0.
-            sidcount = 0
-            sidmean = 0.
-        print "{0:3d} {1:15s}: {2:7d} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.4f} {7:9.2f}"\
-            .format(sid, SIDS[sid], sidcount, sidmin, sidmax, sidsum,
-                    sidmean, sidsum / (len(threadids) * total_t) * 100.0)   
-    print
-
-    #  Dead times.
-    print "# Times not in tasks (deadtimes)"
-    print "# ------------------------------"
-    print "# Time before first task:"
-    print "# no.    : {0:>9s} {1:>9s}".format("value", "percent")
-    predeadtimes = []
-    for i in threadids:
-        if len(tasks[i]) > 0:
-            predeadtime = tasks[i][0][0]
-            print "thread {0:2d}: {1:9.4f} {2:9.4f}"\
-                  .format(i, predeadtime, predeadtime / total_t * 100.0)
-            predeadtimes.append(predeadtime)
-        else:
-            predeadtimes.append(0.0)
-
-    predeadmin = min(predeadtimes)
-    predeadmax = max(predeadtimes)
-    predeadsum = sum(predeadtimes)
-    print "#        : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\
-          .format("count", "minimum", "maximum", "sum", "mean", "percent")
-    print "all      : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\
-          .format(len(predeadtimes), predeadmin, predeadmax, predeadsum,
-                  predeadsum / len(predeadtimes),
-                  predeadsum / (len(threadids) * total_t ) * 100.0)
-    print
-
-    print "# Time after last task:"
-    print "# no.    : {0:>9s} {1:>9s}".format("value", "percent")
-    postdeadtimes = []
-    for i in threadids:
-        if len(tasks[i]) > 0:
-            postdeadtime = total_t - tasks[i][-1][1]
-            print "thread {0:2d}: {1:9.4f} {2:9.4f}"\
-                  .format(i, postdeadtime, postdeadtime / total_t * 100.0)
-            postdeadtimes.append(postdeadtime)
-        else:
-            postdeadtimes.append(0.0)
-
-    postdeadmin = min(postdeadtimes)
-    postdeadmax = max(postdeadtimes)
-    postdeadsum = sum(postdeadtimes)
-    print "#        : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\
-          .format("count", "minimum", "maximum", "sum", "mean", "percent")
-    print "all      : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\
-          .format(len(postdeadtimes), postdeadmin, postdeadmax, postdeadsum,
-                  postdeadsum / len(postdeadtimes),
-                  postdeadsum / (len(threadids) * total_t ) * 100.0)
-    print
-
-    #  Time in engine, i.e. from first to last tasks.
-    print "# Time between tasks (engine deadtime):"
-    print "# no.    : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\
-          .format("count", "minimum", "maximum", "sum", "mean", "percent")
-    enginedeadtimes = []
-    for i in threadids:
-        deadtimes = []
-        if len(tasks[i]) > 0:
-            last = tasks[i][0][0]
-        else:
-            last = 0.0
-        for task in tasks[i]:
-            dt = task[0] - last
-            deadtimes.append(dt)
-            last = task[1]
-
-        #  Drop first value, last value already gone.
-        if len(deadtimes) > 1:
-            deadtimes = deadtimes[1:]
-        else:
-            #  Only one or fewer tasks, so no deadtime by definition.
-            deadtimes = [0.0]
-
-        deadmin = min(deadtimes)
-        deadmax = max(deadtimes)
-        deadsum = sum(deadtimes)
-        print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
-              .format(i, len(deadtimes), deadmin, deadmax, deadsum,
-                      deadsum / len(deadtimes), deadsum / total_t * 100.0)
-        enginedeadtimes.extend(deadtimes)
-
-    deadmin = min(enginedeadtimes)
-    deadmax = max(enginedeadtimes)
-    deadsum = sum(enginedeadtimes)
-    print "all      : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\
-          .format(len(enginedeadtimes), deadmin, deadmax, deadsum,
-                  deadsum / len(enginedeadtimes),
-                  deadsum / (len(threadids) * total_t ) * 100.0)
-    print
-
-    #  All times in step.
-    print "# All deadtimes:"
-    print "# no.    : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\
-          .format("count", "minimum", "maximum", "sum", "mean", "percent")
-    alldeadtimes = []
-    for i in threadids:
-        deadtimes = []
-        last = 0
-        for task in tasks[i]:
-            dt = task[0] - last
-            deadtimes.append(dt)
-            last = task[1]
-        dt = total_t - last
-        deadtimes.append(dt)
-
-        deadmin = min(deadtimes)
-        deadmax = max(deadtimes)
-        deadsum = sum(deadtimes)
-        print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
-              .format(i, len(deadtimes), deadmin, deadmax, deadsum,
-                  deadsum / len(deadtimes), deadsum / total_t * 100.0)
-        alldeadtimes.extend(deadtimes)
-
-    deadmin = min(alldeadtimes)
-    deadmax = max(alldeadtimes)
-    deadsum = sum(alldeadtimes)
-    print "all      : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\
-          .format(len(alldeadtimes), deadmin, deadmax, deadsum,
-                  deadsum / len(alldeadtimes),
-                  deadsum / (len(threadids) * total_t ) * 100.0)
-    print
-
-sys.exit(0)
--- a/examples/plot_tasks.py
+++ b/examples/plot_tasks.py
 #!/usr/bin/env python
 """
 Usage:
-    plot_tasks.py [options] input.dat output.png
+    plot_tasks.py [options] input.dat png-output-prefix

 where input.dat is a thread info file for a step.  Use the '-y interval' flag
-of the swift command to create these. The output plot will be called
-'output.png'. The --limit option can be used to produce plots with the same
-time span and the --expand option to expand each thread line into '*expand'
-lines, so that adjacent tasks of the same type can be distinguished. Other
-options can be seen using the --help flag.
+of the swift or swift_mpi commands to create these (these will need to be
+built with the --enable-task-debugging configure option). The output plot will
+be called 'png-output-prefix.png' or 'png-output-prefix<mpi-rank>.png',
+depending on whether the input thread info file is generated by the swift or
+swift_mpi command. If swift_mpi each rank has a separate plot.
+
+The --limit option can be used to produce plots with the same time
+span and the --expand option to expand each thread line into '*expand' lines,
+so that adjacent tasks of the same type can be distinguished. Other options
+can be seen using the --help flag.
+
+See the command 'process_plot_tasks' to efficiently wrap this command to
+process a number of thread info files and create an HTML file to view them.

 This file is part of SWIFT.
-Copyright (c) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
+
+Copyright (C) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
                   Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
                   Matthieu Schaller (matthieu.schaller@durham.ac.uk)
-          (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
+          (C) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
+All Rights Reserved.

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Lesser General Public License as published
@@ -42,7 +52,7 @@ import argparse
 parser = argparse.ArgumentParser(description="Plot task graphs")

 parser.add_argument("input", help="Thread data file (-y output)")
-parser.add_argument("outpng", help="Name for output graphic file (PNG)")
+parser.add_argument("outbase", help="Base name for output graphic files (PNG)")
 parser.add_argument("-l", "--limit", dest="limit",
                    help="Upper time limit in millisecs (def: depends on data)",
                    default=0, type=int)
@@ -64,7 +74,7 @@ parser.add_argument("-v", "--verbose", dest="verbose",

 args = parser.parse_args()
 infile = args.input
-outpng = args.outpng
+outbase = args.outbase
 delta_t = args.limit
 expand = args.expand

@@ -78,7 +88,7 @@ PLOT_PARAMS = {"axes.labelsize": 10,
               "figure.figsize" : (args.width, args.height),
               "figure.subplot.left" : 0.03,
               "figure.subplot.right" : 0.995,
-               "figure.subplot.bottom" : 0.09,
+               "figure.subplot.bottom" : 0.1,
               "figure.subplot.top" : 0.99,
               "figure.subplot.wspace" : 0.,
               "figure.subplot.hspace" : 0.,
@@ -89,20 +99,19 @@ pl.rcParams.update(PLOT_PARAMS)

 #  Tasks and subtypes. Indexed as in tasks.h.
 TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
-             "init_grav", "ghost", "extra_ghost", "drift_part",
-             "drift_gpart", "kick1", "kick2", "timestep", "send", "recv",
-             "grav_top_level", "grav_long_range", "grav_ghost", "grav_mm",
-             "grav_down", "cooling", "sourceterms", "count"]
+             "init_grav", "ghost", "extra_ghost", "drift_part", "drift_gpart",
+             "kick1", "kick2", "timestep", "send", "recv", "grav_top_level",
+             "grav_long_range", "grav_mm", "grav_down", "cooling",
+             "sourceterms", "count"]

 SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
            "tend", "xv", "rho", "gpart", "multipole", "spart", "count"]

 #  Task/subtypes of interest.
 FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force",
-             "sub_self/density", "sub_self/grav", "pair/force", "pair/density",
-             "pair/grav", "sub_pair/force",
-             "sub_pair/density", "sub_pair/grav", "recv/xv", "send/xv",
-             "recv/rho", "send/rho",
+             "sub_self/density", "pair/force", "pair/density", "pair/grav",
+             "sub_pair/force",
+             "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho",
             "recv/tend", "send/tend"]

 #  A number of colours for the various types. Recycled when there are
@@ -110,7 +119,7 @@ FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force",
 colours = ["cyan", "lightgray", "darkblue", "yellow", "tan", "dodgerblue",
           "sienna", "aquamarine", "bisque", "blue", "green", "lightgreen",
           "brown", "purple", "moccasin", "olivedrab", "chartreuse",
-           "steelblue", "darkgreen", "green", "mediumseagreen",
+           "darksage", "darkgreen", "green", "mediumseagreen",
           "mediumaquamarine", "darkslategrey", "mediumturquoise",
           "black", "cadetblue", "skyblue", "red", "slategray", "gold",
           "slateblue", "blueviolet", "mediumorchid", "firebrick",
@@ -144,34 +153,85 @@ if args.verbose:
 #  Read input.
 data = pl.loadtxt( infile )

-nthread = int(max(data[:,0])) + 1
-print "Number of threads:", nthread
-
-#  Recover the start and end time
+#  Do we have an MPI file?
 full_step = data[0,:]
-tic_step = int(full_step[4])
-toc_step = int(full_step[5])
+if full_step.size == 13:
+    print "MPI mode"
+    mpimode = True
+    nranks = int(max(data[:,0])) + 1
+    print "Number of ranks:", nranks
+    rankcol = 0
+    threadscol = 1
+    taskcol = 2
+    subtaskcol = 3
+    ticcol = 5
+    toccol = 6
+else:
+    print "non MPI mode"
+    nranks = 1
+    mpimode = False
+    rankcol = -1
+    threadscol = 0
+    taskcol = 1
+    subtaskcol = 2
+    ticcol = 4
+    toccol = 5
+
+#  Get CPU_CLOCK to convert ticks into milliseconds.
 CPU_CLOCK = float(full_step[-1]) / 1000.0
-data = data[1:,:]
 if args.verbose:
    print "CPU frequency:", CPU_CLOCK * 1000.0

+nthread = int(max(data[:,threadscol])) + 1
+print "Number of threads:", nthread
+
 # Avoid start and end times of zero.
-data = data[data[:,4] != 0]
-data = data[data[:,5] != 0]
+sdata = data[data[:,ticcol] != 0]
+sdata = sdata[sdata[:,toccol] != 0]

-#  Calculate the time range, if not given.
+# Each rank can have different clocks (compute node), but we want to use the
+# same delta times range for comparisons, so we suck it up and take the hit of
+# precalculating this, unless the user knows better.
 delta_t = delta_t * CPU_CLOCK
 if delta_t == 0:
+    for rank in range(nranks):
+        if mpimode:
+            data = sdata[sdata[:,rankcol] == rank]
+            full_step = data[0,:]
+        tic_step = int(full_step[ticcol])
+        toc_step = int(full_step[toccol])
        dt = toc_step - tic_step
        if dt > delta_t:
            delta_t = dt
    print "Data range: ", delta_t / CPU_CLOCK, "ms"

 # Once more doing the real gather and plots this time.
+for rank in range(nranks):
+    if mpimode:
+        data = sdata[sdata[:,rankcol] == rank]
+        full_step = data[0,:]
+
+    #  Start and end times for this rank.
+    tic_step = int(full_step[ticcol])
+    toc_step = int(full_step[toccol])
+    data = data[1:,:]
+    typesseen = []
+    nethread = 0
+
+    #  Dummy image for ranks that have no tasks.
+    if data.size == 0:
+        print "rank ", rank, " has no tasks"
+        fig = pl.figure()
+        ax = fig.add_subplot(1,1,1)
+        ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK)
+        ax.set_ylim(0, nthread*expand)
+        start_t = tic_step
+        end_t = (toc_step - start_t) / CPU_CLOCK
+    else:
+
        start_t = float(tic_step)
-data[:,4] -= start_t
-data[:,5] -= start_t
+        data[:,ticcol] -= start_t
+        data[:,toccol] -= start_t
        end_t = (toc_step - start_t) / CPU_CLOCK

        tasks = {}
@@ -184,9 +244,9 @@ ecounter = []
        for i in range(nthread):
            ecounter.append(0)

-num_lines = pl.size(data) / pl.size(full_step)
+        num_lines = pl.shape(data)[0]
        for line in range(num_lines):
-    thread = int(data[line,0])
+            thread = int(data[line, threadscol])

            # Expand to cover extra lines if expanding.
            ethread = thread * expand + (ecounter[thread] % expand)
@@ -194,15 +254,15 @@ for line in range(num_lines):
            thread = ethread

            tasks[thread].append({})
-    tasktype = TASKTYPES[int(data[line,1])]
-    subtype = SUBTYPES[int(data[line,2])]
+            tasktype = TASKTYPES[int(data[line,taskcol])]
+            subtype = SUBTYPES[int(data[line,subtaskcol])]
            tasks[thread][-1]["type"] = tasktype
            tasks[thread][-1]["subtype"] = subtype
-    tic = int(data[line,4]) / CPU_CLOCK
-    toc = int(data[line,5]) / CPU_CLOCK
+            tic = int(data[line,ticcol]) / CPU_CLOCK
+            toc = int(data[line,toccol]) / CPU_CLOCK
            tasks[thread][-1]["tic"] = tic
            tasks[thread][-1]["toc"] = toc
-    if "self" in tasktype or "pair" in tasktype:
+            if "self" in tasktype or "pair" in tasktype or "recv" in tasktype or "send" in tasktype:
                fulltype = tasktype + "/" + subtype
                if fulltype in SUBCOLOURS:
                    tasks[thread][-1]["colour"] = SUBCOLOURS[fulltype]
@@ -212,14 +272,14 @@ for line in range(num_lines):
                tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype]

        # Use expanded threads from now on.
-nthread = nthread * expand
+        nethread = nthread * expand

        typesseen = []
        fig = pl.figure()
        ax = fig.add_subplot(1,1,1)
        ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK)
-ax.set_ylim(0, nthread)
-for i in range(nthread):
+        ax.set_ylim(0, nethread)
+        for i in range(nethread):

            #  Collect ranges and colours into arrays.
            tictocs = []
@@ -234,6 +294,7 @@ for i in range(nthread):
                    qtask = task["type"] + "/" + task["subtype"]
                else:
                    qtask = task["type"]
+
                if qtask not in typesseen:
                    pl.plot([], [], color=task["colour"], label=qtask)
                    typesseen.append(qtask)
@@ -241,31 +302,37 @@ for i in range(nthread):
            #  Now plot.
            ax.broken_barh(tictocs, [i+0.05,0.90], facecolors = colours, linewidth=0)

+
    #  Legend and room for it.
    nrow = len(typesseen) / 5
-if not args.nolegend:
-    ax.fill_between([0, 0], nthread+0.5, nthread + nrow + 0.5, facecolor="white")
-    ax.set_ylim(0, nthread + 0.5)
+    ax.fill_between([0, 0], nethread+0.5, nethread + nrow + 0.5, facecolor="white")
+    ax.set_ylim(0, nethread + 0.5)
+    if data.size > 0:
        ax.legend(loc=1, shadow=True, bbox_to_anchor=(0., 1.05 ,1., 0.2), mode="expand", ncol=5)
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width, box.height*0.8])

    # Start and end of time-step
-ax.plot([0, 0], [0, nthread + nrow + 1], 'k--', linewidth=1)
-ax.plot([end_t, end_t], [0, nthread + nrow + 1], 'k--', linewidth=1)
+    ax.plot([0, 0], [0, nethread + nrow + 1], 'k--', linewidth=1)
+    ax.plot([end_t, end_t], [0, nethread + nrow + 1], 'k--', linewidth=1)
+
+    ax.set_xlabel("Wall clock time [ms]")

-ax.set_xlabel("Wall clock time [ms]", labelpad=0.)
    if expand == 1:
-    ax.set_ylabel("Thread ID", labelpad=0 )
+        ax.set_ylabel("Thread ID" )
    else:
-    ax.set_ylabel("Thread ID * " + str(expand), labelpad=0 )
-ax.set_yticks(pl.array(range(nthread)), True)
+        ax.set_ylabel("Thread ID * " + str(expand) )
+    ax.set_yticks(pl.array(range(nethread)), True)

    loc = plticker.MultipleLocator(base=expand)
    ax.yaxis.set_major_locator(loc)
    ax.grid(True, which='major', axis="y", linestyle="-")

    pl.show()
+    if mpimode:
+        outpng = outbase + str(rank) + ".png"
+    else:
+        outpng = outbase + ".png"
    pl.savefig(outpng)
    print "Graphics done, output written to", outpng



--- a/examples/plot_tasks_MPI.py
+++ b/examples/plot_tasks_MPI.py
-#!/usr/bin/env python
-"""
-Usage:
-    plot_tasks_MPI.py [options] input.dat png-output-prefix
-
-where input.dat is a thread info file for a step.  Use the '-y interval' flag
-of the swift MPI command to create these. The output plot will be called
-'png-output-prefix<mpi-rank>.png', i.e. one each for all the threads in each
-MPI rank.  The --limit option can be used to produce plots with the same time
-span and the --expand option to expand each thread line into '*expand' lines,
-so that adjacent tasks of the same type can be distinguished. Other options
-can be seen using the --help flag.
-
-See the command 'process_plot_tasks_MPI' to efficiently wrap this command to
-process a number of thread info files and create an HTML file to view them.
-
-This file is part of SWIFT.
-
-Copyright (C) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
-                   Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
-                   Matthieu Schaller (matthieu.schaller@durham.ac.uk)
-          (C) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
-All Rights Reserved.
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published
-by the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with this program.  If not, see <http://www.gnu.org/licenses/>.
-"""
-
-import matplotlib
-matplotlib.use("Agg")
-import matplotlib.collections as collections
-import matplotlib.ticker as plticker
-import pylab as pl
-import sys
-import argparse
-
-#  Handle the command line.
-parser = argparse.ArgumentParser(description="Plot task graphs")
-
-parser.add_argument("input", help="Thread data file (-y output)")
-parser.add_argument("outbase", help="Base name for output graphic files (PNG)")
-parser.add_argument("-l", "--limit", dest="limit",
-                    help="Upper time limit in millisecs (def: depends on data)",
-                    default=0, type=int)
-parser.add_argument("-e", "--expand", dest="expand",
-                    help="Thread expansion factor (def: 1)",
-                    default=1, type=int)
-parser.add_argument("--height", dest="height",
-                    help="Height of plot in inches (def: 4)",
-                    default=4., type=float)
-parser.add_argument("--width", dest="width",
-                    help="Width of plot in inches (def: 16)",
-                    default=16., type=float)
-parser.add_argument("--nolegend", dest="nolegend",
-                    help="Whether to show the legend (def: False)",
-                    default=False, action="store_true")
-parser.add_argument("-v", "--verbose", dest="verbose",
-                    help="Show colour assignments and other details (def: False)",
-                    default=False, action="store_true")
-
-args = parser.parse_args()
-infile = args.input
-outbase = args.outbase
-delta_t = args.limit
-expand = args.expand
-
-#  Basic plot configuration.
-PLOT_PARAMS = {"axes.labelsize": 10,
-               "axes.titlesize": 10,
-               "font.size": 12,
-               "legend.fontsize": 12,
-               "xtick.labelsize": 10,
-               "ytick.labelsize": 10,
-               "figure.figsize" : (args.width, args.height),
-               "figure.subplot.left" : 0.03,
-               "figure.subplot.right" : 0.995,
-               "figure.subplot.bottom" : 0.1,
-               "figure.subplot.top" : 0.99,
-               "figure.subplot.wspace" : 0.,
-               "figure.subplot.hspace" : 0.,
-               "lines.markersize" : 6,
-               "lines.linewidth" : 3.
-               }
-pl.rcParams.update(PLOT_PARAMS)
-
-#  Tasks and subtypes. Indexed as in tasks.h.
-TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
-             "init_grav", "ghost", "extra_ghost", "drift_part", "drift_gpart",
-             "kick1", "kick2", "timestep", "send", "recv", "grav_top_level",
-             "grav_long_range", "grav_mm", "grav_down", "cooling",
-             "sourceterms", "count"]
-
-SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
-            "tend", "xv", "rho", "gpart", "multipole", "spart", "count"]
-
-#  Task/subtypes of interest.
-FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force",
-             "sub_self/density", "pair/force", "pair/density", "pair/grav",
-             "sub_pair/force",
-             "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho",
-             "recv/tend", "send/tend"]
-
-#  A number of colours for the various types. Recycled when there are
-#  more task types than colours...
-colours = ["cyan", "lightgray", "darkblue", "yellow", "tan", "dodgerblue",
-           "sienna", "aquamarine", "bisque", "blue", "green", "lightgreen",
-           "brown", "purple", "moccasin", "olivedrab", "chartreuse",
-           "darksage", "darkgreen", "green", "mediumseagreen",
-           "mediumaquamarine", "darkslategrey", "mediumturquoise",
-           "black", "cadetblue", "skyblue", "red", "slategray", "gold",
-           "slateblue", "blueviolet", "mediumorchid", "firebrick",
-           "magenta", "hotpink", "pink", "orange", "lightgreen"]
-maxcolours = len(colours)
-
-#  Set colours of task/subtype.
-TASKCOLOURS = {}
-ncolours = 0
-for task in TASKTYPES:
-    TASKCOLOURS[task] = colours[ncolours]
-    ncolours = (ncolours + 1) % maxcolours
-
-SUBCOLOURS = {}
-for task in FULLTYPES:
-    SUBCOLOURS[task] = colours[ncolours]
-    ncolours = (ncolours + 1) % maxcolours
-
-for task in SUBTYPES:
-    SUBCOLOURS[task] = colours[ncolours]
-    ncolours = (ncolours + 1) % maxcolours
-
-#  For fiddling with colours...
-if args.verbose:
-    print "#Selected colours:"
-    for task in sorted(TASKCOLOURS.keys()):
-        print "# " + task + ": " + TASKCOLOURS[task]
-    for task in sorted(SUBCOLOURS.keys()):
-        print "# " + task + ": " + SUBCOLOURS[task]
-
-#  Read input.
-data = pl.loadtxt( infile )
-
-#  Get CPU_CLOCK to convert ticks into milliseconds.
-full_step = data[0,:]
-CPU_CLOCK = float(full_step[-1]) / 1000.0
-if args.verbose:
-    print "CPU frequency:", CPU_CLOCK * 1000.0
-
-nranks = int(max(data[:,0])) + 1
-print "Number of ranks:", nranks
-nthread = int(max(data[:,1])) + 1
-print "Number of threads:", nthread
-
-# Avoid start and end times of zero.
-sdata = data[data[:,5] != 0]
-sdata = sdata[sdata[:,6] != 0]
-
-# Each rank can have different clock (compute node), but we want to use the
-# same delta times range for comparisons, so we suck it up and take the hit of
-# precalculating this, unless the user knows better.
-delta_t = delta_t * CPU_CLOCK
-if delta_t == 0:
-    for rank in range(nranks):
-        data = sdata[sdata[:,0] == rank]
-        full_step = data[0,:]
-        tic_step = int(full_step[5])
-        toc_step = int(full_step[6])
-        dt = toc_step - tic_step
-        if dt > delta_t:
-            delta_t = dt
-    print "Data range: ", delta_t / CPU_CLOCK, "ms"
-
-# Once more doing the real gather and plots this time.
-for rank in range(nranks):
-    data = sdata[sdata[:,0] == rank]
-
-    #  Start and end times for this rank.
-    full_step = data[0,:]
-    tic_step = int(full_step[5])
-    toc_step = int(full_step[6])
-    data = data[1:,:]
-    typesseen = []
-    nethread = 0
-
-    #  Dummy image for ranks that have no tasks.
-    if data.size == 0:
-        print "rank ", rank, " has no tasks"
-        fig = pl.figure()
-        ax = fig.add_subplot(1,1,1)
-        ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK)
-        ax.set_ylim(0, nthread*expand)
-        start_t = tic_step
-        end_t = (toc_step - start_t) / CPU_CLOCK
-    else:
-
-        start_t = float(tic_step)
-        data[:,5] -= start_t
-        data[:,6] -= start_t
-        end_t = (toc_step - start_t) / CPU_CLOCK
-
-        tasks = {}
-        tasks[-1] = []
-        for i in range(nthread*expand):
-            tasks[i] = []
-
-        # Counters for each thread when expanding.
-        ecounter = []
-        for i in range(nthread):
-            ecounter.append(0)
-
-        num_lines = pl.shape(data)[0]
-        for line in range(num_lines):
-            thread = int(data[line,1])
-
-            # Expand to cover extra lines if expanding.
-            ethread = thread * expand + (ecounter[thread] % expand)
-            ecounter[thread] = ecounter[thread] + 1
-            thread = ethread
-
-            tasks[thread].append({})
-            tasktype = TASKTYPES[int(data[line,2])]
-            subtype = SUBTYPES[int(data[line,3])]
-            tasks[thread][-1]["type"] = tasktype
-            tasks[thread][-1]["subtype"] = subtype
-            tic = int(data[line,5]) / CPU_CLOCK
-            toc = int(data[line,6]) / CPU_CLOCK
-            tasks[thread][-1]["tic"] = tic
-            tasks[thread][-1]["toc"] = toc
-            if "self" in tasktype or "pair" in tasktype or "recv" in tasktype or "send" in tasktype:
-                fulltype = tasktype + "/" + subtype
-                if fulltype in SUBCOLOURS:
-                    tasks[thread][-1]["colour"] = SUBCOLOURS[fulltype]
-                else:
-                    tasks[thread][-1]["colour"] = SUBCOLOURS[subtype]
-            else:
-                tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype]
-
-        # Use expanded threads from now on.
-        nethread = nthread * expand
-
-        typesseen = []
-        fig = pl.figure()
-        ax = fig.add_subplot(1,1,1)
-        ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK)
-        ax.set_ylim(0, nethread)
-        for i in range(nethread):
-
-            #  Collect ranges and colours into arrays.
-            tictocs = []
-            colours = []
-            j = 0
-            for task in tasks[i]:
-                tictocs.append((task["tic"], task["toc"] - task["tic"]))
-                colours.append(task["colour"])
-
-                #  Legend support, collections don't add to this.
-                if task["subtype"] != "none":
-                    qtask = task["type"] + "/" + task["subtype"]
-                else:
-                    qtask = task["type"]
-
-                if qtask not in typesseen:
-                    pl.plot([], [], color=task["colour"], label=qtask)
-                    typesseen.append(qtask)
-
-            #  Now plot.
-            ax.broken_barh(tictocs, [i+0.05,0.90], facecolors = colours, linewidth=0)
-
-
-    #  Legend and room for it.
-    nrow = len(typesseen) / 5
-    ax.fill_between([0, 0], nethread+0.5, nethread + nrow + 0.5, facecolor="white")
-    ax.set_ylim(0, nethread + 0.5)
-    if data.size > 0:
-        ax.legend(loc=1, shadow=True, bbox_to_anchor=(0., 1.05 ,1., 0.2), mode="expand", ncol=5)
-        box = ax.get_position()
-        ax.set_position([box.x0, box.y0, box.width, box.height*0.8])
-
-    # Start and end of time-step
-    ax.plot([0, 0], [0, nethread + nrow + 1], 'k--', linewidth=1)
-    ax.plot([end_t, end_t], [0, nethread + nrow + 1], 'k--', linewidth=1)
-
-    ax.set_xlabel("Wall clock time [ms]")
-
-    if expand == 1:
-        ax.set_ylabel("Thread ID" )
-    else:
-        ax.set_ylabel("Thread ID * " + str(expand) )
-    ax.set_yticks(pl.array(range(nethread)), True)
-
-    loc = plticker.MultipleLocator(base=expand)
-    ax.yaxis.set_major_locator(loc)
-    ax.grid(True, which='major', axis="y", linestyle="-")
-
-    pl.show()
-    outpng = outbase + str(rank) + ".png"
-    pl.savefig(outpng)
-    print "Graphics done, output written to", outpng
-
-sys.exit(0)