Commit 7d0da067 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge branch 'script-cleanup' into 'master'

Clean up task and threadpool plotting scripts

Merges MPI and non-MPI task dump plotting and analysis scripts into single
versions that deal with both outputs.

Various tweaks so that we can generate and display task and threadpool
plots together for comparison (non-MPI only).

See #337.

See merge request !425
parents 7a595a27 70f1da48
This diff is collapsed.
#!/usr/bin/env python
"""
Usage:
analsyse_tasks_MPI.py [options] input.dat
where input.dat is a thread info file for an MPI step. Use the '-y interval'
flag of the swift command to create these.
The output is an analysis of the task timings, including deadtime per thread
and step, total amount of time spent for each task type, for the whole step
and per thread and the minimum and maximum times spent per task type.
This file is part of SWIFT.
Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import matplotlib
matplotlib.use("Agg")
import matplotlib.collections as collections
import matplotlib.ticker as plticker
import pylab as pl
import sys
import argparse
# Handle the command line.
parser = argparse.ArgumentParser(description="Analyse task dumps")
parser.add_argument("input", help="Thread data file (-y output)")
parser.add_argument("-v", "--verbose", dest="verbose",
help="Verbose output (default: False)",
default=False, action="store_true")
parser.add_argument("-r", "--rank", dest="rank",
help="Rank to process (default: all)",
default="all", action="store")
args = parser.parse_args()
infile = args.input
# Tasks and subtypes. Indexed as in tasks.h.
TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
"init_grav", "ghost", "extra_ghost", "drift_part",
"drift_gpart", "kick1", "kick2", "timestep", "send", "recv",
"grav_top_level", "grav_long_range", "grav_mm", "grav_down",
"cooling", "sourceterms", "count"]
SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
"tend", "xv", "rho", "gpart", "multipole", "spart", "count"]
SIDS = ["(-1,-1,-1)", "(-1,-1, 0)", "(-1,-1, 1)", "(-1, 0,-1)",
"(-1, 0, 0)", "(-1, 0, 1)", "(-1, 1,-1)", "(-1, 1, 0)",
"(-1, 1, 1)", "( 0,-1,-1)", "( 0,-1, 0)", "( 0,-1, 1)",
"( 0, 0,-1)"]
# Read input.
data = pl.loadtxt( infile )
# Get the CPU clock to convert ticks into milliseconds.
full_step = data[0,:]
updates = int(full_step[7])
g_updates = int(full_step[8])
s_updates = int(full_step[9])
CPU_CLOCK = float(full_step[-1]) / 1000.0
if args.verbose:
print "# CPU frequency:", CPU_CLOCK * 1000.0
print "# updates:", updates
print "# g_updates:", g_updates
print "# s_updates:", s_updates
nranks = int(max(data[:,0])) + 1
print "# Number of ranks:", nranks
if args.rank == "all":
ranks = range(nranks)
else:
ranks = [int(args.rank)]
if ranks[0] >= nranks:
print "Error: maximum rank is " + str(nranks - 1)
sys.exit(1)
maxthread = int(max(data[:,1])) + 1
print "# Maximum thread id:", maxthread
# Avoid start and end times of zero.
sdata = data[data[:,5] != 0]
sdata = data[data[:,6] != 0]
# Now we process the required ranks.
for rank in ranks:
print "# Rank", rank
data = sdata[sdata[:,0] == rank]
# Recover the start and end time
full_step = data[0,:]
tic_step = int(full_step[5])
toc_step = int(full_step[6])
data = data[1:,:]
# Avoid start and end times of zero.
data = data[data[:,5] != 0]
data = data[data[:,6] != 0]
# Calculate the time range.
total_t = (toc_step - tic_step)/ CPU_CLOCK
print "# Data range: ", total_t, "ms"
print
# Correct times to relative values.
start_t = float(tic_step)
data[:,5] -= start_t
data[:,6] -= start_t
end_t = (toc_step - start_t) / CPU_CLOCK
tasks = {}
tasks[-1] = []
for i in range(maxthread):
tasks[i] = []
# Gather into by thread data.
num_lines = pl.shape(data)[0]
for line in range(num_lines):
thread = int(data[line,1])
tic = int(data[line,5]) / CPU_CLOCK
toc = int(data[line,6]) / CPU_CLOCK
tasktype = int(data[line,2])
subtype = int(data[line,3])
sid = int(data[line, -1])
tasks[thread].append([tic,toc,tasktype,subtype, sid])
# Sort by tic and gather used threads.
threadids = []
for i in range(maxthread):
tasks[i] = sorted(tasks[i], key=lambda task: task[0])
threadids.append(i)
# Times per task.
print "# Task times:"
print "# -----------"
print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
.format("type/subtype", "count","minimum", "maximum",
"sum", "mean", "percent")
alltasktimes = {}
sidtimes = {}
for i in threadids:
tasktimes = {}
for task in tasks[i]:
key = TASKTYPES[task[2]] + "/" + SUBTYPES[task[3]]
dt = task[1] - task[0]
if not key in tasktimes:
tasktimes[key] = []
tasktimes[key].append(dt)
if not key in alltasktimes:
alltasktimes[key] = []
alltasktimes[key].append(dt)
my_sid = task[4]
if my_sid > -1:
if not my_sid in sidtimes:
sidtimes[my_sid] = []
sidtimes[my_sid].append(dt)
print "# Thread : ", i
for key in sorted(tasktimes.keys()):
taskmin = min(tasktimes[key])
taskmax = max(tasktimes[key])
tasksum = sum(tasktimes[key])
print "{0:19s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
.format(key, len(tasktimes[key]), taskmin, taskmax, tasksum,
tasksum / len(tasktimes[key]), tasksum / total_t * 100.0)
print
print "# All threads : "
for key in sorted(alltasktimes.keys()):
taskmin = min(alltasktimes[key])
taskmax = max(alltasktimes[key])
tasksum = sum(alltasktimes[key])
print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
.format(key, len(alltasktimes[key]), taskmin, taskmax, tasksum,
tasksum / len(alltasktimes[key]),
tasksum / (len(threadids) * total_t) * 100.0)
print
# For pairs, show stuf sorted by SID
print "# By SID (all threads): "
print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
.format("Pair/Sub-pair SID", "count","minimum", "maximum",
"sum", "mean", "percent")
for sid in range(0,13):
if sid in sidtimes:
sidmin = min(sidtimes[sid])
sidmax = max(sidtimes[sid])
sidsum = sum(sidtimes[sid])
sidcount = len(sidtimes[sid])
sidmean = sidsum / sidcount
else:
sidmin = 0.
sidmax = 0.
sidsum = 0.
sidcount = 0
sidmean = 0.
print "{0:3d} {1:15s}: {2:7d} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.4f} {7:9.2f}"\
.format(sid, SIDS[sid], sidcount, sidmin, sidmax, sidsum,
sidmean, sidsum / (len(threadids) * total_t) * 100.0)
print
# Dead times.
print "# Times not in tasks (deadtimes)"
print "# ------------------------------"
print "# Time before first task:"
print "# no. : {0:>9s} {1:>9s}".format("value", "percent")
predeadtimes = []
for i in threadids:
if len(tasks[i]) > 0:
predeadtime = tasks[i][0][0]
print "thread {0:2d}: {1:9.4f} {2:9.4f}"\
.format(i, predeadtime, predeadtime / total_t * 100.0)
predeadtimes.append(predeadtime)
else:
predeadtimes.append(0.0)
predeadmin = min(predeadtimes)
predeadmax = max(predeadtimes)
predeadsum = sum(predeadtimes)
print "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\
.format("count", "minimum", "maximum", "sum", "mean", "percent")
print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\
.format(len(predeadtimes), predeadmin, predeadmax, predeadsum,
predeadsum / len(predeadtimes),
predeadsum / (len(threadids) * total_t ) * 100.0)
print
print "# Time after last task:"
print "# no. : {0:>9s} {1:>9s}".format("value", "percent")
postdeadtimes = []
for i in threadids:
if len(tasks[i]) > 0:
postdeadtime = total_t - tasks[i][-1][1]
print "thread {0:2d}: {1:9.4f} {2:9.4f}"\
.format(i, postdeadtime, postdeadtime / total_t * 100.0)
postdeadtimes.append(postdeadtime)
else:
postdeadtimes.append(0.0)
postdeadmin = min(postdeadtimes)
postdeadmax = max(postdeadtimes)
postdeadsum = sum(postdeadtimes)
print "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\
.format("count", "minimum", "maximum", "sum", "mean", "percent")
print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\
.format(len(postdeadtimes), postdeadmin, postdeadmax, postdeadsum,
postdeadsum / len(postdeadtimes),
postdeadsum / (len(threadids) * total_t ) * 100.0)
print
# Time in engine, i.e. from first to last tasks.
print "# Time between tasks (engine deadtime):"
print "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\
.format("count", "minimum", "maximum", "sum", "mean", "percent")
enginedeadtimes = []
for i in threadids:
deadtimes = []
if len(tasks[i]) > 0:
last = tasks[i][0][0]
else:
last = 0.0
for task in tasks[i]:
dt = task[0] - last
deadtimes.append(dt)
last = task[1]
# Drop first value, last value already gone.
if len(deadtimes) > 1:
deadtimes = deadtimes[1:]
else:
# Only one or fewer tasks, so no deadtime by definition.
deadtimes = [0.0]
deadmin = min(deadtimes)
deadmax = max(deadtimes)
deadsum = sum(deadtimes)
print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
.format(i, len(deadtimes), deadmin, deadmax, deadsum,
deadsum / len(deadtimes), deadsum / total_t * 100.0)
enginedeadtimes.extend(deadtimes)
deadmin = min(enginedeadtimes)
deadmax = max(enginedeadtimes)
deadsum = sum(enginedeadtimes)
print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\
.format(len(enginedeadtimes), deadmin, deadmax, deadsum,
deadsum / len(enginedeadtimes),
deadsum / (len(threadids) * total_t ) * 100.0)
print
# All times in step.
print "# All deadtimes:"
print "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\
.format("count", "minimum", "maximum", "sum", "mean", "percent")
alldeadtimes = []
for i in threadids:
deadtimes = []
last = 0
for task in tasks[i]:
dt = task[0] - last
deadtimes.append(dt)
last = task[1]
dt = total_t - last
deadtimes.append(dt)
deadmin = min(deadtimes)
deadmax = max(deadtimes)
deadsum = sum(deadtimes)
print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
.format(i, len(deadtimes), deadmin, deadmax, deadsum,
deadsum / len(deadtimes), deadsum / total_t * 100.0)
alldeadtimes.extend(deadtimes)
deadmin = min(alldeadtimes)
deadmax = max(alldeadtimes)
deadsum = sum(alldeadtimes)
print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\
.format(len(alldeadtimes), deadmin, deadmax, deadsum,
deadsum / len(alldeadtimes),
deadsum / (len(threadids) * total_t ) * 100.0)
print
sys.exit(0)
......@@ -791,10 +791,10 @@ int main(int argc, char *argv[]) {
if (dump_threadpool && (dump_threadpool == 1 || j % dump_threadpool == 1)) {
char dumpfile[40];
#ifdef WITH_MPI
snprintf(dumpfile, 30, "threadpool_info-rank%d-step%d.dat", engine_rank,
snprintf(dumpfile, 40, "threadpool_info-rank%d-step%d.dat", engine_rank,
j + 1);
#else
snprintf(dumpfile, 30, "threadpool_info-step%d.dat", j + 1);
snprintf(dumpfile, 40, "threadpool_info-step%d.dat", j + 1);
#endif // WITH_MPI
threadpool_dump_log(&e.threadpool, dumpfile, 1);
} else {
......
#!/usr/bin/env python
"""
Usage:
plot_tasks.py [options] input.dat output.png
plot_tasks.py [options] input.dat png-output-prefix
where input.dat is a thread info file for a step. Use the '-y interval' flag
of the swift command to create these. The output plot will be called
'output.png'. The --limit option can be used to produce plots with the same
time span and the --expand option to expand each thread line into '*expand'
lines, so that adjacent tasks of the same type can be distinguished. Other
options can be seen using the --help flag.
of the swift or swift_mpi commands to create these (these will need to be
built with the --enable-task-debugging configure option). The output plot will
be called 'png-output-prefix.png' or 'png-output-prefix<mpi-rank>.png',
depending on whether the input thread info file is generated by the swift or
swift_mpi command. If swift_mpi each rank has a separate plot.
The --limit option can be used to produce plots with the same time
span and the --expand option to expand each thread line into '*expand' lines,
so that adjacent tasks of the same type can be distinguished. Other options
can be seen using the --help flag.
See the command 'process_plot_tasks' to efficiently wrap this command to
process a number of thread info files and create an HTML file to view them.
This file is part of SWIFT.
Copyright (c) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
Copyright (C) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
Matthieu Schaller (matthieu.schaller@durham.ac.uk)
(c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
(C) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
All Rights Reserved.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
......@@ -42,10 +52,10 @@ import argparse
parser = argparse.ArgumentParser(description="Plot task graphs")
parser.add_argument("input", help="Thread data file (-y output)")
parser.add_argument("outpng", help="Name for output graphic file (PNG)")
parser.add_argument("outbase", help="Base name for output graphic files (PNG)")
parser.add_argument("-l", "--limit", dest="limit",
help="Upper time limit in millisecs (def: depends on data)",
default=0, type=int)
default=0, type=float)
parser.add_argument("-e", "--expand", dest="expand",
help="Thread expansion factor (def: 1)",
default=1, type=int)
......@@ -61,12 +71,23 @@ parser.add_argument("--nolegend", dest="nolegend",
parser.add_argument("-v", "--verbose", dest="verbose",
help="Show colour assignments and other details (def: False)",
default=False, action="store_true")
parser.add_argument("-r", "--ranks", dest="ranks",
help="Comma delimited list of ranks to process, if MPI in effect",
default=None, type=str)
parser.add_argument("-m", "--mintic", dest="mintic",
help="Value of the smallest tic (def: least in input file)",
default=-1, type=int)
args = parser.parse_args()
infile = args.input
outpng = args.outpng
outbase = args.outbase
delta_t = args.limit
expand = args.expand
mintic = args.mintic
if args.ranks != None:
ranks = [int(item) for item in args.ranks.split(',')]
else:
ranks = None
# Basic plot configuration.
PLOT_PARAMS = {"axes.labelsize": 10,
......@@ -78,7 +99,7 @@ PLOT_PARAMS = {"axes.labelsize": 10,
"figure.figsize" : (args.width, args.height),
"figure.subplot.left" : 0.03,
"figure.subplot.right" : 0.995,
"figure.subplot.bottom" : 0.09,
"figure.subplot.bottom" : 0.1,
"figure.subplot.top" : 0.99,
"figure.subplot.wspace" : 0.,
"figure.subplot.hspace" : 0.,
......@@ -89,20 +110,19 @@ pl.rcParams.update(PLOT_PARAMS)
# Tasks and subtypes. Indexed as in tasks.h.
TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
"init_grav", "ghost", "extra_ghost", "drift_part",
"drift_gpart", "kick1", "kick2", "timestep", "send", "recv",
"grav_top_level", "grav_long_range", "grav_ghost", "grav_mm",
"grav_down", "cooling", "sourceterms", "count"]
"init_grav", "ghost", "extra_ghost", "drift_part", "drift_gpart",
"kick1", "kick2", "timestep", "send", "recv", "grav_top_level",
"grav_long_range", "grav_mm", "grav_down", "cooling",
"sourceterms", "count"]
SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
"tend", "xv", "rho", "gpart", "multipole", "spart", "count"]
# Task/subtypes of interest.
FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force",
"sub_self/density", "sub_self/grav", "pair/force", "pair/density",
"pair/grav", "sub_pair/force",
"sub_pair/density", "sub_pair/grav", "recv/xv", "send/xv",
"recv/rho", "send/rho",
"sub_self/density", "pair/force", "pair/density", "pair/grav",
"sub_pair/force",
"sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho",
"recv/tend", "send/tend"]
# A number of colours for the various types. Recycled when there are
......@@ -110,7 +130,7 @@ FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force",
colours = ["cyan", "lightgray", "darkblue", "yellow", "tan", "dodgerblue",
"sienna", "aquamarine", "bisque", "blue", "green", "lightgreen",
"brown", "purple", "moccasin", "olivedrab", "chartreuse",
"steelblue", "darkgreen", "green", "mediumseagreen",
"darksage", "darkgreen", "green", "mediumseagreen",
"mediumaquamarine", "darkslategrey", "mediumturquoise",
"black", "cadetblue", "skyblue", "red", "slategray", "gold",
"slateblue", "blueviolet", "mediumorchid", "firebrick",
......@@ -144,129 +164,207 @@ if args.verbose:
# Read input.
data = pl.loadtxt( infile )
nthread = int(max(data[:,0])) + 1
print "Number of threads:", nthread
# Recover the start and end time
# Do we have an MPI file?
full_step = data[0,:]
tic_step = int(full_step[4])
toc_step = int(full_step[5])
if full_step.size == 13:
print "# MPI mode"
mpimode = True
if ranks == None:
ranks = range(int(max(data[:,0])) + 1)
print "# Number of ranks:", len(ranks)
rankcol = 0
threadscol = 1
taskcol = 2
subtaskcol = 3
ticcol = 5
toccol = 6
else:
print "# non MPI mode"
ranks = [0]
mpimode = False
rankcol = -1
threadscol = 0
taskcol = 1
subtaskcol = 2
ticcol = 4
toccol = 5
# Get CPU_CLOCK to convert ticks into milliseconds.
CPU_CLOCK = float(full_step[-1]) / 1000.0
data = data[1:,:]
if args.verbose:
print "CPU frequency:", CPU_CLOCK * 1000.0
print "# CPU frequency:", CPU_CLOCK * 1000.0
# Avoid start and end times of zero.
data = data[data[:,4] != 0]
data = data[data[:,5] != 0]
nthread = int(max(data[:,threadscol])) + 1
print "# Number of threads:", nthread
# Calculate the time range, if not given.
# Avoid start and end times of zero.
sdata = data[data[:,ticcol] != 0]
sdata = sdata[sdata[:,toccol] != 0]
# Each rank can have different clocks (compute node), but we want to use the
# same delta times range for comparisons, so we suck it up and take the hit of
# precalculating this, unless the user knows better.
delta_t = delta_t * CPU_CLOCK
if delta_t == 0:
dt = toc_step - tic_step
if dt > delta_t:
delta_t = dt
print "Data range: ", delta_t / CPU_CLOCK, "ms"
# Once more doing the real gather and plots this time.
start_t = float(tic_step)
data[:,4] -= start_t
data[:,5] -= start_t
end_t = (toc_step - start_t) / CPU_CLOCK
tasks = {}
tasks[-1] = []
for i in range(nthread*expand):
tasks[i] = []
# Counters for each thread when expanding.
ecounter = []
for i in range(nthread):
ecounter.append(0)
num_lines = pl.size(data) / pl.size(full_step)
for line in range(num_lines):
thread = int(data[line,0])
# Expand to cover extra lines if expanding.
ethread = thread * expand + (ecounter[thread] % expand)
ecounter[thread] = ecounter[thread] + 1
thread = ethread
tasks[thread].append({})
tasktype = TASKTYPES[int(data[line,1])]
subtype = SUBTYPES[int(data[line,2])]
tasks[thread][-1]["type"] = tasktype
tasks[thread][-1]["subtype"] = subtype
tic = int(data[line,4]) / CPU_CLOCK
toc = int(data[line,5]) / CPU_CLOCK
tasks[thread][-1]["tic"] = tic
tasks[thread][-1]["toc"] = toc
if "self" in tasktype or "pair" in tasktype:
fulltype = tasktype + "/" + subtype
if fulltype in SUBCOLOURS:
tasks[thread][-1]["colour"] = SUBCOLOURS[fulltype]
for rank in ranks:
if mpimode:
data = sdata[sdata[:,rankcol] == rank]
full_step = data[0,:]
# Start and end times for this rank. Can be changed using the mintic
# option. This moves our zero time to other time. Useful for
# comparing to other plots.
if mintic < 0:
tic_step = int(full_step[ticcol])
else:
tic_step = mintic
toc_step = int(full_step[toccol])
dt = toc_step - tic_step
if dt > delta_t:
delta_t = dt
print "# Data range: ", delta_t / CPU_CLOCK, "ms"
# Once more doing the real gather and plots this time.
for rank in ranks:
print "# Processing rank: ", rank
if mpimode:
data = sdata[sdata[:,rankcol] == rank]
full_step = data[0,:]