Skip to content
Snippets Groups Projects
Commit 24132c48 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge MPI and non-MPI task dump plotting and analysis scripts.

Keep maintenance down.
parent 637853ce
No related branches found
No related tags found
1 merge request!425Clean up task and threadpool plotting scripts
......@@ -3,8 +3,9 @@
Usage:
analsyse_tasks.py [options] input.dat
where input.dat is a thread info file for a step. Use the '-y interval' flag
of the swift command to create these.
where input.dat is a thread info file for a step (MPI or non-MPI). Use the
'-y interval' flag of the swift and swift_mpi commands to create these
(you will also need to configure with the --enable-task-debugging option).
The output is an analysis of the task timings, including deadtime per thread
and step, total amount of time spent for each task type, for the whole step
......@@ -42,6 +43,9 @@ parser.add_argument("input", help="Thread data file (-y output)")
parser.add_argument("-v", "--verbose", dest="verbose",
help="Verbose output (default: False)",
default=False, action="store_true")
parser.add_argument("-r", "--rank", dest="rank",
help="Rank to process (default: all)",
default="all", action="store")
args = parser.parse_args()
infile = args.input
......@@ -50,8 +54,8 @@ infile = args.input
TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
"init_grav", "ghost", "extra_ghost", "drift_part",
"drift_gpart", "kick1", "kick2", "timestep", "send", "recv",
"grav_top_level", "grav_long_range", "grav_ghost", "grav_mm",
"grav_down", "cooling", "sourceterms", "count"]
"grav_top_level", "grav_long_range", "grav_mm", "grav_down",
"cooling", "sourceterms", "count"]
SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
"tend", "xv", "rho", "gpart", "multipole", "spart", "count"]
......@@ -63,28 +67,81 @@ SIDS = ["(-1,-1,-1)", "(-1,-1, 0)", "(-1,-1, 1)", "(-1, 0,-1)",
# Read input.
data = pl.loadtxt( infile )
full_step = data[0,:]
maxthread = int(max(data[:,0])) + 1
print "# Maximum thread id:", maxthread
# Recover the start and end time
# Do we have an MPI file?
full_step = data[0,:]
tic_step = int(full_step[4])
toc_step = int(full_step[5])
if full_step.size == 13:
print "# MPI mode"
mpimode = True
nranks = int(max(data[:,0])) + 1
print "# Number of ranks:", nranks
rankcol = 0
threadscol = 1
taskcol = 2
subtaskcol = 3
ticcol = 5
toccol = 6
updates = int(full_step[7])
g_updates = int(full_step[8])
s_updates = int(full_step[9])
else:
print "# non MPI mode"
nranks = 1
mpimode = False
rankcol = -1
threadscol = 0
taskcol = 1
subtaskcol = 2
ticcol = 4
toccol = 5
updates = int(full_step[6])
g_updates = int(full_step[7])
s_updates = int(full_step[8])
# Get the CPU clock to convert ticks into milliseconds.
CPU_CLOCK = float(full_step[-1]) / 1000.0
data = data[1:,:]
if args.verbose:
print "# CPU frequency:", CPU_CLOCK * 1000.0
print "# updates:", updates
print "# g_updates:", g_updates
print "# s_updates:", s_updates
if mpimode:
if args.rank == "all":
ranks = range(nranks)
else:
ranks = [int(args.rank)]
if ranks[0] >= nranks:
print "Error: maximum rank is " + str(nranks - 1)
sys.exit(1)
else:
ranks = [1]
maxthread = int(max(data[:,threadscol])) + 1
print "# Maximum thread id:", maxthread
# Avoid start and end times of zero.
data = data[data[:,4] != 0]
data = data[data[:,5] != 0]
sdata = data[data[:,ticcol] != 0]
sdata = data[data[:,toccol] != 0]
# Now we process the required ranks.
for rank in ranks:
if mpimode:
print "# Rank", rank
data = sdata[sdata[:,rankcol] == rank]
full_step = data[0,:]
else:
data = sdata
# Recover the start and end time
tic_step = int(full_step[ticcol])
toc_step = int(full_step[toccol])
data = data[1:,:]
# Avoid start and end times of zero.
data = data[data[:,ticcol] != 0]
data = data[data[:,toccol] != 0]
# Calculate the time range.
total_t = (toc_step - tic_step)/ CPU_CLOCK
......@@ -93,8 +150,9 @@ print
# Correct times to relative values.
start_t = float(tic_step)
data[:,4] -= start_t
data[:,5] -= start_t
data[:,ticcol] -= start_t
data[:,toccol] -= start_t
end_t = (toc_step - start_t) / CPU_CLOCK
tasks = {}
tasks[-1] = []
......@@ -102,21 +160,20 @@ for i in range(maxthread):
tasks[i] = []
# Gather into by thread data.
num_lines = pl.size(data) / pl.size(full_step)
num_lines = pl.shape(data)[0]
for line in range(num_lines):
thread = int(data[line,0])
tic = int(data[line,4]) / CPU_CLOCK
toc = int(data[line,5]) / CPU_CLOCK
tasktype = int(data[line,1])
subtype = int(data[line,2])
thread = int(data[line,threadscol])
tic = int(data[line,ticcol]) / CPU_CLOCK
toc = int(data[line,toccol]) / CPU_CLOCK
tasktype = int(data[line,taskcol])
subtype = int(data[line,subtaskcol])
sid = int(data[line, -1])
tasks[thread].append([tic,toc,tasktype,subtype, sid])
# Sort by tic and gather used thread ids.
# Sort by tic and gather used threads.
threadids = []
for i in range(maxthread):
if len(tasks[i]) > 0:
tasks[i] = sorted(tasks[i], key=lambda task: task[0])
threadids.append(i)
......@@ -126,6 +183,7 @@ print "# -----------"
print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
.format("type/subtype", "count","minimum", "maximum",
"sum", "mean", "percent")
alltasktimes = {}
sidtimes = {}
for i in threadids:
......@@ -147,7 +205,6 @@ for i in threadids:
sidtimes[my_sid] = []
sidtimes[my_sid].append(dt)
print "# Thread : ", i
for key in sorted(tasktimes.keys()):
taskmin = min(tasktimes[key])
......@@ -163,13 +220,13 @@ for key in sorted(alltasktimes.keys()):
taskmin = min(alltasktimes[key])
taskmax = max(alltasktimes[key])
tasksum = sum(alltasktimes[key])
print "{0:19s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
.format(key, len(alltasktimes[key]), taskmin, taskmax, tasksum,
tasksum / len(alltasktimes[key]),
tasksum / (len(threadids) * total_t) * 100.0)
print
# For pairs, show stuf sorted by SID
# For pairs, show stuff sorted by SID
print "# By SID (all threads): "
print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
.format("Pair/Sub-pair SID", "count","minimum", "maximum",
......@@ -200,10 +257,13 @@ print "# Time before first task:"
print "# no. : {0:>9s} {1:>9s}".format("value", "percent")
predeadtimes = []
for i in threadids:
if len(tasks[i]) > 0:
predeadtime = tasks[i][0][0]
print "thread {0:2d}: {1:9.4f} {2:9.4f}"\
.format(i, predeadtime, predeadtime / total_t * 100.0)
predeadtimes.append(predeadtime)
else:
predeadtimes.append(0.0)
predeadmin = min(predeadtimes)
predeadmax = max(predeadtimes)
......@@ -220,10 +280,13 @@ print "# Time after last task:"
print "# no. : {0:>9s} {1:>9s}".format("value", "percent")
postdeadtimes = []
for i in threadids:
if len(tasks[i]) > 0:
postdeadtime = total_t - tasks[i][-1][1]
print "thread {0:2d}: {1:9.4f} {2:9.4f}"\
.format(i, postdeadtime, postdeadtime / total_t * 100.0)
postdeadtimes.append(postdeadtime)
else:
postdeadtimes.append(0.0)
postdeadmin = min(postdeadtimes)
postdeadmax = max(postdeadtimes)
......@@ -243,7 +306,10 @@ print "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\
enginedeadtimes = []
for i in threadids:
deadtimes = []
if len(tasks[i]) > 0:
last = tasks[i][0][0]
else:
last = 0.0
for task in tasks[i]:
dt = task[0] - last
deadtimes.append(dt)
......@@ -253,7 +319,7 @@ for i in threadids:
if len(deadtimes) > 1:
deadtimes = deadtimes[1:]
else:
# Only one task, so no deadtime by definition.
# Only one or fewer tasks, so no deadtime by definition.
deadtimes = [0.0]
deadmin = min(deadtimes)
......
......
#!/usr/bin/env python
"""
Usage:
analsyse_tasks_MPI.py [options] input.dat
where input.dat is a thread info file for an MPI step. Use the '-y interval'
flag of the swift command to create these.
The output is an analysis of the task timings, including deadtime per thread
and step, total amount of time spent for each task type, for the whole step
and per thread and the minimum and maximum times spent per task type.
This file is part of SWIFT.
Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import matplotlib
matplotlib.use("Agg")
import matplotlib.collections as collections
import matplotlib.ticker as plticker
import pylab as pl
import sys
import argparse
# Handle the command line.
parser = argparse.ArgumentParser(description="Analyse task dumps")
parser.add_argument("input", help="Thread data file (-y output)")
parser.add_argument("-v", "--verbose", dest="verbose",
help="Verbose output (default: False)",
default=False, action="store_true")
parser.add_argument("-r", "--rank", dest="rank",
help="Rank to process (default: all)",
default="all", action="store")
args = parser.parse_args()
infile = args.input
# Tasks and subtypes. Indexed as in tasks.h.
TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
"init_grav", "ghost", "extra_ghost", "drift_part",
"drift_gpart", "kick1", "kick2", "timestep", "send", "recv",
"grav_top_level", "grav_long_range", "grav_mm", "grav_down",
"cooling", "sourceterms", "count"]
SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
"tend", "xv", "rho", "gpart", "multipole", "spart", "count"]
SIDS = ["(-1,-1,-1)", "(-1,-1, 0)", "(-1,-1, 1)", "(-1, 0,-1)",
"(-1, 0, 0)", "(-1, 0, 1)", "(-1, 1,-1)", "(-1, 1, 0)",
"(-1, 1, 1)", "( 0,-1,-1)", "( 0,-1, 0)", "( 0,-1, 1)",
"( 0, 0,-1)"]
# Read input.
data = pl.loadtxt( infile )
# Get the CPU clock to convert ticks into milliseconds.
full_step = data[0,:]
updates = int(full_step[7])
g_updates = int(full_step[8])
s_updates = int(full_step[9])
CPU_CLOCK = float(full_step[-1]) / 1000.0
if args.verbose:
print "# CPU frequency:", CPU_CLOCK * 1000.0
print "# updates:", updates
print "# g_updates:", g_updates
print "# s_updates:", s_updates
nranks = int(max(data[:,0])) + 1
print "# Number of ranks:", nranks
if args.rank == "all":
ranks = range(nranks)
else:
ranks = [int(args.rank)]
if ranks[0] >= nranks:
print "Error: maximum rank is " + str(nranks - 1)
sys.exit(1)
maxthread = int(max(data[:,1])) + 1
print "# Maximum thread id:", maxthread
# Avoid start and end times of zero.
sdata = data[data[:,5] != 0]
sdata = data[data[:,6] != 0]
# Now we process the required ranks.
for rank in ranks:
print "# Rank", rank
data = sdata[sdata[:,0] == rank]
# Recover the start and end time
full_step = data[0,:]
tic_step = int(full_step[5])
toc_step = int(full_step[6])
data = data[1:,:]
# Avoid start and end times of zero.
data = data[data[:,5] != 0]
data = data[data[:,6] != 0]
# Calculate the time range.
total_t = (toc_step - tic_step)/ CPU_CLOCK
print "# Data range: ", total_t, "ms"
print
# Correct times to relative values.
start_t = float(tic_step)
data[:,5] -= start_t
data[:,6] -= start_t
end_t = (toc_step - start_t) / CPU_CLOCK
tasks = {}
tasks[-1] = []
for i in range(maxthread):
tasks[i] = []
# Gather into by thread data.
num_lines = pl.shape(data)[0]
for line in range(num_lines):
thread = int(data[line,1])
tic = int(data[line,5]) / CPU_CLOCK
toc = int(data[line,6]) / CPU_CLOCK
tasktype = int(data[line,2])
subtype = int(data[line,3])
sid = int(data[line, -1])
tasks[thread].append([tic,toc,tasktype,subtype, sid])
# Sort by tic and gather used threads.
threadids = []
for i in range(maxthread):
tasks[i] = sorted(tasks[i], key=lambda task: task[0])
threadids.append(i)
# Times per task.
print "# Task times:"
print "# -----------"
print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
.format("type/subtype", "count","minimum", "maximum",
"sum", "mean", "percent")
alltasktimes = {}
sidtimes = {}
for i in threadids:
tasktimes = {}
for task in tasks[i]:
key = TASKTYPES[task[2]] + "/" + SUBTYPES[task[3]]
dt = task[1] - task[0]
if not key in tasktimes:
tasktimes[key] = []
tasktimes[key].append(dt)
if not key in alltasktimes:
alltasktimes[key] = []
alltasktimes[key].append(dt)
my_sid = task[4]
if my_sid > -1:
if not my_sid in sidtimes:
sidtimes[my_sid] = []
sidtimes[my_sid].append(dt)
print "# Thread : ", i
for key in sorted(tasktimes.keys()):
taskmin = min(tasktimes[key])
taskmax = max(tasktimes[key])
tasksum = sum(tasktimes[key])
print "{0:19s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
.format(key, len(tasktimes[key]), taskmin, taskmax, tasksum,
tasksum / len(tasktimes[key]), tasksum / total_t * 100.0)
print
print "# All threads : "
for key in sorted(alltasktimes.keys()):
taskmin = min(alltasktimes[key])
taskmax = max(alltasktimes[key])
tasksum = sum(alltasktimes[key])
print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
.format(key, len(alltasktimes[key]), taskmin, taskmax, tasksum,
tasksum / len(alltasktimes[key]),
tasksum / (len(threadids) * total_t) * 100.0)
print
# For pairs, show stuf sorted by SID
print "# By SID (all threads): "
print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
.format("Pair/Sub-pair SID", "count","minimum", "maximum",
"sum", "mean", "percent")
for sid in range(0,13):
if sid in sidtimes:
sidmin = min(sidtimes[sid])
sidmax = max(sidtimes[sid])
sidsum = sum(sidtimes[sid])
sidcount = len(sidtimes[sid])
sidmean = sidsum / sidcount
else:
sidmin = 0.
sidmax = 0.
sidsum = 0.
sidcount = 0
sidmean = 0.
print "{0:3d} {1:15s}: {2:7d} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.4f} {7:9.2f}"\
.format(sid, SIDS[sid], sidcount, sidmin, sidmax, sidsum,
sidmean, sidsum / (len(threadids) * total_t) * 100.0)
print
# Dead times.
print "# Times not in tasks (deadtimes)"
print "# ------------------------------"
print "# Time before first task:"
print "# no. : {0:>9s} {1:>9s}".format("value", "percent")
predeadtimes = []
for i in threadids:
if len(tasks[i]) > 0:
predeadtime = tasks[i][0][0]
print "thread {0:2d}: {1:9.4f} {2:9.4f}"\
.format(i, predeadtime, predeadtime / total_t * 100.0)
predeadtimes.append(predeadtime)
else:
predeadtimes.append(0.0)
predeadmin = min(predeadtimes)
predeadmax = max(predeadtimes)
predeadsum = sum(predeadtimes)
print "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\
.format("count", "minimum", "maximum", "sum", "mean", "percent")
print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\
.format(len(predeadtimes), predeadmin, predeadmax, predeadsum,
predeadsum / len(predeadtimes),
predeadsum / (len(threadids) * total_t ) * 100.0)
print
print "# Time after last task:"
print "# no. : {0:>9s} {1:>9s}".format("value", "percent")
postdeadtimes = []
for i in threadids:
if len(tasks[i]) > 0:
postdeadtime = total_t - tasks[i][-1][1]
print "thread {0:2d}: {1:9.4f} {2:9.4f}"\
.format(i, postdeadtime, postdeadtime / total_t * 100.0)
postdeadtimes.append(postdeadtime)
else:
postdeadtimes.append(0.0)
postdeadmin = min(postdeadtimes)
postdeadmax = max(postdeadtimes)
postdeadsum = sum(postdeadtimes)
print "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\
.format("count", "minimum", "maximum", "sum", "mean", "percent")
print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\
.format(len(postdeadtimes), postdeadmin, postdeadmax, postdeadsum,
postdeadsum / len(postdeadtimes),
postdeadsum / (len(threadids) * total_t ) * 100.0)
print
# Time in engine, i.e. from first to last tasks.
print "# Time between tasks (engine deadtime):"
print "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\
.format("count", "minimum", "maximum", "sum", "mean", "percent")
enginedeadtimes = []
for i in threadids:
deadtimes = []
if len(tasks[i]) > 0:
last = tasks[i][0][0]
else:
last = 0.0
for task in tasks[i]:
dt = task[0] - last
deadtimes.append(dt)
last = task[1]
# Drop first value, last value already gone.
if len(deadtimes) > 1:
deadtimes = deadtimes[1:]
else:
# Only one or fewer tasks, so no deadtime by definition.
deadtimes = [0.0]
deadmin = min(deadtimes)
deadmax = max(deadtimes)
deadsum = sum(deadtimes)
print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
.format(i, len(deadtimes), deadmin, deadmax, deadsum,
deadsum / len(deadtimes), deadsum / total_t * 100.0)
enginedeadtimes.extend(deadtimes)
deadmin = min(enginedeadtimes)
deadmax = max(enginedeadtimes)
deadsum = sum(enginedeadtimes)
print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\
.format(len(enginedeadtimes), deadmin, deadmax, deadsum,
deadsum / len(enginedeadtimes),
deadsum / (len(threadids) * total_t ) * 100.0)
print
# All times in step.
print "# All deadtimes:"
print "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\
.format("count", "minimum", "maximum", "sum", "mean", "percent")
alldeadtimes = []
for i in threadids:
deadtimes = []
last = 0
for task in tasks[i]:
dt = task[0] - last
deadtimes.append(dt)
last = task[1]
dt = total_t - last
deadtimes.append(dt)
deadmin = min(deadtimes)
deadmax = max(deadtimes)
deadsum = sum(deadtimes)
print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
.format(i, len(deadtimes), deadmin, deadmax, deadsum,
deadsum / len(deadtimes), deadsum / total_t * 100.0)
alldeadtimes.extend(deadtimes)
deadmin = min(alldeadtimes)
deadmax = max(alldeadtimes)
deadsum = sum(alldeadtimes)
print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\
.format(len(alldeadtimes), deadmin, deadmax, deadsum,
deadsum / len(alldeadtimes),
deadsum / (len(threadids) * total_t ) * 100.0)
print
sys.exit(0)
#!/usr/bin/env python
"""
Usage:
plot_tasks.py [options] input.dat output.png
plot_tasks.py [options] input.dat png-output-prefix
where input.dat is a thread info file for a step. Use the '-y interval' flag
of the swift command to create these. The output plot will be called
'output.png'. The --limit option can be used to produce plots with the same
time span and the --expand option to expand each thread line into '*expand'
lines, so that adjacent tasks of the same type can be distinguished. Other
options can be seen using the --help flag.
of the swift or swift_mpi commands to create these (these will need to be
built with the --enable-task-debugging configure option). The output plot will
be called 'png-output-prefix.png' or 'png-output-prefix<mpi-rank>.png',
depending on whether the input thread info file is generated by the swift or
swift_mpi command. If swift_mpi each rank has a separate plot.
The --limit option can be used to produce plots with the same time
span and the --expand option to expand each thread line into '*expand' lines,
so that adjacent tasks of the same type can be distinguished. Other options
can be seen using the --help flag.
See the command 'process_plot_tasks' to efficiently wrap this command to
process a number of thread info files and create an HTML file to view them.
This file is part of SWIFT.
Copyright (c) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
Copyright (C) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
Matthieu Schaller (matthieu.schaller@durham.ac.uk)
(c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
(C) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
All Rights Reserved.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
......@@ -42,7 +52,7 @@ import argparse
parser = argparse.ArgumentParser(description="Plot task graphs")
parser.add_argument("input", help="Thread data file (-y output)")
parser.add_argument("outpng", help="Name for output graphic file (PNG)")
parser.add_argument("outbase", help="Base name for output graphic files (PNG)")
parser.add_argument("-l", "--limit", dest="limit",
help="Upper time limit in millisecs (def: depends on data)",
default=0, type=int)
......@@ -64,7 +74,7 @@ parser.add_argument("-v", "--verbose", dest="verbose",
args = parser.parse_args()
infile = args.input
outpng = args.outpng
outbase = args.outbase
delta_t = args.limit
expand = args.expand
......@@ -78,7 +88,7 @@ PLOT_PARAMS = {"axes.labelsize": 10,
"figure.figsize" : (args.width, args.height),
"figure.subplot.left" : 0.03,
"figure.subplot.right" : 0.995,
"figure.subplot.bottom" : 0.09,
"figure.subplot.bottom" : 0.1,
"figure.subplot.top" : 0.99,
"figure.subplot.wspace" : 0.,
"figure.subplot.hspace" : 0.,
......@@ -89,20 +99,19 @@ pl.rcParams.update(PLOT_PARAMS)
# Tasks and subtypes. Indexed as in tasks.h.
TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
"init_grav", "ghost", "extra_ghost", "drift_part",
"drift_gpart", "kick1", "kick2", "timestep", "send", "recv",
"grav_top_level", "grav_long_range", "grav_ghost", "grav_mm",
"grav_down", "cooling", "sourceterms", "count"]
"init_grav", "ghost", "extra_ghost", "drift_part", "drift_gpart",
"kick1", "kick2", "timestep", "send", "recv", "grav_top_level",
"grav_long_range", "grav_mm", "grav_down", "cooling",
"sourceterms", "count"]
SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
"tend", "xv", "rho", "gpart", "multipole", "spart", "count"]
# Task/subtypes of interest.
FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force",
"sub_self/density", "sub_self/grav", "pair/force", "pair/density",
"pair/grav", "sub_pair/force",
"sub_pair/density", "sub_pair/grav", "recv/xv", "send/xv",
"recv/rho", "send/rho",
"sub_self/density", "pair/force", "pair/density", "pair/grav",
"sub_pair/force",
"sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho",
"recv/tend", "send/tend"]
# A number of colours for the various types. Recycled when there are
......@@ -110,7 +119,7 @@ FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force",
colours = ["cyan", "lightgray", "darkblue", "yellow", "tan", "dodgerblue",
"sienna", "aquamarine", "bisque", "blue", "green", "lightgreen",
"brown", "purple", "moccasin", "olivedrab", "chartreuse",
"steelblue", "darkgreen", "green", "mediumseagreen",
"darksage", "darkgreen", "green", "mediumseagreen",
"mediumaquamarine", "darkslategrey", "mediumturquoise",
"black", "cadetblue", "skyblue", "red", "slategray", "gold",
"slateblue", "blueviolet", "mediumorchid", "firebrick",
......@@ -144,34 +153,85 @@ if args.verbose:
# Read input.
data = pl.loadtxt( infile )
nthread = int(max(data[:,0])) + 1
print "Number of threads:", nthread
# Recover the start and end time
# Do we have an MPI file?
full_step = data[0,:]
tic_step = int(full_step[4])
toc_step = int(full_step[5])
if full_step.size == 13:
print "MPI mode"
mpimode = True
nranks = int(max(data[:,0])) + 1
print "Number of ranks:", nranks
rankcol = 0
threadscol = 1
taskcol = 2
subtaskcol = 3
ticcol = 5
toccol = 6
else:
print "non MPI mode"
nranks = 1
mpimode = False
rankcol = -1
threadscol = 0
taskcol = 1
subtaskcol = 2
ticcol = 4
toccol = 5
# Get CPU_CLOCK to convert ticks into milliseconds.
CPU_CLOCK = float(full_step[-1]) / 1000.0
data = data[1:,:]
if args.verbose:
print "CPU frequency:", CPU_CLOCK * 1000.0
nthread = int(max(data[:,threadscol])) + 1
print "Number of threads:", nthread
# Avoid start and end times of zero.
data = data[data[:,4] != 0]
data = data[data[:,5] != 0]
sdata = data[data[:,ticcol] != 0]
sdata = sdata[sdata[:,toccol] != 0]
# Calculate the time range, if not given.
# Each rank can have different clocks (compute node), but we want to use the
# same delta times range for comparisons, so we suck it up and take the hit of
# precalculating this, unless the user knows better.
delta_t = delta_t * CPU_CLOCK
if delta_t == 0:
for rank in range(nranks):
if mpimode:
data = sdata[sdata[:,rankcol] == rank]
full_step = data[0,:]
tic_step = int(full_step[ticcol])
toc_step = int(full_step[toccol])
dt = toc_step - tic_step
if dt > delta_t:
delta_t = dt
print "Data range: ", delta_t / CPU_CLOCK, "ms"
# Once more doing the real gather and plots this time.
for rank in range(nranks):
if mpimode:
data = sdata[sdata[:,rankcol] == rank]
full_step = data[0,:]
# Start and end times for this rank.
tic_step = int(full_step[ticcol])
toc_step = int(full_step[toccol])
data = data[1:,:]
typesseen = []
nethread = 0
# Dummy image for ranks that have no tasks.
if data.size == 0:
print "rank ", rank, " has no tasks"
fig = pl.figure()
ax = fig.add_subplot(1,1,1)
ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK)
ax.set_ylim(0, nthread*expand)
start_t = tic_step
end_t = (toc_step - start_t) / CPU_CLOCK
else:
start_t = float(tic_step)
data[:,4] -= start_t
data[:,5] -= start_t
data[:,ticcol] -= start_t
data[:,toccol] -= start_t
end_t = (toc_step - start_t) / CPU_CLOCK
tasks = {}
......@@ -184,9 +244,9 @@ ecounter = []
for i in range(nthread):
ecounter.append(0)
num_lines = pl.size(data) / pl.size(full_step)
num_lines = pl.shape(data)[0]
for line in range(num_lines):
thread = int(data[line,0])
thread = int(data[line, threadscol])
# Expand to cover extra lines if expanding.
ethread = thread * expand + (ecounter[thread] % expand)
......@@ -194,15 +254,15 @@ for line in range(num_lines):
thread = ethread
tasks[thread].append({})
tasktype = TASKTYPES[int(data[line,1])]
subtype = SUBTYPES[int(data[line,2])]
tasktype = TASKTYPES[int(data[line,taskcol])]
subtype = SUBTYPES[int(data[line,subtaskcol])]
tasks[thread][-1]["type"] = tasktype
tasks[thread][-1]["subtype"] = subtype
tic = int(data[line,4]) / CPU_CLOCK
toc = int(data[line,5]) / CPU_CLOCK
tic = int(data[line,ticcol]) / CPU_CLOCK
toc = int(data[line,toccol]) / CPU_CLOCK
tasks[thread][-1]["tic"] = tic
tasks[thread][-1]["toc"] = toc
if "self" in tasktype or "pair" in tasktype:
if "self" in tasktype or "pair" in tasktype or "recv" in tasktype or "send" in tasktype:
fulltype = tasktype + "/" + subtype
if fulltype in SUBCOLOURS:
tasks[thread][-1]["colour"] = SUBCOLOURS[fulltype]
......@@ -212,14 +272,14 @@ for line in range(num_lines):
tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype]
# Use expanded threads from now on.
nthread = nthread * expand
nethread = nthread * expand
typesseen = []
fig = pl.figure()
ax = fig.add_subplot(1,1,1)
ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK)
ax.set_ylim(0, nthread)
for i in range(nthread):
ax.set_ylim(0, nethread)
for i in range(nethread):
# Collect ranges and colours into arrays.
tictocs = []
......@@ -234,6 +294,7 @@ for i in range(nthread):
qtask = task["type"] + "/" + task["subtype"]
else:
qtask = task["type"]
if qtask not in typesseen:
pl.plot([], [], color=task["colour"], label=qtask)
typesseen.append(qtask)
......@@ -241,31 +302,37 @@ for i in range(nthread):
# Now plot.
ax.broken_barh(tictocs, [i+0.05,0.90], facecolors = colours, linewidth=0)
# Legend and room for it.
nrow = len(typesseen) / 5
if not args.nolegend:
ax.fill_between([0, 0], nthread+0.5, nthread + nrow + 0.5, facecolor="white")
ax.set_ylim(0, nthread + 0.5)
ax.fill_between([0, 0], nethread+0.5, nethread + nrow + 0.5, facecolor="white")
ax.set_ylim(0, nethread + 0.5)
if data.size > 0:
ax.legend(loc=1, shadow=True, bbox_to_anchor=(0., 1.05 ,1., 0.2), mode="expand", ncol=5)
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width, box.height*0.8])
# Start and end of time-step
ax.plot([0, 0], [0, nthread + nrow + 1], 'k--', linewidth=1)
ax.plot([end_t, end_t], [0, nthread + nrow + 1], 'k--', linewidth=1)
ax.plot([0, 0], [0, nethread + nrow + 1], 'k--', linewidth=1)
ax.plot([end_t, end_t], [0, nethread + nrow + 1], 'k--', linewidth=1)
ax.set_xlabel("Wall clock time [ms]")
ax.set_xlabel("Wall clock time [ms]", labelpad=0.)
if expand == 1:
ax.set_ylabel("Thread ID", labelpad=0 )
ax.set_ylabel("Thread ID" )
else:
ax.set_ylabel("Thread ID * " + str(expand), labelpad=0 )
ax.set_yticks(pl.array(range(nthread)), True)
ax.set_ylabel("Thread ID * " + str(expand) )
ax.set_yticks(pl.array(range(nethread)), True)
loc = plticker.MultipleLocator(base=expand)
ax.yaxis.set_major_locator(loc)
ax.grid(True, which='major', axis="y", linestyle="-")
pl.show()
if mpimode:
outpng = outbase + str(rank) + ".png"
else:
outpng = outbase + ".png"
pl.savefig(outpng)
print "Graphics done, output written to", outpng
......
......
#!/usr/bin/env python
"""
Usage:
plot_tasks_MPI.py [options] input.dat png-output-prefix
where input.dat is a thread info file for a step. Use the '-y interval' flag
of the swift MPI command to create these. The output plot will be called
'png-output-prefix<mpi-rank>.png', i.e. one each for all the threads in each
MPI rank. The --limit option can be used to produce plots with the same time
span and the --expand option to expand each thread line into '*expand' lines,
so that adjacent tasks of the same type can be distinguished. Other options
can be seen using the --help flag.
See the command 'process_plot_tasks_MPI' to efficiently wrap this command to
process a number of thread info files and create an HTML file to view them.
This file is part of SWIFT.
Copyright (C) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
Matthieu Schaller (matthieu.schaller@durham.ac.uk)
(C) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
All Rights Reserved.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import matplotlib
matplotlib.use("Agg")
import matplotlib.collections as collections
import matplotlib.ticker as plticker
import pylab as pl
import sys
import argparse
# Handle the command line.
parser = argparse.ArgumentParser(description="Plot task graphs")
parser.add_argument("input", help="Thread data file (-y output)")
parser.add_argument("outbase", help="Base name for output graphic files (PNG)")
parser.add_argument("-l", "--limit", dest="limit",
help="Upper time limit in millisecs (def: depends on data)",
default=0, type=int)
parser.add_argument("-e", "--expand", dest="expand",
help="Thread expansion factor (def: 1)",
default=1, type=int)
parser.add_argument("--height", dest="height",
help="Height of plot in inches (def: 4)",
default=4., type=float)
parser.add_argument("--width", dest="width",
help="Width of plot in inches (def: 16)",
default=16., type=float)
parser.add_argument("--nolegend", dest="nolegend",
help="Whether to show the legend (def: False)",
default=False, action="store_true")
parser.add_argument("-v", "--verbose", dest="verbose",
help="Show colour assignments and other details (def: False)",
default=False, action="store_true")
args = parser.parse_args()
infile = args.input
outbase = args.outbase
delta_t = args.limit
expand = args.expand
# Basic plot configuration.
PLOT_PARAMS = {"axes.labelsize": 10,
"axes.titlesize": 10,
"font.size": 12,
"legend.fontsize": 12,
"xtick.labelsize": 10,
"ytick.labelsize": 10,
"figure.figsize" : (args.width, args.height),
"figure.subplot.left" : 0.03,
"figure.subplot.right" : 0.995,
"figure.subplot.bottom" : 0.1,
"figure.subplot.top" : 0.99,
"figure.subplot.wspace" : 0.,
"figure.subplot.hspace" : 0.,
"lines.markersize" : 6,
"lines.linewidth" : 3.
}
pl.rcParams.update(PLOT_PARAMS)
# Tasks and subtypes. Indexed as in tasks.h.
TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
"init_grav", "ghost", "extra_ghost", "drift_part", "drift_gpart",
"kick1", "kick2", "timestep", "send", "recv", "grav_top_level",
"grav_long_range", "grav_mm", "grav_down", "cooling",
"sourceterms", "count"]
SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
"tend", "xv", "rho", "gpart", "multipole", "spart", "count"]
# Task/subtypes of interest.
FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force",
"sub_self/density", "pair/force", "pair/density", "pair/grav",
"sub_pair/force",
"sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho",
"recv/tend", "send/tend"]
# A number of colours for the various types. Recycled when there are
# more task types than colours...
colours = ["cyan", "lightgray", "darkblue", "yellow", "tan", "dodgerblue",
"sienna", "aquamarine", "bisque", "blue", "green", "lightgreen",
"brown", "purple", "moccasin", "olivedrab", "chartreuse",
"darksage", "darkgreen", "green", "mediumseagreen",
"mediumaquamarine", "darkslategrey", "mediumturquoise",
"black", "cadetblue", "skyblue", "red", "slategray", "gold",
"slateblue", "blueviolet", "mediumorchid", "firebrick",
"magenta", "hotpink", "pink", "orange", "lightgreen"]
maxcolours = len(colours)
# Set colours of task/subtype.
TASKCOLOURS = {}
ncolours = 0
for task in TASKTYPES:
TASKCOLOURS[task] = colours[ncolours]
ncolours = (ncolours + 1) % maxcolours
SUBCOLOURS = {}
for task in FULLTYPES:
SUBCOLOURS[task] = colours[ncolours]
ncolours = (ncolours + 1) % maxcolours
for task in SUBTYPES:
SUBCOLOURS[task] = colours[ncolours]
ncolours = (ncolours + 1) % maxcolours
# For fiddling with colours...
if args.verbose:
print "#Selected colours:"
for task in sorted(TASKCOLOURS.keys()):
print "# " + task + ": " + TASKCOLOURS[task]
for task in sorted(SUBCOLOURS.keys()):
print "# " + task + ": " + SUBCOLOURS[task]
# Read input.
data = pl.loadtxt( infile )
# Get CPU_CLOCK to convert ticks into milliseconds.
full_step = data[0,:]
CPU_CLOCK = float(full_step[-1]) / 1000.0
if args.verbose:
print "CPU frequency:", CPU_CLOCK * 1000.0
nranks = int(max(data[:,0])) + 1
print "Number of ranks:", nranks
nthread = int(max(data[:,1])) + 1
print "Number of threads:", nthread
# Avoid start and end times of zero.
sdata = data[data[:,5] != 0]
sdata = sdata[sdata[:,6] != 0]
# Each rank can have different clock (compute node), but we want to use the
# same delta times range for comparisons, so we suck it up and take the hit of
# precalculating this, unless the user knows better.
delta_t = delta_t * CPU_CLOCK
if delta_t == 0:
for rank in range(nranks):
data = sdata[sdata[:,0] == rank]
full_step = data[0,:]
tic_step = int(full_step[5])
toc_step = int(full_step[6])
dt = toc_step - tic_step
if dt > delta_t:
delta_t = dt
print "Data range: ", delta_t / CPU_CLOCK, "ms"
# Once more doing the real gather and plots this time.
for rank in range(nranks):
data = sdata[sdata[:,0] == rank]
# Start and end times for this rank.
full_step = data[0,:]
tic_step = int(full_step[5])
toc_step = int(full_step[6])
data = data[1:,:]
typesseen = []
nethread = 0
# Dummy image for ranks that have no tasks.
if data.size == 0:
print "rank ", rank, " has no tasks"
fig = pl.figure()
ax = fig.add_subplot(1,1,1)
ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK)
ax.set_ylim(0, nthread*expand)
start_t = tic_step
end_t = (toc_step - start_t) / CPU_CLOCK
else:
start_t = float(tic_step)
data[:,5] -= start_t
data[:,6] -= start_t
end_t = (toc_step - start_t) / CPU_CLOCK
tasks = {}
tasks[-1] = []
for i in range(nthread*expand):
tasks[i] = []
# Counters for each thread when expanding.
ecounter = []
for i in range(nthread):
ecounter.append(0)
num_lines = pl.shape(data)[0]
for line in range(num_lines):
thread = int(data[line,1])
# Expand to cover extra lines if expanding.
ethread = thread * expand + (ecounter[thread] % expand)
ecounter[thread] = ecounter[thread] + 1
thread = ethread
tasks[thread].append({})
tasktype = TASKTYPES[int(data[line,2])]
subtype = SUBTYPES[int(data[line,3])]
tasks[thread][-1]["type"] = tasktype
tasks[thread][-1]["subtype"] = subtype
tic = int(data[line,5]) / CPU_CLOCK
toc = int(data[line,6]) / CPU_CLOCK
tasks[thread][-1]["tic"] = tic
tasks[thread][-1]["toc"] = toc
if "self" in tasktype or "pair" in tasktype or "recv" in tasktype or "send" in tasktype:
fulltype = tasktype + "/" + subtype
if fulltype in SUBCOLOURS:
tasks[thread][-1]["colour"] = SUBCOLOURS[fulltype]
else:
tasks[thread][-1]["colour"] = SUBCOLOURS[subtype]
else:
tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype]
# Use expanded threads from now on.
nethread = nthread * expand
typesseen = []
fig = pl.figure()
ax = fig.add_subplot(1,1,1)
ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK)
ax.set_ylim(0, nethread)
for i in range(nethread):
# Collect ranges and colours into arrays.
tictocs = []
colours = []
j = 0
for task in tasks[i]:
tictocs.append((task["tic"], task["toc"] - task["tic"]))
colours.append(task["colour"])
# Legend support, collections don't add to this.
if task["subtype"] != "none":
qtask = task["type"] + "/" + task["subtype"]
else:
qtask = task["type"]
if qtask not in typesseen:
pl.plot([], [], color=task["colour"], label=qtask)
typesseen.append(qtask)
# Now plot.
ax.broken_barh(tictocs, [i+0.05,0.90], facecolors = colours, linewidth=0)
# Legend and room for it.
nrow = len(typesseen) / 5
ax.fill_between([0, 0], nethread+0.5, nethread + nrow + 0.5, facecolor="white")
ax.set_ylim(0, nethread + 0.5)
if data.size > 0:
ax.legend(loc=1, shadow=True, bbox_to_anchor=(0., 1.05 ,1., 0.2), mode="expand", ncol=5)
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width, box.height*0.8])
# Start and end of time-step
ax.plot([0, 0], [0, nethread + nrow + 1], 'k--', linewidth=1)
ax.plot([end_t, end_t], [0, nethread + nrow + 1], 'k--', linewidth=1)
ax.set_xlabel("Wall clock time [ms]")
if expand == 1:
ax.set_ylabel("Thread ID" )
else:
ax.set_ylabel("Thread ID * " + str(expand) )
ax.set_yticks(pl.array(range(nethread)), True)
loc = plticker.MultipleLocator(base=expand)
ax.yaxis.set_major_locator(loc)
ax.grid(True, which='major', axis="y", linestyle="-")
pl.show()
outpng = outbase + str(rank) + ".png"
pl.savefig(outpng)
print "Graphics done, output written to", outpng
sys.exit(0)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment