Commit e3309772 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Revert "Merge branch 'threadpool_task_plots' into 'master'

This reverts merge request !375
parent 58a6ff20
......@@ -189,19 +189,6 @@ if test "$enable_task_debugging" = "yes"; then
AC_DEFINE([SWIFT_DEBUG_TASKS],1,[Enable task debugging])
fi
# Check if threadpool debugging is on.
AC_ARG_ENABLE([threadpool-debugging],
[AS_HELP_STRING([--enable-threadpool-debugging],
[Store threadpool mapper timing information and generate threadpool dump files @<:@yes/no@:>@]
)],
[enable_threadpool_debugging="$enableval"],
[enable_threadpool_debugging="no"]
)
if test "$enable_threadpool_debugging" = "yes"; then
AC_DEFINE([SWIFT_DEBUG_THREADPOOL],1,[Enable threadpool debugging])
LDFLAGS="$LDFLAGS -rdynamic"
fi
# Check if the general timers are switched on.
AC_ARG_ENABLE([timers],
[AS_HELP_STRING([--enable-timers],
......@@ -910,10 +897,9 @@ AC_MSG_RESULT([
Multipole order : $with_multipole_order
No gravity below ID : $no_gravity_below_id
Individual timers : $enable_timers
Task debugging : $enable_task_debugging
Threadpool debugging : $enable_threadpool_debugging
Debugging checks : $enable_debugging_checks
Gravity checks : $gravity_force_checks
Individual timers : $enable_timers
Task debugging : $enable_task_debugging
Debugging checks : $enable_debugging_checks
Gravity checks : $gravity_force_checks
------------------------])
......@@ -153,8 +153,7 @@ int main(int argc, char *argv[]) {
/* Let's pin the main thread */
#if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
if (((ENGINE_POLICY) & engine_policy_setaffinity) ==
engine_policy_setaffinity)
if (((ENGINE_POLICY)&engine_policy_setaffinity) == engine_policy_setaffinity)
engine_pin();
#endif
......@@ -164,7 +163,6 @@ int main(int argc, char *argv[]) {
int with_aff = 0;
int dry_run = 0;
int dump_tasks = 0;
int dump_threadpool = 0;
int nsteps = -2;
int with_cosmology = 0;
int with_external_gravity = 0;
......@@ -186,7 +184,7 @@ int main(int argc, char *argv[]) {
/* Parse the parameters */
int c;
while ((c = getopt(argc, argv, "acCdDef:FgGhMn:P:sSt:Tv:y:Y:")) != -1)
while ((c = getopt(argc, argv, "acCdDef:FgGhMn:P:sSt:Tv:y:")) != -1)
switch (c) {
case 'a':
with_aff = 1;
......@@ -275,21 +273,6 @@ int main(int argc, char *argv[]) {
"Task dumping is only possible if SWIFT was configured with the "
"--enable-task-debugging option.");
}
#endif
break;
case 'Y':
if (sscanf(optarg, "%d", &dump_threadpool) != 1) {
if (myrank == 0) printf("Error parsing dump_threadpool (-Y). \n");
if (myrank == 0) print_help_message();
return 1;
}
#ifndef SWIFT_DEBUG_THREADPOOL
if (dump_threadpool) {
error(
"Threadpool dumping is only possible if SWIFT was configured "
"with the "
"--enable-threadpool-debugging option.");
}
#endif
break;
case '?':
......@@ -770,22 +753,6 @@ int main(int argc, char *argv[]) {
#endif // WITH_MPI
}
#endif // SWIFT_DEBUG_TASKS
#ifdef SWIFT_DEBUG_THREADPOOL
/* Dump the task data using the given frequency. */
if (dump_threadpool && (dump_threadpool == 1 || j % dump_threadpool == 1)) {
char dumpfile[40];
#ifdef WITH_MPI
snprintf(dumpfile, 30, "threadpool_info-rank%d-step%d.dat", engine_rank,
j + 1);
#else
snprintf(dumpfile, 30, "threadpool_info-step%d.dat", j + 1);
#endif // WITH_MPI
threadpool_dump_log(&e.threadpool, dumpfile, 1);
} else {
threadpool_reset_log(&e.threadpool);
}
#endif // SWIFT_DEBUG_THREADPOOL
}
/* Print the values of the runner histogram. */
......
#!/usr/bin/env python
"""
Usage:
plot_threadpool.py [options] input.dat output.png
where input.dat is a threadpool info file for a step. Use the '-Y interval'
flag of the swift command to create these. The output plot will be called
'output.png'. The --limit option can be used to produce plots with the same
time span and the --expand option to expand each thread line into '*expand'
lines, so that adjacent tasks of the same type can be distinguished. Other
options can be seen using the --help flag.
This file is part of SWIFT.
Copyright (c) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
Matthieu Schaller (matthieu.schaller@durham.ac.uk)
(c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import matplotlib
matplotlib.use("Agg")
import matplotlib.collections as collections
import matplotlib.ticker as plticker
import pylab as pl
import sys
import argparse
# Handle the command line.
parser = argparse.ArgumentParser(description="Plot threadpool function graphs")
parser.add_argument("input", help="Threadpool data file (-Y output)")
parser.add_argument("outpng", help="Name for output graphic file (PNG)")
parser.add_argument("-l", "--limit", dest="limit",
help="Upper time limit in millisecs (def: depends on data)",
default=0, type=int)
parser.add_argument("-e", "--expand", dest="expand",
help="Thread expansion factor (def: 1)",
default=1, type=int)
parser.add_argument("--height", dest="height",
help="Height of plot in inches (def: 4)",
default=4., type=float)
parser.add_argument("--width", dest="width",
help="Width of plot in inches (def: 16)",
default=16., type=float)
parser.add_argument("--nolegend", dest="nolegend",
help="Whether to show the legend (def: False)",
default=False, action="store_true")
parser.add_argument("-v", "--verbose", dest="verbose",
help="Show colour assignments and other details (def: False)",
default=False, action="store_true")
args = parser.parse_args()
infile = args.input
outpng = args.outpng
delta_t = args.limit
expand = args.expand
# Basic plot configuration.
PLOT_PARAMS = {"axes.labelsize": 10,
"axes.titlesize": 10,
"font.size": 12,
"legend.fontsize": 12,
"xtick.labelsize": 10,
"ytick.labelsize": 10,
"figure.figsize" : (args.width, args.height),
"figure.subplot.left" : 0.03,
"figure.subplot.right" : 0.995,
"figure.subplot.bottom" : 0.09,
"figure.subplot.top" : 0.99,
"figure.subplot.wspace" : 0.,
"figure.subplot.hspace" : 0.,
"lines.markersize" : 6,
"lines.linewidth" : 3.
}
pl.rcParams.update(PLOT_PARAMS)
# A number of colours for the various types. Recycled when there are
# more task types than colours...
colours = ["cyan", "lightgray", "darkblue", "yellow", "tan", "dodgerblue",
"sienna", "aquamarine", "bisque", "blue", "green", "lightgreen",
"brown", "purple", "moccasin", "olivedrab", "chartreuse",
"darksage", "darkgreen", "green", "mediumseagreen",
"mediumaquamarine", "darkslategrey", "mediumturquoise",
"black", "cadetblue", "skyblue", "red", "slategray", "gold",
"slateblue", "blueviolet", "mediumorchid", "firebrick",
"magenta", "hotpink", "pink", "orange", "lightgreen"]
maxcolours = len(colours)
# Read header. First two lines.
with open(infile) as infid:
head = [next(infid) for x in xrange(2)]
header = head[1][2:].strip()
header = eval(header)
nthread = int(header['num_threads']) + 1
CPU_CLOCK = float(header['cpufreq']) / 1000.0
print "Number of threads: ", nthread
if args.verbose:
print "CPU frequency:", CPU_CLOCK * 1000.0
# Read input.
data = pl.genfromtxt(infile, dtype=None, delimiter=" ")
# Mixed types, so need to separate.
tics = []
tocs = []
funcs = []
threads = []
chunks = []
for i in data:
if i[0] != "#":
funcs.append(i[0])
if i[1] < 0:
threads.append(nthread-1)
else:
threads.append(i[1])
chunks.append(i[2])
tics.append(i[3])
tocs.append(i[4])
tics = pl.array(tics)
tocs = pl.array(tocs)
funcs = pl.array(funcs)
threads = pl.array(threads)
chunks = pl.array(chunks)
# Recover the start and end time
tic_step = min(tics)
toc_step = max(tocs)
# Not known.
# Calculate the time range, if not given.
delta_t = delta_t * CPU_CLOCK
if delta_t == 0:
dt = toc_step - tic_step
if dt > delta_t:
delta_t = dt
print "Data range: ", delta_t / CPU_CLOCK, "ms"
# Once more doing the real gather and plots this time.
start_t = float(tic_step)
tics -= tic_step
tocs -= tic_step
end_t = (toc_step - start_t) / CPU_CLOCK
# Get all "task" names and assign colours.
TASKTYPES = pl.unique(funcs)
print TASKTYPES
# Set colours of task/subtype.
TASKCOLOURS = {}
ncolours = 0
for task in TASKTYPES:
TASKCOLOURS[task] = colours[ncolours]
ncolours = (ncolours + 1) % maxcolours
# For fiddling with colours...
if args.verbose:
print "#Selected colours:"
for task in sorted(TASKCOLOURS.keys()):
print "# " + task + ": " + TASKCOLOURS[task]
for task in sorted(SUBCOLOURS.keys()):
print "# " + task + ": " + SUBCOLOURS[task]
tasks = {}
tasks[-1] = []
for i in range(nthread*expand):
tasks[i] = []
# Counters for each thread when expanding.
ecounter = []
for i in range(nthread):
ecounter.append(0)
for i in range(len(threads)):
thread = threads[i]
# Expand to cover extra lines if expanding.
ethread = thread * expand + (ecounter[thread] % expand)
ecounter[thread] = ecounter[thread] + 1
thread = ethread
tasks[thread].append({})
tasks[thread][-1]["type"] = funcs[i]
tic = tics[i] / CPU_CLOCK
toc = tocs[i] / CPU_CLOCK
tasks[thread][-1]["tic"] = tic
tasks[thread][-1]["toc"] = toc
tasks[thread][-1]["colour"] = TASKCOLOURS[funcs[i]]
# Use expanded threads from now on.
nthread = nthread * expand
typesseen = []
fig = pl.figure()
ax = fig.add_subplot(1,1,1)
ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK)
ax.set_ylim(0, nthread)
# Fake thread is used to colour the whole range, do that first.
tictocs = []
colours = []
j = 0
for task in tasks[nthread - expand]:
tictocs.append((task["tic"], task["toc"] - task["tic"]))
colours.append(task["colour"])
ax.broken_barh(tictocs, [0,(nthread-1)], facecolors = colours, linewidth=0, alpha=0.15)
# And we don't plot the fake thread.
nthread = nthread - expand
for i in range(nthread):
# Collect ranges and colours into arrays.
tictocs = []
colours = []
j = 0
for task in tasks[i]:
tictocs.append((task["tic"], task["toc"] - task["tic"]))
colours.append(task["colour"])
# Legend support, collections don't add to this.
qtask = task["type"]
if qtask not in typesseen:
pl.plot([], [], color=task["colour"], label=qtask)
typesseen.append(qtask)
# Now plot.
ax.broken_barh(tictocs, [i+0.05,0.90], facecolors = colours, linewidth=0)
# Legend and room for it.
nrow = len(typesseen) / 5
if not args.nolegend:
ax.fill_between([0, 0], nthread+0.5, nthread + nrow + 0.5, facecolor="white")
ax.set_ylim(0, nthread + 0.5)
ax.legend(loc=1, shadow=True, bbox_to_anchor=(0., 1.05 ,1., 0.2), mode="expand", ncol=5)
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width, box.height*0.8])
# Start and end of time-step
ax.plot([0, 0], [0, nthread + nrow + 1], 'k--', linewidth=1)
ax.plot([end_t, end_t], [0, nthread + nrow + 1], 'k--', linewidth=1)
ax.set_xlabel("Wall clock time [ms]", labelpad=0.)
if expand == 1:
ax.set_ylabel("Thread ID", labelpad=0 )
else:
ax.set_ylabel("Thread ID * " + str(expand), labelpad=0 )
ax.set_yticks(pl.array(range(nthread)), True)
loc = plticker.MultipleLocator(base=expand)
ax.yaxis.set_major_locator(loc)
ax.grid(True, which='major', axis="y", linestyle="-")
pl.show()
pl.savefig(outpng)
print "Graphics done, output written to", outpng
sys.exit(0)
#!/bin/bash
#
# Usage:
# process_plot_threadpool nprocess time-range-ms
#
# Description:
# Process all the threadpool info files in the current directory
# creating function graphs for steps and threads.
#
# The input files are created by a run using the "-Y interval" flag and
# should be named "threadpool_info-step<n>.dat" in the current directory.
# All located files will be processed using "nprocess" concurrent
# processes and all plots will have the given time range. An output
# HTML file "index.html" will be created to view all the plots.
#
#
# This file is part of SWIFT:
#
# Copyright (C) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
# All Rights Reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Handle command-line
if test "$2" == ""; then
echo "Usage: $0 nprocess time-range-ms"
exit 1
fi
NPROCS=$1
TIMERANGE=$2
# Find all thread info files. Use version sort to get into correct order.
files=$(ls -v threadpool_info-step*.dat)
if test $? != 0; then
echo "Failed to find any threadpool info files"
exit 1
fi
# Construct list of names, the step no and names for the graphics.
list=""
for f in $files; do
s=$(echo $f| sed 's,threadpool_info-step\(.*\).dat,\1,')
list="$list $f $s poolstep${s}r"
done
# And process them,
echo "Processing threadpool info files..."
echo $list | xargs -P $NPROCS -n 3 /bin/bash -c "./plot_threadpool.py --expand 3 --limit $TIMERANGE --width 16 --height 4 \$0 \$2 "
echo "Writing output index.html file"
# Construct document - serial.
cat <<EOF > index.html
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<title>SWIFT task graphs</title>
</head>
<body>
<h1>SWIFT task graphs</h1>
EOF
echo $list | xargs -n 3 | while read f s g; do
cat <<EOF >> index.html
<h2>Step $s</h2>
EOF
cat <<EOF >> index.html
<a href="poolstep${s}r${i}.html"><img src="poolstep${s}r${i}.png" width=400px/></a>
EOF
cat <<EOF > poolstep${s}r${i}.html
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<body>
<img src="poolstep${s}r${i}.png">
<pre>
EOF
done
cat <<EOF >> index.html
</body>
</html>
EOF
echo "Finished"
exit
......@@ -2402,6 +2402,21 @@ void engine_make_gravityrecursive_tasks(struct engine *e) {
/* } */
}
void engine_check_sort_tasks(struct engine *e, struct cell *c) {
/* Find the parent sort task, if any, and copy its flags. */
if (c->sorts != NULL) {
struct cell *parent = c->parent;
while (parent != NULL && parent->sorts == NULL) parent = parent->parent;
if (parent != NULL) c->sorts->flags |= parent->sorts->flags;
}
/* Recurse? */
if (c->split)
for (int k = 0; k < 8; k++)
if (c->progeny[k] != NULL) engine_check_sort_tasks(e, c->progeny[k]);
}
/**
* @brief Fill the #space's task list.
*
......@@ -2480,6 +2495,9 @@ void engine_maketasks(struct engine *e) {
for (int k = 0; k < nr_cells; k++)
engine_make_hierarchical_tasks(e, &cells[k]);
/* Append hierarchical tasks to each cell. */
for (int k = 0; k < nr_cells; k++) engine_check_sort_tasks(e, &cells[k]);
/* Run through the tasks and make force tasks for each density task.
Each force task depends on the cell ghosts and unlocks the kick task
of its super-cell. */
......@@ -2774,7 +2792,7 @@ int engine_marktasks(struct engine *e) {
/* Run through the tasks and mark as skip or not. */
size_t extra_data[3] = {(size_t)e, rebuild_space, (size_t)&e->sched};
threadpool_map(&e->threadpool, engine_marktasks_mapper, s->tasks, s->nr_tasks,
sizeof(struct task), 0, extra_data);
sizeof(struct task), 10000, extra_data);
rebuild_space = extra_data[1];
if (e->verbose)
......@@ -3678,7 +3696,7 @@ void engine_drift_all(struct engine *e) {
#endif
threadpool_map(&e->threadpool, engine_do_drift_all_mapper, e->s->cells_top,
e->s->nr_cells, sizeof(struct cell), 0, e);
e->s->nr_cells, sizeof(struct cell), 1, e);
/* Synchronize particle positions */
space_synchronize_particle_positions(e->s);
......@@ -3730,7 +3748,7 @@ void engine_drift_top_multipoles(struct engine *e) {
const ticks tic = getticks();
threadpool_map(&e->threadpool, engine_do_drift_top_multipoles_mapper,
e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 0, e);
e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 10, e);
#ifdef SWIFT_DEBUG_CHECKS
/* Check that all cells have been drifted to the current time. */
......@@ -3768,7 +3786,7 @@ void engine_reconstruct_multipoles(struct engine *e) {
const ticks tic = getticks();
threadpool_map(&e->threadpool, engine_do_reconstruct_multipoles_mapper,
e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 0, e);
e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 10, e);
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
......
......@@ -77,7 +77,7 @@ extern const char *engine_policy_names[];
#define engine_queue_scale 1.2
#define engine_maxtaskspercell 96
#define engine_maxproxies 64
#define engine_tasksreweight 1
#define engine_tasksreweight 10
#define engine_parts_size_grow 1.05
#define engine_redistribute_alloc_margin 1.2
#define engine_default_energy_file_name "energy"
......
......@@ -207,7 +207,7 @@ void gravity_exact_force_compute(struct space *s, const struct engine *e) {
data.const_G = e->physical_constants->const_newton_G;
threadpool_map(&s->e->threadpool, gravity_exact_force_compute_mapper,
s->gparts, s->nr_gparts, sizeof(struct gpart), 0, &data);
s->gparts, s->nr_gparts, sizeof(struct gpart), 1000, &data);
message("Computed exact gravity for %d gparts (took %.3f %s). ",
data.counter_global, clocks_from_ticks(getticks() - tic),
......
......@@ -29,7 +29,7 @@
#define queue_sizeinit 100
#define queue_sizegrow 2
#define queue_search_window 8
#define queue_incoming_size 10240
#define queue_incoming_size 1024
#define queue_struct_align 64
/* Counters. */
......
......@@ -759,7 +759,7 @@ void scheduler_splittasks(struct scheduler *s) {
/* Call the mapper on each current task. */
threadpool_map(s->threadpool, scheduler_splittasks_mapper, s->tasks,
s->nr_tasks, sizeof(struct task), 0, s);
s->nr_tasks, sizeof(struct task), 1000, s);
}
/**
......@@ -1174,7 +1174,7 @@ void scheduler_start(struct scheduler *s) {
/* Re-wait the tasks. */
if (s->active_count > 1000) {
threadpool_map(s->threadpool, scheduler_rewait_mapper, s->tid_active,
s->active_count, sizeof(int), 0, s);
s->active_count, sizeof(int), 1000, s);
} else {
scheduler_rewait_mapper(s->tid_active, s->active_count, s);
}
......@@ -1250,7 +1250,7 @@ void scheduler_start(struct scheduler *s) {
/* Loop over the tasks and enqueue whoever is ready. */
if (s->active_count > 1000) {
threadpool_map(s->threadpool, scheduler_enqueue_mapper, s->tid_active,
s->active_count, sizeof(int), 0, s);
s->active_count, sizeof(int), 1000, s);
} else {
scheduler_enqueue_mapper(s->tid_active, s->active_count, s);
}
......
......@@ -378,7 +378,7 @@ void space_regrid(struct space *s, int verbose) {
/* Free the old cells, if they were allocated. */
if (s->cells_top != NULL) {
threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper,
s->cells_top, s->nr_cells, sizeof(struct cell), 0, s);
s->cells_top, s->nr_cells, sizeof(struct cell), 100, s);
free(s->cells_top);
free(s->multipoles_top);
s->maxdepth = 0;
......@@ -491,7 +491,7 @@ void space_regrid(struct space *s, int verbose) {
/* Free the old cells, if they were allocated. */
threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper,
s->cells_top, s->nr_cells, sizeof(struct cell), 0, s);
s->cells_top, s->nr_cells, sizeof(struct cell), 100, s);
s->maxdepth = 0;
}
......@@ -970,7 +970,7 @@ void space_split(struct space *s, struct cell *cells, int nr_cells,
const ticks tic = getticks();
threadpool_map(&s->e->threadpool, space_split_mapper, cells, nr_cells,