diff --git a/examples/analyse_tasks.py b/examples/analyse_tasks.py
index 17816cab62a343711abee14e0c9d6b7e70ce8e5c..d28553173ebb636a99c96082c1eb35ffedca7d6d 100755
--- a/examples/analyse_tasks.py
+++ b/examples/analyse_tasks.py
@@ -56,6 +56,11 @@ TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
"tend", "xv", "rho", "gpart", "multipole", "spart", "count"]
+SIDS = ["(-1,-1,-1)", "(-1,-1, 0)", "(-1,-1, 1)", "(-1, 0,-1)",
+ "(-1, 0, 0)", "(-1, 0, 1)", "(-1, 1,-1)", "(-1, 1, 0)",
+ "(-1, 1, 1)", "( 0,-1,-1)", "( 0,-1, 0)", "( 0,-1, 1)",
+ "( 0, 0,-1)"]
+
# Read input.
data = pl.loadtxt( infile )
@@ -66,11 +71,17 @@ print "# Maximum thread id:", maxthread
full_step = data[0,:]
tic_step = int(full_step[4])
toc_step = int(full_step[5])
+updates = int(full_step[6])
+g_updates = int(full_step[7])
+s_updates = int(full_step[8])
CPU_CLOCK = float(full_step[-1]) / 1000.0
data = data[1:,:]
if args.verbose:
- print "CPU frequency:", CPU_CLOCK * 1000.0
-
+ print "# CPU frequency:", CPU_CLOCK * 1000.0
+print "# updates:", updates
+print "# g_updates:", g_updates
+print "# s_updates:", s_updates
+
# Avoid start and end times of zero.
data = data[data[:,4] != 0]
data = data[data[:,5] != 0]
@@ -78,6 +89,7 @@ data = data[data[:,5] != 0]
# Calculate the time range.
total_t = (toc_step - tic_step)/ CPU_CLOCK
print "# Data range: ", total_t, "ms"
+print
# Correct times to relative values.
start_t = float(tic_step)
@@ -90,15 +102,16 @@ for i in range(maxthread):
tasks[i] = []
# Gather into by thread data.
-num_lines = pl.size(data) / 10
+num_lines = pl.size(data) / pl.size(full_step)
for line in range(num_lines):
thread = int(data[line,0])
tic = int(data[line,4]) / CPU_CLOCK
toc = int(data[line,5]) / CPU_CLOCK
tasktype = int(data[line,1])
subtype = int(data[line,2])
+ sid = int(data[line, -1])
- tasks[thread].append([tic,toc,tasktype,subtype])
+ tasks[thread].append([tic,toc,tasktype,subtype, sid])
# Sort by tic and gather used thread ids.
threadids = []
@@ -110,10 +123,11 @@ for i in range(maxthread):
# Times per task.
print "# Task times:"
print "# -----------"
-print "# {0:<16s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
+print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
.format("type/subtype", "count","minimum", "maximum",
"sum", "mean", "percent")
alltasktimes = {}
+sidtimes = {}
for i in threadids:
tasktimes = {}
for task in tasks[i]:
@@ -127,12 +141,19 @@ for i in threadids:
alltasktimes[key] = []
alltasktimes[key].append(dt)
+ my_sid = task[4]
+ if my_sid > -1:
+ if not my_sid in sidtimes:
+ sidtimes[my_sid] = []
+ sidtimes[my_sid].append(dt)
+
+
print "# Thread : ", i
for key in sorted(tasktimes.keys()):
taskmin = min(tasktimes[key])
taskmax = max(tasktimes[key])
tasksum = sum(tasktimes[key])
- print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
+ print "{0:19s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
.format(key, len(tasktimes[key]), taskmin, taskmax, tasksum,
tasksum / len(tasktimes[key]), tasksum / total_t * 100.0)
print
@@ -142,12 +163,36 @@ for key in sorted(alltasktimes.keys()):
taskmin = min(alltasktimes[key])
taskmax = max(alltasktimes[key])
tasksum = sum(alltasktimes[key])
- print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
+ print "{0:19s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
.format(key, len(alltasktimes[key]), taskmin, taskmax, tasksum,
tasksum / len(alltasktimes[key]),
tasksum / (len(threadids) * total_t) * 100.0)
print
+# For pairs, show stuf sorted by SID
+print "# By SID (all threads): "
+print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
+ .format("Pair/Sub-pair SID", "count","minimum", "maximum",
+ "sum", "mean", "percent")
+
+for sid in range(0,13):
+ if sid in sidtimes:
+ sidmin = min(sidtimes[sid])
+ sidmax = max(sidtimes[sid])
+ sidsum = sum(sidtimes[sid])
+ sidcount = len(sidtimes[sid])
+ sidmean = sidsum / sidcount
+ else:
+ sidmin = 0.
+ sidmax = 0.
+ sidsum = 0.
+ sidcount = 0
+ sidmean = 0.
+ print "{0:3d} {1:15s}: {2:7d} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.4f} {7:9.2f}"\
+ .format(sid, SIDS[sid], sidcount, sidmin, sidmax, sidsum,
+ sidmean, sidsum / (len(threadids) * total_t) * 100.0)
+print
+
# Dead times.
print "# Times not in tasks (deadtimes)"
print "# ------------------------------"
diff --git a/examples/analyse_tasks_MPI.py b/examples/analyse_tasks_MPI.py
index c90c8caee6062fd83a8ff3bda3e760eee55b08e1..b78d73e879046b05b8a089f97c4c9c00a5f7bb79 100755
--- a/examples/analyse_tasks_MPI.py
+++ b/examples/analyse_tasks_MPI.py
@@ -42,6 +42,9 @@ parser.add_argument("input", help="Thread data file (-y output)")
parser.add_argument("-v", "--verbose", dest="verbose",
help="Verbose output (default: False)",
default=False, action="store_true")
+parser.add_argument("-r", "--rank", dest="rank",
+ help="Rank to process (default: all)",
+ default="all", action="store")
args = parser.parse_args()
infile = args.input
@@ -56,17 +59,36 @@ TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair",
SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav",
"tend", "xv", "rho", "gpart", "multipole", "spart", "count"]
+SIDS = ["(-1,-1,-1)", "(-1,-1, 0)", "(-1,-1, 1)", "(-1, 0,-1)",
+ "(-1, 0, 0)", "(-1, 0, 1)", "(-1, 1,-1)", "(-1, 1, 0)",
+ "(-1, 1, 1)", "( 0,-1,-1)", "( 0,-1, 0)", "( 0,-1, 1)",
+ "( 0, 0,-1)"]
+
# Read input.
data = pl.loadtxt( infile )
# Get the CPU clock to convert ticks into milliseconds.
full_step = data[0,:]
+updates = int(full_step[7])
+g_updates = int(full_step[8])
+s_updates = int(full_step[9])
CPU_CLOCK = float(full_step[-1]) / 1000.0
if args.verbose:
print "# CPU frequency:", CPU_CLOCK * 1000.0
+print "# updates:", updates
+print "# g_updates:", g_updates
+print "# s_updates:", s_updates
nranks = int(max(data[:,0])) + 1
print "# Number of ranks:", nranks
+if args.rank == "all":
+ ranks = range(nranks)
+else:
+ ranks = [int(args.rank)]
+ if ranks[0] >= nranks:
+ print "Error: maximum rank is " + str(nranks - 1)
+ sys.exit(1)
+
maxthread = int(max(data[:,1])) + 1
print "# Maximum thread id:", maxthread
@@ -74,8 +96,8 @@ print "# Maximum thread id:", maxthread
sdata = data[data[:,5] != 0]
sdata = data[data[:,6] != 0]
-# Now we process all the ranks.
-for rank in range(nranks):
+# Now we process the required ranks.
+for rank in ranks:
print "# Rank", rank
data = sdata[sdata[:,0] == rank]
@@ -92,6 +114,7 @@ for rank in range(nranks):
# Calculate the time range.
total_t = (toc_step - tic_step)/ CPU_CLOCK
print "# Data range: ", total_t, "ms"
+ print
# Correct times to relative values.
start_t = float(tic_step)
@@ -105,15 +128,16 @@ for rank in range(nranks):
tasks[i] = []
# Gather into by thread data.
- num_lines = pl.size(data) / 12
+ num_lines = pl.shape(data)[0]
for line in range(num_lines):
thread = int(data[line,1])
tic = int(data[line,5]) / CPU_CLOCK
toc = int(data[line,6]) / CPU_CLOCK
tasktype = int(data[line,2])
subtype = int(data[line,3])
+ sid = int(data[line, -1])
- tasks[thread].append([tic,toc,tasktype,subtype])
+ tasks[thread].append([tic,toc,tasktype,subtype, sid])
# Sort by tic and gather used threads.
threadids = []
@@ -124,10 +148,12 @@ for rank in range(nranks):
# Times per task.
print "# Task times:"
print "# -----------"
- print "# {0:<16s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
+ print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
.format("type/subtype", "count","minimum", "maximum",
"sum", "mean", "percent")
+
alltasktimes = {}
+ sidtimes = {}
for i in threadids:
tasktimes = {}
for task in tasks[i]:
@@ -140,13 +166,19 @@ for rank in range(nranks):
if not key in alltasktimes:
alltasktimes[key] = []
alltasktimes[key].append(dt)
+
+ my_sid = task[4]
+ if my_sid > -1:
+ if not my_sid in sidtimes:
+ sidtimes[my_sid] = []
+ sidtimes[my_sid].append(dt)
print "# Thread : ", i
for key in sorted(tasktimes.keys()):
taskmin = min(tasktimes[key])
taskmax = max(tasktimes[key])
tasksum = sum(tasktimes[key])
- print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
+ print "{0:19s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\
.format(key, len(tasktimes[key]), taskmin, taskmax, tasksum,
tasksum / len(tasktimes[key]), tasksum / total_t * 100.0)
print
@@ -162,6 +194,30 @@ for rank in range(nranks):
tasksum / (len(threadids) * total_t) * 100.0)
print
+ # For pairs, show stuf sorted by SID
+ print "# By SID (all threads): "
+ print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\
+ .format("Pair/Sub-pair SID", "count","minimum", "maximum",
+ "sum", "mean", "percent")
+
+ for sid in range(0,13):
+ if sid in sidtimes:
+ sidmin = min(sidtimes[sid])
+ sidmax = max(sidtimes[sid])
+ sidsum = sum(sidtimes[sid])
+ sidcount = len(sidtimes[sid])
+ sidmean = sidsum / sidcount
+ else:
+ sidmin = 0.
+ sidmax = 0.
+ sidsum = 0.
+ sidcount = 0
+ sidmean = 0.
+ print "{0:3d} {1:15s}: {2:7d} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.4f} {7:9.2f}"\
+ .format(sid, SIDS[sid], sidcount, sidmin, sidmax, sidsum,
+ sidmean, sidsum / (len(threadids) * total_t) * 100.0)
+ print
+
# Dead times.
print "# Times not in tasks (deadtimes)"
print "# ------------------------------"
diff --git a/examples/main.c b/examples/main.c
index 5a4e56ad4f5744345a7c2a61fbe008c8e750685b..4cc3a36d4cd32518902332e387220aeafaefcdfc 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -681,14 +681,16 @@ int main(int argc, char *argv[]) {
/* Open file and position at end. */
file_thread = fopen(dumpfile, "a");
- fprintf(file_thread, " %03i 0 0 0 0 %lli %lli 0 0 0 0 %lli\n", myrank,
- e.tic_step, e.toc_step, cpufreq);
+ fprintf(file_thread, " %03i 0 0 0 0 %lli %lli %zi %zi %zi 0 0 %lli\n",
+ myrank, e.tic_step, e.toc_step, e.updates, e.g_updates,
+ e.s_updates, cpufreq);
int count = 0;
for (int l = 0; l < e.sched.nr_tasks; l++) {
if (!e.sched.tasks[l].implicit && e.sched.tasks[l].toc != 0) {
fprintf(
- file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %i\n",
- myrank, e.sched.tasks[l].rid, e.sched.tasks[l].type,
+ file_thread,
+ " %03i %i %i %i %i %lli %lli %i %i %i %i %i %i\n", myrank,
+ e.sched.tasks[l].rid, e.sched.tasks[l].type,
e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL),
e.sched.tasks[l].tic, e.sched.tasks[l].toc,
(e.sched.tasks[l].ci != NULL) ? e.sched.tasks[l].ci->count
@@ -699,7 +701,7 @@ int main(int argc, char *argv[]) {
: 0,
(e.sched.tasks[l].cj != NULL) ? e.sched.tasks[l].cj->gcount
: 0,
- e.sched.tasks[l].flags);
+ e.sched.tasks[l].flags, e.sched.tasks[l].sid);
}
fflush(stdout);
count++;
@@ -717,19 +719,21 @@ int main(int argc, char *argv[]) {
FILE *file_thread;
file_thread = fopen(dumpfile, "w");
/* Add some information to help with the plots */
- fprintf(file_thread, " %i %i %i %i %lli %lli %i %i %i %lli\n", -2, -1, -1,
- 1, e.tic_step, e.toc_step, 0, 0, 0, cpufreq);
+ fprintf(file_thread, " %i %i %i %i %lli %lli %zi %zi %zi %i %lli\n", -2,
+ -1, -1, 1, e.tic_step, e.toc_step, e.updates, e.g_updates,
+ e.s_updates, 0, cpufreq);
for (int l = 0; l < e.sched.nr_tasks; l++) {
if (!e.sched.tasks[l].implicit && e.sched.tasks[l].toc != 0) {
fprintf(
- file_thread, " %i %i %i %i %lli %lli %i %i %i %i\n",
+ file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n",
e.sched.tasks[l].rid, e.sched.tasks[l].type,
e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL),
e.sched.tasks[l].tic, e.sched.tasks[l].toc,
(e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->count,
(e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->count,
(e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->gcount,
- (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->gcount);
+ (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->gcount,
+ e.sched.tasks[l].sid);
}
}
fclose(file_thread);
diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py
index 88f176687db8116cfd4370970769164985e4d366..e80c3635cf02d409c82960395261179e27cff853 100755
--- a/examples/plot_tasks.py
+++ b/examples/plot_tasks.py
@@ -78,7 +78,7 @@ PLOT_PARAMS = {"axes.labelsize": 10,
"figure.figsize" : (args.width, args.height),
"figure.subplot.left" : 0.03,
"figure.subplot.right" : 0.995,
- "figure.subplot.bottom" : 0.1,
+ "figure.subplot.bottom" : 0.09,
"figure.subplot.top" : 0.99,
"figure.subplot.wspace" : 0.,
"figure.subplot.hspace" : 0.,
@@ -183,7 +183,7 @@ ecounter = []
for i in range(nthread):
ecounter.append(0)
-num_lines = pl.size(data) / 10
+num_lines = pl.size(data) / pl.size(full_step)
for line in range(num_lines):
thread = int(data[line,0])
@@ -243,21 +243,21 @@ for i in range(nthread):
# Legend and room for it.
nrow = len(typesseen) / 5
if not args.nolegend:
- if len(typesseen) * 5 < nrow:
- nrow = nrow + 1
ax.fill_between([0, 0], nthread+0.5, nthread + nrow + 0.5, facecolor="white")
- ax.set_ylim(0, nthread + nrow + 1)
- ax.legend(loc=1, shadow=True, mode="expand", ncol=5)
-
+ ax.set_ylim(0, nthread + 0.5)
+ ax.legend(loc=1, shadow=True, bbox_to_anchor=(0., 1.05 ,1., 0.2), mode="expand", ncol=5)
+ box = ax.get_position()
+ ax.set_position([box.x0, box.y0, box.width, box.height*0.8])
+
# Start and end of time-step
ax.plot([0, 0], [0, nthread + nrow + 1], 'k--', linewidth=1)
ax.plot([end_t, end_t], [0, nthread + nrow + 1], 'k--', linewidth=1)
-ax.set_xlabel("Wall clock time [ms]")
+ax.set_xlabel("Wall clock time [ms]", labelpad=0.)
if expand == 1:
- ax.set_ylabel("Thread ID" )
+ ax.set_ylabel("Thread ID", labelpad=0 )
else:
- ax.set_ylabel("Thread ID * " + str(expand) )
+ ax.set_ylabel("Thread ID * " + str(expand), labelpad=0 )
ax.set_yticks(pl.array(range(nthread)), True)
loc = plticker.MultipleLocator(base=expand)
diff --git a/examples/plot_tasks_MPI.py b/examples/plot_tasks_MPI.py
index 83465aee87e8b641775d760fa4db2f06b125dd8b..85d7c54567a66c9c2151732e0e7a11c6580f958b 100755
--- a/examples/plot_tasks_MPI.py
+++ b/examples/plot_tasks_MPI.py
@@ -278,12 +278,12 @@ for rank in range(nranks):
# Legend and room for it.
nrow = len(typesseen) / 5
- if len(typesseen) * 5 < nrow:
- nrow = nrow + 1
ax.fill_between([0, 0], nethread+0.5, nethread + nrow + 0.5, facecolor="white")
- ax.set_ylim(0, nethread + nrow + 1)
+ ax.set_ylim(0, nethread + 0.5)
if data.size > 0:
- ax.legend(loc=1, shadow=True, mode="expand", ncol=5)
+ ax.legend(loc=1, shadow=True, bbox_to_anchor=(0., 1.05 ,1., 0.2), mode="expand", ncol=5)
+ box = ax.get_position()
+ ax.set_position([box.x0, box.y0, box.width, box.height*0.8])
# Start and end of time-step
ax.plot([0, 0], [0, nethread + nrow + 1], 'k--', linewidth=1)
diff --git a/examples/process_plot_tasks_MPI b/examples/process_plot_tasks_MPI
index b2672b3711823eb87d0bede5b1ffd8945a735f98..691822ebc33b43450d69b06e49c2c95bb0683045 100755
--- a/examples/process_plot_tasks_MPI
+++ b/examples/process_plot_tasks_MPI
@@ -62,7 +62,9 @@ nrank=$(($nrank-1))
# And process them,
echo "Processing thread info files..."
echo $list | xargs -P $NPROCS -n 3 /bin/bash -c "./plot_tasks_MPI.py --expand 1 --limit $TIMERANGE \$0 \$2 "
-echo $list | xargs -P $NPROCS -n 3 /bin/bash -c "./analyse_tasks_MPI.py \$0 > \$2.stats"
+for i in $(seq 0 $nrank); do
+ echo $list | xargs -P $NPROCS -n 3 /bin/bash -c "./analyse_tasks_MPI.py -r $i \$0 > \$2${i}.stats"
+done
echo "Writing output index.html file"
# Construct document - serial.
@@ -93,7 +95,7 @@ EOF2
EOF2 -cat step${s}r.stats >> step${s}r${i}.html +cat step${s}r${i}.stats >> step${s}r${i}.html cat <> step${s}r${i}.html