analyse_tasks.py 15.5 KB
Newer Older
1
2
3
#!/usr/bin/env python
"""
Usage:
Peter W. Draper's avatar
Typo    
Peter W. Draper committed
4
    analyse_tasks.py [options] input.dat
5

6
7
8
where input.dat is a thread info file for a step (MPI or non-MPI). Use the
'-y interval' flag of the swift and swift_mpi commands to create these
(you will also need to configure with the --enable-task-debugging option).
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

The output is an analysis of the task timings, including deadtime per thread
and step, total amount of time spent for each task type, for the whole step
and per thread and the minimum and maximum times spent per task type.

This file is part of SWIFT.
Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU Lesser General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""

import matplotlib
32

33
34
35
36
37
38
39
40
41
42
43
matplotlib.use("Agg")
import matplotlib.collections as collections
import matplotlib.ticker as plticker
import pylab as pl
import sys
import argparse

#  Handle the command line.
parser = argparse.ArgumentParser(description="Analyse task dumps")

parser.add_argument("input", help="Thread data file (-y output)")
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
parser.add_argument(
    "-v",
    "--verbose",
    dest="verbose",
    help="Verbose output (default: False)",
    default=False,
    action="store_true",
)
parser.add_argument(
    "-r",
    "--rank",
    dest="rank",
    help="Rank to process (default: all)",
    default="all",
    action="store",
)
60
61
62
63
64
65
66
parser.add_argument(
    "--html",
    dest="html",
    help="Use html titles and anchors in the output (default: False)",
    default=False,
    action="store_true",
)
67
68
69

args = parser.parse_args()
infile = args.input
70
with_html = args.html
71
72

#  Tasks and subtypes. Indexed as in tasks.h.
73
74
75
76
77
78
79
80
81
82
83
84
85
86
TASKTYPES = [
    "none",
    "sort",
    "self",
    "pair",
    "sub_self",
    "sub_pair",
    "init_grav",
    "init_grav_out",
    "ghost_in",
    "ghost",
    "ghost_out",
    "extra_ghost",
    "drift_part",
87
    "drift_spart",
88
    "drift_bpart",
89
    "drift_gpart",
Matthieu Schaller's avatar
Matthieu Schaller committed
90
    "drift_gpart_out",
91
    "hydro_end_force",
92
93
94
    "kick1",
    "kick2",
    "timestep",
95
    "timestep_limiter",
96
    "timestep_sync",
97
98
99
100
101
102
103
    "send",
    "recv",
    "grav_long_range",
    "grav_mm",
    "grav_down_in",
    "grav_down",
    "grav_mesh",
104
    "grav_end_force",
105
106
    "cooling",
    "star_formation",
107
108
    "star_formation_in",
    "star_formation_out",
109
    "logger",
110
111
    "stars_in",
    "stars_out",
112
113
114
    "stars_ghost_in",
    "stars_ghost",
    "stars_ghost_out",
115
    "stars_sort",
116
    "stars_resort",
117
118
119
    "bh_in",
    "bh_out",
    "bh_ghost",
120
121
122
    "bh_swallow_ghost1",
    "bh_swallow_ghost2",
    "bh_swallow_ghost3",
123
124
    "fof_self",
    "fof_pair",
125
126
127
128
129
130
131
132
    "count",
]

SUBTYPES = [
    "none",
    "density",
    "gradient",
    "force",
133
    "limiter",
134
135
    "grav",
    "external_grav",
136
137
138
    "tend_part",
    "tend_gpart",
    "tend_spart",
139
    "tend_bpart",
140
141
    "xv",
    "rho",
142
143
    "part_swallow",
    "bpart_merger",
144
145
146
147
    "gpart",
    "multipole",
    "spart",
    "stars_density",
Alexei Borissov's avatar
Alexei Borissov committed
148
    "stars_feedback",
149
150
    "sf_counts",
    "bpart_rho",
151
152
    "bpart_swallow",
    "bpart_feedback",
153
    "bh_density",
154
    "bh_swallow",
155
156
    "do_gas_swallow",
    "do_bh_swallow",
157
    "bh_feedback",
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
    "count",
]

SIDS = [
    "(-1,-1,-1)",
    "(-1,-1, 0)",
    "(-1,-1, 1)",
    "(-1, 0,-1)",
    "(-1, 0, 0)",
    "(-1, 0, 1)",
    "(-1, 1,-1)",
    "(-1, 1, 0)",
    "(-1, 1, 1)",
    "( 0,-1,-1)",
    "( 0,-1, 0)",
    "( 0,-1, 1)",
    "( 0, 0,-1)",
]
176

177
#  Read input.
178
179
data = pl.loadtxt(infile)
full_step = data[0, :]
180

181
#  Do we have an MPI file?
182
full_step = data[0, :]
183
if full_step.size == 13:
184
    print("# MPI mode")
185
    mpimode = True
186
    nranks = int(max(data[:, 0])) + 1
187
    print("# Number of ranks:", nranks)
188
189
190
191
192
193
194
195
196
197
    rankcol = 0
    threadscol = 1
    taskcol = 2
    subtaskcol = 3
    ticcol = 5
    toccol = 6
    updates = int(full_step[7])
    g_updates = int(full_step[8])
    s_updates = int(full_step[9])
else:
198
    print("# non MPI mode")
199
200
201
202
203
204
205
206
207
208
209
210
211
    nranks = 1
    mpimode = False
    rankcol = -1
    threadscol = 0
    taskcol = 1
    subtaskcol = 2
    ticcol = 4
    toccol = 5
    updates = int(full_step[6])
    g_updates = int(full_step[7])
    s_updates = int(full_step[8])

#  Get the CPU clock to convert ticks into milliseconds.
212
213
CPU_CLOCK = float(full_step[-1]) / 1000.0
if args.verbose:
214
215
216
217
    print("# CPU frequency:", CPU_CLOCK * 1000.0)
print("#   updates:", updates)
print("# g_updates:", g_updates)
print("# s_updates:", s_updates)
218
219
220

if mpimode:
    if args.rank == "all":
221
        ranks = list(range(nranks))
222
223
224
    else:
        ranks = [int(args.rank)]
        if ranks[0] >= nranks:
225
            print("Error: maximum rank is " + str(nranks - 1))
226
227
228
229
            sys.exit(1)
else:
    ranks = [1]

230
maxthread = int(max(data[:, threadscol])) + 1
231
print("# Maximum thread id:", maxthread)
232

233
#  Avoid start and end times of zero.
234
235
sdata = data[data[:, ticcol] != 0]
sdata = data[data[:, toccol] != 0]
236
237
238
239

#  Now we process the required ranks.
for rank in ranks:
    if mpimode:
240
        print("# Rank", rank)
241
242
        data = sdata[sdata[:, rankcol] == rank]
        full_step = data[0, :]
243
244
245
246
247
248
    else:
        data = sdata

    #  Recover the start and end time
    tic_step = int(full_step[ticcol])
    toc_step = int(full_step[toccol])
249
    data = data[1:, :]
250
251

    #  Avoid start and end times of zero.
252
253
    data = data[data[:, ticcol] != 0]
    data = data[data[:, toccol] != 0]
254
255

    #  Calculate the time range.
256
    total_t = (toc_step - tic_step) / CPU_CLOCK
257
258
    print("# Data range: ", total_t, "ms")
    print()
259
260
261

    #  Correct times to relative values.
    start_t = float(tic_step)
262
263
    data[:, ticcol] -= start_t
    data[:, toccol] -= start_t
264
265
266
267
268
269
270
271
272
273
    end_t = (toc_step - start_t) / CPU_CLOCK

    tasks = {}
    tasks[-1] = []
    for i in range(maxthread):
        tasks[i] = []

    #  Gather into by thread data.
    num_lines = pl.shape(data)[0]
    for line in range(num_lines):
274
275
276
277
278
        thread = int(data[line, threadscol])
        tic = int(data[line, ticcol]) / CPU_CLOCK
        toc = int(data[line, toccol]) / CPU_CLOCK
        tasktype = int(data[line, taskcol])
        subtype = int(data[line, subtaskcol])
279
280
        sid = int(data[line, -1])

281
        tasks[thread].append([tic, toc, tasktype, subtype, sid])
282
283
284
285

    #  Sort by tic and gather used threads.
    threadids = []
    for i in range(maxthread):
286
287
288
289
        itasks = sorted(tasks[i], key=lambda task: task[0])
        if len(itasks) > 0:
            threadids.append(i)
            tasks[i] = itasks
290

291
    #  Times per task.
292
293
    print("# Task times:")
    print("# -----------")
294
295
296
297
298
    print(
        "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}".format(
            "type/subtype", "count", "minimum", "maximum", "sum", "mean", "percent"
        )
    )
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320

    alltasktimes = {}
    sidtimes = {}
    for i in threadids:
        tasktimes = {}
        for task in tasks[i]:
            key = TASKTYPES[task[2]] + "/" + SUBTYPES[task[3]]
            dt = task[1] - task[0]
            if not key in tasktimes:
                tasktimes[key] = []
            tasktimes[key].append(dt)

            if not key in alltasktimes:
                alltasktimes[key] = []
            alltasktimes[key].append(dt)

            my_sid = task[4]
            if my_sid > -1:
                if not my_sid in sidtimes:
                    sidtimes[my_sid] = []
                sidtimes[my_sid].append(dt)

321
322
        if with_html:
            print('<div id="thread{}"></div>'.format(i))
323
        print("# Thread : ", i)
324
325
326
327
        for key in sorted(tasktimes.keys()):
            taskmin = min(tasktimes[key])
            taskmax = max(tasktimes[key])
            tasksum = sum(tasktimes[key])
328
            print(
329
                "{0:24s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format(
330
331
332
333
334
335
336
337
338
                    key,
                    len(tasktimes[key]),
                    taskmin,
                    taskmax,
                    tasksum,
                    tasksum / len(tasktimes[key]),
                    tasksum / total_t * 100.0,
                )
            )
339
        print()
340

341
342
    if with_html:
        print('<div id="all"></div>')
343
    print("# All threads : ")
344
345
346
347
    for key in sorted(alltasktimes.keys()):
        taskmin = min(alltasktimes[key])
        taskmax = max(alltasktimes[key])
        tasksum = sum(alltasktimes[key])
348
        print(
349
            "{0:23s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format(
350
351
352
353
354
355
356
357
358
                key,
                len(alltasktimes[key]),
                taskmin,
                taskmax,
                tasksum,
                tasksum / len(alltasktimes[key]),
                tasksum / (len(threadids) * total_t) * 100.0,
            )
        )
359
    print()
360

361
    # For pairs, show stuff sorted by SID
362
363
    if with_html:
        print('<div id="sid"></div>')
364
    print("# By SID (all threads): ")
365
366
367
368
369
    print(
        "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}".format(
            "Pair/Sub-pair SID", "count", "minimum", "maximum", "sum", "mean", "percent"
        )
    )
370

371
    for sid in range(0, 13):
372
373
374
375
376
377
378
        if sid in sidtimes:
            sidmin = min(sidtimes[sid])
            sidmax = max(sidtimes[sid])
            sidsum = sum(sidtimes[sid])
            sidcount = len(sidtimes[sid])
            sidmean = sidsum / sidcount
        else:
379
380
381
            sidmin = 0.0
            sidmax = 0.0
            sidsum = 0.0
382
            sidcount = 0
383
384
385
386
387
388
389
390
391
392
393
394
395
            sidmean = 0.0
        print(
            "{0:3d} {1:15s}: {2:7d} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.4f} {7:9.2f}".format(
                sid,
                SIDS[sid],
                sidcount,
                sidmin,
                sidmax,
                sidsum,
                sidmean,
                sidsum / (len(threadids) * total_t) * 100.0,
            )
        )
396
    print()
397

398
    #  Dead times.
399
400
    print("# Times not in tasks (deadtimes)")
    print("# ------------------------------")
401
402
    if with_html:
        print('<div id="before"></div>')
403
404
    print("# Time before first task:")
    print("# no.    : {0:>9s} {1:>9s}".format("value", "percent"))
405
406
407
408
    predeadtimes = []
    for i in threadids:
        if len(tasks[i]) > 0:
            predeadtime = tasks[i][0][0]
409
410
411
412
413
            print(
                "thread {0:2d}: {1:9.4f} {2:9.4f}".format(
                    i, predeadtime, predeadtime / total_t * 100.0
                )
            )
414
415
416
417
418
419
420
            predeadtimes.append(predeadtime)
        else:
            predeadtimes.append(0.0)

    predeadmin = min(predeadtimes)
    predeadmax = max(predeadtimes)
    predeadsum = sum(predeadtimes)
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
    print(
        "#        : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format(
            "count", "minimum", "maximum", "sum", "mean", "percent"
        )
    )
    print(
        "all      : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format(
            len(predeadtimes),
            predeadmin,
            predeadmax,
            predeadsum,
            predeadsum / len(predeadtimes),
            predeadsum / (len(threadids) * total_t) * 100.0,
        )
    )
436
    print()
437

438
439
    if with_html:
        print('<div id="after"></div>')
440
441
    print("# Time after last task:")
    print("# no.    : {0:>9s} {1:>9s}".format("value", "percent"))
442
443
444
445
    postdeadtimes = []
    for i in threadids:
        if len(tasks[i]) > 0:
            postdeadtime = total_t - tasks[i][-1][1]
446
447
448
449
450
            print(
                "thread {0:2d}: {1:9.4f} {2:9.4f}".format(
                    i, postdeadtime, postdeadtime / total_t * 100.0
                )
            )
451
452
453
454
455
456
457
            postdeadtimes.append(postdeadtime)
        else:
            postdeadtimes.append(0.0)

    postdeadmin = min(postdeadtimes)
    postdeadmax = max(postdeadtimes)
    postdeadsum = sum(postdeadtimes)
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
    print(
        "#        : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format(
            "count", "minimum", "maximum", "sum", "mean", "percent"
        )
    )
    print(
        "all      : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format(
            len(postdeadtimes),
            postdeadmin,
            postdeadmax,
            postdeadsum,
            postdeadsum / len(postdeadtimes),
            postdeadsum / (len(threadids) * total_t) * 100.0,
        )
    )
473
    print()
474
475

    #  Time in engine, i.e. from first to last tasks.
476
477
    if with_html:
        print('<div id="between"></div>')
478
    print("# Time between tasks (engine deadtime):")
479
480
481
482
483
    print(
        "# no.    : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format(
            "count", "minimum", "maximum", "sum", "mean", "percent"
        )
    )
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
    enginedeadtimes = []
    for i in threadids:
        deadtimes = []
        if len(tasks[i]) > 0:
            last = tasks[i][0][0]
        else:
            last = 0.0
        for task in tasks[i]:
            dt = task[0] - last
            deadtimes.append(dt)
            last = task[1]

        #  Drop first value, last value already gone.
        if len(deadtimes) > 1:
            deadtimes = deadtimes[1:]
        else:
            #  Only one or fewer tasks, so no deadtime by definition.
            deadtimes = [0.0]

        deadmin = min(deadtimes)
        deadmax = max(deadtimes)
        deadsum = sum(deadtimes)
506
507
508
509
510
511
512
513
514
515
516
        print(
            "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format(
                i,
                len(deadtimes),
                deadmin,
                deadmax,
                deadsum,
                deadsum / len(deadtimes),
                deadsum / total_t * 100.0,
            )
        )
517
518
519
520
521
        enginedeadtimes.extend(deadtimes)

    deadmin = min(enginedeadtimes)
    deadmax = max(enginedeadtimes)
    deadsum = sum(enginedeadtimes)
522
523
524
525
526
527
528
529
530
531
    print(
        "all      : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format(
            len(enginedeadtimes),
            deadmin,
            deadmax,
            deadsum,
            deadsum / len(enginedeadtimes),
            deadsum / (len(threadids) * total_t) * 100.0,
        )
    )
532
    print()
533
534

    #  All times in step.
535
536
    if with_html:
        print('<div id="dead"></div>')
537
    print("# All deadtimes:")
538
539
540
541
542
    print(
        "# no.    : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format(
            "count", "minimum", "maximum", "sum", "mean", "percent"
        )
    )
543
544
545
546
547
548
549
550
551
    alldeadtimes = []
    for i in threadids:
        deadtimes = []
        last = 0
        for task in tasks[i]:
            dt = task[0] - last
            deadtimes.append(dt)
            last = task[1]
        dt = total_t - last
552
        deadtimes.append(dt)
553
554
555
556

        deadmin = min(deadtimes)
        deadmax = max(deadtimes)
        deadsum = sum(deadtimes)
557
558
559
560
561
562
563
564
565
566
567
        print(
            "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format(
                i,
                len(deadtimes),
                deadmin,
                deadmax,
                deadsum,
                deadsum / len(deadtimes),
                deadsum / total_t * 100.0,
            )
        )
568
569
570
571
572
        alldeadtimes.extend(deadtimes)

    deadmin = min(alldeadtimes)
    deadmax = max(alldeadtimes)
    deadsum = sum(alldeadtimes)
573
574
575
576
577
578
579
580
581
582
    print(
        "all      : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format(
            len(alldeadtimes),
            deadmin,
            deadmax,
            deadsum,
            deadsum / len(alldeadtimes),
            deadsum / (len(threadids) * total_t) * 100.0,
        )
    )
583
    print()
584
585

sys.exit(0)