analyse_tasks.py 15.6 KB
Newer Older
1
2
3
#!/usr/bin/env python
"""
Usage:
Peter W. Draper's avatar
Typo    
Peter W. Draper committed
4
    analyse_tasks.py [options] input.dat
5

6
7
8
where input.dat is a thread info file for a step (MPI or non-MPI). Use the
'-y interval' flag of the swift and swift_mpi commands to create these
(you will also need to configure with the --enable-task-debugging option).
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

The output is an analysis of the task timings, including deadtime per thread
and step, total amount of time spent for each task type, for the whole step
and per thread and the minimum and maximum times spent per task type.

This file is part of SWIFT.
Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU Lesser General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""

import matplotlib
32

33
34
35
36
37
38
39
40
41
42
43
matplotlib.use("Agg")
import matplotlib.collections as collections
import matplotlib.ticker as plticker
import pylab as pl
import sys
import argparse

#  Handle the command line.
parser = argparse.ArgumentParser(description="Analyse task dumps")

parser.add_argument("input", help="Thread data file (-y output)")
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
parser.add_argument(
    "-v",
    "--verbose",
    dest="verbose",
    help="Verbose output (default: False)",
    default=False,
    action="store_true",
)
parser.add_argument(
    "-r",
    "--rank",
    dest="rank",
    help="Rank to process (default: all)",
    default="all",
    action="store",
)
60
61
62
63
64
65
66
parser.add_argument(
    "--html",
    dest="html",
    help="Use html titles and anchors in the output (default: False)",
    default=False,
    action="store_true",
)
67
68
69

args = parser.parse_args()
infile = args.input
70
with_html = args.html
71
72

#  Tasks and subtypes. Indexed as in tasks.h.
73
74
75
76
77
78
79
80
81
82
83
84
85
86
TASKTYPES = [
    "none",
    "sort",
    "self",
    "pair",
    "sub_self",
    "sub_pair",
    "init_grav",
    "init_grav_out",
    "ghost_in",
    "ghost",
    "ghost_out",
    "extra_ghost",
    "drift_part",
87
    "drift_spart",
88
    "drift_bpart",
89
    "drift_gpart",
Matthieu Schaller's avatar
Matthieu Schaller committed
90
    "drift_gpart_out",
91
    "hydro_end_force",
92
93
94
    "kick1",
    "kick2",
    "timestep",
95
    "timestep_limiter",
96
    "timestep_sync",
97
98
99
100
101
102
103
    "send",
    "recv",
    "grav_long_range",
    "grav_mm",
    "grav_down_in",
    "grav_down",
    "grav_mesh",
104
    "grav_end_force",
105
    "cooling",
106
107
    "cooling_in",
    "cooling_out",
108
    "star_formation",
109
110
    "star_formation_in",
    "star_formation_out",
111
    "logger",
112
113
    "stars_in",
    "stars_out",
114
115
116
    "stars_ghost_in",
    "stars_ghost",
    "stars_ghost_out",
117
    "stars_sort",
118
    "stars_resort",
119
120
121
    "bh_in",
    "bh_out",
    "bh_ghost",
122
123
124
    "bh_swallow_ghost1",
    "bh_swallow_ghost2",
    "bh_swallow_ghost3",
125
126
    "fof_self",
    "fof_pair",
127
128
129
130
131
132
133
134
    "count",
]

SUBTYPES = [
    "none",
    "density",
    "gradient",
    "force",
135
    "limiter",
136
137
    "grav",
    "external_grav",
138
139
140
    "tend_part",
    "tend_gpart",
    "tend_spart",
141
    "tend_bpart",
142
143
    "xv",
    "rho",
144
145
    "part_swallow",
    "bpart_merger",
146
147
148
149
    "gpart",
    "multipole",
    "spart",
    "stars_density",
Alexei Borissov's avatar
Alexei Borissov committed
150
    "stars_feedback",
151
152
153
154
    "sf_counts",
    "bpart_rho",
    "bpart_swallow",
    "bpart_feedback",
155
    "bh_density",
156
    "bh_swallow",
157
158
    "do_gas_swallow",
    "do_bh_swallow",
159
    "bh_feedback",
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
    "count",
]

SIDS = [
    "(-1,-1,-1)",
    "(-1,-1, 0)",
    "(-1,-1, 1)",
    "(-1, 0,-1)",
    "(-1, 0, 0)",
    "(-1, 0, 1)",
    "(-1, 1,-1)",
    "(-1, 1, 0)",
    "(-1, 1, 1)",
    "( 0,-1,-1)",
    "( 0,-1, 0)",
    "( 0,-1, 1)",
    "( 0, 0,-1)",
]
178

179
#  Read input.
180
181
data = pl.loadtxt(infile)
full_step = data[0, :]
182

183
#  Do we have an MPI file?
184
full_step = data[0, :]
185
if full_step.size == 13:
186
    print("# MPI mode")
187
    mpimode = True
188
    nranks = int(max(data[:, 0])) + 1
189
    print("# Number of ranks:", nranks)
190
191
192
193
194
195
196
197
198
199
    rankcol = 0
    threadscol = 1
    taskcol = 2
    subtaskcol = 3
    ticcol = 5
    toccol = 6
    updates = int(full_step[7])
    g_updates = int(full_step[8])
    s_updates = int(full_step[9])
else:
200
    print("# non MPI mode")
201
202
203
204
205
206
207
208
209
210
211
212
213
    nranks = 1
    mpimode = False
    rankcol = -1
    threadscol = 0
    taskcol = 1
    subtaskcol = 2
    ticcol = 4
    toccol = 5
    updates = int(full_step[6])
    g_updates = int(full_step[7])
    s_updates = int(full_step[8])

#  Get the CPU clock to convert ticks into milliseconds.
214
215
CPU_CLOCK = float(full_step[-1]) / 1000.0
if args.verbose:
216
217
218
219
    print("# CPU frequency:", CPU_CLOCK * 1000.0)
print("#   updates:", updates)
print("# g_updates:", g_updates)
print("# s_updates:", s_updates)
220
221
222

if mpimode:
    if args.rank == "all":
223
        ranks = list(range(nranks))
224
225
226
    else:
        ranks = [int(args.rank)]
        if ranks[0] >= nranks:
227
            print("Error: maximum rank is " + str(nranks - 1))
228
229
230
231
            sys.exit(1)
else:
    ranks = [1]

232
maxthread = int(max(data[:, threadscol])) + 1
233
print("# Maximum thread id:", maxthread)
234

235
#  Avoid start and end times of zero.
236
237
sdata = data[data[:, ticcol] != 0]
sdata = data[data[:, toccol] != 0]
238
239
240
241

#  Now we process the required ranks.
for rank in ranks:
    if mpimode:
242
        print("# Rank", rank)
243
244
        data = sdata[sdata[:, rankcol] == rank]
        full_step = data[0, :]
245
246
247
248
249
250
    else:
        data = sdata

    #  Recover the start and end time
    tic_step = int(full_step[ticcol])
    toc_step = int(full_step[toccol])
251
    data = data[1:, :]
252
253

    #  Avoid start and end times of zero.
254
255
    data = data[data[:, ticcol] != 0]
    data = data[data[:, toccol] != 0]
256
257

    #  Calculate the time range.
258
    total_t = (toc_step - tic_step) / CPU_CLOCK
259
260
    print("# Data range: ", total_t, "ms")
    print()
261
262
263

    #  Correct times to relative values.
    start_t = float(tic_step)
264
265
    data[:, ticcol] -= start_t
    data[:, toccol] -= start_t
266
267
268
269
270
271
272
273
274
275
    end_t = (toc_step - start_t) / CPU_CLOCK

    tasks = {}
    tasks[-1] = []
    for i in range(maxthread):
        tasks[i] = []

    #  Gather into by thread data.
    num_lines = pl.shape(data)[0]
    for line in range(num_lines):
276
277
278
279
280
        thread = int(data[line, threadscol])
        tic = int(data[line, ticcol]) / CPU_CLOCK
        toc = int(data[line, toccol]) / CPU_CLOCK
        tasktype = int(data[line, taskcol])
        subtype = int(data[line, subtaskcol])
281
282
        sid = int(data[line, -1])

283
        tasks[thread].append([tic, toc, tasktype, subtype, sid])
284
285
286
287

    #  Sort by tic and gather used threads.
    threadids = []
    for i in range(maxthread):
288
289
290
291
        itasks = sorted(tasks[i], key=lambda task: task[0])
        if len(itasks) > 0:
            threadids.append(i)
            tasks[i] = itasks
292

293
    #  Times per task.
294
295
    print("# Task times:")
    print("# -----------")
296
297
298
299
300
    print(
        "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}".format(
            "type/subtype", "count", "minimum", "maximum", "sum", "mean", "percent"
        )
    )
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322

    alltasktimes = {}
    sidtimes = {}
    for i in threadids:
        tasktimes = {}
        for task in tasks[i]:
            key = TASKTYPES[task[2]] + "/" + SUBTYPES[task[3]]
            dt = task[1] - task[0]
            if not key in tasktimes:
                tasktimes[key] = []
            tasktimes[key].append(dt)

            if not key in alltasktimes:
                alltasktimes[key] = []
            alltasktimes[key].append(dt)

            my_sid = task[4]
            if my_sid > -1:
                if not my_sid in sidtimes:
                    sidtimes[my_sid] = []
                sidtimes[my_sid].append(dt)

323
324
        if with_html:
            print('<div id="thread{}"></div>'.format(i))
325
        print("# Thread : ", i)
326
327
328
329
        for key in sorted(tasktimes.keys()):
            taskmin = min(tasktimes[key])
            taskmax = max(tasktimes[key])
            tasksum = sum(tasktimes[key])
330
            print(
331
                "{0:24s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format(
332
333
334
335
336
337
338
339
340
                    key,
                    len(tasktimes[key]),
                    taskmin,
                    taskmax,
                    tasksum,
                    tasksum / len(tasktimes[key]),
                    tasksum / total_t * 100.0,
                )
            )
341
        print()
342

343
344
    if with_html:
        print('<div id="all"></div>')
345
    print("# All threads : ")
346
347
348
349
    for key in sorted(alltasktimes.keys()):
        taskmin = min(alltasktimes[key])
        taskmax = max(alltasktimes[key])
        tasksum = sum(alltasktimes[key])
350
        print(
351
            "{0:23s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format(
352
353
354
355
356
357
358
359
360
                key,
                len(alltasktimes[key]),
                taskmin,
                taskmax,
                tasksum,
                tasksum / len(alltasktimes[key]),
                tasksum / (len(threadids) * total_t) * 100.0,
            )
        )
361
    print()
362

363
    # For pairs, show stuff sorted by SID
364
365
    if with_html:
        print('<div id="sid"></div>')
366
    print("# By SID (all threads): ")
367
368
369
370
371
    print(
        "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}".format(
            "Pair/Sub-pair SID", "count", "minimum", "maximum", "sum", "mean", "percent"
        )
    )
372

373
    for sid in range(0, 13):
374
375
376
377
378
379
380
        if sid in sidtimes:
            sidmin = min(sidtimes[sid])
            sidmax = max(sidtimes[sid])
            sidsum = sum(sidtimes[sid])
            sidcount = len(sidtimes[sid])
            sidmean = sidsum / sidcount
        else:
381
382
383
            sidmin = 0.0
            sidmax = 0.0
            sidsum = 0.0
384
            sidcount = 0
385
386
387
388
389
390
391
392
393
394
395
396
397
            sidmean = 0.0
        print(
            "{0:3d} {1:15s}: {2:7d} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.4f} {7:9.2f}".format(
                sid,
                SIDS[sid],
                sidcount,
                sidmin,
                sidmax,
                sidsum,
                sidmean,
                sidsum / (len(threadids) * total_t) * 100.0,
            )
        )
398
    print()
399

400
    #  Dead times.
401
402
    print("# Times not in tasks (deadtimes)")
    print("# ------------------------------")
403
404
    if with_html:
        print('<div id="before"></div>')
405
406
    print("# Time before first task:")
    print("# no.    : {0:>9s} {1:>9s}".format("value", "percent"))
407
408
409
410
    predeadtimes = []
    for i in threadids:
        if len(tasks[i]) > 0:
            predeadtime = tasks[i][0][0]
411
412
413
414
415
            print(
                "thread {0:2d}: {1:9.4f} {2:9.4f}".format(
                    i, predeadtime, predeadtime / total_t * 100.0
                )
            )
416
417
418
419
420
421
422
            predeadtimes.append(predeadtime)
        else:
            predeadtimes.append(0.0)

    predeadmin = min(predeadtimes)
    predeadmax = max(predeadtimes)
    predeadsum = sum(predeadtimes)
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
    print(
        "#        : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format(
            "count", "minimum", "maximum", "sum", "mean", "percent"
        )
    )
    print(
        "all      : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format(
            len(predeadtimes),
            predeadmin,
            predeadmax,
            predeadsum,
            predeadsum / len(predeadtimes),
            predeadsum / (len(threadids) * total_t) * 100.0,
        )
    )
438
    print()
439

440
441
    if with_html:
        print('<div id="after"></div>')
442
443
    print("# Time after last task:")
    print("# no.    : {0:>9s} {1:>9s}".format("value", "percent"))
444
445
446
447
    postdeadtimes = []
    for i in threadids:
        if len(tasks[i]) > 0:
            postdeadtime = total_t - tasks[i][-1][1]
448
449
450
451
452
            print(
                "thread {0:2d}: {1:9.4f} {2:9.4f}".format(
                    i, postdeadtime, postdeadtime / total_t * 100.0
                )
            )
453
454
455
456
457
458
459
            postdeadtimes.append(postdeadtime)
        else:
            postdeadtimes.append(0.0)

    postdeadmin = min(postdeadtimes)
    postdeadmax = max(postdeadtimes)
    postdeadsum = sum(postdeadtimes)
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
    print(
        "#        : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format(
            "count", "minimum", "maximum", "sum", "mean", "percent"
        )
    )
    print(
        "all      : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format(
            len(postdeadtimes),
            postdeadmin,
            postdeadmax,
            postdeadsum,
            postdeadsum / len(postdeadtimes),
            postdeadsum / (len(threadids) * total_t) * 100.0,
        )
    )
475
    print()
476
477

    #  Time in engine, i.e. from first to last tasks.
478
479
    if with_html:
        print('<div id="between"></div>')
480
    print("# Time between tasks (engine deadtime):")
481
482
483
484
485
    print(
        "# no.    : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format(
            "count", "minimum", "maximum", "sum", "mean", "percent"
        )
    )
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
    enginedeadtimes = []
    for i in threadids:
        deadtimes = []
        if len(tasks[i]) > 0:
            last = tasks[i][0][0]
        else:
            last = 0.0
        for task in tasks[i]:
            dt = task[0] - last
            deadtimes.append(dt)
            last = task[1]

        #  Drop first value, last value already gone.
        if len(deadtimes) > 1:
            deadtimes = deadtimes[1:]
        else:
            #  Only one or fewer tasks, so no deadtime by definition.
            deadtimes = [0.0]

        deadmin = min(deadtimes)
        deadmax = max(deadtimes)
        deadsum = sum(deadtimes)
508
509
510
511
512
513
514
515
516
517
518
        print(
            "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format(
                i,
                len(deadtimes),
                deadmin,
                deadmax,
                deadsum,
                deadsum / len(deadtimes),
                deadsum / total_t * 100.0,
            )
        )
519
520
521
522
523
        enginedeadtimes.extend(deadtimes)

    deadmin = min(enginedeadtimes)
    deadmax = max(enginedeadtimes)
    deadsum = sum(enginedeadtimes)
524
525
526
527
528
529
530
531
532
533
    print(
        "all      : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format(
            len(enginedeadtimes),
            deadmin,
            deadmax,
            deadsum,
            deadsum / len(enginedeadtimes),
            deadsum / (len(threadids) * total_t) * 100.0,
        )
    )
534
    print()
535
536

    #  All times in step.
537
538
    if with_html:
        print('<div id="dead"></div>')
539
    print("# All deadtimes:")
540
541
542
543
544
    print(
        "# no.    : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format(
            "count", "minimum", "maximum", "sum", "mean", "percent"
        )
    )
545
546
547
548
549
550
551
552
553
    alldeadtimes = []
    for i in threadids:
        deadtimes = []
        last = 0
        for task in tasks[i]:
            dt = task[0] - last
            deadtimes.append(dt)
            last = task[1]
        dt = total_t - last
554
        deadtimes.append(dt)
555
556
557
558

        deadmin = min(deadtimes)
        deadmax = max(deadtimes)
        deadsum = sum(deadtimes)
559
560
561
562
563
564
565
566
567
568
569
        print(
            "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format(
                i,
                len(deadtimes),
                deadmin,
                deadmax,
                deadsum,
                deadsum / len(deadtimes),
                deadsum / total_t * 100.0,
            )
        )
570
571
572
573
574
        alldeadtimes.extend(deadtimes)

    deadmin = min(alldeadtimes)
    deadmax = max(alldeadtimes)
    deadsum = sum(alldeadtimes)
575
576
577
578
579
580
581
582
583
584
    print(
        "all      : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format(
            len(alldeadtimes),
            deadmin,
            deadmax,
            deadsum,
            deadsum / len(alldeadtimes),
            deadsum / (len(threadids) * total_t) * 100.0,
        )
    )
585
    print()
586
587

sys.exit(0)