Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
55b1a619
Commit
55b1a619
authored
Sep 21, 2017
by
Matthieu Schaller
Browse files
Merge branch 'master' of gitlab.cosma.dur.ac.uk:swift/swiftsim
parents
a4100a19
7d0da067
Changes
31
Expand all
Hide whitespace changes
Inline
Side-by-side
configure.ac
View file @
55b1a619
...
...
@@ -602,10 +602,10 @@ if test "$enable_warn" != "no"; then
# We will do this by hand instead and only default to the macro for unknown compilers
case "$ax_cv_c_compiler_vendor" in
gnu | clang)
CFLAGS="$CFLAGS -Wall -Wextra -Wno-unused-parameter"
CFLAGS="$CFLAGS -Wall -Wextra -Wno-unused-parameter
-Wshadow
"
;;
intel)
CFLAGS="$CFLAGS -w2 -Wunused-variable"
CFLAGS="$CFLAGS -w2 -Wunused-variable
-Wshadow
"
;;
*)
AX_CFLAGS_WARN_ALL
...
...
examples/analyse_tasks.py
View file @
55b1a619
This diff is collapsed.
Click to expand it.
examples/analyse_tasks_MPI.py
deleted
100755 → 0
View file @
a4100a19
#!/usr/bin/env python
"""
Usage:
analsyse_tasks_MPI.py [options] input.dat
where input.dat is a thread info file for an MPI step. Use the '-y interval'
flag of the swift command to create these.
The output is an analysis of the task timings, including deadtime per thread
and step, total amount of time spent for each task type, for the whole step
and per thread and the minimum and maximum times spent per task type.
This file is part of SWIFT.
Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import
matplotlib
matplotlib
.
use
(
"Agg"
)
import
matplotlib.collections
as
collections
import
matplotlib.ticker
as
plticker
import
pylab
as
pl
import
sys
import
argparse
# Handle the command line.
parser
=
argparse
.
ArgumentParser
(
description
=
"Analyse task dumps"
)
parser
.
add_argument
(
"input"
,
help
=
"Thread data file (-y output)"
)
parser
.
add_argument
(
"-v"
,
"--verbose"
,
dest
=
"verbose"
,
help
=
"Verbose output (default: False)"
,
default
=
False
,
action
=
"store_true"
)
parser
.
add_argument
(
"-r"
,
"--rank"
,
dest
=
"rank"
,
help
=
"Rank to process (default: all)"
,
default
=
"all"
,
action
=
"store"
)
args
=
parser
.
parse_args
()
infile
=
args
.
input
# Tasks and subtypes. Indexed as in tasks.h.
TASKTYPES
=
[
"none"
,
"sort"
,
"self"
,
"pair"
,
"sub_self"
,
"sub_pair"
,
"init_grav"
,
"ghost"
,
"extra_ghost"
,
"drift_part"
,
"drift_gpart"
,
"kick1"
,
"kick2"
,
"timestep"
,
"send"
,
"recv"
,
"grav_top_level"
,
"grav_long_range"
,
"grav_mm"
,
"grav_down"
,
"cooling"
,
"sourceterms"
,
"count"
]
SUBTYPES
=
[
"none"
,
"density"
,
"gradient"
,
"force"
,
"grav"
,
"external_grav"
,
"tend"
,
"xv"
,
"rho"
,
"gpart"
,
"multipole"
,
"spart"
,
"count"
]
SIDS
=
[
"(-1,-1,-1)"
,
"(-1,-1, 0)"
,
"(-1,-1, 1)"
,
"(-1, 0,-1)"
,
"(-1, 0, 0)"
,
"(-1, 0, 1)"
,
"(-1, 1,-1)"
,
"(-1, 1, 0)"
,
"(-1, 1, 1)"
,
"( 0,-1,-1)"
,
"( 0,-1, 0)"
,
"( 0,-1, 1)"
,
"( 0, 0,-1)"
]
# Read input.
data
=
pl
.
loadtxt
(
infile
)
# Get the CPU clock to convert ticks into milliseconds.
full_step
=
data
[
0
,:]
updates
=
int
(
full_step
[
7
])
g_updates
=
int
(
full_step
[
8
])
s_updates
=
int
(
full_step
[
9
])
CPU_CLOCK
=
float
(
full_step
[
-
1
])
/
1000.0
if
args
.
verbose
:
print
"# CPU frequency:"
,
CPU_CLOCK
*
1000.0
print
"# updates:"
,
updates
print
"# g_updates:"
,
g_updates
print
"# s_updates:"
,
s_updates
nranks
=
int
(
max
(
data
[:,
0
]))
+
1
print
"# Number of ranks:"
,
nranks
if
args
.
rank
==
"all"
:
ranks
=
range
(
nranks
)
else
:
ranks
=
[
int
(
args
.
rank
)]
if
ranks
[
0
]
>=
nranks
:
print
"Error: maximum rank is "
+
str
(
nranks
-
1
)
sys
.
exit
(
1
)
maxthread
=
int
(
max
(
data
[:,
1
]))
+
1
print
"# Maximum thread id:"
,
maxthread
# Avoid start and end times of zero.
sdata
=
data
[
data
[:,
5
]
!=
0
]
sdata
=
data
[
data
[:,
6
]
!=
0
]
# Now we process the required ranks.
for
rank
in
ranks
:
print
"# Rank"
,
rank
data
=
sdata
[
sdata
[:,
0
]
==
rank
]
# Recover the start and end time
full_step
=
data
[
0
,:]
tic_step
=
int
(
full_step
[
5
])
toc_step
=
int
(
full_step
[
6
])
data
=
data
[
1
:,:]
# Avoid start and end times of zero.
data
=
data
[
data
[:,
5
]
!=
0
]
data
=
data
[
data
[:,
6
]
!=
0
]
# Calculate the time range.
total_t
=
(
toc_step
-
tic_step
)
/
CPU_CLOCK
print
"# Data range: "
,
total_t
,
"ms"
print
# Correct times to relative values.
start_t
=
float
(
tic_step
)
data
[:,
5
]
-=
start_t
data
[:,
6
]
-=
start_t
end_t
=
(
toc_step
-
start_t
)
/
CPU_CLOCK
tasks
=
{}
tasks
[
-
1
]
=
[]
for
i
in
range
(
maxthread
):
tasks
[
i
]
=
[]
# Gather into by thread data.
num_lines
=
pl
.
shape
(
data
)[
0
]
for
line
in
range
(
num_lines
):
thread
=
int
(
data
[
line
,
1
])
tic
=
int
(
data
[
line
,
5
])
/
CPU_CLOCK
toc
=
int
(
data
[
line
,
6
])
/
CPU_CLOCK
tasktype
=
int
(
data
[
line
,
2
])
subtype
=
int
(
data
[
line
,
3
])
sid
=
int
(
data
[
line
,
-
1
])
tasks
[
thread
].
append
([
tic
,
toc
,
tasktype
,
subtype
,
sid
])
# Sort by tic and gather used threads.
threadids
=
[]
for
i
in
range
(
maxthread
):
tasks
[
i
]
=
sorted
(
tasks
[
i
],
key
=
lambda
task
:
task
[
0
])
threadids
.
append
(
i
)
# Times per task.
print
"# Task times:"
print
"# -----------"
print
"# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"
\
.
format
(
"type/subtype"
,
"count"
,
"minimum"
,
"maximum"
,
"sum"
,
"mean"
,
"percent"
)
alltasktimes
=
{}
sidtimes
=
{}
for
i
in
threadids
:
tasktimes
=
{}
for
task
in
tasks
[
i
]:
key
=
TASKTYPES
[
task
[
2
]]
+
"/"
+
SUBTYPES
[
task
[
3
]]
dt
=
task
[
1
]
-
task
[
0
]
if
not
key
in
tasktimes
:
tasktimes
[
key
]
=
[]
tasktimes
[
key
].
append
(
dt
)
if
not
key
in
alltasktimes
:
alltasktimes
[
key
]
=
[]
alltasktimes
[
key
].
append
(
dt
)
my_sid
=
task
[
4
]
if
my_sid
>
-
1
:
if
not
my_sid
in
sidtimes
:
sidtimes
[
my_sid
]
=
[]
sidtimes
[
my_sid
].
append
(
dt
)
print
"# Thread : "
,
i
for
key
in
sorted
(
tasktimes
.
keys
()):
taskmin
=
min
(
tasktimes
[
key
])
taskmax
=
max
(
tasktimes
[
key
])
tasksum
=
sum
(
tasktimes
[
key
])
print
"{0:19s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"
\
.
format
(
key
,
len
(
tasktimes
[
key
]),
taskmin
,
taskmax
,
tasksum
,
tasksum
/
len
(
tasktimes
[
key
]),
tasksum
/
total_t
*
100.0
)
print
print
"# All threads : "
for
key
in
sorted
(
alltasktimes
.
keys
()):
taskmin
=
min
(
alltasktimes
[
key
])
taskmax
=
max
(
alltasktimes
[
key
])
tasksum
=
sum
(
alltasktimes
[
key
])
print
"{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"
\
.
format
(
key
,
len
(
alltasktimes
[
key
]),
taskmin
,
taskmax
,
tasksum
,
tasksum
/
len
(
alltasktimes
[
key
]),
tasksum
/
(
len
(
threadids
)
*
total_t
)
*
100.0
)
print
# For pairs, show stuf sorted by SID
print
"# By SID (all threads): "
print
"# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"
\
.
format
(
"Pair/Sub-pair SID"
,
"count"
,
"minimum"
,
"maximum"
,
"sum"
,
"mean"
,
"percent"
)
for
sid
in
range
(
0
,
13
):
if
sid
in
sidtimes
:
sidmin
=
min
(
sidtimes
[
sid
])
sidmax
=
max
(
sidtimes
[
sid
])
sidsum
=
sum
(
sidtimes
[
sid
])
sidcount
=
len
(
sidtimes
[
sid
])
sidmean
=
sidsum
/
sidcount
else
:
sidmin
=
0.
sidmax
=
0.
sidsum
=
0.
sidcount
=
0
sidmean
=
0.
print
"{0:3d} {1:15s}: {2:7d} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.4f} {7:9.2f}"
\
.
format
(
sid
,
SIDS
[
sid
],
sidcount
,
sidmin
,
sidmax
,
sidsum
,
sidmean
,
sidsum
/
(
len
(
threadids
)
*
total_t
)
*
100.0
)
print
# Dead times.
print
"# Times not in tasks (deadtimes)"
print
"# ------------------------------"
print
"# Time before first task:"
print
"# no. : {0:>9s} {1:>9s}"
.
format
(
"value"
,
"percent"
)
predeadtimes
=
[]
for
i
in
threadids
:
if
len
(
tasks
[
i
])
>
0
:
predeadtime
=
tasks
[
i
][
0
][
0
]
print
"thread {0:2d}: {1:9.4f} {2:9.4f}"
\
.
format
(
i
,
predeadtime
,
predeadtime
/
total_t
*
100.0
)
predeadtimes
.
append
(
predeadtime
)
else
:
predeadtimes
.
append
(
0.0
)
predeadmin
=
min
(
predeadtimes
)
predeadmax
=
max
(
predeadtimes
)
predeadsum
=
sum
(
predeadtimes
)
print
"# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"
\
.
format
(
"count"
,
"minimum"
,
"maximum"
,
"sum"
,
"mean"
,
"percent"
)
print
"all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"
\
.
format
(
len
(
predeadtimes
),
predeadmin
,
predeadmax
,
predeadsum
,
predeadsum
/
len
(
predeadtimes
),
predeadsum
/
(
len
(
threadids
)
*
total_t
)
*
100.0
)
print
print
"# Time after last task:"
print
"# no. : {0:>9s} {1:>9s}"
.
format
(
"value"
,
"percent"
)
postdeadtimes
=
[]
for
i
in
threadids
:
if
len
(
tasks
[
i
])
>
0
:
postdeadtime
=
total_t
-
tasks
[
i
][
-
1
][
1
]
print
"thread {0:2d}: {1:9.4f} {2:9.4f}"
\
.
format
(
i
,
postdeadtime
,
postdeadtime
/
total_t
*
100.0
)
postdeadtimes
.
append
(
postdeadtime
)
else
:
postdeadtimes
.
append
(
0.0
)
postdeadmin
=
min
(
postdeadtimes
)
postdeadmax
=
max
(
postdeadtimes
)
postdeadsum
=
sum
(
postdeadtimes
)
print
"# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"
\
.
format
(
"count"
,
"minimum"
,
"maximum"
,
"sum"
,
"mean"
,
"percent"
)
print
"all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"
\
.
format
(
len
(
postdeadtimes
),
postdeadmin
,
postdeadmax
,
postdeadsum
,
postdeadsum
/
len
(
postdeadtimes
),
postdeadsum
/
(
len
(
threadids
)
*
total_t
)
*
100.0
)
print
# Time in engine, i.e. from first to last tasks.
print
"# Time between tasks (engine deadtime):"
print
"# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"
\
.
format
(
"count"
,
"minimum"
,
"maximum"
,
"sum"
,
"mean"
,
"percent"
)
enginedeadtimes
=
[]
for
i
in
threadids
:
deadtimes
=
[]
if
len
(
tasks
[
i
])
>
0
:
last
=
tasks
[
i
][
0
][
0
]
else
:
last
=
0.0
for
task
in
tasks
[
i
]:
dt
=
task
[
0
]
-
last
deadtimes
.
append
(
dt
)
last
=
task
[
1
]
# Drop first value, last value already gone.
if
len
(
deadtimes
)
>
1
:
deadtimes
=
deadtimes
[
1
:]
else
:
# Only one or fewer tasks, so no deadtime by definition.
deadtimes
=
[
0.0
]
deadmin
=
min
(
deadtimes
)
deadmax
=
max
(
deadtimes
)
deadsum
=
sum
(
deadtimes
)
print
"thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"
\
.
format
(
i
,
len
(
deadtimes
),
deadmin
,
deadmax
,
deadsum
,
deadsum
/
len
(
deadtimes
),
deadsum
/
total_t
*
100.0
)
enginedeadtimes
.
extend
(
deadtimes
)
deadmin
=
min
(
enginedeadtimes
)
deadmax
=
max
(
enginedeadtimes
)
deadsum
=
sum
(
enginedeadtimes
)
print
"all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"
\
.
format
(
len
(
enginedeadtimes
),
deadmin
,
deadmax
,
deadsum
,
deadsum
/
len
(
enginedeadtimes
),
deadsum
/
(
len
(
threadids
)
*
total_t
)
*
100.0
)
print
# All times in step.
print
"# All deadtimes:"
print
"# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"
\
.
format
(
"count"
,
"minimum"
,
"maximum"
,
"sum"
,
"mean"
,
"percent"
)
alldeadtimes
=
[]
for
i
in
threadids
:
deadtimes
=
[]
last
=
0
for
task
in
tasks
[
i
]:
dt
=
task
[
0
]
-
last
deadtimes
.
append
(
dt
)
last
=
task
[
1
]
dt
=
total_t
-
last
deadtimes
.
append
(
dt
)
deadmin
=
min
(
deadtimes
)
deadmax
=
max
(
deadtimes
)
deadsum
=
sum
(
deadtimes
)
print
"thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"
\
.
format
(
i
,
len
(
deadtimes
),
deadmin
,
deadmax
,
deadsum
,
deadsum
/
len
(
deadtimes
),
deadsum
/
total_t
*
100.0
)
alldeadtimes
.
extend
(
deadtimes
)
deadmin
=
min
(
alldeadtimes
)
deadmax
=
max
(
alldeadtimes
)
deadsum
=
sum
(
alldeadtimes
)
print
"all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"
\
.
format
(
len
(
alldeadtimes
),
deadmin
,
deadmax
,
deadsum
,
deadsum
/
len
(
alldeadtimes
),
deadsum
/
(
len
(
threadids
)
*
total_t
)
*
100.0
)
print
sys
.
exit
(
0
)
examples/main.c
View file @
55b1a619
...
...
@@ -153,12 +153,6 @@ int main(int argc, char *argv[]) {
#endif
/* Let's pin the main thread */
#if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
if
(((
ENGINE_POLICY
)
&
engine_policy_setaffinity
)
==
engine_policy_setaffinity
)
engine_pin
();
#endif
/* Welcome to SWIFT, you made the right choice */
if
(
myrank
==
0
)
greetings
();
...
...
@@ -329,6 +323,12 @@ int main(int argc, char *argv[]) {
return
1
;
}
/* Let's pin the main thread, now we know if affinity will be used. */
#if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
if
(
with_aff
&&
((
ENGINE_POLICY
)
&
engine_policy_setaffinity
)
==
engine_policy_setaffinity
)
engine_pin
();
#endif
/* Genesis 1.1: And then, there was time ! */
clocks_set_cpufreq
(
cpufreq
);
...
...
@@ -791,10 +791,10 @@ int main(int argc, char *argv[]) {
if
(
dump_threadpool
&&
(
dump_threadpool
==
1
||
j
%
dump_threadpool
==
1
))
{
char
dumpfile
[
40
];
#ifdef WITH_MPI
snprintf
(
dumpfile
,
3
0
,
"threadpool_info-rank%d-step%d.dat"
,
engine_rank
,
snprintf
(
dumpfile
,
4
0
,
"threadpool_info-rank%d-step%d.dat"
,
engine_rank
,
j
+
1
);
#else
snprintf
(
dumpfile
,
3
0
,
"threadpool_info-step%d.dat"
,
j
+
1
);
snprintf
(
dumpfile
,
4
0
,
"threadpool_info-step%d.dat"
,
j
+
1
);
#endif // WITH_MPI
threadpool_dump_log
(
&
e
.
threadpool
,
dumpfile
,
1
);
}
else
{
...
...
examples/plot_tasks.py
View file @
55b1a619
#!/usr/bin/env python
"""
Usage:
plot_tasks.py [options] input.dat output
.png
plot_tasks.py [options] input.dat
png-
output
-prefix
where input.dat is a thread info file for a step. Use the '-y interval' flag
of the swift command to create these. The output plot will be called
'output.png'. The --limit option can be used to produce plots with the same
time span and the --expand option to expand each thread line into '*expand'
lines, so that adjacent tasks of the same type can be distinguished. Other
options can be seen using the --help flag.
of the swift or swift_mpi commands to create these (these will need to be
built with the --enable-task-debugging configure option). The output plot will
be called 'png-output-prefix.png' or 'png-output-prefix<mpi-rank>.png',
depending on whether the input thread info file is generated by the swift or
swift_mpi command. If swift_mpi each rank has a separate plot.
The --limit option can be used to produce plots with the same time
span and the --expand option to expand each thread line into '*expand' lines,
so that adjacent tasks of the same type can be distinguished. Other options
can be seen using the --help flag.
See the command 'process_plot_tasks' to efficiently wrap this command to
process a number of thread info files and create an HTML file to view them.
This file is part of SWIFT.
Copyright (c) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
Copyright (C) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
Matthieu Schaller (matthieu.schaller@durham.ac.uk)
(c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
(C) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
All Rights Reserved.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
...
...
@@ -42,10 +52,10 @@ import argparse
parser
=
argparse
.
ArgumentParser
(
description
=
"Plot task graphs"
)
parser
.
add_argument
(
"input"
,
help
=
"Thread data file (-y output)"
)
parser
.
add_argument
(
"out
png
"
,
help
=
"
N
ame for output graphic file (PNG)"
)
parser
.
add_argument
(
"out
base
"
,
help
=
"
Base n
ame for output graphic file
s
(PNG)"
)
parser
.
add_argument
(
"-l"
,
"--limit"
,
dest
=
"limit"
,
help
=
"Upper time limit in millisecs (def: depends on data)"
,
default
=
0
,
type
=
in
t
)
default
=
0
,
type
=
floa
t
)
parser
.
add_argument
(
"-e"
,
"--expand"
,
dest
=
"expand"
,
help
=
"Thread expansion factor (def: 1)"
,
default
=
1
,
type
=
int
)
...
...
@@ -61,12 +71,23 @@ parser.add_argument("--nolegend", dest="nolegend",
parser
.
add_argument
(
"-v"
,
"--verbose"
,
dest
=
"verbose"
,
help
=
"Show colour assignments and other details (def: False)"
,
default
=
False
,
action
=
"store_true"
)
parser
.
add_argument
(
"-r"
,
"--ranks"
,
dest
=
"ranks"
,
help
=
"Comma delimited list of ranks to process, if MPI in effect"
,
default
=
None
,
type
=
str
)
parser
.
add_argument
(
"-m"
,
"--mintic"
,
dest
=
"mintic"
,
help
=
"Value of the smallest tic (def: least in input file)"
,
default
=-
1
,
type
=
int
)
args
=
parser
.
parse_args
()
infile
=
args
.
input
out
png
=
args
.
out
png
out
base
=
args
.
out
base
delta_t
=
args
.
limit
expand
=
args
.
expand
mintic
=
args
.
mintic
if
args
.
ranks
!=
None
:
ranks
=
[
int
(
item
)
for
item
in
args
.
ranks
.
split
(
','
)]
else
:
ranks
=
None
# Basic plot configuration.
PLOT_PARAMS
=
{
"axes.labelsize"
:
10
,
...
...
@@ -78,7 +99,7 @@ PLOT_PARAMS = {"axes.labelsize": 10,
"figure.figsize"
:
(
args
.
width
,
args
.
height
),
"figure.subplot.left"
:
0.03
,
"figure.subplot.right"
:
0.995
,
"figure.subplot.bottom"
:
0.
09
,
"figure.subplot.bottom"
:
0.
1
,
"figure.subplot.top"
:
0.99
,
"figure.subplot.wspace"
:
0.
,
"figure.subplot.hspace"
:
0.
,
...
...
@@ -89,20 +110,19 @@ pl.rcParams.update(PLOT_PARAMS)
# Tasks and subtypes. Indexed as in tasks.h.
TASKTYPES
=
[
"none"
,
"sort"
,
"self"
,
"pair"
,
"sub_self"
,
"sub_pair"
,
"init_grav"
,
"ghost"
,
"extra_ghost"
,
"drift_part"
,
"drift_gpart"
,
"kick1"
,
"kick2"
,
"timestep"
,
"send"
,
"recv"
,
"grav_top_level"
,
"grav_long_range"
,
"grav_
ghost
"
,
"grav_
mm
"
,
"grav_down"
,
"cooling"
,
"sourceterms"
,
"count"
]
"init_grav"
,
"ghost"
,
"extra_ghost"
,
"drift_part"
,
"drift_gpart"
,
"kick1"
,
"kick2"
,
"timestep"
,
"send"
,
"recv"
,
"grav_top_level"
,
"grav_long_range"
,
"grav_
mm
"
,
"grav_
down"
,
"cooling
"
,
"sourceterms"
,
"count"
]
SUBTYPES
=
[
"none"
,
"density"
,
"gradient"
,
"force"
,
"grav"
,
"external_grav"
,
"tend"
,
"xv"
,
"rho"
,
"gpart"
,
"multipole"
,
"spart"
,
"count"
]
# Task/subtypes of interest.
FULLTYPES
=
[
"self/force"
,
"self/density"
,
"self/grav"
,
"sub_self/force"
,
"sub_self/density"
,
"sub_self/grav"
,
"pair/force"
,
"pair/density"
,
"pair/grav"
,
"sub_pair/force"
,
"sub_pair/density"
,
"sub_pair/grav"
,
"recv/xv"
,
"send/xv"
,
"recv/rho"
,
"send/rho"
,
"sub_self/density"
,
"pair/force"
,
"pair/density"
,
"pair/grav"
,
"sub_pair/force"
,
"sub_pair/density"
,
"recv/xv"
,
"send/xv"
,
"recv/rho"
,
"send/rho"
,
"recv/tend"
,
"send/tend"
]
# A number of colours for the various types. Recycled when there are
...
...
@@ -110,7 +130,7 @@ FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force",
colours
=
[
"cyan"
,
"lightgray"
,
"darkblue"
,
"yellow"
,
"tan"
,
"dodgerblue"
,
"sienna"
,
"aquamarine"
,
"bisque"
,
"blue"
,
"green"
,
"lightgreen"
,
"brown"
,
"purple"
,
"moccasin"
,
"olivedrab"
,
"chartreuse"
,
"
steelblu
e"
,
"darkgreen"
,
"green"
,
"mediumseagreen"
,
"
darksag
e"
,
"darkgreen"
,
"green"
,
"mediumseagreen"
,
"mediumaquamarine"
,
"darkslategrey"
,
"mediumturquoise"
,
"black"
,
"cadetblue"
,
"skyblue"
,
"red"
,
"slategray"
,
"gold"
,
"slateblue"
,
"blueviolet"
,
"mediumorchid"
,
"firebrick"
,
...
...
@@ -144,129 +164,207 @@ if args.verbose:
# Read input.
data
=
pl
.
loadtxt
(
infile
)
nthread
=
int
(
max
(
data
[:,
0
]))
+
1
print
"Number of threads:"
,
nthread
# Recover the start and end time
# Do we have an MPI file?
full_step
=
data
[
0
,:]
tic_step
=
int
(
full_step
[
4
])
toc_step
=
int
(
full_step
[
5
])
if
full_step
.
size
==
13
:
print
"# MPI mode"
mpimode
=
True
if
ranks
==
None
:
ranks
=
range
(
int
(
max
(
data
[:,
0
]))
+
1
)
print
"# Number of ranks:"
,
len
(
ranks
)
rankcol
=
0
threadscol
=
1
taskcol
=
2
subtaskcol
=
3
ticcol
=
5
toccol
=
6
else
:
print
"# non MPI mode"
ranks
=
[
0
]
mpimode
=
False
rankcol
=
-
1
threadscol
=
0
taskcol
=
1
subtaskcol
=
2
ticcol
=
4
toccol
=
5
# Get CPU_CLOCK to convert ticks into milliseconds.
CPU_CLOCK
=
float
(
full_step
[
-
1
])
/
1000.0
data
=
data
[
1
:,:]
if
args
.
verbose
:
print
"CPU frequency:"
,
CPU_CLOCK
*
1000.0
print
"
#
CPU frequency:"
,
CPU_CLOCK
*
1000.0
# Avoid start and end times of zero.
data
=
data
[
data
[:,
4
]
!=
0
]
data
=
data
[
data
[:,
5
]
!=
0
]
nthread
=
int
(
max
(
data
[:,
threadscol
]))
+
1
print
"# Number of threads:"
,
nthread
# Calculate the time range, if not given.
# Avoid start and end times of zero.
sdata
=
data
[
data
[:,
ticcol
]
!=
0
]
sdata
=
sdata
[
sdata
[:,
toccol
]
!=
0
]
# Each rank can have different clocks (compute node), but we want to use the
# same delta times range for comparisons, so we suck it up and take the hit of
# precalculating this, unless the user knows better.
delta_t
=
delta_t
*
CPU_CLOCK
if
delta_t
==
0
:
dt
=
toc_step
-
tic_step
if
dt
>
delta_t
:
delta_t
=
dt
print
"Data range: "
,
delta_t
/
CPU_CLOCK
,
"ms"
# Once more doing the real gather and plots this time.
start_t
=
float
(
tic_step
)
data
[:,
4
]
-=
start_t
data
[:,
5
]
-=
start_t
end_t
=
(
toc_step
-
start_t
)
/
CPU_CLOCK
tasks
=
{}
tasks
[
-
1
]
=
[]
for
i
in
range
(
nthread
*
expand
):
tasks
[
i
]
=
[]
# Counters for each thread when expanding.
ecounter
=
[]
for
i
in
range
(
nthread
):
ecounter
.
append
(
0
)
num_lines
=
pl
.
size
(
data
)
/
pl
.
size
(
full_step
)
for
line
in
range
(
num_lines
):
thread
=
int
(
data
[
line
,
0
])
# Expand to cover extra lines if expanding.
ethread
=
thread
*
expand
+
(
ecounter
[
thread
]
%
expand
)
ecounter
[
thread
]
=
ecounter
[
thread
]
+
1
thread
=
ethread
tasks
[
thread
].
append
({})
tasktype
=
TASKTYPES
[
int
(
data
[
line
,
1
])]