diff --git a/examples/analyse_runtime.py b/examples/analyse_runtime.py deleted file mode 100644 index 9093dfc2346e7ee5f5707a7085a786672caa2104..0000000000000000000000000000000000000000 --- a/examples/analyse_runtime.py +++ /dev/null @@ -1,153 +0,0 @@ -################################################################################ -# This file is part of SWIFT. -# Copyright (c) 2018 Matthieu Schaller (matthieu.schaller@durham.ac.uk) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# -################################################################################ - -import re -import sys -import matplotlib -matplotlib.use("Agg") -from pylab import * - -# Plot parameters -params = {'axes.labelsize': 10, -'axes.titlesize': 10, -'font.size': 12, -'legend.fontsize': 12, -'xtick.labelsize': 10, -'ytick.labelsize': 10, -'text.usetex': True, - 'figure.figsize' : (6.45,6.45), -'figure.subplot.left' : 0.06, -'figure.subplot.right' : 0.99, -'figure.subplot.bottom' : 0.06, -'figure.subplot.top' : 0.99, -'figure.subplot.wspace' : 0.21, -'figure.subplot.hspace' : 0.13, -'lines.markersize' : 6, -'lines.linewidth' : 3., -'text.latex.unicode': True -} -rcParams.update(params) - -threshold = 0.008 - -num_files = len(sys.argv) - 1 - -labels = ['Gpart assignment', 'Mesh comunication', 'Forward Fourier transform', 'Green function', 'Backwards Fourier transform', - 'Making gravity tasks', 'Splitting tasks', 'Counting and linking tasks', 'Setting super-pointers', 'Linking gravity tasks', - 'Creating send tasks', 'Exchanging cell tags', 'Creating recv tasks', 'Setting unlocks', 'Ranking the tasks', 'scheduler_reweight:', - 'space_rebuild:', 'engine_drift_all:', 'engine_unskip:', 'engine_collect_end_of_step:', 'engine_launch:', 'writing particle properties', - 'engine_repartition:', 'engine_exchange_cells:', 'Dumping restart files', 'engine_print_stats:', 'engine_marktasks:', - 'Reading initial conditions', 'engine_print_task_counts:', 'engine_drift_top_multipoles:', 'Communicating rebuild flag'] -is_rebuild = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0] -times = np.zeros(len(labels)) -counts = np.zeros(len(labels)) - -cols = ['0.5', '#332288', '#88CCEE', '#44AA99', '#117733', '#999933', '#DDCC77', - '#CC6677', '#882255', '#AA4499', '#661100', '#6699CC', '#AA4466', - '#4477AA'] - -total_time = 0 -lastline = '' - -for i in range(num_files): - - filename = sys.argv[i + 1] - print "Analysing", filename - - # Open stdout file - file = open(filename, 'r') - - # Search the different phrases - for line in file: - - # Loop over the possbile labels - for i in range(len(labels)): - - # Extract the different blocks - if re.search("%s took"%labels[i], line): - counts[i] += 1. - times[i] += float(re.findall(r'[+-]?((\d+\.?\d*)|(\.\d+))', line)[-1][0]) - - # Find the last line with meaningful output (avoid crash report, batch system stuf....) - if re.findall(r'\[[0-9]{4}\][ ]\[*', line) or re.findall(r'^\[[0-9]*[.][0-9]+\][ ]', line): - lastline = line - - # Total run time - total_time += float(re.findall(r'[+-]?([0-9]*[.])?[0-9]+', lastline)[1]) - -# Conver to seconds -times /= 1000. - -# Total time -total_measured_time = np.sum(times) -print "\nTotal measured time: %.3f s"%total_measured_time - -print "Total time:", total_time, "s\n" - -# Ratios -time_ratios = times / total_time - -# Better looking labels -for i in range(len(labels)): - labels[i] = labels[i].replace("_", " ") - labels[i] = labels[i].replace(":", "") - labels[i] = labels[i].title() - -times = np.array(times) -time_ratios = np.array(time_ratios) -is_rebuild = np.array(is_rebuild) - -# Sort in order of importance -order = np.argsort(-times) -times = times[order] -time_ratios = time_ratios[order] -is_rebuild = is_rebuild[order] -labels = np.take(labels, order) - -# Keep only the important components -important_times = [0.] -important_ratios = [0.,] -important_labels = ['Others (all below %.1f\%%)'%(threshold*100), ] -important_is_rebuild = [0] -print "Elements in 'Other' category (<%.1f%%):"%(threshold*100) -for i in range(len(labels)): - if time_ratios[i] > threshold: - important_times.append(times[i]) - important_ratios.append(time_ratios[i]) - important_labels.append(labels[i]) - important_is_rebuild.append(is_rebuild[i]) - else: - important_times[0] += times[i] - important_ratios[0] += time_ratios[i] - print " - '%-30s': %.4f%%"%(labels[i], time_ratios[i] * 100) - -print "\nUnaccounted for: %.4f%%\n"%(100 * (total_time - total_measured_time) / total_time) - -important_ratios = np.array(important_ratios) -important_is_rebuild = np.array(important_is_rebuild) - -figure() - -def func(pct): - return "$%4.2f\\%%$"%pct - -pie,_,_ = pie(important_ratios, explode=important_is_rebuild*0.2, autopct=lambda pct:func(pct), textprops=dict(color='0.1', fontsize=14), labeldistance=0.7, pctdistance=0.85, startangle=-15, colors=cols) -legend(pie, important_labels, title="SWIFT operations", loc="upper left") - -savefig("time_pie.pdf", dpi=150) diff --git a/examples/analyse_tasks.py b/examples/analyse_tasks.py deleted file mode 100755 index 48a00a63ea5da5cb80ebd6f10187cc613c1a5ed5..0000000000000000000000000000000000000000 --- a/examples/analyse_tasks.py +++ /dev/null @@ -1,375 +0,0 @@ -#!/usr/bin/env python -""" -Usage: - analyse_tasks.py [options] input.dat - -where input.dat is a thread info file for a step (MPI or non-MPI). Use the -'-y interval' flag of the swift and swift_mpi commands to create these -(you will also need to configure with the --enable-task-debugging option). - -The output is an analysis of the task timings, including deadtime per thread -and step, total amount of time spent for each task type, for the whole step -and per thread and the minimum and maximum times spent per task type. - -This file is part of SWIFT. -Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk) - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published -by the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -""" - -import matplotlib -matplotlib.use("Agg") -import matplotlib.collections as collections -import matplotlib.ticker as plticker -import pylab as pl -import sys -import argparse - -# Handle the command line. -parser = argparse.ArgumentParser(description="Analyse task dumps") - -parser.add_argument("input", help="Thread data file (-y output)") -parser.add_argument("-v", "--verbose", dest="verbose", - help="Verbose output (default: False)", - default=False, action="store_true") -parser.add_argument("-r", "--rank", dest="rank", - help="Rank to process (default: all)", - default="all", action="store") - -args = parser.parse_args() -infile = args.input - -# Tasks and subtypes. Indexed as in tasks.h. -TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", - "init_grav", "init_grav_out", "ghost_in", "ghost", "ghost_out", "extra_ghost", "drift_part", "drift_gpart", - "end_force", "kick1", "kick2", "timestep", "send", "recv", "grav_long_range", "grav_mm", "grav_down_in", - "grav_down", "grav_mesh", "cooling", "star_formation", "sourceterms", - "stars_ghost_in", "stars_ghost", "stars_ghost_out", - "count"] - -SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav", - "tend", "xv", "rho", "gpart", "multipole", "spart", "stars_density", "count"] - -SIDS = ["(-1,-1,-1)", "(-1,-1, 0)", "(-1,-1, 1)", "(-1, 0,-1)", - "(-1, 0, 0)", "(-1, 0, 1)", "(-1, 1,-1)", "(-1, 1, 0)", - "(-1, 1, 1)", "( 0,-1,-1)", "( 0,-1, 0)", "( 0,-1, 1)", - "( 0, 0,-1)"] - -# Read input. -data = pl.loadtxt( infile ) -full_step = data[0,:] - -# Do we have an MPI file? -full_step = data[0,:] -if full_step.size == 13: - print "# MPI mode" - mpimode = True - nranks = int(max(data[:,0])) + 1 - print "# Number of ranks:", nranks - rankcol = 0 - threadscol = 1 - taskcol = 2 - subtaskcol = 3 - ticcol = 5 - toccol = 6 - updates = int(full_step[7]) - g_updates = int(full_step[8]) - s_updates = int(full_step[9]) -else: - print "# non MPI mode" - nranks = 1 - mpimode = False - rankcol = -1 - threadscol = 0 - taskcol = 1 - subtaskcol = 2 - ticcol = 4 - toccol = 5 - updates = int(full_step[6]) - g_updates = int(full_step[7]) - s_updates = int(full_step[8]) - -# Get the CPU clock to convert ticks into milliseconds. -CPU_CLOCK = float(full_step[-1]) / 1000.0 -if args.verbose: - print "# CPU frequency:", CPU_CLOCK * 1000.0 -print "# updates:", updates -print "# g_updates:", g_updates -print "# s_updates:", s_updates - -if mpimode: - if args.rank == "all": - ranks = range(nranks) - else: - ranks = [int(args.rank)] - if ranks[0] >= nranks: - print "Error: maximum rank is " + str(nranks - 1) - sys.exit(1) -else: - ranks = [1] - -maxthread = int(max(data[:,threadscol])) + 1 -print "# Maximum thread id:", maxthread - -# Avoid start and end times of zero. -sdata = data[data[:,ticcol] != 0] -sdata = data[data[:,toccol] != 0] - -# Now we process the required ranks. -for rank in ranks: - if mpimode: - print "# Rank", rank - data = sdata[sdata[:,rankcol] == rank] - full_step = data[0,:] - else: - data = sdata - - # Recover the start and end time - tic_step = int(full_step[ticcol]) - toc_step = int(full_step[toccol]) - data = data[1:,:] - - # Avoid start and end times of zero. - data = data[data[:,ticcol] != 0] - data = data[data[:,toccol] != 0] - - # Calculate the time range. - total_t = (toc_step - tic_step)/ CPU_CLOCK - print "# Data range: ", total_t, "ms" - print - - # Correct times to relative values. - start_t = float(tic_step) - data[:,ticcol] -= start_t - data[:,toccol] -= start_t - end_t = (toc_step - start_t) / CPU_CLOCK - - tasks = {} - tasks[-1] = [] - for i in range(maxthread): - tasks[i] = [] - - # Gather into by thread data. - num_lines = pl.shape(data)[0] - for line in range(num_lines): - thread = int(data[line,threadscol]) - tic = int(data[line,ticcol]) / CPU_CLOCK - toc = int(data[line,toccol]) / CPU_CLOCK - tasktype = int(data[line,taskcol]) - subtype = int(data[line,subtaskcol]) - sid = int(data[line, -1]) - - tasks[thread].append([tic,toc,tasktype,subtype, sid]) - - # Sort by tic and gather used threads. - threadids = [] - for i in range(maxthread): - tasks[i] = sorted(tasks[i], key=lambda task: task[0]) - threadids.append(i) - - # Times per task. - print "# Task times:" - print "# -----------" - print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\ - .format("type/subtype", "count","minimum", "maximum", - "sum", "mean", "percent") - - alltasktimes = {} - sidtimes = {} - for i in threadids: - tasktimes = {} - for task in tasks[i]: - key = TASKTYPES[task[2]] + "/" + SUBTYPES[task[3]] - dt = task[1] - task[0] - if not key in tasktimes: - tasktimes[key] = [] - tasktimes[key].append(dt) - - if not key in alltasktimes: - alltasktimes[key] = [] - alltasktimes[key].append(dt) - - my_sid = task[4] - if my_sid > -1: - if not my_sid in sidtimes: - sidtimes[my_sid] = [] - sidtimes[my_sid].append(dt) - - print "# Thread : ", i - for key in sorted(tasktimes.keys()): - taskmin = min(tasktimes[key]) - taskmax = max(tasktimes[key]) - tasksum = sum(tasktimes[key]) - print "{0:19s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ - .format(key, len(tasktimes[key]), taskmin, taskmax, tasksum, - tasksum / len(tasktimes[key]), tasksum / total_t * 100.0) - print - - print "# All threads : " - for key in sorted(alltasktimes.keys()): - taskmin = min(alltasktimes[key]) - taskmax = max(alltasktimes[key]) - tasksum = sum(alltasktimes[key]) - print "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ - .format(key, len(alltasktimes[key]), taskmin, taskmax, tasksum, - tasksum / len(alltasktimes[key]), - tasksum / (len(threadids) * total_t) * 100.0) - print - - # For pairs, show stuff sorted by SID - print "# By SID (all threads): " - print "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\ - .format("Pair/Sub-pair SID", "count","minimum", "maximum", - "sum", "mean", "percent") - - for sid in range(0,13): - if sid in sidtimes: - sidmin = min(sidtimes[sid]) - sidmax = max(sidtimes[sid]) - sidsum = sum(sidtimes[sid]) - sidcount = len(sidtimes[sid]) - sidmean = sidsum / sidcount - else: - sidmin = 0. - sidmax = 0. - sidsum = 0. - sidcount = 0 - sidmean = 0. - print "{0:3d} {1:15s}: {2:7d} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.4f} {7:9.2f}"\ - .format(sid, SIDS[sid], sidcount, sidmin, sidmax, sidsum, - sidmean, sidsum / (len(threadids) * total_t) * 100.0) - print - - # Dead times. - print "# Times not in tasks (deadtimes)" - print "# ------------------------------" - print "# Time before first task:" - print "# no. : {0:>9s} {1:>9s}".format("value", "percent") - predeadtimes = [] - for i in threadids: - if len(tasks[i]) > 0: - predeadtime = tasks[i][0][0] - print "thread {0:2d}: {1:9.4f} {2:9.4f}"\ - .format(i, predeadtime, predeadtime / total_t * 100.0) - predeadtimes.append(predeadtime) - else: - predeadtimes.append(0.0) - - predeadmin = min(predeadtimes) - predeadmax = max(predeadtimes) - predeadsum = sum(predeadtimes) - print "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ - .format("count", "minimum", "maximum", "sum", "mean", "percent") - print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ - .format(len(predeadtimes), predeadmin, predeadmax, predeadsum, - predeadsum / len(predeadtimes), - predeadsum / (len(threadids) * total_t ) * 100.0) - print - - print "# Time after last task:" - print "# no. : {0:>9s} {1:>9s}".format("value", "percent") - postdeadtimes = [] - for i in threadids: - if len(tasks[i]) > 0: - postdeadtime = total_t - tasks[i][-1][1] - print "thread {0:2d}: {1:9.4f} {2:9.4f}"\ - .format(i, postdeadtime, postdeadtime / total_t * 100.0) - postdeadtimes.append(postdeadtime) - else: - postdeadtimes.append(0.0) - - postdeadmin = min(postdeadtimes) - postdeadmax = max(postdeadtimes) - postdeadsum = sum(postdeadtimes) - print "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ - .format("count", "minimum", "maximum", "sum", "mean", "percent") - print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ - .format(len(postdeadtimes), postdeadmin, postdeadmax, postdeadsum, - postdeadsum / len(postdeadtimes), - postdeadsum / (len(threadids) * total_t ) * 100.0) - print - - # Time in engine, i.e. from first to last tasks. - print "# Time between tasks (engine deadtime):" - print "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ - .format("count", "minimum", "maximum", "sum", "mean", "percent") - enginedeadtimes = [] - for i in threadids: - deadtimes = [] - if len(tasks[i]) > 0: - last = tasks[i][0][0] - else: - last = 0.0 - for task in tasks[i]: - dt = task[0] - last - deadtimes.append(dt) - last = task[1] - - # Drop first value, last value already gone. - if len(deadtimes) > 1: - deadtimes = deadtimes[1:] - else: - # Only one or fewer tasks, so no deadtime by definition. - deadtimes = [0.0] - - deadmin = min(deadtimes) - deadmax = max(deadtimes) - deadsum = sum(deadtimes) - print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ - .format(i, len(deadtimes), deadmin, deadmax, deadsum, - deadsum / len(deadtimes), deadsum / total_t * 100.0) - enginedeadtimes.extend(deadtimes) - - deadmin = min(enginedeadtimes) - deadmax = max(enginedeadtimes) - deadsum = sum(enginedeadtimes) - print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ - .format(len(enginedeadtimes), deadmin, deadmax, deadsum, - deadsum / len(enginedeadtimes), - deadsum / (len(threadids) * total_t ) * 100.0) - print - - # All times in step. - print "# All deadtimes:" - print "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ - .format("count", "minimum", "maximum", "sum", "mean", "percent") - alldeadtimes = [] - for i in threadids: - deadtimes = [] - last = 0 - for task in tasks[i]: - dt = task[0] - last - deadtimes.append(dt) - last = task[1] - dt = total_t - last - deadtimes.append(dt) - - deadmin = min(deadtimes) - deadmax = max(deadtimes) - deadsum = sum(deadtimes) - print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ - .format(i, len(deadtimes), deadmin, deadmax, deadsum, - deadsum / len(deadtimes), deadsum / total_t * 100.0) - alldeadtimes.extend(deadtimes) - - deadmin = min(alldeadtimes) - deadmax = max(alldeadtimes) - deadsum = sum(alldeadtimes) - print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ - .format(len(alldeadtimes), deadmin, deadmax, deadsum, - deadsum / len(alldeadtimes), - deadsum / (len(threadids) * total_t ) * 100.0) - print - -sys.exit(0) diff --git a/examples/check_ngbs.py b/examples/check_ngbs.py deleted file mode 100644 index a4a07ce7bd6ffb817e8106b74d9895a0edbceca7..0000000000000000000000000000000000000000 --- a/examples/check_ngbs.py +++ /dev/null @@ -1,321 +0,0 @@ -import h5py as h -import numpy as np -import matplotlib -matplotlib.use("Agg") -from pylab import * -import os.path - -kernel_gamma = 1.825742 -kernel_gamma2 = kernel_gamma * kernel_gamma -kernel_gamma_dim = np.power(kernel_gamma,3) -hydro_dimension_unit_sphere = 4. * np.pi / 3. -kernel_norm = hydro_dimension_unit_sphere * kernel_gamma_dim -error = False - -inputFile1 = "" -inputFile2 = "" - -# Compare the values of two floats -def isclose(a, b, rel_tol=1e-09, abs_tol=0.0): - return abs(a-b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) - -# Check list of density neighbours and check that they are correct. -def check_density_neighbours(pids, ngb_ids_naive, ngb_ids_sort, mask, pos, - h_naive, h_sort, num_invalid, acc): - - for k in range(0,num_invalid): - - # Filter neighbour lists for valid particle ids - filter_neigh_naive = [i for i in ngb_ids_naive[mask][k] if i > -1] - filter_neigh_sort = [i for i in ngb_ids_sort[mask][k] if i > -1] - - # Check neighbour lists for differences - id_list = set(filter_neigh_naive).symmetric_difference(set(filter_neigh_sort)) - - # Check for duplicate IDs - duplicate_check_naive = len(filter_neigh_naive) != len(set(filter_neigh_naive)) - duplicate_check_sort = len(filter_neigh_sort) != len(set(filter_neigh_sort)) - - if duplicate_check_naive: - print "Duplicate neighbour ID found in: ", inputFile1 - print filter_neigh_naive - return True - - if duplicate_check_sort: - print "Duplicate neighbour ID found in: ", inputFile2 - print filter_neigh_sort - return True - - pid = pids[mask][k] - - # Loop over discrepancies and check if they are actually neighbours - for pjd in id_list: - pi_pos = pos[np.where(pids == pid)] - pj_pos = pos[np.where(pids == pjd)] - - hi = h_naive[np.where(pids == pid)] - - dx = pi_pos[0][0] - pj_pos[0][0] - dy = pi_pos[0][1] - pj_pos[0][1] - dz = pi_pos[0][2] - pj_pos[0][2] - - # Correct for BCs - dx = nearest(dx) - dy = nearest(dy) - dz = nearest(dz) - - r2 = dx*dx + dy*dy + dz*dz - - hig2 = hi*hi*kernel_gamma2 - - diff = abs(r2 - hig2) - - print "Particle {} is missing {}, hig2: {}, r2: {}, |r2 - hig2|: {}".format(pid,pjd,hig2, r2, diff) - - if diff < acc * hig2: - print "Missing interaction due to precision issue will be ignored." - else: - hi_2 = h_sort[np.where(pids == pid)] - - # If a neigbour is missing and the particle has the same h throw - # an error. - if(isclose(hi,hi_2)): - print "Missing interaction found but particle has the same smoothing length (hi_1: %e, hi_2: %e)."%(hi, hi_2) - return True - else: - print "Missing interaction due to different smoothing lengths will be ignored (hi_1: %e, hi_2: %e)."%(hi, hi_2) - - return False - -# Check list of force neighbours and check that they are correct. -def check_force_neighbours(pids, ngb_ids_naive, ngb_ids_sort, mask, pos, - h_naive, h_sort, num_invalid, acc): - - error_val = False - - for k in range(0,num_invalid): - - # Filter neighbour lists for valid particle ids - filter_neigh_naive = [i for i in ngb_ids_naive[mask][k] if i > -1] - filter_neigh_sort = [i for i in ngb_ids_sort[mask][k] if i > -1] - - # Check neighbour lists for differences - id_list = set(filter_neigh_naive).symmetric_difference(set(filter_neigh_sort)) - - pid = pids[mask][k] - - # Loop over discrepancies and check if they are actually neighbours - for pjd in id_list: - pi_pos = pos[np.where(pids == pid)] - pj_pos = pos[np.where(pids == pjd)] - - hi = h_naive[np.where(pids == pid)] - hj = h_naive[np.where(pids == pjd)] - - dx = pi_pos[0][0] - pj_pos[0][0] - dy = pi_pos[0][1] - pj_pos[0][1] - dz = pi_pos[0][2] - pj_pos[0][2] - - # Correct for BCs - dx = nearest(dx) - dy = nearest(dy) - dz = nearest(dz) - - r2 = dx*dx + dy*dy + dz*dz - - hig2 = hi*hi*kernel_gamma2 - hjg2 = hj*hj*kernel_gamma2 - - diff = abs(r2 - max(hig2, hjg2)) - - print "Particle {} is missing {}, hig2: {}, hjg2: {}, r2: {}, |r2 - max(hig2,hjg2)|: {}".format(pid,pjd,hig2, hjg2, r2, diff) - - if diff < acc * max(hig2,hjg2): - print "Missing interaction due to precision issue will be ignored." - else: - hi_2 = h_sort[np.where(pids == pid)] - if(isclose(hi,hi_2)): - print "Missing interaction due to the same smoothing lengths will not be ignored (hi_1: %e, hi_2: %e)."%(hi, hi_2) - error_val = True - else: - print "Missing interaction due to different smoothing lengths will be ignored (hi_1: %e, hi_2: %e)."%(hi, hi_2) - - return error_val - -def nearest(dx): - if(dx > 0.5 * box_size): - return dx - box_size - elif(dx < -0.5 * box_size): - return dx + box_size - else: - return dx - -# Parse command line arguments -if len(sys.argv) < 3: - print "Error: pass input files as arguments" - sys.exit() -else: - inputFile1 = sys.argv[1] - inputFile2 = sys.argv[2] - if os.path.exists(inputFile1) != 1: - print "\n{} does not exist!\n".format(inputFile1) - sys.exit() - if os.path.exists(inputFile2) != 1: - print "\n{} does not exist!\n".format(inputFile2) - sys.exit() - -# Open input files -file_naive = h.File(inputFile1, "r") -file_sort = h.File(inputFile2, "r") - -box_size = file_naive["/Header"].attrs["BoxSize"][0] - -# Read input file fields -ids_naive = file_naive["/PartType0/ParticleIDs"][:] -ids_sort = file_sort["/PartType0/ParticleIDs"][:] - -h_naive = file_naive["/PartType0/SmoothingLength"][:] -h_sort = file_sort["/PartType0/SmoothingLength"][:] - -pos_naive = file_naive["/PartType0/Coordinates"][:,:] -#pos_sort = file_sort["/PartType0/Coordinates"][:,:] - -num_density_naive = file_naive["/PartType0/Num_ngb_density"][:] -num_density_sort = file_sort["/PartType0/Num_ngb_density"][:] - -num_force_naive = file_naive["/PartType0/Num_ngb_force"][:] -num_force_sort = file_sort["/PartType0/Num_ngb_force"][:] - -neighbour_ids_density_naive = file_naive["/PartType0/Ids_ngb_density"][:] -neighbour_ids_density_sort = file_sort["/PartType0/Ids_ngb_density"][:] - -neighbour_ids_force_naive = file_naive["/PartType0/Ids_ngb_force"][:] -neighbour_ids_force_sort = file_sort["/PartType0/Ids_ngb_force"][:] - - -#wcount_naive = file_naive["/PartType0/Wcount"][:] -#wcount_sort = file_sort["/PartType0/Wcount"][:] -# -#wcount_naive = wcount_naive * np.power(h_naive,3) * kernel_norm -#wcount_sort = wcount_sort * np.power(h_sort,3) * kernel_norm - -# Cross check -max_density_ngbs_naive = np.max(num_density_naive) -max_density_ngbs_sort = np.max(num_density_sort) -max_force_ngbs_naive = np.max(num_force_naive) -max_force_ngbs_sort = np.max(num_force_sort) - -print " Min Mean Max " -print " ---------------------" -print "Ngbs density naiv: ", np.min(num_density_naive), np.mean(num_density_naive), max_density_ngbs_naive -print "Ngbs density sort: ", np.min(num_density_sort), np.mean(num_density_sort), max_density_ngbs_sort -print "Ngbs force naiv: ", np.min(num_force_naive), np.mean(num_force_naive), max_force_ngbs_naive -print "Ngbs force sort: ", np.min(num_force_sort), np.mean(num_force_sort), max_force_ngbs_sort -#print "Wcount naiv: ", np.min(wcount_naive), np.mean(wcount_naive), np.max(wcount_naive) -#print "Wcount sort: ", np.min(wcount_sort), np.mean(wcount_sort), np.max(wcount_sort) - -# Sort -index_naive = np.argsort(ids_naive) -index_sort = np.argsort(ids_sort) - -num_density_naive = num_density_naive[index_naive] -num_density_sort = num_density_sort[index_sort] -num_force_naive = num_force_naive[index_naive] -num_force_sort = num_force_sort[index_sort] -ids_naive = ids_naive[index_naive] -ids_sort = ids_sort[index_sort] -neighbour_ids_density_naive = neighbour_ids_density_naive[index_naive] -neighbour_ids_density_sort = neighbour_ids_density_sort[index_sort] -neighbour_ids_force_naive = neighbour_ids_force_naive[index_naive] -neighbour_ids_force_sort = neighbour_ids_force_sort[index_sort] -#wcount_naive = wcount_naive[index_naive] -#wcount_sort = wcount_sort[index_sort] -h_naive = h_naive[index_naive] -h_sort = h_sort[index_sort] -pos_naive = pos_naive[index_naive] -#pos_sort = pos_sort[index_sort] - -neighbour_length_naive = len(neighbour_ids_density_naive[0]) -neighbour_length_sort = len(neighbour_ids_density_sort[0]) - -# Check that input files are logging the same number of neighbours -if neighbour_length_naive != neighbour_length_sort: - print "Input files have logged different numbers of neighbour lengths!" - print "{} has logged: {} neighbours".format(inputFile1, neighbour_length_naive) - print "{} has logged: {} neighbours".format(inputFile2, neighbour_length_sort) - exit(1) - -if (max_density_ngbs_naive > neighbour_length_naive or max_force_ngbs_naive > neighbour_length_naive or - max_density_ngbs_sort > neighbour_length_sort or max_force_ngbs_sort > neighbour_length_sort): - print "The number of neighbours has exceeded the number of neighbours logged." - print "Modify NUM_OF_NEIGHBOURS in hydro_part.h to log more neighbours." - print "The highest neighbour count is: ", max(max_density_ngbs_naive,max_force_ngbs_naive, max_density_ngbs_sort,max_force_ngbs_sort) - exit(1) - -# First check -print "\n Min Max" -print " ----------" -print "Differences for density: ", min(num_density_naive - num_density_sort), max(num_density_naive - num_density_sort) -print "Differences for force: ", min(num_force_naive - num_force_sort), max(num_force_naive - num_force_sort) - -# Get the IDs that are different -mask_density = num_density_naive != num_density_sort -mask_force = num_force_naive != num_force_sort -num_invalid_density = np.sum(mask_density) -num_invalid_force = np.sum(mask_force) - -print "\nNum non-zero density: ", num_invalid_density -print "Num non-zero force: ", num_invalid_force - -print "\nParticle IDs with incorrect densities" -print "----------------------------------------" -print ids_naive[mask_density] - -# Check density neighbour lists -error += check_density_neighbours(ids_naive, neighbour_ids_density_naive, - neighbour_ids_density_sort, mask_density, pos_naive, h_naive, h_sort, - num_invalid_density, 2e-6) - -print "Num of density interactions", inputFile1 -print num_density_naive[mask_density] - -print "Num of density interactions", inputFile2 -print num_density_sort[mask_density] - -print "\nParticle IDs with incorrect forces" -print "------------------------------------" -print ids_naive[mask_force] - -# Check force neighbour lists -error += check_force_neighbours(ids_naive, neighbour_ids_force_naive, - neighbour_ids_force_sort, mask_force, pos_naive, h_naive, h_sort, - num_invalid_force, 2e-6) - -print "Num of force interactions", inputFile1 -print num_force_naive[mask_force] - -#print "Smoothing lengths", inputFile1 -#print h_naive[mask_force] - -print "Num of force interactions", inputFile2 -print num_force_sort[mask_force] - -#print "Smoothing lengths", inputFile2 -#print h_sort[mask_force] - -# Statistics of h difference -h_relative = (h_naive - h_sort) / h_naive -print "h statistics: {} {} (Min, 1st Percentile)".format(np.min(h_relative), np.percentile(h_relative,1)) -print "h statistics: {} {} (Mean, Median)".format(np.mean(h_relative), np.median(h_relative)) -print "h statistics: {} {} (Max, 99th Percentile)".format(np.max(h_relative), np.percentile(h_relative, 99)) - -if error: - print "\n------------------" - print "Differences found." - print "------------------" - exit(1) -else: - print "\n---------------------" - print "No differences found." - print "---------------------" - exit(0) diff --git a/examples/plot_gravity_checks.py b/examples/plot_gravity_checks.py deleted file mode 100755 index 23866ac2a6952ff918dbc80533269c0d2e9bcbc5..0000000000000000000000000000000000000000 --- a/examples/plot_gravity_checks.py +++ /dev/null @@ -1,307 +0,0 @@ -#!/usr/bin/env python - -import sys -import glob -import re -import numpy as np -import matplotlib.pyplot as plt - -params = {'axes.labelsize': 14, -'axes.titlesize': 18, -'font.size': 12, -'legend.fontsize': 12, -'xtick.labelsize': 14, -'ytick.labelsize': 14, -'text.usetex': True, -'figure.figsize': (12, 10), -'figure.subplot.left' : 0.06, -'figure.subplot.right' : 0.99 , -'figure.subplot.bottom' : 0.06 , -'figure.subplot.top' : 0.99 , -'figure.subplot.wspace' : 0.14 , -'figure.subplot.hspace' : 0.14 , -'lines.markersize' : 6, -'lines.linewidth' : 3., -'text.latex.unicode': True -} -plt.rcParams.update(params) -plt.rc('font',**{'family':'sans-serif','sans-serif':['Times']}) - -min_error = 1e-7 -max_error = 3e-1 -num_bins = 64 - -# Construct the bins -bin_edges = np.linspace(np.log10(min_error), np.log10(max_error), num_bins + 1) -bin_size = (np.log10(max_error) - np.log10(min_error)) / num_bins -bins = 0.5*(bin_edges[1:] + bin_edges[:-1]) -bin_edges = 10**bin_edges -bins = 10**bins - -# Colours -cols = ['#332288', '#88CCEE', '#117733', '#DDCC77', '#CC6677'] - -# Time-step to plot -step = int(sys.argv[1]) -periodic = int(sys.argv[2]) - -# Find the files for the different expansion orders -order_list = glob.glob("gravity_checks_swift_step%.4d_order*.dat"%step) -num_order = len(order_list) - -# Get the multipole orders -order = np.zeros(num_order) -for i in range(num_order): - order[i] = int(order_list[i][35]) -order = sorted(order) -order_list = sorted(order_list) - -# Read the exact accelerations first -if periodic: - data = np.loadtxt('gravity_checks_exact_periodic_step%.4d.dat'%step) -else: - data = np.loadtxt('gravity_checks_exact_step%.4d.dat'%step) -exact_ids = data[:,0] -exact_pos = data[:,1:4] -exact_a = data[:,4:7] -exact_pot = data[:,7] -# Sort stuff -sort_index = np.argsort(exact_ids) -exact_ids = exact_ids[sort_index] -exact_pos = exact_pos[sort_index, :] -exact_a = exact_a[sort_index, :] -exact_pot = exact_pot[sort_index] -exact_a_norm = np.sqrt(exact_a[:,0]**2 + exact_a[:,1]**2 + exact_a[:,2]**2) - -print "Number of particles tested:", np.size(exact_ids) - -# Start the plot -plt.figure() - -count = 0 - -# Get the Gadget-2 data if existing -if periodic: - gadget2_file_list = glob.glob("forcetest_gadget2_periodic.txt") -else: - gadget2_file_list = glob.glob("forcetest_gadget2.txt") -if len(gadget2_file_list) != 0: - - gadget2_data = np.loadtxt(gadget2_file_list[0]) - gadget2_ids = gadget2_data[:,0] - gadget2_pos = gadget2_data[:,1:4] - gadget2_a_exact = gadget2_data[:,4:7] - gadget2_a_grav = gadget2_data[:, 7:10] - - # Sort stuff - sort_index = np.argsort(gadget2_ids) - gadget2_ids = gadget2_ids[sort_index] - gadget2_pos = gadget2_pos[sort_index, :] - gadget2_a_exact = gadget2_a_exact[sort_index, :] - gadget2_exact_a_norm = np.sqrt(gadget2_a_exact[:,0]**2 + gadget2_a_exact[:,1]**2 + gadget2_a_exact[:,2]**2) - gadget2_a_grav = gadget2_a_grav[sort_index, :] - - # Cross-checks - if not np.array_equal(exact_ids, gadget2_ids): - print "Comparing different IDs !" - - if np.max(np.abs(exact_pos - gadget2_pos)/np.abs(gadget2_pos)) > 1e-6: - print "Comparing different positions ! max difference:" - index = np.argmax(exact_pos[:,0]**2 + exact_pos[:,1]**2 + exact_pos[:,2]**2 - gadget2_pos[:,0]**2 - gadget2_pos[:,1]**2 - gadget2_pos[:,2]**2) - print "Gadget2 (id=%d):"%gadget2_ids[index], gadget2_pos[index,:], "exact (id=%d):"%exact_ids[index], exact_pos[index,:], "\n" - - diff = np.abs(exact_a_norm - gadget2_exact_a_norm) / np.abs(gadget2_exact_a_norm) - max_diff = np.max(diff) - if max_diff > 2e-6: - print "Comparing different exact accelerations !" - print "Median=", np.median(diff), "Mean=", np.mean(diff), "99%=", np.percentile(diff, 99) - print "max difference ( relative diff =", max_diff, "):" - #index = np.argmax(exact_a[:,0]**2 + exact_a[:,1]**2 + exact_a[:,2]**2 - gadget2_a_exact[:,0]**2 - gadget2_a_exact[:,1]**2 - gadget2_a_exact[:,2]**2) - index = np.argmax(diff) - print "a_exact --- Gadget2:", gadget2_a_exact[index,:], "exact:", exact_a[index,:] - print "pos --- Gadget2: (id=%d):"%gadget2_ids[index], gadget2_pos[index,:], "exact (id=%d):"%gadget2_ids[index], gadget2_pos[index,:],"\n" - - - # Compute the error norm - diff = gadget2_a_exact - gadget2_a_grav - - norm_diff = np.sqrt(diff[:,0]**2 + diff[:,1]**2 + diff[:,2]**2) - norm_a = np.sqrt(gadget2_a_exact[:,0]**2 + gadget2_a_exact[:,1]**2 + gadget2_a_exact[:,2]**2) - - norm_error = norm_diff / norm_a - error_x = abs(diff[:,0]) / norm_a - error_y = abs(diff[:,1]) / norm_a - error_z = abs(diff[:,2]) / norm_a - - # Bin the error - norm_error_hist,_ = np.histogram(norm_error, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_x_hist,_ = np.histogram(error_x, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_y_hist,_ = np.histogram(error_y, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_z_hist,_ = np.histogram(error_z, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - - norm_median = np.median(norm_error) - median_x = np.median(error_x) - median_y = np.median(error_y) - median_z = np.median(error_z) - - norm_per99 = np.percentile(norm_error,99) - per99_x = np.percentile(error_x,99) - per99_y = np.percentile(error_y,99) - per99_z = np.percentile(error_z,99) - - norm_max = np.max(norm_error) - max_x = np.max(error_x) - max_y = np.max(error_y) - max_z = np.max(error_z) - - print "Gadget-2 ---- " - print "Norm: median= %f 99%%= %f max= %f"%(norm_median, norm_per99, norm_max) - print "X : median= %f 99%%= %f max= %f"%(median_x, per99_x, max_x) - print "Y : median= %f 99%%= %f max= %f"%(median_y, per99_y, max_y) - print "Z : median= %f 99%%= %f max= %f"%(median_z, per99_z, max_z) - print "" - - plt.subplot(231) - plt.text(min_error * 1.5, 1.55, "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(norm_median, norm_per99), ha="left", va="top", alpha=0.8) - plt.semilogx(bins, norm_error_hist, 'k--', label="Gadget-2", alpha=0.8) - plt.subplot(232) - plt.semilogx(bins, error_x_hist, 'k--', label="Gadget-2", alpha=0.8) - plt.text(min_error * 1.5, 1.55, "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_x, per99_x), ha="left", va="top", alpha=0.8) - plt.subplot(233) - plt.semilogx(bins, error_y_hist, 'k--', label="Gadget-2", alpha=0.8) - plt.text(min_error * 1.5, 1.55, "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_y, per99_y), ha="left", va="top", alpha=0.8) - plt.subplot(234) - plt.semilogx(bins, error_z_hist, 'k--', label="Gadget-2", alpha=0.8) - plt.text(min_error * 1.5, 1.55, "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_z, per99_z), ha="left", va="top", alpha=0.8) - - count += 1 - - -# Plot the different histograms -for i in range(num_order): - data = np.loadtxt(order_list[i]) - ids = data[:,0] - pos = data[:,1:4] - a_grav = data[:, 4:7] - pot = data[:, 7] - - # Sort stuff - sort_index = np.argsort(ids) - ids = ids[sort_index] - pos = pos[sort_index, :] - a_grav = a_grav[sort_index, :] - pot = pot[sort_index] - - # Cross-checks - if not np.array_equal(exact_ids, ids): - print "Comparing different IDs !" - - if np.max(np.abs(exact_pos - pos)/np.abs(pos)) > 1e-6: - print "Comparing different positions ! max difference:" - index = np.argmax(exact_pos[:,0]**2 + exact_pos[:,1]**2 + exact_pos[:,2]**2 - pos[:,0]**2 - pos[:,1]**2 - pos[:,2]**2) - print "SWIFT (id=%d):"%ids[index], pos[index,:], "exact (id=%d):"%exact_ids[index], exact_pos[index,:], "\n" - - # Compute the error norm - diff = exact_a - a_grav - diff_pot = exact_pot - pot - - # Correct for different normalization of potential - print "Difference in normalization of potential:", np.mean(diff_pot), - print "std_dev=", np.std(diff_pot), "99-percentile:", np.percentile(diff_pot, 99)-np.median(diff_pot), "1-percentile:", np.median(diff_pot) - np.percentile(diff_pot, 1) - - exact_pot -= np.mean(diff_pot) - diff_pot = exact_pot - pot - - norm_diff = np.sqrt(diff[:,0]**2 + diff[:,1]**2 + diff[:,2]**2) - - norm_error = norm_diff / exact_a_norm - error_x = abs(diff[:,0]) / exact_a_norm - error_y = abs(diff[:,1]) / exact_a_norm - error_z = abs(diff[:,2]) / exact_a_norm - error_pot = abs(diff_pot) / abs(exact_pot) - - # Bin the error - norm_error_hist,_ = np.histogram(norm_error, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_x_hist,_ = np.histogram(error_x, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_y_hist,_ = np.histogram(error_y, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_z_hist,_ = np.histogram(error_z, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - error_pot_hist,_ = np.histogram(error_pot, bins=bin_edges, density=False) / (np.size(norm_error) * bin_size) - - norm_median = np.median(norm_error) - median_x = np.median(error_x) - median_y = np.median(error_y) - median_z = np.median(error_z) - median_pot = np.median(error_pot) - - norm_per99 = np.percentile(norm_error,99) - per99_x = np.percentile(error_x,99) - per99_y = np.percentile(error_y,99) - per99_z = np.percentile(error_z,99) - per99_pot = np.percentile(error_pot, 99) - - norm_max = np.max(norm_error) - max_x = np.max(error_x) - max_y = np.max(error_y) - max_z = np.max(error_z) - max_pot = np.max(error_pot) - - print "Order %d ---- "%order[i] - print "Norm: median= %f 99%%= %f max= %f"%(norm_median, norm_per99, norm_max) - print "X : median= %f 99%%= %f max= %f"%(median_x, per99_x, max_x) - print "Y : median= %f 99%%= %f max= %f"%(median_y, per99_y, max_y) - print "Z : median= %f 99%%= %f max= %f"%(median_z, per99_z, max_z) - print "Pot : median= %f 99%%= %f max= %f"%(median_pot, per99_pot, max_pot) - print "" - - plt.subplot(231) - plt.semilogx(bins, error_x_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) - plt.text(min_error * 1.5, 1.5 - count/10., "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_x, per99_x), ha="left", va="top", color=cols[i]) - plt.subplot(232) - plt.semilogx(bins, error_y_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) - plt.text(min_error * 1.5, 1.5 - count/10., "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_y, per99_y), ha="left", va="top", color=cols[i]) - plt.subplot(233) - plt.semilogx(bins, error_z_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) - plt.text(min_error * 1.5, 1.5 - count/10., "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_z, per99_z), ha="left", va="top", color=cols[i]) - plt.subplot(234) - plt.semilogx(bins, norm_error_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) - plt.text(min_error * 1.5, 1.5 - count/10., "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(norm_median, norm_per99), ha="left", va="top", color=cols[i]) - plt.subplot(235) - plt.semilogx(bins, error_pot_hist, color=cols[i],label="SWIFT m-poles order %d"%order[i]) - plt.text(min_error * 1.5, 1.5 - count/10., "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$"%(median_pot, per99_pot), ha="left", va="top", color=cols[i]) - - count += 1 - -plt.subplot(231) -plt.xlabel("$\delta a_x/|\overrightarrow{a}_{exact}|$") -#plt.ylabel("Density") -plt.xlim(min_error, max_error) -plt.ylim(0,1.75) -#plt.legend(loc="center left") -plt.subplot(232) -plt.xlabel("$\delta a_y/|\overrightarrow{a}_{exact}|$") -#plt.ylabel("Density") -plt.xlim(min_error, max_error) -plt.ylim(0,1.75) -#plt.legend(loc="center left") -plt.subplot(233) -plt.xlabel("$\delta a_z/|\overrightarrow{a}_{exact}|$") -#plt.ylabel("Density") -plt.xlim(min_error, max_error) -plt.ylim(0,1.75) -plt.subplot(234) -plt.xlabel("$|\delta \overrightarrow{a}|/|\overrightarrow{a}_{exact}|$") -#plt.ylabel("Density") -plt.xlim(min_error, max_error) -plt.ylim(0,2.5) -plt.legend(loc="upper left") -plt.subplot(235) -plt.xlabel("$\delta \phi/\phi_{exact}$") -#plt.ylabel("Density") -plt.xlim(min_error, max_error) -plt.ylim(0,1.75) -#plt.legend(loc="center left") - - - -plt.savefig("gravity_checks_step%.4d.png"%step, dpi=200) -plt.savefig("gravity_checks_step%.4d.pdf"%step, dpi=200) diff --git a/examples/plot_scaling_results.py b/examples/plot_scaling_results.py deleted file mode 100755 index e39f0d2d0c00eecf7680b2f090bd2c0aa29ed8bb..0000000000000000000000000000000000000000 --- a/examples/plot_scaling_results.py +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/env python -# -# Usage: -# python plot_scaling_results.py input-file1-ext input-file2-ext ... -# -# Description: -# Plots speed up, parallel efficiency and time to solution given a "timesteps" output file generated by SWIFT. -# -# Example: -# python plot_scaling_results.py _hreads_cosma_stdout.txt _threads_knl_stdout.txt -# -# The working directory should contain files 1_threads_cosma_stdout.txt - 64_threads_cosma_stdout.txt and 1_threads_knl_stdout.txt - 64_threads_knl_stdout.txt, i.e wall clock time for each run using a given number of threads - -import sys -import glob -import re -import numpy as np -import matplotlib.pyplot as plt -import scipy.stats -import ntpath - -params = {'axes.labelsize': 14, -'axes.titlesize': 18, -'font.size': 12, -'legend.fontsize': 12, -'xtick.labelsize': 14, -'ytick.labelsize': 14, -'text.usetex': True, -'figure.subplot.left' : 0.055, -'figure.subplot.right' : 0.98 , -'figure.subplot.bottom' : 0.05 , -'figure.subplot.top' : 0.95 , -'figure.subplot.wspace' : 0.14 , -'figure.subplot.hspace' : 0.12 , -'lines.markersize' : 6, -'lines.linewidth' : 3., -'text.latex.unicode': True -} -plt.rcParams.update(params) -plt.rc('font',**{'family':'sans-serif','sans-serif':['Times']}) - -version = [] -branch = [] -revision = [] -hydro_scheme = [] -hydro_kernel = [] -hydro_neighbours = [] -hydro_eta = [] -threadList = [] -hexcols = ['#332288', '#88CCEE', '#44AA99', '#117733', '#999933', '#DDCC77', - '#CC6677', '#882255', '#AA4499', '#661100', '#6699CC', '#AA4466', - '#4477AA'] -linestyle = (hexcols[0],hexcols[1],hexcols[3],hexcols[5],hexcols[6],hexcols[8],hexcols[2],hexcols[4],hexcols[7],hexcols[9]) -numTimesteps = 0 -legendTitle = ' ' - -inputFileNames = [] - -# Work out how many data series there are -if len(sys.argv) == 1: - print "Please specify an input file in the arguments." - sys.exit() -else: - for fileName in sys.argv[1:]: - inputFileNames.append(fileName) - numOfSeries = int(len(sys.argv) - 1) - -# Get the names of the branch, Git revision, hydro scheme and hydro kernel -def parse_header(inputFile): - with open(inputFile, 'r') as f: - found_end = False - for line in f: - if 'Branch:' in line: - s = line.split() - line = s[2:] - branch.append(" ".join(line)) - elif 'Revision:' in line: - s = line.split() - revision.append(s[2]) - elif 'Hydrodynamic scheme:' in line: - line = line[2:-1] - s = line.split() - line = s[2:] - hydro_scheme.append(" ".join(line)) - elif 'Hydrodynamic kernel:' in line: - line = line[2:-1] - s = line.split() - line = s[2:5] - hydro_kernel.append(" ".join(line)) - elif 'neighbours:' in line: - s = line.split() - hydro_neighbours.append(s[4]) - elif 'Eta:' in line: - s = line.split() - hydro_eta.append(s[2]) - return - -# Parse file and return total time taken, speed up and parallel efficiency -def parse_files(): - - totalTime = [] - sumTotal = [] - speedUp = [] - parallelEff = [] - - for i in range(0,numOfSeries): # Loop over each data series - - # Get path to set of files - path, name = ntpath.split(inputFileNames[i]) - - # Get each file that starts with the cmd line arg - file_list = glob.glob(inputFileNames[i] + "*") - - threadList.append([]) - - # Remove path from file names - for j in range(0,len(file_list)): - p, filename = ntpath.split(file_list[j]) - file_list[j] = filename - - # Create a list of threads using the list of files - for fileName in file_list: - s = re.split(r'[_.]+',fileName) - threadList[i].append(int(s[1])) - - # Re-add path once each file has been found - if len(path) != 0: - for j in range(0,len(file_list)): - file_list[j] = path + '/' + file_list[j] - - # Sort the thread list in ascending order and save the indices - sorted_indices = np.argsort(threadList[i]) - threadList[i].sort() - - # Sort the file list in ascending order acording to the thread number - file_list = [ file_list[j] for j in sorted_indices] - - parse_header(file_list[0]) - - branch[i] = branch[i].replace("_", "\\_") - - #version.append("$\\textrm{%s}$"%str(branch[i]))# + " " + revision[i])# + "\n" + hydro_scheme[i] + -# "\n" + hydro_kernel[i] + r", $N_{ngb}=%d$"%float(hydro_neighbours[i]) + -# r", $\eta=%.3f$"%float(hydro_eta[i])) - totalTime.append([]) - speedUp.append([]) - parallelEff.append([]) - - # Loop over all files for a given series and load the times - for j in range(0,len(file_list)): - times = np.loadtxt(file_list[j],usecols=(9,)) - updates = np.loadtxt(file_list[j],usecols=(6,)) - totalTime[i].append(np.sum(times)) - - sumTotal.append(np.sum(totalTime[i])) - - # Sort the total times in descending order - sorted_indices = np.argsort(sumTotal)[::-1] - - totalTime = [ totalTime[j] for j in sorted_indices] - branchNew = [ branch[j] for j in sorted_indices] - - for i in range(0,numOfSeries): - version.append("$\\textrm{%s}$"%str(branchNew[i])) - - global numTimesteps - numTimesteps = len(times) - - # Find speed-up and parallel efficiency - for i in range(0,numOfSeries): - for j in range(0,len(file_list)): - speedUp[i].append(totalTime[i][0] / totalTime[i][j]) - parallelEff[i].append(speedUp[i][j] / threadList[i][j]) - - return (totalTime,speedUp,parallelEff) - -def print_results(totalTime,parallelEff,version): - - for i in range(0,numOfSeries): - print " " - print "------------------------------------" - print version[i] - print "------------------------------------" - print "Wall clock time for: {} time steps".format(numTimesteps) - print "------------------------------------" - - for j in range(0,len(threadList[i])): - print str(threadList[i][j]) + " threads: {}".format(totalTime[i][j]) - - print " " - print "------------------------------------" - print "Parallel Efficiency for: {} time steps".format(numTimesteps) - print "------------------------------------" - - for j in range(0,len(threadList[i])): - print str(threadList[i][j]) + " threads: {}".format(parallelEff[i][j]) - - return - -# Returns a lighter/darker version of the colour -def color_variant(hex_color, brightness_offset=1): - - rgb_hex = [hex_color[x:x+2] for x in [1, 3, 5]] - new_rgb_int = [int(hex_value, 16) + brightness_offset for hex_value in rgb_hex] - new_rgb_int = [min([255, max([0, i])]) for i in new_rgb_int] # make sure new values are between 0 and 255 - # hex() produces "0x88", we want just "88" - - return "#" + "".join([hex(i)[2:] for i in new_rgb_int]) - -def plot_results(totalTime,speedUp,parallelEff,numSeries): - - fig, axarr = plt.subplots(2, 2, figsize=(10,10), frameon=True) - speedUpPlot = axarr[0, 0] - parallelEffPlot = axarr[0, 1] - totalTimePlot = axarr[1, 0] - emptyPlot = axarr[1, 1] - - # Plot speed up - speedUpPlot.plot(threadList[0],threadList[0], linestyle='--', lw=1.5, color='0.2') - for i in range(0,numSeries): - speedUpPlot.plot(threadList[0],speedUp[i],linestyle[i],label=version[i]) - - speedUpPlot.set_ylabel("${\\rm Speed\\textendash up}$", labelpad=0.) - speedUpPlot.set_xlabel("${\\rm Threads}$", labelpad=0.) - speedUpPlot.set_xlim([0.7,threadList[0][-1]+1]) - speedUpPlot.set_ylim([0.7,threadList[0][-1]+1]) - - # Plot parallel efficiency - parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [1,1], 'k--', lw=1.5, color='0.2') - parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.9,0.9], 'k--', lw=1.5, color='0.2') - parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.75,0.75], 'k--', lw=1.5, color='0.2') - parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.5,0.5], 'k--', lw=1.5, color='0.2') - for i in range(0,numSeries): - parallelEffPlot.plot(threadList[0],parallelEff[i],linestyle[i]) - - parallelEffPlot.set_xscale('log') - parallelEffPlot.set_ylabel("${\\rm Parallel~efficiency}$", labelpad=0.) - parallelEffPlot.set_xlabel("${\\rm Threads}$", labelpad=0.) - parallelEffPlot.set_ylim([0,1.1]) - parallelEffPlot.set_xlim([0.9,10**(np.floor(np.log10(threadList[0][-1]))+0.5)]) - - # Plot time to solution - for i in range(0,numOfSeries): - pts = [1, 10**np.floor(np.log10(threadList[i][-1])+1)] - totalTimePlot.loglog(pts,totalTime[i][0]/pts, 'k--', lw=1., color='0.2') - totalTimePlot.loglog(threadList[i],totalTime[i],linestyle[i],label=version[i]) - - y_min = 10**np.floor(np.log10(np.min(totalTime[:][0])*0.6)) - y_max = 1.0*10**np.floor(np.log10(np.max(totalTime[:][0]) * 1.5)+1) - totalTimePlot.set_xscale('log') - totalTimePlot.set_xlabel("${\\rm Threads}$", labelpad=0.) - totalTimePlot.set_ylabel("${\\rm Time~to~solution}~[{\\rm ms}]$", labelpad=0.) - totalTimePlot.set_xlim([0.9, 10**(np.floor(np.log10(threadList[0][-1]))+0.5)]) - totalTimePlot.set_ylim(y_min, y_max) - - totalTimePlot.legend(bbox_to_anchor=(1.21, 0.97), loc=2, borderaxespad=0.,prop={'size':12}, frameon=False,title=legendTitle) - emptyPlot.axis('off') - - for i, txt in enumerate(threadList[0]): - if 2**np.floor(np.log2(threadList[0][i])) == threadList[0][i]: # only powers of 2 - speedUpPlot.annotate("$%s$"%txt, (threadList[0][i],speedUp[0][i]), (threadList[0][i],speedUp[0][i] + 0.3), color=hexcols[0]) - parallelEffPlot.annotate("$%s$"%txt, (threadList[0][i],parallelEff[0][i]), (threadList[0][i], parallelEff[0][i]+0.02), color=hexcols[0]) - totalTimePlot.annotate("$%s$"%txt, (threadList[0][i],totalTime[0][i]), (threadList[0][i], totalTime[0][i]*1.1), color=hexcols[0]) - - #fig.suptitle("Thread Speed Up, Parallel Efficiency and Time To Solution for {} Time Steps of Cosmo Volume\n Cmd Line: {}, Platform: {}".format(numTimesteps),cmdLine,platform)) - fig.suptitle("${\\rm Speed\\textendash up,~parallel~efficiency~and~time~to~solution~for}~%d~{\\rm time\\textendash steps}$"%numTimesteps, fontsize=16) - - return - -# Calculate results -(totalTime,speedUp,parallelEff) = parse_files() - -legendTitle = version[0] - -plot_results(totalTime,speedUp,parallelEff,numOfSeries) - -print_results(totalTime,parallelEff,version) - -# And plot -plt.show() diff --git a/examples/plot_scaling_results_breakdown.py b/examples/plot_scaling_results_breakdown.py deleted file mode 100755 index 6a87e42bcd393d543187e768e31a15bc56f1ae6a..0000000000000000000000000000000000000000 --- a/examples/plot_scaling_results_breakdown.py +++ /dev/null @@ -1,289 +0,0 @@ -#!/usr/bin/env python -# -# Usage: -# python plot_scaling_results.py input-file1-ext input-file2-ext ... -# -# Description: -# Plots speed up, parallel efficiency and time to solution given a "timesteps" output file generated by SWIFT. -# -# Example: -# python plot_scaling_results.py _hreads_cosma_stdout.txt _threads_knl_stdout.txt -# -# The working directory should contain files 1_threads_cosma_stdout.txt - 64_threads_cosma_stdout.txt and 1_threads_knl_stdout.txt - 64_threads_knl_stdout.txt, i.e wall clock time for each run using a given number of threads - -import sys -import glob -import re -import numpy as np -import matplotlib.pyplot as plt -import scipy.stats -import ntpath - -params = {'axes.labelsize': 14, -'axes.titlesize': 18, -'font.size': 12, -'legend.fontsize': 12, -'xtick.labelsize': 14, -'ytick.labelsize': 14, -'text.usetex': True, -'figure.subplot.left' : 0.055, -'figure.subplot.right' : 0.98 , -'figure.subplot.bottom' : 0.05 , -'figure.subplot.top' : 0.95 , -'figure.subplot.wspace' : 0.14 , -'figure.subplot.hspace' : 0.12 , -'lines.markersize' : 6, -'lines.linewidth' : 3., -'text.latex.unicode': True -} -plt.rcParams.update(params) -plt.rc('font',**{'family':'sans-serif','sans-serif':['Times']}) - -version = [] -branch = [] -revision = [] -hydro_scheme = [] -hydro_kernel = [] -hydro_neighbours = [] -hydro_eta = [] -threadList = [] -hexcols = ['#332288', '#88CCEE', '#44AA99', '#117733', '#999933', '#DDCC77', - '#CC6677', '#882255', '#AA4499', '#661100', '#6699CC', '#AA4466', - '#4477AA'] -linestyle = (hexcols[0],hexcols[1],hexcols[3],hexcols[5],hexcols[6],hexcols[8],hexcols[2],hexcols[4],hexcols[7],hexcols[9]) -numTimesteps = 0 -legendTitle = ' ' - -inputFileNames = [] - -# Work out how many data series there are -if len(sys.argv) == 1: - print "Please specify an input file in the arguments." - sys.exit() -else: - for fileName in sys.argv[1:]: - inputFileNames.append(fileName) - numOfSeries = int(len(sys.argv) - 1) - -# Get the names of the branch, Git revision, hydro scheme and hydro kernel -def parse_header(inputFile): - with open(inputFile, 'r') as f: - found_end = False - for line in f: - if 'Branch:' in line: - s = line.split() - line = s[2:] - branch.append(" ".join(line)) - elif 'Revision:' in line: - s = line.split() - revision.append(s[2]) - elif 'Hydrodynamic scheme:' in line: - line = line[2:-1] - s = line.split() - line = s[2:] - hydro_scheme.append(" ".join(line)) - elif 'Hydrodynamic kernel:' in line: - line = line[2:-1] - s = line.split() - line = s[2:5] - hydro_kernel.append(" ".join(line)) - elif 'neighbours:' in line: - s = line.split() - hydro_neighbours.append(s[4]) - elif 'Eta:' in line: - s = line.split() - hydro_eta.append(s[2]) - return - -# Parse file and return total time taken, speed up and parallel efficiency -def parse_files(): - - totalTime = [] - sumTotal = [] - speedUp = [] - parallelEff = [] - - for i in range(0,numOfSeries): # Loop over each data series - - # Get path to set of files - path, name = ntpath.split(inputFileNames[i]) - - # Get each file that starts with the cmd line arg - file_list = glob.glob(inputFileNames[i] + "*") - - threadList.append([]) - - # Remove path from file names - for j in range(0,len(file_list)): - p, filename = ntpath.split(file_list[j]) - file_list[j] = filename - - # Create a list of threads using the list of files - for fileName in file_list: - s = re.split(r'[_.]+',fileName) - threadList[i].append(int(s[1])) - - # Re-add path once each file has been found - if len(path) != 0: - for j in range(0,len(file_list)): - file_list[j] = path + '/' + file_list[j] - - # Sort the thread list in ascending order and save the indices - sorted_indices = np.argsort(threadList[i]) - threadList[i].sort() - - # Sort the file list in ascending order acording to the thread number - file_list = [ file_list[j] for j in sorted_indices] - - parse_header(file_list[0]) - - branch[i] = branch[i].replace("_", "\\_") - - - #version.append("$\\textrm{%s}$"%str(branch[i]))# + " " + revision[i])# + "\n" + hydro_scheme[i] + -# "\n" + hydro_kernel[i] + r", $N_{ngb}=%d$"%float(hydro_neighbours[i]) + -# r", $\eta=%.3f$"%float(hydro_eta[i])) - totalTime.append([]) - speedUp.append([]) - parallelEff.append([]) - - # Loop over all files for a given series and load the times - for j in range(0,len(file_list)): - times = np.loadtxt(file_list[j],usecols=(9,)) - updates = np.loadtxt(file_list[j],usecols=(6,)) - totalTime[i].append(np.sum(times)) - - sumTotal.append(np.sum(totalTime[i])) - - # Sort the total times in descending order - sorted_indices = np.argsort(sumTotal)[::-1] - - totalTime = [ totalTime[j] for j in sorted_indices] - branchNew = [ branch[j] for j in sorted_indices] - - for i in range(0,numOfSeries): - version.append("$\\textrm{%s}$"%str(branchNew[i])) - - global numTimesteps - numTimesteps = len(times) - - # Find speed-up and parallel efficiency - for i in range(0,numOfSeries): - for j in range(0,len(file_list)): - speedUp[i].append(totalTime[i][0] / totalTime[i][j]) - parallelEff[i].append(speedUp[i][j] / threadList[i][j]) - - return (totalTime,speedUp,parallelEff) - -def print_results(totalTime,parallelEff,version): - - for i in range(0,numOfSeries): - print " " - print "------------------------------------" - print version[i] - print "------------------------------------" - print "Wall clock time for: {} time steps".format(numTimesteps) - print "------------------------------------" - - for j in range(0,len(threadList[i])): - print str(threadList[i][j]) + " threads: {}".format(totalTime[i][j]) - - print " " - print "------------------------------------" - print "Parallel Efficiency for: {} time steps".format(numTimesteps) - print "------------------------------------" - - for j in range(0,len(threadList[i])): - print str(threadList[i][j]) + " threads: {}".format(parallelEff[i][j]) - - return - -# Returns a lighter/darker version of the colour -def color_variant(hex_color, brightness_offset=1): - - rgb_hex = [hex_color[x:x+2] for x in [1, 3, 5]] - new_rgb_int = [int(hex_value, 16) + brightness_offset for hex_value in rgb_hex] - new_rgb_int = [min([255, max([0, i])]) for i in new_rgb_int] # make sure new values are between 0 and 255 - # hex() produces "0x88", we want just "88" - - return "#" + "".join([hex(i)[2:] for i in new_rgb_int]) - -def plot_results(totalTime,speedUp,parallelEff,numSeries): - - fig, axarr = plt.subplots(2, 2, figsize=(10,10), frameon=True) - speedUpPlot = axarr[0, 0] - parallelEffPlot = axarr[0, 1] - totalTimePlot = axarr[1, 0] - emptyPlot = axarr[1, 1] - - # Plot speed up - speedUpPlot.plot(threadList[0],threadList[0], linestyle='--', lw=1.5, color='0.2') - for i in range(0,numSeries): - speedUpPlot.plot(threadList[0],speedUp[i],linestyle[i],label=version[i]) - - speedUpPlot.set_ylabel("${\\rm Speed\\textendash up}$", labelpad=0.) - speedUpPlot.set_xlabel("${\\rm Threads}$", labelpad=0.) - speedUpPlot.set_xlim([0.7,threadList[0][-1]+1]) - speedUpPlot.set_ylim([0.7,threadList[0][-1]+1]) - - # Plot parallel efficiency - parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [1,1], 'k--', lw=1.5, color='0.2') - parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.9,0.9], 'k--', lw=1.5, color='0.2') - parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.75,0.75], 'k--', lw=1.5, color='0.2') - parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.5,0.5], 'k--', lw=1.5, color='0.2') - for i in range(0,numSeries): - parallelEffPlot.plot(threadList[0],parallelEff[i],linestyle[i]) - - parallelEffPlot.set_xscale('log') - parallelEffPlot.set_ylabel("${\\rm Parallel~efficiency}$", labelpad=0.) - parallelEffPlot.set_xlabel("${\\rm Threads}$", labelpad=0.) - parallelEffPlot.set_ylim([0,1.1]) - parallelEffPlot.set_xlim([0.9,10**(np.floor(np.log10(threadList[0][-1]))+0.5)]) - - # Plot time to solution - for i in range(0,numSeries): - for j in range(0,len(threadList[0])): - totalTime[i][j] = totalTime[i][j] * threadList[i][j] - if i > 1: - totalTime[i][j] = totalTime[i][j] + totalTime[i-1][j] - totalTimePlot.plot(threadList[0],totalTime[i],linestyle[i],label=version[i]) - - if i > 1: - colour = color_variant(linestyle[i],100) - totalTimePlot.fill_between(threadList[0],np.array(totalTime[i]),np.array(totalTime[i-1]), facecolor=colour) - elif i==1: - colour = color_variant(linestyle[i],100) - totalTimePlot.fill_between(threadList[0], totalTime[i],facecolor=colour) - - totalTimePlot.set_xscale('log') - totalTimePlot.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) - totalTimePlot.set_xlabel("${\\rm Threads}$", labelpad=0.) - totalTimePlot.set_ylabel("${\\rm Time~to~solution~x~No.~of~cores}~[{\\rm ms}]$", labelpad=0.) - totalTimePlot.set_xlim([0.9, 10**(np.floor(np.log10(threadList[0][-1]))+0.5)]) - #totalTimePlot.set_ylim(y_min, y_max) - - totalTimePlot.legend(bbox_to_anchor=(1.21, 0.97), loc=2, borderaxespad=0.,prop={'size':12}, frameon=False,title=legendTitle) - emptyPlot.axis('off') - - for i, txt in enumerate(threadList[0]): - if 2**np.floor(np.log2(threadList[0][i])) == threadList[0][i]: # only powers of 2 - speedUpPlot.annotate("$%s$"%txt, (threadList[0][i],speedUp[0][i]), (threadList[0][i],speedUp[0][i] + 0.3), color=hexcols[0]) - parallelEffPlot.annotate("$%s$"%txt, (threadList[0][i],parallelEff[0][i]), (threadList[0][i], parallelEff[0][i]+0.02), color=hexcols[0]) - totalTimePlot.annotate("$%s$"%txt, (threadList[0][i],totalTime[0][i]), (threadList[0][i], totalTime[0][i]*1.1), color=hexcols[0]) - - #fig.suptitle("Thread Speed Up, Parallel Efficiency and Time To Solution for {} Time Steps of Cosmo Volume\n Cmd Line: {}, Platform: {}".format(numTimesteps),cmdLine,platform)) - fig.suptitle("${\\rm Speed\\textendash up,~parallel~efficiency~and~time~to~solution~x~no.~of~cores~for}~%d~{\\rm time\\textendash steps}$"%numTimesteps, fontsize=16) - - return - -# Calculate results -(totalTime,speedUp,parallelEff) = parse_files() - -legendTitle = version[0] - -plot_results(totalTime,speedUp,parallelEff,numOfSeries) - -print_results(totalTime,parallelEff,version) - -# And plot -plt.show() diff --git a/examples/analyse_dump_cells.py b/tools/analyse_dump_cells.py similarity index 85% rename from examples/analyse_dump_cells.py rename to tools/analyse_dump_cells.py index 2adfaf319e9c0da33f86a6158da68e6620c47361..2216b5f5fe6aa0c0d9dcc29a8abf0f263d2c3cc4 100755 --- a/examples/analyse_dump_cells.py +++ b/tools/analyse_dump_cells.py @@ -47,13 +47,13 @@ mpicol = 20 # Command-line arguments. if len(sys.argv) < 5: - print "usage: ", sys.argv[0], " nx ny nz cell1.dat cell2.dat ..." + print("usage: ", sys.argv[0], " nx ny nz cell1.dat cell2.dat ...") sys.exit(1) nx = int(sys.argv[1]) ny = int(sys.argv[2]) nz = int(sys.argv[3]) -print "# x y z onedge" +print("# x y z onedge") allactives = [] onedge = 0 tcount = 0 @@ -65,28 +65,28 @@ for i in range(4, len(sys.argv)): continue # Select cells that are on the current rank and are top-level cells. - rdata = data[data[:,localcol] == 1] - tdata = rdata[rdata[:,topcol] == 1] + rdata = data[data[:, localcol] == 1] + tdata = rdata[rdata[:, topcol] == 1] # Separation of the cells is in data. - xwidth = tdata[0,xwcol] - ywidth = tdata[0,ywcol] - zwidth = tdata[0,zwcol] + xwidth = tdata[0, xwcol] + ywidth = tdata[0, ywcol] + zwidth = tdata[0, zwcol] # Fill space nx, ny,n nz with all toplevel cells and flag their active # state. - space = np.zeros((nx,ny,nz)) + space = np.zeros((nx, ny, nz)) actives = [] for line in tdata: ix = int(np.rint(line[xcol] / xwidth)) iy = int(np.rint(line[ycol] / ywidth)) iz = int(np.rint(line[zcol] / zwidth)) active = int(line[activecol]) - space[ix,iy,iz] = 1 + active + space[ix, iy, iz] = 1 + active tcount = tcount + 1 if active == 1: actives.append([ix, iy, iz, line]) - + # Report all active cells and flag any without 26 neighbours. These are # on the edge of the partition volume and will have foreign neighbour # cells. @@ -116,13 +116,12 @@ for i in range(4, len(sys.argv)): count = count + 1 if count < 27: onedge = onedge + 1 - print active[3][0], active[3][1], active[3][2], 1 + print(active[3][0], active[3][1], active[3][2], 1) else: - print active[3][0], active[3][1], active[3][2], 0 + print(active[3][0], active[3][1], active[3][2], 0) allactives.extend(actives) -print "# top cells: ", tcount, " active: ", len(allactives), " on edge: ", onedge +print("# top cells: ", tcount, " active: ", len(allactives), " on edge: ", onedge) sys.exit(0) - diff --git a/tools/analyse_runtime.py b/tools/analyse_runtime.py new file mode 100755 index 0000000000000000000000000000000000000000..5cf2bbce7f44d5ddca6c6f6c2f372ead835bd569 --- /dev/null +++ b/tools/analyse_runtime.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python + +################################################################################ +# This file is part of SWIFT. +# Copyright (c) 2018 Matthieu Schaller (matthieu.schaller@durham.ac.uk) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +################################################################################ + +import re +import sys +import matplotlib + +matplotlib.use("Agg") +from pylab import * + +# Plot parameters +params = { + "axes.labelsize": 10, + "axes.titlesize": 10, + "font.size": 12, + "legend.fontsize": 12, + "xtick.labelsize": 10, + "ytick.labelsize": 10, + "text.usetex": True, + "figure.figsize": (6.45, 6.45), + "figure.subplot.left": 0.06, + "figure.subplot.right": 0.99, + "figure.subplot.bottom": 0.06, + "figure.subplot.top": 0.99, + "figure.subplot.wspace": 0.21, + "figure.subplot.hspace": 0.13, + "lines.markersize": 6, + "lines.linewidth": 3.0, + "text.latex.unicode": True, +} +rcParams.update(params) + +threshold = 0.008 + +num_files = len(sys.argv) - 1 + +labels = [ + "Gpart assignment", + "Mesh comunication", + "Forward Fourier transform", + "Green function", + "Backwards Fourier transform", + "engine_recompute_displacement_constraint:", + "engine_exchange_top_multipoles:", + "updating particle counts", + "Making gravity tasks", + "Making hydro tasks", + "Splitting tasks", + "Counting and linking tasks", + "Setting super-pointers", + "Making extra hydroloop tasks", + "Linking gravity tasks", + "Creating send tasks", + "Exchanging cell tags", + "Creating recv tasks", + "Setting unlocks", + "Ranking the tasks", + "scheduler_reweight:", + "space_list_useful_top_level_cells:", + "space_rebuild:", + "engine_drift_all:", + "engine_unskip:", + "engine_collect_end_of_step:", + "engine_launch:", + "writing particle properties", + "engine_repartition:", + "engine_exchange_cells:", + "Dumping restart files", + "engine_print_stats:", + "engine_marktasks:", + "Reading initial conditions", + "engine_print_task_counts:", + "engine_drift_top_multipoles:", + "Communicating rebuild flag", + "engine_split:", + "space_init", + "engine_init", + "engine_repartition_trigger" +] +is_rebuild = [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 +] +times = np.zeros(len(labels)) +counts = np.zeros(len(labels)) + +cols = [ + "0.5", + "#332288", + "#88CCEE", + "#44AA99", + "#117733", + "#999933", + "#DDCC77", + "#CC6677", + "#882255", + "#AA4499", + "#661100", + "#6699CC", + "#AA4466", + "#4477AA", +] + +total_time = 0 +lastline = "" + +for i in range(num_files): + + filename = sys.argv[i + 1] + print("Analysing %s" % filename) + + # Open stdout file + file = open(filename, "r") + + # Search the different phrases + for line in file: + + # Loop over the possbile labels + for i in range(len(labels)): + + # Extract the different blocks + if re.search("%s took" % labels[i], line): + counts[i] += 1.0 + times[i] += float( + re.findall(r"[+-]?((\d+\.?\d*)|(\.\d+))", line)[-1][0] + ) + + # Find the last line with meaningful output (avoid crash report, batch system stuf....) + if re.findall(r"\[[0-9]{4}\][ ]\[*", line) or re.findall( + r"^\[[0-9]*[.][0-9]+\][ ]", line + ): + lastline = line + + # Total run time + total_time += float(re.findall(r"[+-]?([0-9]*[.])?[0-9]+", lastline)[1]) + +# Conver to seconds +times /= 1000.0 + +# Total time +total_measured_time = np.sum(times) +print("\nTotal measured time: %.3f s" % total_measured_time) + +print("Total time: %f s\n" % total_time) + +# Ratios +time_ratios = times / total_time + +# Better looking labels +for i in range(len(labels)): + labels[i] = labels[i].replace("_", " ") + labels[i] = labels[i].replace(":", "") + labels[i] = labels[i].title() + +times = np.array(times) +time_ratios = np.array(time_ratios) +is_rebuild = np.array(is_rebuild) + +# Sort in order of importance +order = np.argsort(-times) +times = times[order] +counts = counts[order] +time_ratios = time_ratios[order] +is_rebuild = is_rebuild[order] +labels = np.take(labels, order) + +# Keep only the important components +important_times = [0.0] +important_ratios = [0.0] +important_labels = ["Others (all below %.1f\%%)" % (threshold * 100)] +important_is_rebuild = [0] +need_print = True +print("Time spent in the different code sections:") +for i in range(len(labels)): + if time_ratios[i] > threshold: + important_times.append(times[i]) + important_ratios.append(time_ratios[i]) + important_labels.append(labels[i]) + important_is_rebuild.append(is_rebuild[i]) + else: + if need_print: + print("Elements in 'Other' category (<%.1f%%):" % (threshold * 100)) + need_print = False + important_times[0] += times[i] + important_ratios[0] += time_ratios[i] + + print(" - '%-40s' (%5d calls): %.4f%%" % (labels[i], counts[i], time_ratios[i] * 100)) + +# Anything unaccounted for? +print( + "\nUnaccounted for: %.4f%%\n" + % (100 * (total_time - total_measured_time) / total_time) +) + +important_ratios = np.array(important_ratios) +important_is_rebuild = np.array(important_is_rebuild) + +figure() + + +def func(pct): + return "$%4.2f\\%%$" % pct + + +pie, _, _ = pie( + important_ratios, + explode=important_is_rebuild * 0.2, + autopct=lambda pct: func(pct), + textprops=dict(color="0.1", fontsize=14), + labeldistance=0.7, + pctdistance=0.85, + startangle=-15, + colors=cols, +) +legend(pie, important_labels, title="SWIFT operations", loc="upper left") + +savefig("time_pie.pdf", dpi=150) diff --git a/examples/check_interactions.sh b/tools/check_interactions.sh similarity index 100% rename from examples/check_interactions.sh rename to tools/check_interactions.sh diff --git a/tools/check_ngbs.py b/tools/check_ngbs.py new file mode 100755 index 0000000000000000000000000000000000000000..648308cb4b2c142fba3ca8e25a024113d2d082f2 --- /dev/null +++ b/tools/check_ngbs.py @@ -0,0 +1,418 @@ +#!/usr/bin/env python + +import h5py as h +import numpy as np +import matplotlib + +matplotlib.use("Agg") +from pylab import * +import os.path + +kernel_gamma = 1.825742 +kernel_gamma2 = kernel_gamma * kernel_gamma +kernel_gamma_dim = np.power(kernel_gamma, 3) +hydro_dimension_unit_sphere = 4.0 * np.pi / 3.0 +kernel_norm = hydro_dimension_unit_sphere * kernel_gamma_dim +error = False + +inputFile1 = "" +inputFile2 = "" + +# Compare the values of two floats +def isclose(a, b, rel_tol=1e-09, abs_tol=0.0): + return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol) + + +# Check list of density neighbours and check that they are correct. +def check_density_neighbours( + pids, ngb_ids_naive, ngb_ids_sort, mask, pos, h_naive, h_sort, num_invalid, acc +): + + for k in range(0, num_invalid): + + # Filter neighbour lists for valid particle ids + filter_neigh_naive = [i for i in ngb_ids_naive[mask][k] if i > -1] + filter_neigh_sort = [i for i in ngb_ids_sort[mask][k] if i > -1] + + # Check neighbour lists for differences + id_list = set(filter_neigh_naive).symmetric_difference(set(filter_neigh_sort)) + + # Check for duplicate IDs + duplicate_check_naive = len(filter_neigh_naive) != len(set(filter_neigh_naive)) + duplicate_check_sort = len(filter_neigh_sort) != len(set(filter_neigh_sort)) + + if duplicate_check_naive: + print("Duplicate neighbour ID found in: ", inputFile1) + print(filter_neigh_naive) + return True + + if duplicate_check_sort: + print("Duplicate neighbour ID found in: ", inputFile2) + print(filter_neigh_sort) + return True + + pid = pids[mask][k] + + # Loop over discrepancies and check if they are actually neighbours + for pjd in id_list: + pi_pos = pos[np.where(pids == pid)] + pj_pos = pos[np.where(pids == pjd)] + + hi = h_naive[np.where(pids == pid)] + + dx = pi_pos[0][0] - pj_pos[0][0] + dy = pi_pos[0][1] - pj_pos[0][1] + dz = pi_pos[0][2] - pj_pos[0][2] + + # Correct for BCs + dx = nearest(dx) + dy = nearest(dy) + dz = nearest(dz) + + r2 = dx * dx + dy * dy + dz * dz + + hig2 = hi * hi * kernel_gamma2 + + diff = abs(r2 - hig2) + + print( + "Particle {} is missing {}, hig2: {}, r2: {}, |r2 - hig2|: {}".format( + pid, pjd, hig2, r2, diff + ) + ) + + if diff < acc * hig2: + print("Missing interaction due to precision issue will be ignored.") + else: + hi_2 = h_sort[np.where(pids == pid)] + + # If a neigbour is missing and the particle has the same h throw + # an error. + if isclose(hi, hi_2): + print( + "Missing interaction found but particle has the same smoothing length (hi_1: %e, hi_2: %e)." + % (hi, hi_2) + ) + return True + else: + print( + "Missing interaction due to different smoothing lengths will be ignored (hi_1: %e, hi_2: %e)." + % (hi, hi_2) + ) + + return False + + +# Check list of force neighbours and check that they are correct. +def check_force_neighbours( + pids, ngb_ids_naive, ngb_ids_sort, mask, pos, h_naive, h_sort, num_invalid, acc +): + + error_val = False + + for k in range(0, num_invalid): + + # Filter neighbour lists for valid particle ids + filter_neigh_naive = [i for i in ngb_ids_naive[mask][k] if i > -1] + filter_neigh_sort = [i for i in ngb_ids_sort[mask][k] if i > -1] + + # Check neighbour lists for differences + id_list = set(filter_neigh_naive).symmetric_difference(set(filter_neigh_sort)) + + pid = pids[mask][k] + + # Loop over discrepancies and check if they are actually neighbours + for pjd in id_list: + pi_pos = pos[np.where(pids == pid)] + pj_pos = pos[np.where(pids == pjd)] + + hi = h_naive[np.where(pids == pid)] + hj = h_naive[np.where(pids == pjd)] + + dx = pi_pos[0][0] - pj_pos[0][0] + dy = pi_pos[0][1] - pj_pos[0][1] + dz = pi_pos[0][2] - pj_pos[0][2] + + # Correct for BCs + dx = nearest(dx) + dy = nearest(dy) + dz = nearest(dz) + + r2 = dx * dx + dy * dy + dz * dz + + hig2 = hi * hi * kernel_gamma2 + hjg2 = hj * hj * kernel_gamma2 + + diff = abs(r2 - max(hig2, hjg2)) + + print( + "Particle {} is missing {}, hig2: {}, hjg2: {}, r2: {}, |r2 - max(hig2,hjg2)|: {}".format( + pid, pjd, hig2, hjg2, r2, diff + ) + ) + + if diff < acc * max(hig2, hjg2): + print("Missing interaction due to precision issue will be ignored.") + else: + hi_2 = h_sort[np.where(pids == pid)] + if isclose(hi, hi_2): + print( + "Missing interaction due to the same smoothing lengths will not be ignored (hi_1: %e, hi_2: %e)." + % (hi, hi_2) + ) + error_val = True + else: + print( + "Missing interaction due to different smoothing lengths will be ignored (hi_1: %e, hi_2: %e)." + % (hi, hi_2) + ) + + return error_val + + +def nearest(dx): + if dx > 0.5 * box_size: + return dx - box_size + elif dx < -0.5 * box_size: + return dx + box_size + else: + return dx + + +# Parse command line arguments +if len(sys.argv) < 3: + print("Error: pass input files as arguments") + sys.exit() +else: + inputFile1 = sys.argv[1] + inputFile2 = sys.argv[2] + if os.path.exists(inputFile1) != 1: + print("\n{} does not exist!\n".format(inputFile1)) + sys.exit() + if os.path.exists(inputFile2) != 1: + print("\n{} does not exist!\n".format(inputFile2)) + sys.exit() + +# Open input files +file_naive = h.File(inputFile1, "r") +file_sort = h.File(inputFile2, "r") + +box_size = file_naive["/Header"].attrs["BoxSize"][0] + +# Read input file fields +ids_naive = file_naive["/PartType0/ParticleIDs"][:] +ids_sort = file_sort["/PartType0/ParticleIDs"][:] + +h_naive = file_naive["/PartType0/SmoothingLength"][:] +h_sort = file_sort["/PartType0/SmoothingLength"][:] + +pos_naive = file_naive["/PartType0/Coordinates"][:, :] +# pos_sort = file_sort["/PartType0/Coordinates"][:,:] + +num_density_naive = file_naive["/PartType0/Num_ngb_density"][:] +num_density_sort = file_sort["/PartType0/Num_ngb_density"][:] + +num_force_naive = file_naive["/PartType0/Num_ngb_force"][:] +num_force_sort = file_sort["/PartType0/Num_ngb_force"][:] + +neighbour_ids_density_naive = file_naive["/PartType0/Ids_ngb_density"][:] +neighbour_ids_density_sort = file_sort["/PartType0/Ids_ngb_density"][:] + +neighbour_ids_force_naive = file_naive["/PartType0/Ids_ngb_force"][:] +neighbour_ids_force_sort = file_sort["/PartType0/Ids_ngb_force"][:] + + +# wcount_naive = file_naive["/PartType0/Wcount"][:] +# wcount_sort = file_sort["/PartType0/Wcount"][:] +# +# wcount_naive = wcount_naive * np.power(h_naive,3) * kernel_norm +# wcount_sort = wcount_sort * np.power(h_sort,3) * kernel_norm + +# Cross check +max_density_ngbs_naive = np.max(num_density_naive) +max_density_ngbs_sort = np.max(num_density_sort) +max_force_ngbs_naive = np.max(num_force_naive) +max_force_ngbs_sort = np.max(num_force_sort) + +print(" Min Mean Max ") +print(" ---------------------") +print( + "Ngbs density naiv: ", + np.min(num_density_naive), + np.mean(num_density_naive), + max_density_ngbs_naive, +) +print( + "Ngbs density sort: ", + np.min(num_density_sort), + np.mean(num_density_sort), + max_density_ngbs_sort, +) +print( + "Ngbs force naiv: ", + np.min(num_force_naive), + np.mean(num_force_naive), + max_force_ngbs_naive, +) +print( + "Ngbs force sort: ", + np.min(num_force_sort), + np.mean(num_force_sort), + max_force_ngbs_sort, +) +# print "Wcount naiv: ", np.min(wcount_naive), np.mean(wcount_naive), np.max(wcount_naive) +# print "Wcount sort: ", np.min(wcount_sort), np.mean(wcount_sort), np.max(wcount_sort) + +# Sort +index_naive = np.argsort(ids_naive) +index_sort = np.argsort(ids_sort) + +num_density_naive = num_density_naive[index_naive] +num_density_sort = num_density_sort[index_sort] +num_force_naive = num_force_naive[index_naive] +num_force_sort = num_force_sort[index_sort] +ids_naive = ids_naive[index_naive] +ids_sort = ids_sort[index_sort] +neighbour_ids_density_naive = neighbour_ids_density_naive[index_naive] +neighbour_ids_density_sort = neighbour_ids_density_sort[index_sort] +neighbour_ids_force_naive = neighbour_ids_force_naive[index_naive] +neighbour_ids_force_sort = neighbour_ids_force_sort[index_sort] +# wcount_naive = wcount_naive[index_naive] +# wcount_sort = wcount_sort[index_sort] +h_naive = h_naive[index_naive] +h_sort = h_sort[index_sort] +pos_naive = pos_naive[index_naive] +# pos_sort = pos_sort[index_sort] + +neighbour_length_naive = len(neighbour_ids_density_naive[0]) +neighbour_length_sort = len(neighbour_ids_density_sort[0]) + +# Check that input files are logging the same number of neighbours +if neighbour_length_naive != neighbour_length_sort: + print("Input files have logged different numbers of neighbour lengths!") + print("{} has logged: {} neighbours".format(inputFile1, neighbour_length_naive)) + print("{} has logged: {} neighbours".format(inputFile2, neighbour_length_sort)) + exit(1) + +if ( + max_density_ngbs_naive > neighbour_length_naive + or max_force_ngbs_naive > neighbour_length_naive + or max_density_ngbs_sort > neighbour_length_sort + or max_force_ngbs_sort > neighbour_length_sort +): + print("The number of neighbours has exceeded the number of neighbours logged.") + print("Modify NUM_OF_NEIGHBOURS in hydro_part.h to log more neighbours.") + print( + "The highest neighbour count is: ", + max( + max_density_ngbs_naive, + max_force_ngbs_naive, + max_density_ngbs_sort, + max_force_ngbs_sort, + ), + ) + exit(1) + +# First check +print("\n Min Max") +print(" ----------") +print( + "Differences for density: ", + min(num_density_naive - num_density_sort), + max(num_density_naive - num_density_sort), +) +print( + "Differences for force: ", + min(num_force_naive - num_force_sort), + max(num_force_naive - num_force_sort), +) + +# Get the IDs that are different +mask_density = num_density_naive != num_density_sort +mask_force = num_force_naive != num_force_sort +num_invalid_density = np.sum(mask_density) +num_invalid_force = np.sum(mask_force) + +print("\nNum non-zero density: ", num_invalid_density) +print("Num non-zero force: ", num_invalid_force) + +print("\nParticle IDs with incorrect densities") +print("----------------------------------------") +print(ids_naive[mask_density]) + +# Check density neighbour lists +error += check_density_neighbours( + ids_naive, + neighbour_ids_density_naive, + neighbour_ids_density_sort, + mask_density, + pos_naive, + h_naive, + h_sort, + num_invalid_density, + 2e-6, +) + +print("Num of density interactions", inputFile1) +print(num_density_naive[mask_density]) + +print("Num of density interactions", inputFile2) +print(num_density_sort[mask_density]) + +print("\nParticle IDs with incorrect forces") +print("------------------------------------") +print(ids_naive[mask_force]) + +# Check force neighbour lists +error += check_force_neighbours( + ids_naive, + neighbour_ids_force_naive, + neighbour_ids_force_sort, + mask_force, + pos_naive, + h_naive, + h_sort, + num_invalid_force, + 2e-6, +) + +print("Num of force interactions", inputFile1) +print(num_force_naive[mask_force]) + +# print "Smoothing lengths", inputFile1 +# print h_naive[mask_force] + +print("Num of force interactions", inputFile2) +print(num_force_sort[mask_force]) + +# print "Smoothing lengths", inputFile2 +# print h_sort[mask_force] + +# Statistics of h difference +h_relative = (h_naive - h_sort) / h_naive +print( + "h statistics: {} {} (Min, 1st Percentile)".format( + np.min(h_relative), np.percentile(h_relative, 1) + ) +) +print( + "h statistics: {} {} (Mean, Median)".format( + np.mean(h_relative), np.median(h_relative) + ) +) +print( + "h statistics: {} {} (Max, 99th Percentile)".format( + np.max(h_relative), np.percentile(h_relative, 99) + ) +) + +if error: + print("\n------------------") + print("Differences found.") + print("------------------") + exit(1) +else: + print("\n---------------------") + print("No differences found.") + print("---------------------") + exit(0) diff --git a/tools/combine_ics.py b/tools/combine_ics.py new file mode 100755 index 0000000000000000000000000000000000000000..447af1cc3d6d9c09e6322648a0dc5035382e857c --- /dev/null +++ b/tools/combine_ics.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python +""" +Usage: + combine_ics.py input_file.0.hdf5 merged_file.hdf5 + +This file combines Gadget-2 type 2 (i.e. hdf5) initial condition files +into a single file that can be digested by SWIFT. +This has mainly be tested for DM-only (parttype1) files but also works +smoothly for ICs including gas. The special case of a mass-table for +the DM particles is handled. No unit conversions are applied nor are +any scale-factors or h-factors changed. +The script applies some compression and checksum filters to the output +to save disk space. + +This file is part of SWIFT. +Copyright (C) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + +All Rights Reserved. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +""" + +import sys +import h5py as h5 +import numpy as np + +# First, we need to collect some information from the master file +main_file_name = str(sys.argv[1])[:-7] +print("Merging snapshots files with name", main_file_name) +master_file_name = main_file_name + ".0.hdf5" +print("Reading master information from", master_file_name) +master_file = h5.File(master_file_name, "r") +grp_header = master_file["/Header"] + +num_files = grp_header.attrs["NumFilesPerSnapshot"] +tot_num_parts = grp_header.attrs["NumPart_Total"] +tot_num_parts_high_word = grp_header.attrs["NumPart_Total"] +entropy_flag = grp_header.attrs["Flag_Entropy_ICs"] +box_size = grp_header.attrs["BoxSize"] +time = grp_header.attrs["Time"] + +# Combine the low- and high-words +for i in range(6): + tot_num_parts[i] += np.int64(tot_num_parts_high_word[i]) << 32 + +# Some basic information +print("Reading", tot_num_parts, "particles from", num_files, "files.") + + +# Check whether there is a mass table +DM_mass = 0.0 +mtable = grp_header.attrs.get("MassTable") +if mtable is not None: + DM_mass = grp_header.attrs["MassTable"][1] +if DM_mass != 0.0: + print("DM mass set to", DM_mass, "from the header mass table.") +else: + print("Reading DM mass from the particles.") + + +# Create the empty file +output_file_name = sys.argv[2] +output_file = h5.File(output_file_name, "w-") + + +# Header +grp = output_file.create_group("/Header") +grp.attrs["NumFilesPerSnapshot"] = 1 +grp.attrs["NumPart_Total"] = tot_num_parts +grp.attrs["NumPart_Total_HighWord"] = [0, 0, 0, 0, 0, 0] +grp.attrs["NumPart_ThisFile"] = tot_num_parts +grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +grp.attrs["BoxSize"] = box_size +grp.attrs["Flag_Entropy_ICs"] = entropy_flag +grp.attrs["Time"] = time + +# Create the particle groups +if tot_num_parts[0] > 0: + grp0 = output_file.create_group("/PartType0") +if tot_num_parts[1] > 0: + grp1 = output_file.create_group("/PartType1") +if tot_num_parts[4] > 0: + grp4 = output_file.create_group("/PartType4") +if tot_num_parts[5] > 0: + grp5 = output_file.create_group("/PartType5") + + +# Helper function to create the datasets we need +def create_set(grp, name, size, dim, dtype): + if dim == 1: + grp.create_dataset( + name, + (size,), + dtype=dtype, + chunks=True, + compression="gzip", + compression_opts=4, + shuffle=True, + fletcher32=True, + maxshape=(size,), + ) + else: + grp.create_dataset( + name, + (size, dim), + dtype=dtype, + chunks=True, + compression="gzip", + compression_opts=4, + shuffle=True, + fletcher32=True, + maxshape=(size, dim), + ) + + +# Create the required datasets +if tot_num_parts[0] > 0: + create_set(grp0, "Coordinates", tot_num_parts[0], 3, "d") + create_set(grp0, "Velocities", tot_num_parts[0], 3, "f") + create_set(grp0, "Masses", tot_num_parts[0], 1, "f") + create_set(grp0, "ParticleIDs", tot_num_parts[0], 1, "l") + create_set(grp0, "InternalEnergy", tot_num_parts[0], 1, "f") + create_set(grp0, "SmoothingLength", tot_num_parts[0], 1, "f") + +if tot_num_parts[1] > 0: + create_set(grp1, "Coordinates", tot_num_parts[1], 3, "d") + create_set(grp1, "Velocities", tot_num_parts[1], 3, "f") + create_set(grp1, "Masses", tot_num_parts[1], 1, "f") + create_set(grp1, "ParticleIDs", tot_num_parts[1], 1, "l") + +if tot_num_parts[4] > 0: + create_set(grp4, "Coordinates", tot_num_parts[4], 3, "d") + create_set(grp4, "Velocities", tot_num_parts[4], 3, "f") + create_set(grp4, "Masses", tot_num_parts[4], 1, "f") + create_set(grp4, "ParticleIDs", tot_num_parts[4], 1, "l") + +if tot_num_parts[5] > 0: + create_set(grp5, "Coordinates", tot_num_parts[5], 3, "d") + create_set(grp5, "Velocities", tot_num_parts[5], 3, "f") + create_set(grp5, "Masses", tot_num_parts[5], 1, "f") + create_set(grp5, "ParticleIDs", tot_num_parts[5], 1, "l") + +# Heavy-lifting ahead. Leave a last message. +print("Datasets created in output file") + + +# Special case of the non-zero mass table +if DM_mass != 0.0: + masses = np.ones(tot_num_parts[1], dtype=np.float) * DM_mass + grp1["Masses"][:] = masses + + +# Cumulative number of particles read/written +cumul_parts = [0, 0, 0, 0, 0, 0] + +# Loop over all the files that are part of the snapshots +for f in range(num_files): + + file_name = main_file_name + "." + str(f) + ".hdf5" + file = h5.File(file_name, "r") + file_header = file["/Header"] + num_parts = file_header.attrs["NumPart_ThisFile"] + + print( + "Copying data from file", + f, + "/", + num_files, + ": num_parts = [", + num_parts[0], + num_parts[1], + num_parts[4], + num_parts[5], + "]", + ) + sys.stdout.flush() + + # Helper function to copy data + def copy_grp(name_new, name_old, ptype): + full_name_new = "/PartType" + str(ptype) + "/" + name_new + full_name_old = "/PartType" + str(ptype) + "/" + name_old + output_file[full_name_new][ + cumul_parts[ptype] : cumul_parts[ptype] + num_parts[ptype] + ] = file[full_name_old] + + def copy_grp_same_name(name, ptype): + copy_grp(name, name, ptype) + + if num_parts[0] > 0: + copy_grp_same_name("Coordinates", 0) + copy_grp_same_name("Velocities", 0) + copy_grp_same_name("Masses", 0) + copy_grp_same_name("ParticleIDs", 0) + copy_grp_same_name("InternalEnergy", 0) + copy_grp_same_name("SmoothingLength", 0) + + if num_parts[1] > 0: + copy_grp_same_name("Coordinates", 1) + copy_grp_same_name("Velocities", 1) + copy_grp_same_name("ParticleIDs", 1) + if DM_mass == 0.0: # Do not overwrite values if there was a mass table + copy_grp_same_name("Masses", 1) + + if num_parts[4] > 0: + copy_grp_same_name("Coordinates", 4) + copy_grp_same_name("Velocities", 4) + copy_grp_same_name("Masses", 4) + copy_grp_same_name("ParticleIDs", 4) + + if num_parts[5] > 0: + copy_grp_same_name("Coordinates", 5) + copy_grp_same_name("Velocities", 5) + copy_grp_same_name("Masses", 5) + copy_grp_same_name("ParticleIDs", 5) + + cumul_parts[0] += num_parts[0] + cumul_parts[1] += num_parts[1] + cumul_parts[4] += num_parts[4] + cumul_parts[5] += num_parts[5] + file.close() + +print("All done! SWIFT is waiting.") diff --git a/tools/plot_gravity_checks.py b/tools/plot_gravity_checks.py new file mode 100755 index 0000000000000000000000000000000000000000..cef81b86e9663bc7c9df2c9affaeb258501f57e6 --- /dev/null +++ b/tools/plot_gravity_checks.py @@ -0,0 +1,456 @@ +#!/usr/bin/env python + +import sys +import glob +import re +import numpy as np +import matplotlib.pyplot as plt + +params = { + "axes.labelsize": 14, + "axes.titlesize": 18, + "font.size": 12, + "legend.fontsize": 12, + "xtick.labelsize": 14, + "ytick.labelsize": 14, + "text.usetex": True, + "figure.figsize": (12, 10), + "figure.subplot.left": 0.06, + "figure.subplot.right": 0.99, + "figure.subplot.bottom": 0.06, + "figure.subplot.top": 0.99, + "figure.subplot.wspace": 0.14, + "figure.subplot.hspace": 0.14, + "lines.markersize": 6, + "lines.linewidth": 3.0, + "text.latex.unicode": True, +} +plt.rcParams.update(params) +plt.rc("font", **{"family": "sans-serif", "sans-serif": ["Times"]}) + +min_error = 1e-7 +max_error = 3e-1 +num_bins = 64 + +# Construct the bins +bin_edges = np.linspace(np.log10(min_error), np.log10(max_error), num_bins + 1) +bin_size = (np.log10(max_error) - np.log10(min_error)) / num_bins +bins = 0.5 * (bin_edges[1:] + bin_edges[:-1]) +bin_edges = 10 ** bin_edges +bins = 10 ** bins + +# Colours +cols = ["#332288", "#88CCEE", "#117733", "#DDCC77", "#CC6677"] + +# Time-step to plot +step = int(sys.argv[1]) +periodic = int(sys.argv[2]) + +# Find the files for the different expansion orders +order_list = glob.glob("gravity_checks_swift_step%.4d_order*.dat" % step) +num_order = len(order_list) + +# Get the multipole orders +order = np.zeros(num_order) +for i in range(num_order): + order[i] = int(order_list[i][35]) +order = sorted(order) +order_list = sorted(order_list) + +# Read the exact accelerations first +if periodic: + data = np.loadtxt("gravity_checks_exact_periodic_step%.4d.dat" % step) +else: + data = np.loadtxt("gravity_checks_exact_step%.4d.dat" % step) +exact_ids = data[:, 0] +exact_pos = data[:, 1:4] +exact_a = data[:, 4:7] +exact_pot = data[:, 7] +# Sort stuff +sort_index = np.argsort(exact_ids) +exact_ids = exact_ids[sort_index] +exact_pos = exact_pos[sort_index, :] +exact_a = exact_a[sort_index, :] +exact_pot = exact_pot[sort_index] +exact_a_norm = np.sqrt(exact_a[:, 0] ** 2 + exact_a[:, 1] ** 2 + exact_a[:, 2] ** 2) + +print("Number of particles tested:", np.size(exact_ids)) + +# Start the plot +plt.figure() + +count = 0 + +# Get the Gadget-2 data if existing +if periodic: + gadget2_file_list = glob.glob("forcetest_gadget2_periodic.txt") +else: + gadget2_file_list = glob.glob("forcetest_gadget2.txt") +if len(gadget2_file_list) != 0: + + gadget2_data = np.loadtxt(gadget2_file_list[0]) + gadget2_ids = gadget2_data[:, 0] + gadget2_pos = gadget2_data[:, 1:4] + gadget2_a_exact = gadget2_data[:, 4:7] + gadget2_a_grav = gadget2_data[:, 7:10] + + # Sort stuff + sort_index = np.argsort(gadget2_ids) + gadget2_ids = gadget2_ids[sort_index] + gadget2_pos = gadget2_pos[sort_index, :] + gadget2_a_exact = gadget2_a_exact[sort_index, :] + gadget2_exact_a_norm = np.sqrt( + gadget2_a_exact[:, 0] ** 2 + + gadget2_a_exact[:, 1] ** 2 + + gadget2_a_exact[:, 2] ** 2 + ) + gadget2_a_grav = gadget2_a_grav[sort_index, :] + + # Cross-checks + if not np.array_equal(exact_ids, gadget2_ids): + print("Comparing different IDs !") + + if np.max(np.abs(exact_pos - gadget2_pos) / np.abs(gadget2_pos)) > 1e-6: + print("Comparing different positions ! max difference:") + index = np.argmax( + exact_pos[:, 0] ** 2 + + exact_pos[:, 1] ** 2 + + exact_pos[:, 2] ** 2 + - gadget2_pos[:, 0] ** 2 + - gadget2_pos[:, 1] ** 2 + - gadget2_pos[:, 2] ** 2 + ) + print( + "Gadget2 (id=%d):" % gadget2_ids[index], + gadget2_pos[index, :], + "exact (id=%d):" % exact_ids[index], + exact_pos[index, :], + "\n", + ) + + diff = np.abs(exact_a_norm - gadget2_exact_a_norm) / np.abs(gadget2_exact_a_norm) + max_diff = np.max(diff) + if max_diff > 2e-6: + print("Comparing different exact accelerations !") + print( + "Median=", + np.median(diff), + "Mean=", + np.mean(diff), + "99%=", + np.percentile(diff, 99), + ) + print("max difference ( relative diff =", max_diff, "):") + # index = np.argmax(exact_a[:,0]**2 + exact_a[:,1]**2 + exact_a[:,2]**2 - gadget2_a_exact[:,0]**2 - gadget2_a_exact[:,1]**2 - gadget2_a_exact[:,2]**2) + index = np.argmax(diff) + print( + "a_exact --- Gadget2:", + gadget2_a_exact[index, :], + "exact:", + exact_a[index, :], + ) + print( + "pos --- Gadget2: (id=%d):" % gadget2_ids[index], + gadget2_pos[index, :], + "exact (id=%d):" % gadget2_ids[index], + gadget2_pos[index, :], + "\n", + ) + + # Compute the error norm + diff = gadget2_a_exact - gadget2_a_grav + + norm_diff = np.sqrt(diff[:, 0] ** 2 + diff[:, 1] ** 2 + diff[:, 2] ** 2) + norm_a = np.sqrt( + gadget2_a_exact[:, 0] ** 2 + + gadget2_a_exact[:, 1] ** 2 + + gadget2_a_exact[:, 2] ** 2 + ) + + norm_error = norm_diff / norm_a + error_x = abs(diff[:, 0]) / norm_a + error_y = abs(diff[:, 1]) / norm_a + error_z = abs(diff[:, 2]) / norm_a + + # Bin the error + norm_error_hist, _ = np.histogram(norm_error, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + error_x_hist, _ = np.histogram(error_x, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + error_y_hist, _ = np.histogram(error_y, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + error_z_hist, _ = np.histogram(error_z, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + + norm_median = np.median(norm_error) + median_x = np.median(error_x) + median_y = np.median(error_y) + median_z = np.median(error_z) + + norm_per99 = np.percentile(norm_error, 99) + per99_x = np.percentile(error_x, 99) + per99_y = np.percentile(error_y, 99) + per99_z = np.percentile(error_z, 99) + + norm_max = np.max(norm_error) + max_x = np.max(error_x) + max_y = np.max(error_y) + max_z = np.max(error_z) + + print("Gadget-2 ---- ") + print("Norm: median= %f 99%%= %f max= %f" % (norm_median, norm_per99, norm_max)) + print("X : median= %f 99%%= %f max= %f" % (median_x, per99_x, max_x)) + print("Y : median= %f 99%%= %f max= %f" % (median_y, per99_y, max_y)) + print("Z : median= %f 99%%= %f max= %f" % (median_z, per99_z, max_z)) + print("") + + plt.subplot(231) + plt.text( + min_error * 1.5, + 1.55, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (norm_median, norm_per99), + ha="left", + va="top", + alpha=0.8, + ) + plt.semilogx(bins, norm_error_hist, "k--", label="Gadget-2", alpha=0.8) + plt.subplot(232) + plt.semilogx(bins, error_x_hist, "k--", label="Gadget-2", alpha=0.8) + plt.text( + min_error * 1.5, + 1.55, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (median_x, per99_x), + ha="left", + va="top", + alpha=0.8, + ) + plt.subplot(233) + plt.semilogx(bins, error_y_hist, "k--", label="Gadget-2", alpha=0.8) + plt.text( + min_error * 1.5, + 1.55, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (median_y, per99_y), + ha="left", + va="top", + alpha=0.8, + ) + plt.subplot(234) + plt.semilogx(bins, error_z_hist, "k--", label="Gadget-2", alpha=0.8) + plt.text( + min_error * 1.5, + 1.55, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (median_z, per99_z), + ha="left", + va="top", + alpha=0.8, + ) + + count += 1 + + +# Plot the different histograms +for i in range(num_order): + data = np.loadtxt(order_list[i]) + ids = data[:, 0] + pos = data[:, 1:4] + a_grav = data[:, 4:7] + pot = data[:, 7] + + # Sort stuff + sort_index = np.argsort(ids) + ids = ids[sort_index] + pos = pos[sort_index, :] + a_grav = a_grav[sort_index, :] + pot = pot[sort_index] + + # Cross-checks + if not np.array_equal(exact_ids, ids): + print("Comparing different IDs !") + + if np.max(np.abs(exact_pos - pos) / np.abs(pos)) > 1e-6: + print("Comparing different positions ! max difference:") + index = np.argmax( + exact_pos[:, 0] ** 2 + + exact_pos[:, 1] ** 2 + + exact_pos[:, 2] ** 2 + - pos[:, 0] ** 2 + - pos[:, 1] ** 2 + - pos[:, 2] ** 2 + ) + print( + "SWIFT (id=%d):" % ids[index], + pos[index, :], + "exact (id=%d):" % exact_ids[index], + exact_pos[index, :], + "\n", + ) + + # Compute the error norm + diff = exact_a - a_grav + diff_pot = exact_pot - pot + + # Correct for different normalization of potential + print("Difference in normalization of potential:", np.mean(diff_pot), end=" ") + print( + "std_dev=", + np.std(diff_pot), + "99-percentile:", + np.percentile(diff_pot, 99) - np.median(diff_pot), + "1-percentile:", + np.median(diff_pot) - np.percentile(diff_pot, 1), + ) + + exact_pot -= np.mean(diff_pot) + diff_pot = exact_pot - pot + + norm_diff = np.sqrt(diff[:, 0] ** 2 + diff[:, 1] ** 2 + diff[:, 2] ** 2) + + norm_error = norm_diff / exact_a_norm + error_x = abs(diff[:, 0]) / exact_a_norm + error_y = abs(diff[:, 1]) / exact_a_norm + error_z = abs(diff[:, 2]) / exact_a_norm + error_pot = abs(diff_pot) / abs(exact_pot) + + # Bin the error + norm_error_hist, _ = np.histogram(norm_error, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + error_x_hist, _ = np.histogram(error_x, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + error_y_hist, _ = np.histogram(error_y, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + error_z_hist, _ = np.histogram(error_z, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + error_pot_hist, _ = np.histogram(error_pot, bins=bin_edges, density=False) / ( + np.size(norm_error) * bin_size + ) + + norm_median = np.median(norm_error) + median_x = np.median(error_x) + median_y = np.median(error_y) + median_z = np.median(error_z) + median_pot = np.median(error_pot) + + norm_per99 = np.percentile(norm_error, 99) + per99_x = np.percentile(error_x, 99) + per99_y = np.percentile(error_y, 99) + per99_z = np.percentile(error_z, 99) + per99_pot = np.percentile(error_pot, 99) + + norm_max = np.max(norm_error) + max_x = np.max(error_x) + max_y = np.max(error_y) + max_z = np.max(error_z) + max_pot = np.max(error_pot) + + print("Order %d ---- " % order[i]) + print("Norm: median= %f 99%%= %f max= %f" % (norm_median, norm_per99, norm_max)) + print("X : median= %f 99%%= %f max= %f" % (median_x, per99_x, max_x)) + print("Y : median= %f 99%%= %f max= %f" % (median_y, per99_y, max_y)) + print("Z : median= %f 99%%= %f max= %f" % (median_z, per99_z, max_z)) + print("Pot : median= %f 99%%= %f max= %f" % (median_pot, per99_pot, max_pot)) + print("") + + plt.subplot(231) + plt.semilogx( + bins, error_x_hist, color=cols[i], label="SWIFT m-poles order %d" % order[i] + ) + plt.text( + min_error * 1.5, + 1.5 - count / 10.0, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (median_x, per99_x), + ha="left", + va="top", + color=cols[i], + ) + plt.subplot(232) + plt.semilogx( + bins, error_y_hist, color=cols[i], label="SWIFT m-poles order %d" % order[i] + ) + plt.text( + min_error * 1.5, + 1.5 - count / 10.0, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (median_y, per99_y), + ha="left", + va="top", + color=cols[i], + ) + plt.subplot(233) + plt.semilogx( + bins, error_z_hist, color=cols[i], label="SWIFT m-poles order %d" % order[i] + ) + plt.text( + min_error * 1.5, + 1.5 - count / 10.0, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (median_z, per99_z), + ha="left", + va="top", + color=cols[i], + ) + plt.subplot(234) + plt.semilogx( + bins, norm_error_hist, color=cols[i], label="SWIFT m-poles order %d" % order[i] + ) + plt.text( + min_error * 1.5, + 1.5 - count / 10.0, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (norm_median, norm_per99), + ha="left", + va="top", + color=cols[i], + ) + plt.subplot(235) + plt.semilogx( + bins, error_pot_hist, color=cols[i], label="SWIFT m-poles order %d" % order[i] + ) + plt.text( + min_error * 1.5, + 1.5 - count / 10.0, + "$50\\%%\\rightarrow%.4f~~ 99\\%%\\rightarrow%.4f$" % (median_pot, per99_pot), + ha="left", + va="top", + color=cols[i], + ) + + count += 1 + +plt.subplot(231) +plt.xlabel("$\delta a_x/|\overrightarrow{a}_{exact}|$") +# plt.ylabel("Density") +plt.xlim(min_error, max_error) +plt.ylim(0, 1.75) +# plt.legend(loc="center left") +plt.subplot(232) +plt.xlabel("$\delta a_y/|\overrightarrow{a}_{exact}|$") +# plt.ylabel("Density") +plt.xlim(min_error, max_error) +plt.ylim(0, 1.75) +# plt.legend(loc="center left") +plt.subplot(233) +plt.xlabel("$\delta a_z/|\overrightarrow{a}_{exact}|$") +# plt.ylabel("Density") +plt.xlim(min_error, max_error) +plt.ylim(0, 1.75) +plt.subplot(234) +plt.xlabel("$|\delta \overrightarrow{a}|/|\overrightarrow{a}_{exact}|$") +# plt.ylabel("Density") +plt.xlim(min_error, max_error) +plt.ylim(0, 2.5) +plt.legend(loc="upper left") +plt.subplot(235) +plt.xlabel("$\delta \phi/\phi_{exact}$") +# plt.ylabel("Density") +plt.xlim(min_error, max_error) +plt.ylim(0, 1.75) +# plt.legend(loc="center left") + + +plt.savefig("gravity_checks_step%.4d.png" % step, dpi=200) +plt.savefig("gravity_checks_step%.4d.pdf" % step, dpi=200) diff --git a/tools/plot_scaling_results.py b/tools/plot_scaling_results.py new file mode 100755 index 0000000000000000000000000000000000000000..2c29d93f88cbe4bde83f3473e9f738a526480c1c --- /dev/null +++ b/tools/plot_scaling_results.py @@ -0,0 +1,365 @@ +#!/usr/bin/env python +# +# Usage: +# python plot_scaling_results.py input-file1-ext input-file2-ext ... +# +# Description: +# Plots speed up, parallel efficiency and time to solution given a "timesteps" output file generated by SWIFT. +# +# Example: +# python plot_scaling_results.py _hreads_cosma_stdout.txt _threads_knl_stdout.txt +# +# The working directory should contain files 1_threads_cosma_stdout.txt - 64_threads_cosma_stdout.txt and 1_threads_knl_stdout.txt - 64_threads_knl_stdout.txt, i.e wall clock time for each run using a given number of threads + +import sys +import glob +import re +import numpy as np +import matplotlib.pyplot as plt +import scipy.stats +import ntpath + +params = { + "axes.labelsize": 14, + "axes.titlesize": 18, + "font.size": 12, + "legend.fontsize": 12, + "xtick.labelsize": 14, + "ytick.labelsize": 14, + "text.usetex": True, + "figure.subplot.left": 0.055, + "figure.subplot.right": 0.98, + "figure.subplot.bottom": 0.05, + "figure.subplot.top": 0.95, + "figure.subplot.wspace": 0.14, + "figure.subplot.hspace": 0.12, + "lines.markersize": 6, + "lines.linewidth": 3.0, + "text.latex.unicode": True, +} +plt.rcParams.update(params) +plt.rc("font", **{"family": "sans-serif", "sans-serif": ["Times"]}) + +version = [] +branch = [] +revision = [] +hydro_scheme = [] +hydro_kernel = [] +hydro_neighbours = [] +hydro_eta = [] +threadList = [] +hexcols = [ + "#332288", + "#88CCEE", + "#44AA99", + "#117733", + "#999933", + "#DDCC77", + "#CC6677", + "#882255", + "#AA4499", + "#661100", + "#6699CC", + "#AA4466", + "#4477AA", +] +linestyle = ( + hexcols[0], + hexcols[1], + hexcols[3], + hexcols[5], + hexcols[6], + hexcols[8], + hexcols[2], + hexcols[4], + hexcols[7], + hexcols[9], +) +numTimesteps = 0 +legendTitle = " " + +inputFileNames = [] + +# Work out how many data series there are +if len(sys.argv) == 1: + print("Please specify an input file in the arguments.") + sys.exit() +else: + for fileName in sys.argv[1:]: + inputFileNames.append(fileName) + numOfSeries = int(len(sys.argv) - 1) + +# Get the names of the branch, Git revision, hydro scheme and hydro kernel +def parse_header(inputFile): + with open(inputFile, "r") as f: + found_end = False + for line in f: + if "Branch:" in line: + s = line.split() + line = s[2:] + branch.append(" ".join(line)) + elif "Revision:" in line: + s = line.split() + revision.append(s[2]) + elif "Hydrodynamic scheme:" in line: + line = line[2:-1] + s = line.split() + line = s[2:] + hydro_scheme.append(" ".join(line)) + elif "Hydrodynamic kernel:" in line: + line = line[2:-1] + s = line.split() + line = s[2:5] + hydro_kernel.append(" ".join(line)) + elif "neighbours:" in line: + s = line.split() + hydro_neighbours.append(s[4]) + elif "Eta:" in line: + s = line.split() + hydro_eta.append(s[2]) + return + + +# Parse file and return total time taken, speed up and parallel efficiency +def parse_files(): + + totalTime = [] + sumTotal = [] + speedUp = [] + parallelEff = [] + + for i in range(0, numOfSeries): # Loop over each data series + + # Get path to set of files + path, name = ntpath.split(inputFileNames[i]) + + # Get each file that starts with the cmd line arg + file_list = glob.glob(inputFileNames[i] + "*") + + threadList.append([]) + + # Remove path from file names + for j in range(0, len(file_list)): + p, filename = ntpath.split(file_list[j]) + file_list[j] = filename + + # Create a list of threads using the list of files + for fileName in file_list: + s = re.split(r"[_.]+", fileName) + threadList[i].append(int(s[1])) + + # Re-add path once each file has been found + if len(path) != 0: + for j in range(0, len(file_list)): + file_list[j] = path + "/" + file_list[j] + + # Sort the thread list in ascending order and save the indices + sorted_indices = np.argsort(threadList[i]) + threadList[i].sort() + + # Sort the file list in ascending order acording to the thread number + file_list = [file_list[j] for j in sorted_indices] + + parse_header(file_list[0]) + + branch[i] = branch[i].replace("_", "\\_") + + # version.append("$\\textrm{%s}$"%str(branch[i]))# + " " + revision[i])# + "\n" + hydro_scheme[i] + + # "\n" + hydro_kernel[i] + r", $N_{ngb}=%d$"%float(hydro_neighbours[i]) + + # r", $\eta=%.3f$"%float(hydro_eta[i])) + totalTime.append([]) + speedUp.append([]) + parallelEff.append([]) + + # Loop over all files for a given series and load the times + for j in range(0, len(file_list)): + times = np.loadtxt(file_list[j], usecols=(9,)) + updates = np.loadtxt(file_list[j], usecols=(6,)) + totalTime[i].append(np.sum(times)) + + sumTotal.append(np.sum(totalTime[i])) + + # Sort the total times in descending order + sorted_indices = np.argsort(sumTotal)[::-1] + + totalTime = [totalTime[j] for j in sorted_indices] + branchNew = [branch[j] for j in sorted_indices] + + for i in range(0, numOfSeries): + version.append("$\\textrm{%s}$" % str(branchNew[i])) + + global numTimesteps + numTimesteps = len(times) + + # Find speed-up and parallel efficiency + for i in range(0, numOfSeries): + for j in range(0, len(file_list)): + speedUp[i].append(totalTime[i][0] / totalTime[i][j]) + parallelEff[i].append(speedUp[i][j] / threadList[i][j]) + + return (totalTime, speedUp, parallelEff) + + +def print_results(totalTime, parallelEff, version): + + for i in range(0, numOfSeries): + print(" ") + print("------------------------------------") + print(version[i]) + print("------------------------------------") + print("Wall clock time for: {} time steps".format(numTimesteps)) + print("------------------------------------") + + for j in range(0, len(threadList[i])): + print(str(threadList[i][j]) + " threads: {}".format(totalTime[i][j])) + + print(" ") + print("------------------------------------") + print("Parallel Efficiency for: {} time steps".format(numTimesteps)) + print("------------------------------------") + + for j in range(0, len(threadList[i])): + print(str(threadList[i][j]) + " threads: {}".format(parallelEff[i][j])) + + return + + +# Returns a lighter/darker version of the colour +def color_variant(hex_color, brightness_offset=1): + + rgb_hex = [hex_color[x : x + 2] for x in [1, 3, 5]] + new_rgb_int = [int(hex_value, 16) + brightness_offset for hex_value in rgb_hex] + new_rgb_int = [ + min([255, max([0, i])]) for i in new_rgb_int + ] # make sure new values are between 0 and 255 + # hex() produces "0x88", we want just "88" + + return "#" + "".join([hex(i)[2:] for i in new_rgb_int]) + + +def plot_results(totalTime, speedUp, parallelEff, numSeries): + + fig, axarr = plt.subplots(2, 2, figsize=(10, 10), frameon=True) + speedUpPlot = axarr[0, 0] + parallelEffPlot = axarr[0, 1] + totalTimePlot = axarr[1, 0] + emptyPlot = axarr[1, 1] + + # Plot speed up + speedUpPlot.plot(threadList[0], threadList[0], linestyle="--", lw=1.5, color="0.2") + for i in range(0, numSeries): + speedUpPlot.plot(threadList[0], speedUp[i], linestyle[i], label=version[i]) + + speedUpPlot.set_ylabel("${\\rm Speed\\textendash up}$", labelpad=0.0) + speedUpPlot.set_xlabel("${\\rm Threads}$", labelpad=0.0) + speedUpPlot.set_xlim([0.7, threadList[0][-1] + 1]) + speedUpPlot.set_ylim([0.7, threadList[0][-1] + 1]) + + # Plot parallel efficiency + parallelEffPlot.plot( + [threadList[0][0], 10 ** np.floor(np.log10(threadList[0][-1]) + 1)], + [1, 1], + "k--", + lw=1.5, + color="0.2", + ) + parallelEffPlot.plot( + [threadList[0][0], 10 ** np.floor(np.log10(threadList[0][-1]) + 1)], + [0.9, 0.9], + "k--", + lw=1.5, + color="0.2", + ) + parallelEffPlot.plot( + [threadList[0][0], 10 ** np.floor(np.log10(threadList[0][-1]) + 1)], + [0.75, 0.75], + "k--", + lw=1.5, + color="0.2", + ) + parallelEffPlot.plot( + [threadList[0][0], 10 ** np.floor(np.log10(threadList[0][-1]) + 1)], + [0.5, 0.5], + "k--", + lw=1.5, + color="0.2", + ) + for i in range(0, numSeries): + parallelEffPlot.plot(threadList[0], parallelEff[i], linestyle[i]) + + parallelEffPlot.set_xscale("log") + parallelEffPlot.set_ylabel("${\\rm Parallel~efficiency}$", labelpad=0.0) + parallelEffPlot.set_xlabel("${\\rm Threads}$", labelpad=0.0) + parallelEffPlot.set_ylim([0, 1.1]) + parallelEffPlot.set_xlim([0.9, 10 ** (np.floor(np.log10(threadList[0][-1])) + 0.5)]) + + # Plot time to solution + for i in range(0, numOfSeries): + pts = [1, 10 ** np.floor(np.log10(threadList[i][-1]) + 1)] + totalTimePlot.loglog(pts, totalTime[i][0] / pts, "k--", lw=1.0, color="0.2") + totalTimePlot.loglog( + threadList[i], totalTime[i], linestyle[i], label=version[i] + ) + + y_min = 10 ** np.floor(np.log10(np.min(totalTime[:][0]) * 0.6)) + y_max = 1.0 * 10 ** np.floor(np.log10(np.max(totalTime[:][0]) * 1.5) + 1) + totalTimePlot.set_xscale("log") + totalTimePlot.set_xlabel("${\\rm Threads}$", labelpad=0.0) + totalTimePlot.set_ylabel("${\\rm Time~to~solution}~[{\\rm ms}]$", labelpad=0.0) + totalTimePlot.set_xlim([0.9, 10 ** (np.floor(np.log10(threadList[0][-1])) + 0.5)]) + totalTimePlot.set_ylim(y_min, y_max) + + totalTimePlot.legend( + bbox_to_anchor=(1.21, 0.97), + loc=2, + borderaxespad=0.0, + prop={"size": 12}, + frameon=False, + title=legendTitle, + ) + emptyPlot.axis("off") + + for i, txt in enumerate(threadList[0]): + if ( + 2 ** np.floor(np.log2(threadList[0][i])) == threadList[0][i] + ): # only powers of 2 + speedUpPlot.annotate( + "$%s$" % txt, + (threadList[0][i], speedUp[0][i]), + (threadList[0][i], speedUp[0][i] + 0.3), + color=hexcols[0], + ) + parallelEffPlot.annotate( + "$%s$" % txt, + (threadList[0][i], parallelEff[0][i]), + (threadList[0][i], parallelEff[0][i] + 0.02), + color=hexcols[0], + ) + totalTimePlot.annotate( + "$%s$" % txt, + (threadList[0][i], totalTime[0][i]), + (threadList[0][i], totalTime[0][i] * 1.1), + color=hexcols[0], + ) + + # fig.suptitle("Thread Speed Up, Parallel Efficiency and Time To Solution for {} Time Steps of Cosmo Volume\n Cmd Line: {}, Platform: {}".format(numTimesteps),cmdLine,platform)) + fig.suptitle( + "${\\rm Speed\\textendash up,~parallel~efficiency~and~time~to~solution~for}~%d~{\\rm time\\textendash steps}$" + % numTimesteps, + fontsize=16, + ) + + return + + +# Calculate results +(totalTime, speedUp, parallelEff) = parse_files() + +legendTitle = version[0] + +plot_results(totalTime, speedUp, parallelEff, numOfSeries) + +print_results(totalTime, parallelEff, version) + +# And plot +plt.show() diff --git a/tools/plot_scaling_results_breakdown.py b/tools/plot_scaling_results_breakdown.py new file mode 100755 index 0000000000000000000000000000000000000000..570ec37ee908dbbe51bfc12fa2c3af59d2d8800a --- /dev/null +++ b/tools/plot_scaling_results_breakdown.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python +# +# Usage: +# python plot_scaling_results.py input-file1-ext input-file2-ext ... +# +# Description: +# Plots speed up, parallel efficiency and time to solution given a "timesteps" output file generated by SWIFT. +# +# Example: +# python plot_scaling_results.py _hreads_cosma_stdout.txt _threads_knl_stdout.txt +# +# The working directory should contain files 1_threads_cosma_stdout.txt - 64_threads_cosma_stdout.txt and 1_threads_knl_stdout.txt - 64_threads_knl_stdout.txt, i.e wall clock time for each run using a given number of threads + +import sys +import glob +import re +import numpy as np +import matplotlib.pyplot as plt +import scipy.stats +import ntpath + +params = { + "axes.labelsize": 14, + "axes.titlesize": 18, + "font.size": 12, + "legend.fontsize": 12, + "xtick.labelsize": 14, + "ytick.labelsize": 14, + "text.usetex": True, + "figure.subplot.left": 0.055, + "figure.subplot.right": 0.98, + "figure.subplot.bottom": 0.05, + "figure.subplot.top": 0.95, + "figure.subplot.wspace": 0.14, + "figure.subplot.hspace": 0.12, + "lines.markersize": 6, + "lines.linewidth": 3.0, + "text.latex.unicode": True, +} +plt.rcParams.update(params) +plt.rc("font", **{"family": "sans-serif", "sans-serif": ["Times"]}) + +version = [] +branch = [] +revision = [] +hydro_scheme = [] +hydro_kernel = [] +hydro_neighbours = [] +hydro_eta = [] +threadList = [] +hexcols = [ + "#332288", + "#88CCEE", + "#44AA99", + "#117733", + "#999933", + "#DDCC77", + "#CC6677", + "#882255", + "#AA4499", + "#661100", + "#6699CC", + "#AA4466", + "#4477AA", +] +linestyle = ( + hexcols[0], + hexcols[1], + hexcols[3], + hexcols[5], + hexcols[6], + hexcols[8], + hexcols[2], + hexcols[4], + hexcols[7], + hexcols[9], +) +numTimesteps = 0 +legendTitle = " " + +inputFileNames = [] + +# Work out how many data series there are +if len(sys.argv) == 1: + print("Please specify an input file in the arguments.") + sys.exit() +else: + for fileName in sys.argv[1:]: + inputFileNames.append(fileName) + numOfSeries = int(len(sys.argv) - 1) + +# Get the names of the branch, Git revision, hydro scheme and hydro kernel +def parse_header(inputFile): + with open(inputFile, "r") as f: + found_end = False + for line in f: + if "Branch:" in line: + s = line.split() + line = s[2:] + branch.append(" ".join(line)) + elif "Revision:" in line: + s = line.split() + revision.append(s[2]) + elif "Hydrodynamic scheme:" in line: + line = line[2:-1] + s = line.split() + line = s[2:] + hydro_scheme.append(" ".join(line)) + elif "Hydrodynamic kernel:" in line: + line = line[2:-1] + s = line.split() + line = s[2:5] + hydro_kernel.append(" ".join(line)) + elif "neighbours:" in line: + s = line.split() + hydro_neighbours.append(s[4]) + elif "Eta:" in line: + s = line.split() + hydro_eta.append(s[2]) + return + + +# Parse file and return total time taken, speed up and parallel efficiency +def parse_files(): + + totalTime = [] + sumTotal = [] + speedUp = [] + parallelEff = [] + + for i in range(0, numOfSeries): # Loop over each data series + + # Get path to set of files + path, name = ntpath.split(inputFileNames[i]) + + # Get each file that starts with the cmd line arg + file_list = glob.glob(inputFileNames[i] + "*") + + threadList.append([]) + + # Remove path from file names + for j in range(0, len(file_list)): + p, filename = ntpath.split(file_list[j]) + file_list[j] = filename + + # Create a list of threads using the list of files + for fileName in file_list: + s = re.split(r"[_.]+", fileName) + threadList[i].append(int(s[1])) + + # Re-add path once each file has been found + if len(path) != 0: + for j in range(0, len(file_list)): + file_list[j] = path + "/" + file_list[j] + + # Sort the thread list in ascending order and save the indices + sorted_indices = np.argsort(threadList[i]) + threadList[i].sort() + + # Sort the file list in ascending order acording to the thread number + file_list = [file_list[j] for j in sorted_indices] + + parse_header(file_list[0]) + + branch[i] = branch[i].replace("_", "\\_") + + # version.append("$\\textrm{%s}$"%str(branch[i]))# + " " + revision[i])# + "\n" + hydro_scheme[i] + + # "\n" + hydro_kernel[i] + r", $N_{ngb}=%d$"%float(hydro_neighbours[i]) + + # r", $\eta=%.3f$"%float(hydro_eta[i])) + totalTime.append([]) + speedUp.append([]) + parallelEff.append([]) + + # Loop over all files for a given series and load the times + for j in range(0, len(file_list)): + times = np.loadtxt(file_list[j], usecols=(9,)) + updates = np.loadtxt(file_list[j], usecols=(6,)) + totalTime[i].append(np.sum(times)) + + sumTotal.append(np.sum(totalTime[i])) + + # Sort the total times in descending order + sorted_indices = np.argsort(sumTotal)[::-1] + + totalTime = [totalTime[j] for j in sorted_indices] + branchNew = [branch[j] for j in sorted_indices] + + for i in range(0, numOfSeries): + version.append("$\\textrm{%s}$" % str(branchNew[i])) + + global numTimesteps + numTimesteps = len(times) + + # Find speed-up and parallel efficiency + for i in range(0, numOfSeries): + for j in range(0, len(file_list)): + speedUp[i].append(totalTime[i][0] / totalTime[i][j]) + parallelEff[i].append(speedUp[i][j] / threadList[i][j]) + + return (totalTime, speedUp, parallelEff) + + +def print_results(totalTime, parallelEff, version): + + for i in range(0, numOfSeries): + print(" ") + print("------------------------------------") + print(version[i]) + print("------------------------------------") + print("Wall clock time for: {} time steps".format(numTimesteps)) + print("------------------------------------") + + for j in range(0, len(threadList[i])): + print(str(threadList[i][j]) + " threads: {}".format(totalTime[i][j])) + + print(" ") + print("------------------------------------") + print("Parallel Efficiency for: {} time steps".format(numTimesteps)) + print("------------------------------------") + + for j in range(0, len(threadList[i])): + print(str(threadList[i][j]) + " threads: {}".format(parallelEff[i][j])) + + return + + +# Returns a lighter/darker version of the colour +def color_variant(hex_color, brightness_offset=1): + + rgb_hex = [hex_color[x : x + 2] for x in [1, 3, 5]] + new_rgb_int = [int(hex_value, 16) + brightness_offset for hex_value in rgb_hex] + new_rgb_int = [ + min([255, max([0, i])]) for i in new_rgb_int + ] # make sure new values are between 0 and 255 + # hex() produces "0x88", we want just "88" + + return "#" + "".join([hex(i)[2:] for i in new_rgb_int]) + + +def plot_results(totalTime, speedUp, parallelEff, numSeries): + + fig, axarr = plt.subplots(2, 2, figsize=(10, 10), frameon=True) + speedUpPlot = axarr[0, 0] + parallelEffPlot = axarr[0, 1] + totalTimePlot = axarr[1, 0] + emptyPlot = axarr[1, 1] + + # Plot speed up + speedUpPlot.plot(threadList[0], threadList[0], linestyle="--", lw=1.5, color="0.2") + for i in range(0, numSeries): + speedUpPlot.plot(threadList[0], speedUp[i], linestyle[i], label=version[i]) + + speedUpPlot.set_ylabel("${\\rm Speed\\textendash up}$", labelpad=0.0) + speedUpPlot.set_xlabel("${\\rm Threads}$", labelpad=0.0) + speedUpPlot.set_xlim([0.7, threadList[0][-1] + 1]) + speedUpPlot.set_ylim([0.7, threadList[0][-1] + 1]) + + # Plot parallel efficiency + parallelEffPlot.plot( + [threadList[0][0], 10 ** np.floor(np.log10(threadList[0][-1]) + 1)], + [1, 1], + "k--", + lw=1.5, + color="0.2", + ) + parallelEffPlot.plot( + [threadList[0][0], 10 ** np.floor(np.log10(threadList[0][-1]) + 1)], + [0.9, 0.9], + "k--", + lw=1.5, + color="0.2", + ) + parallelEffPlot.plot( + [threadList[0][0], 10 ** np.floor(np.log10(threadList[0][-1]) + 1)], + [0.75, 0.75], + "k--", + lw=1.5, + color="0.2", + ) + parallelEffPlot.plot( + [threadList[0][0], 10 ** np.floor(np.log10(threadList[0][-1]) + 1)], + [0.5, 0.5], + "k--", + lw=1.5, + color="0.2", + ) + for i in range(0, numSeries): + parallelEffPlot.plot(threadList[0], parallelEff[i], linestyle[i]) + + parallelEffPlot.set_xscale("log") + parallelEffPlot.set_ylabel("${\\rm Parallel~efficiency}$", labelpad=0.0) + parallelEffPlot.set_xlabel("${\\rm Threads}$", labelpad=0.0) + parallelEffPlot.set_ylim([0, 1.1]) + parallelEffPlot.set_xlim([0.9, 10 ** (np.floor(np.log10(threadList[0][-1])) + 0.5)]) + + # Plot time to solution + for i in range(0, numSeries): + for j in range(0, len(threadList[0])): + totalTime[i][j] = totalTime[i][j] * threadList[i][j] + if i > 1: + totalTime[i][j] = totalTime[i][j] + totalTime[i - 1][j] + totalTimePlot.plot(threadList[0], totalTime[i], linestyle[i], label=version[i]) + + if i > 1: + colour = color_variant(linestyle[i], 100) + totalTimePlot.fill_between( + threadList[0], + np.array(totalTime[i]), + np.array(totalTime[i - 1]), + facecolor=colour, + ) + elif i == 1: + colour = color_variant(linestyle[i], 100) + totalTimePlot.fill_between(threadList[0], totalTime[i], facecolor=colour) + + totalTimePlot.set_xscale("log") + totalTimePlot.ticklabel_format(style="sci", axis="y", scilimits=(0, 0)) + totalTimePlot.set_xlabel("${\\rm Threads}$", labelpad=0.0) + totalTimePlot.set_ylabel( + "${\\rm Time~to~solution~x~No.~of~cores}~[{\\rm ms}]$", labelpad=0.0 + ) + totalTimePlot.set_xlim([0.9, 10 ** (np.floor(np.log10(threadList[0][-1])) + 0.5)]) + # totalTimePlot.set_ylim(y_min, y_max) + + totalTimePlot.legend( + bbox_to_anchor=(1.21, 0.97), + loc=2, + borderaxespad=0.0, + prop={"size": 12}, + frameon=False, + title=legendTitle, + ) + emptyPlot.axis("off") + + for i, txt in enumerate(threadList[0]): + if ( + 2 ** np.floor(np.log2(threadList[0][i])) == threadList[0][i] + ): # only powers of 2 + speedUpPlot.annotate( + "$%s$" % txt, + (threadList[0][i], speedUp[0][i]), + (threadList[0][i], speedUp[0][i] + 0.3), + color=hexcols[0], + ) + parallelEffPlot.annotate( + "$%s$" % txt, + (threadList[0][i], parallelEff[0][i]), + (threadList[0][i], parallelEff[0][i] + 0.02), + color=hexcols[0], + ) + totalTimePlot.annotate( + "$%s$" % txt, + (threadList[0][i], totalTime[0][i]), + (threadList[0][i], totalTime[0][i] * 1.1), + color=hexcols[0], + ) + + # fig.suptitle("Thread Speed Up, Parallel Efficiency and Time To Solution for {} Time Steps of Cosmo Volume\n Cmd Line: {}, Platform: {}".format(numTimesteps),cmdLine,platform)) + fig.suptitle( + "${\\rm Speed\\textendash up,~parallel~efficiency~and~time~to~solution~x~no.~of~cores~for}~%d~{\\rm time\\textendash steps}$" + % numTimesteps, + fontsize=16, + ) + + return + + +# Calculate results +(totalTime, speedUp, parallelEff) = parse_files() + +legendTitle = version[0] + +plot_results(totalTime, speedUp, parallelEff, numOfSeries) + +print_results(totalTime, parallelEff, version) + +# And plot +plt.show() diff --git a/examples/plot_task_dependencies.sh b/tools/plot_task_dependencies.sh similarity index 100% rename from examples/plot_task_dependencies.sh rename to tools/plot_task_dependencies.sh diff --git a/examples/plot_task_level.py b/tools/plot_task_level.py old mode 100644 new mode 100755 similarity index 76% rename from examples/plot_task_level.py rename to tools/plot_task_level.py index 2d51f4829692bc71826ec6c4b93c025f7106828f..23e3ec878a2b8ef0f4d3c56d91ef75026e012de8 --- a/examples/plot_task_level.py +++ b/tools/plot_task_level.py @@ -19,7 +19,7 @@ filename = sys.argv[-1] names = ["type", "subtype", "depth", "count"] # read file -data = pd.read_csv(filename, sep=' ', comment="#", names=names) +data = pd.read_csv(filename, sep=" ", comment="#", names=names) # generate color map cmap = plt.get_cmap("hsv") @@ -30,9 +30,13 @@ for i in range(data["depth"].max()): ind = data["depth"] == i label = "depth = %i" % i c = cmap(i / N) - plt.plot(data["type"][ind] + "_" + data["subtype"][ind], - data["count"][ind], ".", label=label, - color=c) + plt.plot( + data["type"][ind] + "_" + data["subtype"][ind], + data["count"][ind], + ".", + label=label, + color=c, + ) # modify figure parameters and show it plt.gca().set_yscale("log") diff --git a/examples/process_cells b/tools/process_cells similarity index 100% rename from examples/process_cells rename to tools/process_cells diff --git a/examples/process_cells_helper b/tools/process_cells_helper similarity index 100% rename from examples/process_cells_helper rename to tools/process_cells_helper diff --git a/tools/task_plots/analyse_tasks.py b/tools/task_plots/analyse_tasks.py new file mode 100755 index 0000000000000000000000000000000000000000..603417a3c2d6b79ac1534d011e03b0907774f4ab --- /dev/null +++ b/tools/task_plots/analyse_tasks.py @@ -0,0 +1,525 @@ +#!/usr/bin/env python +""" +Usage: + analyse_tasks.py [options] input.dat + +where input.dat is a thread info file for a step (MPI or non-MPI). Use the +'-y interval' flag of the swift and swift_mpi commands to create these +(you will also need to configure with the --enable-task-debugging option). + +The output is an analysis of the task timings, including deadtime per thread +and step, total amount of time spent for each task type, for the whole step +and per thread and the minimum and maximum times spent per task type. + +This file is part of SWIFT. +Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk) + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +""" + +import matplotlib + +matplotlib.use("Agg") +import matplotlib.collections as collections +import matplotlib.ticker as plticker +import pylab as pl +import sys +import argparse + +# Handle the command line. +parser = argparse.ArgumentParser(description="Analyse task dumps") + +parser.add_argument("input", help="Thread data file (-y output)") +parser.add_argument( + "-v", + "--verbose", + dest="verbose", + help="Verbose output (default: False)", + default=False, + action="store_true", +) +parser.add_argument( + "-r", + "--rank", + dest="rank", + help="Rank to process (default: all)", + default="all", + action="store", +) + +args = parser.parse_args() +infile = args.input + +# Tasks and subtypes. Indexed as in tasks.h. +TASKTYPES = [ + "none", + "sort", + "self", + "pair", + "sub_self", + "sub_pair", + "init_grav", + "init_grav_out", + "ghost_in", + "ghost", + "ghost_out", + "extra_ghost", + "drift_part", + "drift_gpart", + "end_force", + "kick1", + "kick2", + "timestep", + "send", + "recv", + "grav_long_range", + "grav_mm", + "grav_down_in", + "grav_down", + "grav_mesh", + "cooling", + "star_formation", + "sourceterms", + "stars_ghost_in", + "stars_ghost", + "stars_ghost_out", + "count", +] + +SUBTYPES = [ + "none", + "density", + "gradient", + "force", + "grav", + "external_grav", + "tend", + "xv", + "rho", + "gpart", + "multipole", + "spart", + "stars_density", + "count", +] + +SIDS = [ + "(-1,-1,-1)", + "(-1,-1, 0)", + "(-1,-1, 1)", + "(-1, 0,-1)", + "(-1, 0, 0)", + "(-1, 0, 1)", + "(-1, 1,-1)", + "(-1, 1, 0)", + "(-1, 1, 1)", + "( 0,-1,-1)", + "( 0,-1, 0)", + "( 0,-1, 1)", + "( 0, 0,-1)", +] + +# Read input. +data = pl.loadtxt(infile) +full_step = data[0, :] + +# Do we have an MPI file? +full_step = data[0, :] +if full_step.size == 13: + print("# MPI mode") + mpimode = True + nranks = int(max(data[:, 0])) + 1 + print("# Number of ranks:", nranks) + rankcol = 0 + threadscol = 1 + taskcol = 2 + subtaskcol = 3 + ticcol = 5 + toccol = 6 + updates = int(full_step[7]) + g_updates = int(full_step[8]) + s_updates = int(full_step[9]) +else: + print("# non MPI mode") + nranks = 1 + mpimode = False + rankcol = -1 + threadscol = 0 + taskcol = 1 + subtaskcol = 2 + ticcol = 4 + toccol = 5 + updates = int(full_step[6]) + g_updates = int(full_step[7]) + s_updates = int(full_step[8]) + +# Get the CPU clock to convert ticks into milliseconds. +CPU_CLOCK = float(full_step[-1]) / 1000.0 +if args.verbose: + print("# CPU frequency:", CPU_CLOCK * 1000.0) +print("# updates:", updates) +print("# g_updates:", g_updates) +print("# s_updates:", s_updates) + +if mpimode: + if args.rank == "all": + ranks = list(range(nranks)) + else: + ranks = [int(args.rank)] + if ranks[0] >= nranks: + print("Error: maximum rank is " + str(nranks - 1)) + sys.exit(1) +else: + ranks = [1] + +maxthread = int(max(data[:, threadscol])) + 1 +print("# Maximum thread id:", maxthread) + +# Avoid start and end times of zero. +sdata = data[data[:, ticcol] != 0] +sdata = data[data[:, toccol] != 0] + +# Now we process the required ranks. +for rank in ranks: + if mpimode: + print("# Rank", rank) + data = sdata[sdata[:, rankcol] == rank] + full_step = data[0, :] + else: + data = sdata + + # Recover the start and end time + tic_step = int(full_step[ticcol]) + toc_step = int(full_step[toccol]) + data = data[1:, :] + + # Avoid start and end times of zero. + data = data[data[:, ticcol] != 0] + data = data[data[:, toccol] != 0] + + # Calculate the time range. + total_t = (toc_step - tic_step) / CPU_CLOCK + print("# Data range: ", total_t, "ms") + print() + + # Correct times to relative values. + start_t = float(tic_step) + data[:, ticcol] -= start_t + data[:, toccol] -= start_t + end_t = (toc_step - start_t) / CPU_CLOCK + + tasks = {} + tasks[-1] = [] + for i in range(maxthread): + tasks[i] = [] + + # Gather into by thread data. + num_lines = pl.shape(data)[0] + for line in range(num_lines): + thread = int(data[line, threadscol]) + tic = int(data[line, ticcol]) / CPU_CLOCK + toc = int(data[line, toccol]) / CPU_CLOCK + tasktype = int(data[line, taskcol]) + subtype = int(data[line, subtaskcol]) + sid = int(data[line, -1]) + + tasks[thread].append([tic, toc, tasktype, subtype, sid]) + + # Sort by tic and gather used threads. + threadids = [] + for i in range(maxthread): + tasks[i] = sorted(tasks[i], key=lambda task: task[0]) + threadids.append(i) + + # Times per task. + print("# Task times:") + print("# -----------") + print( + "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}".format( + "type/subtype", "count", "minimum", "maximum", "sum", "mean", "percent" + ) + ) + + alltasktimes = {} + sidtimes = {} + for i in threadids: + tasktimes = {} + for task in tasks[i]: + key = TASKTYPES[task[2]] + "/" + SUBTYPES[task[3]] + dt = task[1] - task[0] + if not key in tasktimes: + tasktimes[key] = [] + tasktimes[key].append(dt) + + if not key in alltasktimes: + alltasktimes[key] = [] + alltasktimes[key].append(dt) + + my_sid = task[4] + if my_sid > -1: + if not my_sid in sidtimes: + sidtimes[my_sid] = [] + sidtimes[my_sid].append(dt) + + print("# Thread : ", i) + for key in sorted(tasktimes.keys()): + taskmin = min(tasktimes[key]) + taskmax = max(tasktimes[key]) + tasksum = sum(tasktimes[key]) + print( + "{0:19s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format( + key, + len(tasktimes[key]), + taskmin, + taskmax, + tasksum, + tasksum / len(tasktimes[key]), + tasksum / total_t * 100.0, + ) + ) + print() + + print("# All threads : ") + for key in sorted(alltasktimes.keys()): + taskmin = min(alltasktimes[key]) + taskmax = max(alltasktimes[key]) + tasksum = sum(alltasktimes[key]) + print( + "{0:18s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format( + key, + len(alltasktimes[key]), + taskmin, + taskmax, + tasksum, + tasksum / len(alltasktimes[key]), + tasksum / (len(threadids) * total_t) * 100.0, + ) + ) + print() + + # For pairs, show stuff sorted by SID + print("# By SID (all threads): ") + print( + "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}".format( + "Pair/Sub-pair SID", "count", "minimum", "maximum", "sum", "mean", "percent" + ) + ) + + for sid in range(0, 13): + if sid in sidtimes: + sidmin = min(sidtimes[sid]) + sidmax = max(sidtimes[sid]) + sidsum = sum(sidtimes[sid]) + sidcount = len(sidtimes[sid]) + sidmean = sidsum / sidcount + else: + sidmin = 0.0 + sidmax = 0.0 + sidsum = 0.0 + sidcount = 0 + sidmean = 0.0 + print( + "{0:3d} {1:15s}: {2:7d} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.4f} {7:9.2f}".format( + sid, + SIDS[sid], + sidcount, + sidmin, + sidmax, + sidsum, + sidmean, + sidsum / (len(threadids) * total_t) * 100.0, + ) + ) + print() + + # Dead times. + print("# Times not in tasks (deadtimes)") + print("# ------------------------------") + print("# Time before first task:") + print("# no. : {0:>9s} {1:>9s}".format("value", "percent")) + predeadtimes = [] + for i in threadids: + if len(tasks[i]) > 0: + predeadtime = tasks[i][0][0] + print( + "thread {0:2d}: {1:9.4f} {2:9.4f}".format( + i, predeadtime, predeadtime / total_t * 100.0 + ) + ) + predeadtimes.append(predeadtime) + else: + predeadtimes.append(0.0) + + predeadmin = min(predeadtimes) + predeadmax = max(predeadtimes) + predeadsum = sum(predeadtimes) + print( + "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format( + "count", "minimum", "maximum", "sum", "mean", "percent" + ) + ) + print( + "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format( + len(predeadtimes), + predeadmin, + predeadmax, + predeadsum, + predeadsum / len(predeadtimes), + predeadsum / (len(threadids) * total_t) * 100.0, + ) + ) + print() + + print("# Time after last task:") + print("# no. : {0:>9s} {1:>9s}".format("value", "percent")) + postdeadtimes = [] + for i in threadids: + if len(tasks[i]) > 0: + postdeadtime = total_t - tasks[i][-1][1] + print( + "thread {0:2d}: {1:9.4f} {2:9.4f}".format( + i, postdeadtime, postdeadtime / total_t * 100.0 + ) + ) + postdeadtimes.append(postdeadtime) + else: + postdeadtimes.append(0.0) + + postdeadmin = min(postdeadtimes) + postdeadmax = max(postdeadtimes) + postdeadsum = sum(postdeadtimes) + print( + "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format( + "count", "minimum", "maximum", "sum", "mean", "percent" + ) + ) + print( + "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format( + len(postdeadtimes), + postdeadmin, + postdeadmax, + postdeadsum, + postdeadsum / len(postdeadtimes), + postdeadsum / (len(threadids) * total_t) * 100.0, + ) + ) + print() + + # Time in engine, i.e. from first to last tasks. + print("# Time between tasks (engine deadtime):") + print( + "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format( + "count", "minimum", "maximum", "sum", "mean", "percent" + ) + ) + enginedeadtimes = [] + for i in threadids: + deadtimes = [] + if len(tasks[i]) > 0: + last = tasks[i][0][0] + else: + last = 0.0 + for task in tasks[i]: + dt = task[0] - last + deadtimes.append(dt) + last = task[1] + + # Drop first value, last value already gone. + if len(deadtimes) > 1: + deadtimes = deadtimes[1:] + else: + # Only one or fewer tasks, so no deadtime by definition. + deadtimes = [0.0] + + deadmin = min(deadtimes) + deadmax = max(deadtimes) + deadsum = sum(deadtimes) + print( + "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format( + i, + len(deadtimes), + deadmin, + deadmax, + deadsum, + deadsum / len(deadtimes), + deadsum / total_t * 100.0, + ) + ) + enginedeadtimes.extend(deadtimes) + + deadmin = min(enginedeadtimes) + deadmax = max(enginedeadtimes) + deadsum = sum(enginedeadtimes) + print( + "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format( + len(enginedeadtimes), + deadmin, + deadmax, + deadsum, + deadsum / len(enginedeadtimes), + deadsum / (len(threadids) * total_t) * 100.0, + ) + ) + print() + + # All times in step. + print("# All deadtimes:") + print( + "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format( + "count", "minimum", "maximum", "sum", "mean", "percent" + ) + ) + alldeadtimes = [] + for i in threadids: + deadtimes = [] + last = 0 + for task in tasks[i]: + dt = task[0] - last + deadtimes.append(dt) + last = task[1] + dt = total_t - last + deadtimes.append(dt) + + deadmin = min(deadtimes) + deadmax = max(deadtimes) + deadsum = sum(deadtimes) + print( + "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format( + i, + len(deadtimes), + deadmin, + deadmax, + deadsum, + deadsum / len(deadtimes), + deadsum / total_t * 100.0, + ) + ) + alldeadtimes.extend(deadtimes) + + deadmin = min(alldeadtimes) + deadmax = max(alldeadtimes) + deadsum = sum(alldeadtimes) + print( + "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format( + len(alldeadtimes), + deadmin, + deadmax, + deadsum, + deadsum / len(alldeadtimes), + deadsum / (len(threadids) * total_t) * 100.0, + ) + ) + print() + +sys.exit(0) diff --git a/examples/analyse_threadpool_tasks.py b/tools/task_plots/analyse_threadpool_tasks.py similarity index 53% rename from examples/analyse_threadpool_tasks.py rename to tools/task_plots/analyse_threadpool_tasks.py index 609af363b4110e010d6714bef6862d40e5acb278..af8d88dc1d4dc319fe7506d604e550de22a55a81 100755 --- a/examples/analyse_threadpool_tasks.py +++ b/tools/task_plots/analyse_threadpool_tasks.py @@ -29,6 +29,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. """ import matplotlib + matplotlib.use("Agg") import matplotlib.collections as collections import matplotlib.ticker as plticker @@ -40,23 +41,28 @@ import argparse parser = argparse.ArgumentParser(description="Analyse task dumps") parser.add_argument("input", help="Threadpool data file (-y output)") -parser.add_argument("-v", "--verbose", dest="verbose", - help="Verbose output (default: False)", - default=False, action="store_true") +parser.add_argument( + "-v", + "--verbose", + dest="verbose", + help="Verbose output (default: False)", + default=False, + action="store_true", +) args = parser.parse_args() infile = args.input # Read header. First two lines. with open(infile) as infid: - head = [next(infid) for x in xrange(2)] + head = [next(infid) for x in range(2)] header = head[1][2:].strip() header = eval(header) -nthread = int(header['num_threads']) + 1 -CPU_CLOCK = float(header['cpufreq']) / 1000.0 -print "Number of threads: ", nthread - 1 +nthread = int(header["num_threads"]) + 1 +CPU_CLOCK = float(header["cpufreq"]) / 1000.0 +print("Number of threads: ", nthread - 1) if args.verbose: - print "CPU frequency:", CPU_CLOCK * 1000.0 + print("CPU frequency:", CPU_CLOCK * 1000.0) # Read input. data = pl.genfromtxt(infile, dtype=None, delimiter=" ") @@ -71,7 +77,7 @@ for i in data: if i[0] != "#": funcs.append(i[0].replace("_mapper", "")) if i[1] < 0: - threads.append(nthread-1) + threads.append(nthread - 1) else: threads.append(i[1]) chunks.append(i[2]) @@ -88,9 +94,9 @@ tic_step = min(tics) toc_step = max(tocs) # Calculate the time range. -total_t = (toc_step - tic_step)/ CPU_CLOCK -print "# Data range: ", total_t, "ms" -print +total_t = (toc_step - tic_step) / CPU_CLOCK +print("# Data range: ", total_t, "ms") +print() # Correct times to relative millisecs. start_t = float(tic_step) @@ -104,7 +110,7 @@ for i in range(nthread): # Gather into by thread data. for i in range(len(tics)): - tasks[threads[i]].append([tics[i],tocs[i],funcs[i]]) + tasks[threads[i]].append([tics[i], tocs[i], funcs[i]]) # Don't actually process the fake thread. nthread = nthread - 1 @@ -117,11 +123,13 @@ for i in range(nthread): threadids.append(i) # Times per task. -print "# Task times:" -print "# -----------" -print "# {0:<31s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}"\ - .format("type/subtype", "count","minimum", "maximum", - "sum", "mean", "percent") +print("# Task times:") +print("# -----------") +print( + "# {0:<31s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}".format( + "type/subtype", "count", "minimum", "maximum", "sum", "mean", "percent" + ) +) alltasktimes = {} sidtimes = {} for i in threadids: @@ -137,74 +145,116 @@ for i in threadids: alltasktimes[key] = [] alltasktimes[key].append(dt) - print "# Thread : ", i + print("# Thread : ", i) for key in sorted(tasktimes.keys()): taskmin = min(tasktimes[key]) taskmax = max(tasktimes[key]) tasksum = sum(tasktimes[key]) - print "{0:33s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ - .format(key, len(tasktimes[key]), taskmin, taskmax, tasksum, - tasksum / len(tasktimes[key]), tasksum / total_t * 100.0) - print - -print "# All threads : " + print( + "{0:33s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format( + key, + len(tasktimes[key]), + taskmin, + taskmax, + tasksum, + tasksum / len(tasktimes[key]), + tasksum / total_t * 100.0, + ) + ) + print() + +print("# All threads : ") for key in sorted(alltasktimes.keys()): taskmin = min(alltasktimes[key]) taskmax = max(alltasktimes[key]) tasksum = sum(alltasktimes[key]) - print "{0:33s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ - .format(key, len(alltasktimes[key]), taskmin, taskmax, tasksum, - tasksum / len(alltasktimes[key]), - tasksum / (len(threadids) * total_t) * 100.0) -print + print( + "{0:33s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format( + key, + len(alltasktimes[key]), + taskmin, + taskmax, + tasksum, + tasksum / len(alltasktimes[key]), + tasksum / (len(threadids) * total_t) * 100.0, + ) + ) +print() # Dead times. -print "# Times not in tasks (deadtimes)" -print "# ------------------------------" -print "# Time before first task:" -print "# no. : {0:>9s} {1:>9s}".format("value", "percent") +print("# Times not in tasks (deadtimes)") +print("# ------------------------------") +print("# Time before first task:") +print("# no. : {0:>9s} {1:>9s}".format("value", "percent")) predeadtimes = [] for i in threadids: predeadtime = tasks[i][0][0] - print "thread {0:2d}: {1:9.4f} {2:9.4f}"\ - .format(i, predeadtime, predeadtime / total_t * 100.0) + print( + "thread {0:2d}: {1:9.4f} {2:9.4f}".format( + i, predeadtime, predeadtime / total_t * 100.0 + ) + ) predeadtimes.append(predeadtime) predeadmin = min(predeadtimes) predeadmax = max(predeadtimes) predeadsum = sum(predeadtimes) -print "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ - .format("count", "minimum", "maximum", "sum", "mean", "percent") -print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ - .format(len(predeadtimes), predeadmin, predeadmax, predeadsum, - predeadsum / len(predeadtimes), - predeadsum / (len(threadids) * total_t ) * 100.0) -print - -print "# Time after last task:" -print "# no. : {0:>9s} {1:>9s}".format("value", "percent") +print( + "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format( + "count", "minimum", "maximum", "sum", "mean", "percent" + ) +) +print( + "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format( + len(predeadtimes), + predeadmin, + predeadmax, + predeadsum, + predeadsum / len(predeadtimes), + predeadsum / (len(threadids) * total_t) * 100.0, + ) +) +print() + +print("# Time after last task:") +print("# no. : {0:>9s} {1:>9s}".format("value", "percent")) postdeadtimes = [] for i in threadids: postdeadtime = total_t - tasks[i][-1][1] - print "thread {0:2d}: {1:9.4f} {2:9.4f}"\ - .format(i, postdeadtime, postdeadtime / total_t * 100.0) + print( + "thread {0:2d}: {1:9.4f} {2:9.4f}".format( + i, postdeadtime, postdeadtime / total_t * 100.0 + ) + ) postdeadtimes.append(postdeadtime) postdeadmin = min(postdeadtimes) postdeadmax = max(postdeadtimes) postdeadsum = sum(postdeadtimes) -print "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ - .format("count", "minimum", "maximum", "sum", "mean", "percent") -print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ - .format(len(postdeadtimes), postdeadmin, postdeadmax, postdeadsum, - postdeadsum / len(postdeadtimes), - postdeadsum / (len(threadids) * total_t ) * 100.0) -print +print( + "# : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format( + "count", "minimum", "maximum", "sum", "mean", "percent" + ) +) +print( + "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format( + len(postdeadtimes), + postdeadmin, + postdeadmax, + postdeadsum, + postdeadsum / len(postdeadtimes), + postdeadsum / (len(threadids) * total_t) * 100.0, + ) +) +print() # Time in threadpool, i.e. from first to last tasks. -print "# Time between tasks (threadpool deadtime):" -print "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ - .format("count", "minimum", "maximum", "sum", "mean", "percent") +print("# Time between tasks (threadpool deadtime):") +print( + "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format( + "count", "minimum", "maximum", "sum", "mean", "percent" + ) +) threadpooldeadtimes = [] for i in threadids: deadtimes = [] @@ -224,24 +274,41 @@ for i in threadids: deadmin = min(deadtimes) deadmax = max(deadtimes) deadsum = sum(deadtimes) - print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ - .format(i, len(deadtimes), deadmin, deadmax, deadsum, - deadsum / len(deadtimes), deadsum / total_t * 100.0) + print( + "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format( + i, + len(deadtimes), + deadmin, + deadmax, + deadsum, + deadsum / len(deadtimes), + deadsum / total_t * 100.0, + ) + ) threadpooldeadtimes.extend(deadtimes) deadmin = min(threadpooldeadtimes) deadmax = max(threadpooldeadtimes) deadsum = sum(threadpooldeadtimes) -print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ - .format(len(threadpooldeadtimes), deadmin, deadmax, deadsum, - deadsum / len(threadpooldeadtimes), - deadsum / (len(threadids) * total_t ) * 100.0) -print +print( + "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format( + len(threadpooldeadtimes), + deadmin, + deadmax, + deadsum, + deadsum / len(threadpooldeadtimes), + deadsum / (len(threadids) * total_t) * 100.0, + ) +) +print() # All times in step. -print "# All deadtimes:" -print "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}"\ - .format("count", "minimum", "maximum", "sum", "mean", "percent") +print("# All deadtimes:") +print( + "# no. : {0:>9s} {1:>9s} {2:>9s} {3:>9s} {4:>9s} {5:>9s}".format( + "count", "minimum", "maximum", "sum", "mean", "percent" + ) +) alldeadtimes = [] for i in threadids: deadtimes = [] @@ -256,18 +323,32 @@ for i in threadids: deadmin = min(deadtimes) deadmax = max(deadtimes) deadsum = sum(deadtimes) - print "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}"\ - .format(i, len(deadtimes), deadmin, deadmax, deadsum, - deadsum / len(deadtimes), deadsum / total_t * 100.0) + print( + "thread {0:2d}: {1:9d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format( + i, + len(deadtimes), + deadmin, + deadmax, + deadsum, + deadsum / len(deadtimes), + deadsum / total_t * 100.0, + ) + ) alldeadtimes.extend(deadtimes) deadmin = min(alldeadtimes) deadmax = max(alldeadtimes) deadsum = sum(alldeadtimes) -print "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}"\ - .format(len(alldeadtimes), deadmin, deadmax, deadsum, - deadsum / len(alldeadtimes), - deadsum / (len(threadids) * total_t ) * 100.0) -print +print( + "all : {0:9d} {1:9.4f} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.2f}".format( + len(alldeadtimes), + deadmin, + deadmax, + deadsum, + deadsum / len(alldeadtimes), + deadsum / (len(threadids) * total_t) * 100.0, + ) +) +print() sys.exit(0) diff --git a/examples/plot_tasks.py b/tools/task_plots/plot_tasks.py similarity index 58% rename from examples/plot_tasks.py rename to tools/task_plots/plot_tasks.py index 76128ac66af21cd5f6d9a7ab0546ed813bde4536..ad26a2965044c07cccff8c5387900ee687211a88 100755 --- a/examples/plot_tasks.py +++ b/tools/task_plots/plot_tasks.py @@ -41,6 +41,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. """ import matplotlib + matplotlib.use("Agg") import matplotlib.collections as collections import matplotlib.ticker as plticker @@ -53,30 +54,67 @@ parser = argparse.ArgumentParser(description="Plot task graphs") parser.add_argument("input", help="Thread data file (-y output)") parser.add_argument("outbase", help="Base name for output graphic files (PNG)") -parser.add_argument("-l", "--limit", dest="limit", - help="Upper time limit in millisecs (def: depends on data)", - default=0, type=float) -parser.add_argument("-e", "--expand", dest="expand", - help="Thread expansion factor (def: 1)", - default=1, type=int) -parser.add_argument("--height", dest="height", - help="Height of plot in inches (def: 4)", - default=4., type=float) -parser.add_argument("--width", dest="width", - help="Width of plot in inches (def: 16)", - default=16., type=float) -parser.add_argument("--nolegend", dest="nolegend", - help="Whether to show the legend (def: False)", - default=False, action="store_true") -parser.add_argument("-v", "--verbose", dest="verbose", - help="Show colour assignments and other details (def: False)", - default=False, action="store_true") -parser.add_argument("-r", "--ranks", dest="ranks", - help="Comma delimited list of ranks to process, if MPI in effect", - default=None, type=str) -parser.add_argument("-m", "--mintic", dest="mintic", - help="Value of the smallest tic (def: least in input file)", - default=-1, type=int) +parser.add_argument( + "-l", + "--limit", + dest="limit", + help="Upper time limit in millisecs (def: depends on data)", + default=0, + type=float, +) +parser.add_argument( + "-e", + "--expand", + dest="expand", + help="Thread expansion factor (def: 1)", + default=1, + type=int, +) +parser.add_argument( + "--height", + dest="height", + help="Height of plot in inches (def: 4)", + default=4.0, + type=float, +) +parser.add_argument( + "--width", + dest="width", + help="Width of plot in inches (def: 16)", + default=16.0, + type=float, +) +parser.add_argument( + "--nolegend", + dest="nolegend", + help="Whether to show the legend (def: False)", + default=False, + action="store_true", +) +parser.add_argument( + "-v", + "--verbose", + dest="verbose", + help="Show colour assignments and other details (def: False)", + default=False, + action="store_true", +) +parser.add_argument( + "-r", + "--ranks", + dest="ranks", + help="Comma delimited list of ranks to process, if MPI in effect", + default=None, + type=str, +) +parser.add_argument( + "-m", + "--mintic", + dest="mintic", + help="Value of the smallest tic (def: least in input file)", + default=-1, + type=int, +) args = parser.parse_args() infile = args.input @@ -85,58 +123,152 @@ delta_t = args.limit expand = args.expand mintic = args.mintic if args.ranks != None: - ranks = [int(item) for item in args.ranks.split(',')] + ranks = [int(item) for item in args.ranks.split(",")] else: ranks = None # Basic plot configuration. -PLOT_PARAMS = {"axes.labelsize": 10, - "axes.titlesize": 10, - "font.size": 12, - "legend.fontsize": 12, - "xtick.labelsize": 10, - "ytick.labelsize": 10, - "figure.figsize" : (args.width, args.height), - "figure.subplot.left" : 0.03, - "figure.subplot.right" : 0.995, - "figure.subplot.bottom" : 0.1, - "figure.subplot.top" : 0.99, - "figure.subplot.wspace" : 0., - "figure.subplot.hspace" : 0., - "lines.markersize" : 6, - "lines.linewidth" : 3. - } +PLOT_PARAMS = { + "axes.labelsize": 10, + "axes.titlesize": 10, + "font.size": 12, + "legend.fontsize": 12, + "xtick.labelsize": 10, + "ytick.labelsize": 10, + "figure.figsize": (args.width, args.height), + "figure.subplot.left": 0.03, + "figure.subplot.right": 0.995, + "figure.subplot.bottom": 0.1, + "figure.subplot.top": 0.99, + "figure.subplot.wspace": 0.0, + "figure.subplot.hspace": 0.0, + "lines.markersize": 6, + "lines.linewidth": 3.0, +} pl.rcParams.update(PLOT_PARAMS) # Tasks and subtypes. Indexed as in tasks.h. -TASKTYPES = ["none", "sort", "self", "pair", "sub_self", "sub_pair", - "init_grav", "init_grav_out", "ghost_in", "ghost", "ghost_out", "extra_ghost", "drift_part", "drift_gpart", - "end_force", "kick1", "kick2", "timestep", "send", "recv", "grav_long_range", "grav_mm", "grav_down_in", - "grav_down", "grav_mesh", "cooling", "star_formation", "sourceterms", - "stars_ghost_in", "stars_ghost", "stars_ghost_out", - "count"] - -SUBTYPES = ["none", "density", "gradient", "force", "grav", "external_grav", - "tend", "xv", "rho", "gpart", "multipole", "spart", "stars_density", "count"] +TASKTYPES = [ + "none", + "sort", + "self", + "pair", + "sub_self", + "sub_pair", + "init_grav", + "init_grav_out", + "ghost_in", + "ghost", + "ghost_out", + "extra_ghost", + "drift_part", + "drift_gpart", + "end_force", + "kick1", + "kick2", + "timestep", + "send", + "recv", + "grav_long_range", + "grav_mm", + "grav_down_in", + "grav_down", + "grav_mesh", + "cooling", + "star_formation", + "sourceterms", + "stars_ghost_in", + "stars_ghost", + "stars_ghost_out", + "count", +] + +SUBTYPES = [ + "none", + "density", + "gradient", + "force", + "grav", + "external_grav", + "tend", + "xv", + "rho", + "gpart", + "multipole", + "spart", + "stars_density", + "count", +] # Task/subtypes of interest. -FULLTYPES = ["self/force", "self/density", "self/grav", "sub_self/force", - "sub_self/density", "pair/force", "pair/density", "pair/grav", - "sub_pair/force", - "sub_pair/density", "recv/xv", "send/xv", "recv/rho", "send/rho", - "recv/tend", "send/tend", "recv/gpart", "send/gpart", "self/stars_density", - "pair/stars_density", "sub_self/stars_density", "sub_pair/stars_density"] +FULLTYPES = [ + "self/force", + "self/density", + "self/grav", + "sub_self/force", + "sub_self/density", + "pair/force", + "pair/density", + "pair/grav", + "sub_pair/force", + "sub_pair/density", + "recv/xv", + "send/xv", + "recv/rho", + "send/rho", + "recv/tend", + "send/tend", + "recv/gpart", + "send/gpart", + "self/stars_density", + "pair/stars_density", + "sub_self/stars_density", + "sub_pair/stars_density", +] # A number of colours for the various types. Recycled when there are # more task types than colours... -colours = ["cyan", "lightgray", "darkblue", "yellow", "tan", "dodgerblue", - "sienna", "aquamarine", "bisque", "blue", "green", "lightgreen", - "brown", "purple", "moccasin", "olivedrab", "chartreuse", - "olive", "darkgreen", "green", "mediumseagreen", - "mediumaquamarine", "darkslategrey", "mediumturquoise", - "black", "cadetblue", "skyblue", "red", "slategray", "gold", - "slateblue", "blueviolet", "mediumorchid", "firebrick", - "magenta", "hotpink", "pink", "orange", "lightgreen"] +colours = [ + "cyan", + "lightgray", + "darkblue", + "yellow", + "tan", + "dodgerblue", + "sienna", + "aquamarine", + "bisque", + "blue", + "green", + "lightgreen", + "brown", + "purple", + "moccasin", + "olivedrab", + "chartreuse", + "olive", + "darkgreen", + "green", + "mediumseagreen", + "mediumaquamarine", + "darkslategrey", + "mediumturquoise", + "black", + "cadetblue", + "skyblue", + "red", + "slategray", + "gold", + "slateblue", + "blueviolet", + "mediumorchid", + "firebrick", + "magenta", + "hotpink", + "pink", + "orange", + "lightgreen", +] maxcolours = len(colours) # Set colours of task/subtype. @@ -159,21 +291,21 @@ for task in SUBTYPES: if args.verbose: print("#Selected colours:") for task in sorted(TASKCOLOURS.keys()): - print("# " + task + ": " + TASKCOLOURS[task]) + print(("# " + task + ": " + TASKCOLOURS[task])) for task in sorted(SUBCOLOURS.keys()): - print("# " + task + ": " + SUBCOLOURS[task]) + print(("# " + task + ": " + SUBCOLOURS[task])) # Read input. -data = pl.loadtxt( infile ) +data = pl.loadtxt(infile) # Do we have an MPI file? -full_step = data[0,:] +full_step = data[0, :] if full_step.size == 13: print("# MPI mode") mpimode = True if ranks == None: - ranks = range(int(max(data[:,0])) + 1) - print("# Number of ranks:", len(ranks)) + ranks = list(range(int(max(data[:, 0])) + 1)) + print(("# Number of ranks:", len(ranks))) rankcol = 0 threadscol = 1 taskcol = 2 @@ -194,14 +326,14 @@ else: # Get CPU_CLOCK to convert ticks into milliseconds. CPU_CLOCK = float(full_step[-1]) / 1000.0 if args.verbose: - print("# CPU frequency:", CPU_CLOCK * 1000.0) + print(("# CPU frequency:", CPU_CLOCK * 1000.0)) -nthread = int(max(data[:,threadscol])) + 1 -print("# Number of threads:", nthread) +nthread = int(max(data[:, threadscol])) + 1 +print(("# Number of threads:", nthread)) # Avoid start and end times of zero. -sdata = data[data[:,ticcol] != 0] -sdata = sdata[sdata[:,toccol] != 0] +sdata = data[data[:, ticcol] != 0] +sdata = sdata[sdata[:, toccol] != 0] # Each rank can have different clocks (compute node), but we want to use the # same delta times range for comparisons, so we suck it up and take the hit of @@ -210,8 +342,8 @@ delta_t = delta_t * CPU_CLOCK if delta_t == 0: for rank in ranks: if mpimode: - data = sdata[sdata[:,rankcol] == rank] - full_step = data[0,:] + data = sdata[sdata[:, rankcol] == rank] + full_step = data[0, :] # Start and end times for this rank. Can be changed using the mintic # option. This moves our zero time to other time. Useful for @@ -224,31 +356,31 @@ if delta_t == 0: dt = toc_step - tic_step if dt > delta_t: delta_t = dt - print("# Data range: ", delta_t / CPU_CLOCK, "ms") + print(("# Data range: ", delta_t / CPU_CLOCK, "ms")) # Once more doing the real gather and plots this time. for rank in ranks: - print("# Processing rank: ", rank) + print(("# Processing rank: ", rank)) if mpimode: - data = sdata[sdata[:,rankcol] == rank] - full_step = data[0,:] + data = sdata[sdata[:, rankcol] == rank] + full_step = data[0, :] tic_step = int(full_step[ticcol]) toc_step = int(full_step[toccol]) - print("# Min tic = ", tic_step) - data = data[1:,:] + print(("# Min tic = ", tic_step)) + data = data[1:, :] typesseen = [] nethread = 0 # Dummy image for ranks that have no tasks. if data.size == 0: - print("# Rank ", rank, " has no tasks") + print(("# Rank ", rank, " has no tasks")) fig = pl.figure() - ax = fig.add_subplot(1,1,1) + ax = fig.add_subplot(1, 1, 1) ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK) if nthread == 0: ax.set_ylim(0, expand) else: - ax.set_ylim(0, nthread*expand) + ax.set_ylim(0, nthread * expand) if mintic < 0: start_t = tic_step else: @@ -260,13 +392,13 @@ for rank in ranks: start_t = float(tic_step) else: start_t = float(mintic) - data[:,ticcol] -= start_t - data[:,toccol] -= start_t + data[:, ticcol] -= start_t + data[:, toccol] -= start_t end_t = (toc_step - start_t) / CPU_CLOCK tasks = {} tasks[-1] = [] - for i in range(nthread*expand): + for i in range(nthread * expand): tasks[i] = [] # Counters for each thread when expanding. @@ -284,15 +416,20 @@ for rank in ranks: thread = ethread tasks[thread].append({}) - tasktype = TASKTYPES[int(data[line,taskcol])] - subtype = SUBTYPES[int(data[line,subtaskcol])] + tasktype = TASKTYPES[int(data[line, taskcol])] + subtype = SUBTYPES[int(data[line, subtaskcol])] tasks[thread][-1]["type"] = tasktype tasks[thread][-1]["subtype"] = subtype - tic = int(data[line,ticcol]) / CPU_CLOCK - toc = int(data[line,toccol]) / CPU_CLOCK + tic = int(data[line, ticcol]) / CPU_CLOCK + toc = int(data[line, toccol]) / CPU_CLOCK tasks[thread][-1]["tic"] = tic tasks[thread][-1]["toc"] = toc - if "self" in tasktype or "pair" in tasktype or "recv" in tasktype or "send" in tasktype: + if ( + "self" in tasktype + or "pair" in tasktype + or "recv" in tasktype + or "send" in tasktype + ): fulltype = tasktype + "/" + subtype if fulltype in SUBCOLOURS: tasks[thread][-1]["colour"] = SUBCOLOURS[fulltype] @@ -306,9 +443,9 @@ for rank in ranks: typesseen = [] fig = pl.figure() - ax = fig.add_subplot(1,1,1) + ax = fig.add_subplot(1, 1, 1) ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK) - ax.set_ylim(0.5, nethread+1.0) + ax.set_ylim(0.5, nethread + 1.0) for i in range(nethread): # Collect ranges and colours into arrays. @@ -330,36 +467,40 @@ for rank in ranks: typesseen.append(qtask) # Now plot. - ax.broken_barh(tictocs, [i+0.55,0.9], facecolors = colours, linewidth=0) - + ax.broken_barh(tictocs, [i + 0.55, 0.9], facecolors=colours, linewidth=0) # Legend and room for it. nrow = len(typesseen) / 8 ax.fill_between([0, 0], nethread, nethread + nrow, facecolor="white") if data.size > 0 and not args.nolegend: ax.fill_between([0, 0], nethread, nethread + nrow, facecolor="white") - ax.legend(loc="lower left", shadow=True, - bbox_to_anchor=(0., 1.0, 1., 0.2), mode="expand", ncol=8) + ax.legend( + loc="lower left", + shadow=True, + bbox_to_anchor=(0.0, 1.0, 1.0, 0.2), + mode="expand", + ncol=8, + ) # Start and end of time-step if mintic < 0: - ax.plot([0, 0], [0, nethread + nrow + 1], 'k--', linewidth=1) + ax.plot([0, 0], [0, nethread + nrow + 1], "k--", linewidth=1) else: real_start = tic_step - mintic - ax.plot([real_start, real_start], [0, nethread + nrow + 1], 'k--', linewidth=1) - ax.plot([end_t, end_t], [0, nethread + nrow + 1], 'k--', linewidth=1) + ax.plot([real_start, real_start], [0, nethread + nrow + 1], "k--", linewidth=1) + ax.plot([end_t, end_t], [0, nethread + nrow + 1], "k--", linewidth=1) ax.set_xlabel("Wall clock time [ms]") if expand == 1: - ax.set_ylabel("Thread ID" ) + ax.set_ylabel("Thread ID") else: - ax.set_ylabel("Thread ID * " + str(expand) ) - ax.set_yticks(pl.array(range(nethread)), True) + ax.set_ylabel("Thread ID * " + str(expand)) + ax.set_yticks(pl.array(list(range(nethread))), True) loc = plticker.MultipleLocator(base=expand) ax.yaxis.set_major_locator(loc) - ax.grid(True, which='major', axis="y", linestyle="-") + ax.grid(True, which="major", axis="y", linestyle="-") pl.show() if mpimode: @@ -367,6 +508,6 @@ for rank in ranks: else: outpng = outbase + ".png" pl.savefig(outpng, bbox_inches="tight") - print("Graphics done, output written to", outpng) + print(("Graphics done, output written to", outpng)) sys.exit(0) diff --git a/examples/plot_threadpool.py b/tools/task_plots/plot_threadpool.py similarity index 59% rename from examples/plot_threadpool.py rename to tools/task_plots/plot_threadpool.py index e7553cb0a7375fb38196ae6f6d5413ae34dcc119..2e5521c901d0571665c6c6d7ec3297b0e9e60552 100755 --- a/examples/plot_threadpool.py +++ b/tools/task_plots/plot_threadpool.py @@ -31,6 +31,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. """ import matplotlib + matplotlib.use("Agg") import matplotlib.collections as collections import matplotlib.ticker as plticker @@ -43,27 +44,59 @@ parser = argparse.ArgumentParser(description="Plot threadpool function graphs") parser.add_argument("input", help="Threadpool data file (-Y output)") parser.add_argument("outpng", help="Name for output graphic file (PNG)") -parser.add_argument("-l", "--limit", dest="limit", - help="Upper time limit in millisecs (def: depends on data)", - default=0, type=float) -parser.add_argument("-e", "--expand", dest="expand", - help="Thread expansion factor (def: 1)", - default=1, type=int) -parser.add_argument("--height", dest="height", - help="Height of plot in inches (def: 4)", - default=4., type=float) -parser.add_argument("--width", dest="width", - help="Width of plot in inches (def: 16)", - default=16., type=float) -parser.add_argument("--nolegend", dest="nolegend", - help="Whether to show the legend (def: False)", - default=False, action="store_true") -parser.add_argument("-v", "--verbose", dest="verbose", - help="Show colour assignments and other details (def: False)", - default=False, action="store_true") -parser.add_argument("-m", "--mintic", dest="mintic", - help="Value of the smallest tic (def: least in input file)", - default=-1, type=int) +parser.add_argument( + "-l", + "--limit", + dest="limit", + help="Upper time limit in millisecs (def: depends on data)", + default=0, + type=float, +) +parser.add_argument( + "-e", + "--expand", + dest="expand", + help="Thread expansion factor (def: 1)", + default=1, + type=int, +) +parser.add_argument( + "--height", + dest="height", + help="Height of plot in inches (def: 4)", + default=4.0, + type=float, +) +parser.add_argument( + "--width", + dest="width", + help="Width of plot in inches (def: 16)", + default=16.0, + type=float, +) +parser.add_argument( + "--nolegend", + dest="nolegend", + help="Whether to show the legend (def: False)", + default=False, + action="store_true", +) +parser.add_argument( + "-v", + "--verbose", + dest="verbose", + help="Show colour assignments and other details (def: False)", + default=False, + action="store_true", +) +parser.add_argument( + "-m", + "--mintic", + dest="mintic", + help="Value of the smallest tic (def: least in input file)", + default=-1, + type=int, +) args = parser.parse_args() infile = args.input @@ -73,46 +106,80 @@ expand = args.expand mintic = args.mintic # Basic plot configuration. -PLOT_PARAMS = {"axes.labelsize": 10, - "axes.titlesize": 10, - "font.size": 12, - "legend.fontsize": 12, - "xtick.labelsize": 10, - "ytick.labelsize": 10, - "figure.figsize" : (args.width, args.height), - "figure.subplot.left" : 0.03, - "figure.subplot.right" : 0.995, - "figure.subplot.bottom" : 0.09, - "figure.subplot.top" : 0.99, - "figure.subplot.wspace" : 0., - "figure.subplot.hspace" : 0., - "lines.markersize" : 6, - "lines.linewidth" : 3. - } +PLOT_PARAMS = { + "axes.labelsize": 10, + "axes.titlesize": 10, + "font.size": 12, + "legend.fontsize": 12, + "xtick.labelsize": 10, + "ytick.labelsize": 10, + "figure.figsize": (args.width, args.height), + "figure.subplot.left": 0.03, + "figure.subplot.right": 0.995, + "figure.subplot.bottom": 0.09, + "figure.subplot.top": 0.99, + "figure.subplot.wspace": 0.0, + "figure.subplot.hspace": 0.0, + "lines.markersize": 6, + "lines.linewidth": 3.0, +} pl.rcParams.update(PLOT_PARAMS) # A number of colours for the various types. Recycled when there are # more task types than colours... -colours = ["cyan", "lightgray", "darkblue", "yellow", "tan", "dodgerblue", - "sienna", "aquamarine", "bisque", "blue", "green", "lightgreen", - "brown", "purple", "moccasin", "olivedrab", "chartreuse", - "olive", "darkgreen", "green", "mediumseagreen", - "mediumaquamarine", "darkslategrey", "mediumturquoise", - "black", "cadetblue", "skyblue", "red", "slategray", "gold", - "slateblue", "blueviolet", "mediumorchid", "firebrick", - "magenta", "hotpink", "pink", "orange", "lightgreen"] +colours = [ + "cyan", + "lightgray", + "darkblue", + "yellow", + "tan", + "dodgerblue", + "sienna", + "aquamarine", + "bisque", + "blue", + "green", + "lightgreen", + "brown", + "purple", + "moccasin", + "olivedrab", + "chartreuse", + "olive", + "darkgreen", + "green", + "mediumseagreen", + "mediumaquamarine", + "darkslategrey", + "mediumturquoise", + "black", + "cadetblue", + "skyblue", + "red", + "slategray", + "gold", + "slateblue", + "blueviolet", + "mediumorchid", + "firebrick", + "magenta", + "hotpink", + "pink", + "orange", + "lightgreen", +] maxcolours = len(colours) # Read header. First two lines. with open(infile) as infid: - head = [next(infid) for x in xrange(2)] + head = [next(infid) for x in range(2)] header = head[1][2:].strip() header = eval(header) -nthread = int(header['num_threads']) + 1 -CPU_CLOCK = float(header['cpufreq']) / 1000.0 -print "Number of threads: ", nthread +nthread = int(header["num_threads"]) + 1 +CPU_CLOCK = float(header["cpufreq"]) / 1000.0 +print("Number of threads: ", nthread) if args.verbose: - print "CPU frequency:", CPU_CLOCK * 1000.0 + print("CPU frequency:", CPU_CLOCK * 1000.0) # Read input. data = pl.genfromtxt(infile, dtype=None, delimiter=" ") @@ -127,7 +194,7 @@ for i in data: if i[0] != "#": funcs.append(i[0].replace("_mapper", "")) if i[1] < 0: - threads.append(nthread-1) + threads.append(nthread - 1) else: threads.append(i[1]) chunks.append(i[2]) @@ -143,7 +210,7 @@ chunks = pl.array(chunks) mintic_step = min(tics) tic_step = mintic_step toc_step = max(tocs) -print "# Min tic = ", mintic_step +print("# Min tic = ", mintic_step) if mintic > 0: tic_step = mintic @@ -153,7 +220,7 @@ if delta_t == 0: dt = toc_step - tic_step if dt > delta_t: delta_t = dt - print "Data range: ", delta_t / CPU_CLOCK, "ms" + print("Data range: ", delta_t / CPU_CLOCK, "ms") # Once more doing the real gather and plots this time. start_t = float(tic_step) @@ -163,7 +230,7 @@ end_t = (toc_step - start_t) / CPU_CLOCK # Get all "task" names and assign colours. TASKTYPES = pl.unique(funcs) -print TASKTYPES +print(TASKTYPES) # Set colours of task/subtype. TASKCOLOURS = {} @@ -174,15 +241,15 @@ for task in TASKTYPES: # For fiddling with colours... if args.verbose: - print "#Selected colours:" + print("#Selected colours:") for task in sorted(TASKCOLOURS.keys()): - print "# " + task + ": " + TASKCOLOURS[task] + print("# " + task + ": " + TASKCOLOURS[task]) for task in sorted(SUBCOLOURS.keys()): - print "# " + task + ": " + SUBCOLOURS[task] + print("# " + task + ": " + SUBCOLOURS[task]) tasks = {} tasks[-1] = [] -for i in range(nthread*expand): +for i in range(nthread * expand): tasks[i] = [] # Counters for each thread when expanding. @@ -211,7 +278,7 @@ nthread = nthread * expand typesseen = [] fig = pl.figure() -ax = fig.add_subplot(1,1,1) +ax = fig.add_subplot(1, 1, 1) ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK) ax.set_ylim(0, nthread) @@ -222,7 +289,7 @@ j = 0 for task in tasks[nthread - expand]: tictocs.append((task["tic"], task["toc"] - task["tic"])) colours.append(task["colour"]) -ax.broken_barh(tictocs, [0,(nthread-1)], facecolors = colours, linewidth=0, alpha=0.15) +ax.broken_barh(tictocs, [0, (nthread - 1)], facecolors=colours, linewidth=0, alpha=0.15) # And we don't plot the fake thread. nthread = nthread - expand @@ -243,36 +310,38 @@ for i in range(nthread): typesseen.append(qtask) # Now plot. - ax.broken_barh(tictocs, [i+0.05,0.90], facecolors = colours, linewidth=0) + ax.broken_barh(tictocs, [i + 0.05, 0.90], facecolors=colours, linewidth=0) # Legend and room for it. nrow = len(typesseen) / 5 if not args.nolegend: - ax.fill_between([0, 0], nthread+0.5, nthread + nrow + 0.5, facecolor="white") + ax.fill_between([0, 0], nthread + 0.5, nthread + nrow + 0.5, facecolor="white") ax.set_ylim(0, nthread + 0.5) - ax.legend(loc=1, shadow=True, bbox_to_anchor=(0., 1.05 ,1., 0.2), mode="expand", ncol=5) + ax.legend( + loc=1, shadow=True, bbox_to_anchor=(0.0, 1.05, 1.0, 0.2), mode="expand", ncol=5 + ) box = ax.get_position() - ax.set_position([box.x0, box.y0, box.width, box.height*0.8]) - + ax.set_position([box.x0, box.y0, box.width, box.height * 0.8]) + # Start and end of time-step -real_start_t = (mintic_step - tic_step)/ CPU_CLOCK -ax.plot([real_start_t, real_start_t], [0, nthread + nrow + 1], 'k--', linewidth=1) +real_start_t = (mintic_step - tic_step) / CPU_CLOCK +ax.plot([real_start_t, real_start_t], [0, nthread + nrow + 1], "k--", linewidth=1) -ax.plot([end_t, end_t], [0, nthread + nrow + 1], 'k--', linewidth=1) +ax.plot([end_t, end_t], [0, nthread + nrow + 1], "k--", linewidth=1) -ax.set_xlabel("Wall clock time [ms]", labelpad=0.) +ax.set_xlabel("Wall clock time [ms]", labelpad=0.0) if expand == 1: - ax.set_ylabel("Thread ID", labelpad=0 ) + ax.set_ylabel("Thread ID", labelpad=0) else: - ax.set_ylabel("Thread ID * " + str(expand), labelpad=0 ) -ax.set_yticks(pl.array(range(nthread)), True) + ax.set_ylabel("Thread ID * " + str(expand), labelpad=0) +ax.set_yticks(pl.array(list(range(nthread))), True) loc = plticker.MultipleLocator(base=expand) ax.yaxis.set_major_locator(loc) -ax.grid(True, which='major', axis="y", linestyle="-") +ax.grid(True, which="major", axis="y", linestyle="-") pl.show() pl.savefig(outpng) -print "Graphics done, output written to", outpng +print("Graphics done, output written to", outpng) sys.exit(0) diff --git a/examples/process_plot_tasks b/tools/task_plots/process_plot_tasks similarity index 100% rename from examples/process_plot_tasks rename to tools/task_plots/process_plot_tasks diff --git a/examples/process_plot_tasks_MPI b/tools/task_plots/process_plot_tasks_MPI similarity index 100% rename from examples/process_plot_tasks_MPI rename to tools/task_plots/process_plot_tasks_MPI diff --git a/examples/process_plot_taskthreadpools b/tools/task_plots/process_plot_taskthreadpools similarity index 100% rename from examples/process_plot_taskthreadpools rename to tools/task_plots/process_plot_taskthreadpools diff --git a/examples/process_plot_taskthreadpools_helper b/tools/task_plots/process_plot_taskthreadpools_helper similarity index 100% rename from examples/process_plot_taskthreadpools_helper rename to tools/task_plots/process_plot_taskthreadpools_helper diff --git a/examples/process_plot_threadpool b/tools/task_plots/process_plot_threadpool similarity index 100% rename from examples/process_plot_threadpool rename to tools/task_plots/process_plot_threadpool