diff --git a/configure.ac b/configure.ac index 225796677a706183087283af4463474508a6b979..de0e1b35d4da78d103ddf27304ad527361a342c7 100644 --- a/configure.ac +++ b/configure.ac @@ -60,6 +60,7 @@ AC_ARG_ENABLE([mpi], good_mpi="yes" if test "$enable_mpi" = "yes"; then AX_MPI([CC="$MPICC" AC_DEFINE(HAVE_MPI, 1, [Define if you have the MPI library.]) ]) + MPI_LIBRARY="Unknown MPI" # Various MPI implementations require additional libraries when also using # threads. Use mpirun (on PATH) as that seems to be only command with @@ -83,14 +84,17 @@ if test "$enable_mpi" = "yes"; then case "$version" in *Intel*MPI*) MPI_THREAD_LIBS="-mt_mpi" + MPI_LIBRARY="Intel MPI" AC_MSG_RESULT([Intel MPI]) ;; *Platform*) MPI_THREAD_LIBS="-lmtmpi" + MPI_LIBRARY="PLATFORM MPI" AC_MSG_RESULT([PLATFORM MPI]) ;; *"Open MPI"*) MPI_THREAD_LIBS="" + MPI_LIBRARY="Open MPI" AC_MSG_RESULT([Open MPI]) # OpenMPI should be 1.8.6 or later, if not complain. # Version is last word on first line of -version output. @@ -109,6 +113,7 @@ if test "$enable_mpi" = "yes"; then esac AC_SUBST([MPI_THREAD_LIBS]) fi + AC_DEFINE_UNQUOTED([SWIFT_MPI_LIBRARY], ["$MPI_LIBRARY"], [The MPI library name, if known.]) fi AM_CONDITIONAL([HAVEMPI],[test $enable_mpi = "yes"]) @@ -265,6 +270,9 @@ AC_CHECK_FUNC(pthread_setaffinity_np, AC_DEFINE([HAVE_SETAFFINITY],[true], AM_CONDITIONAL(HAVESETAFFINITY, [test "$ac_cv_func_pthread_setaffinity_np" = "yes"]) +# Check for libnuma. +AC_CHECK_LIB([numa], [numa_available]) + # Check for timing functions needed by cycle.h. AC_HEADER_TIME AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h]) diff --git a/examples/main.c b/examples/main.c index 22b1156b63cb148b0a6c974e69bcafa0d0ccdfb6..160e6f97b64b9c380affe80455a425cbfe157a03 100644 --- a/examples/main.c +++ b/examples/main.c @@ -79,6 +79,7 @@ int main(int argc, char *argv[]) { struct engine e; struct UnitSystem us; char ICfileName[200] = ""; + char dumpfile[30]; float dt_max = 0.0f; ticks tic; int nr_nodes = 1, myrank = 0, grid[3] = {1, 1, 1}; @@ -115,11 +116,26 @@ int main(int argc, char *argv[]) { /* Greeting message */ if (myrank == 0) greetings(); +#if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE) + /* Ensure the NUMA node on which we initialise (first touch) everything + * doesn't change before engine_init allocates NUMA-local workers. Otherwise, + * we may be scheduled elsewhere between the two times. + */ + cpu_set_t affinity; + CPU_ZERO(&affinity); + CPU_SET(sched_getcpu(), &affinity); + if (sched_setaffinity(0, sizeof(cpu_set_t), &affinity) != 0) { + message("failed to set entry thread's affinity"); + } else { + message("set entry thread's affinity"); + } +#endif + /* Init the space. */ bzero(&s, sizeof(struct space)); /* Parse the options */ - while ((c = getopt(argc, argv, "a:c:d:f:g:m:q:r:s:t:w:yz:")) != -1) + while ((c = getopt(argc, argv, "a:c:d:f:g:m:q:r:s:t:w:y:z:")) != -1) switch (c) { case 'a': if (sscanf(optarg, "%lf", &scaling) != 1) @@ -178,7 +194,8 @@ int main(int argc, char *argv[]) { if (myrank == 0) message("sub size set to %i.", space_subsize); break; case 'y': - dump_tasks = 1; + if(sscanf(optarg, "%d", &dump_tasks) != 1) + error("Error parsing dump_tasks (-y)"); break; case 'z': if (sscanf(optarg, "%d", &space_splitsize) != 1) @@ -414,6 +431,69 @@ int main(int argc, char *argv[]) { #endif } + /* Dump the task data using the given frequency. */ + if (dump_tasks && (dump_tasks == 1 || j % dump_tasks == 1)) { +#ifdef WITH_MPI + + /* Make sure output file is empty, only on one rank. */ + sprintf(dumpfile, "thread_info_MPI-step%d.dat", j); + if (myrank == 0) { + file_thread = fopen(dumpfile, "w"); + fclose(file_thread); + } + MPI_Barrier(MPI_COMM_WORLD); + + for (int i = 0; i < nr_nodes; i++) { + + /* Rank 0 decides the index of writing node, this happens one-by-one. */ + int kk = i; + MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD); + + if (i == myrank) { + + /* Open file and position at end. */ + file_thread = fopen(dumpfile, "a"); + + fprintf(file_thread, " %03i 0 0 0 0 %lli 0 0 0 0\n", myrank, + e.tic_step); + int count = 0; + for (int l = 0; l < e.sched.nr_tasks; l++) + if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit) { + fprintf( + file_thread, " %03i %i %i %i %i %lli %lli %i %i %i\n", myrank, + e.sched.tasks[l].rid, e.sched.tasks[l].type, + e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL), + e.sched.tasks[l].tic, e.sched.tasks[l].toc, + (e.sched.tasks[l].ci != NULL) ? e.sched.tasks[l].ci->count : 0, + (e.sched.tasks[l].cj != NULL) ? e.sched.tasks[l].cj->count : 0, + e.sched.tasks[l].flags); + fflush(stdout); + count++; + } + message("rank %d counted %d tasks", myrank, count); + + fclose(file_thread); + } + + /* And we wait for all to synchronize. */ + MPI_Barrier(MPI_COMM_WORLD); + } + +#else + sprintf(dumpfile, "thread_info-step%d.dat", j); + file_thread = fopen(dumpfile, "w"); + for (int l = 0; l < e.sched.nr_tasks; l++) + if (!e.sched.tasks[l].skip && !e.sched.tasks[l].implicit) + fprintf(file_thread, " %i %i %i %i %lli %lli %i %i\n", + e.sched.tasks[l].rid, e.sched.tasks[l].type, + e.sched.tasks[l].subtype, (e.sched.tasks[l].cj == NULL), + e.sched.tasks[l].tic, e.sched.tasks[l].toc, + (e.sched.tasks[l].ci == NULL) ? 0 : e.sched.tasks[l].ci->count, + (e.sched.tasks[l].cj == NULL) ? 0 : e.sched.tasks[l].cj->count); + fclose(file_thread); +#endif + } + /* Dump a line of aggregate output. */ /* if (myrank == 0) { */ /* printf("%i %e %.16e %.16e %.16e %.3e %.3e %i %.3e %.3e", j, e.time, @@ -447,43 +527,6 @@ int main(int argc, char *argv[]) { (double)runner_hist_bins[k]); #endif - /* Dump the task data. */ - if (dump_tasks) { -#ifdef WITH_MPI - file_thread = fopen("thread_info_MPI.dat", "w"); - for (j = 0; j < nr_nodes; j++) { - MPI_Barrier(MPI_COMM_WORLD); - if (j == myrank) { - fprintf(file_thread, " %03i 0 0 0 0 %lli 0 0 0 0\n", myrank, - e.tic_step); - for (k = 0; k < e.sched.nr_tasks; k++) - if (!e.sched.tasks[k].skip && !e.sched.tasks[k].implicit) - fprintf( - file_thread, " %03i %i %i %i %i %lli %lli %i %i %i\n", myrank, - e.sched.tasks[k].rid, e.sched.tasks[k].type, - e.sched.tasks[k].subtype, (e.sched.tasks[k].cj == NULL), - e.sched.tasks[k].tic, e.sched.tasks[k].toc, - e.sched.tasks[k].ci->count, - (e.sched.tasks[k].cj != NULL) ? e.sched.tasks[k].cj->count : 0, - e.sched.tasks[k].flags); - fflush(stdout); - sleep(1); - } - } - fclose(file_thread); -#else - file_thread = fopen("thread_info.dat", "w"); - for (k = 0; k < e.sched.nr_tasks; k++) - if (!e.sched.tasks[k].skip && !e.sched.tasks[k].implicit) - fprintf(file_thread, " %i %i %i %i %lli %lli %i %i\n", - e.sched.tasks[k].rid, e.sched.tasks[k].type, - e.sched.tasks[k].subtype, (e.sched.tasks[k].cj == NULL), - e.sched.tasks[k].tic, e.sched.tasks[k].toc, - e.sched.tasks[k].ci->count, - (e.sched.tasks[k].cj == NULL) ? 0 : e.sched.tasks[k].cj->count); - fclose(file_thread); -#endif - } if (with_outputs) { /* Write final output. */ diff --git a/examples/plot_tasks.py b/examples/plot_tasks.py index 52f1a90592120e61bd3e789ee0cb8fe7edb63a8d..bf0002664aa04fb0efccf1c1cbeb86158cb17d23 100644 --- a/examples/plot_tasks.py +++ b/examples/plot_tasks.py @@ -3,27 +3,28 @@ # Copyright (c) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk), # Bert Vandenbroucke (bert.vandenbroucke@ugent.be) # Matthieu Schaller (matthieu.schaller@durham.ac.uk) - # + # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. - # + # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. - # + # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. - # + # ############################################################################## import matplotlib matplotlib.use('Agg') import pylab as pl -#import numpy as np +import numpy as np +import sys CPU_CLOCK = 2.7e9 @@ -50,18 +51,47 @@ pl.rc('font',**{'family':'sans-serif','sans-serif':['Times']}) -types = {"0": "task_none", "1": "task_sort", "2": "task_self", "3": "task_pair", - "4": "task_pair", "5": "task_sub", "6": "task_ghost", - "7": "task_kick1", "8": "task_kick2", "9": "task_send", - "10": "task_recv", "11": "task_link", "12": "task_grav_pp", - "13": "task_grav_mm", "14": "task_grav_up", "15": "task_grav_down"} - -subtypes = {"0": "subtask_none", "1": "subtask_density", "2": "subtask_force", - "3": "subtask_grav"} - -colors = {"subtask_density": "g", "subtask_force": "b", "subtask_grav": "y", "subtask_none": "k"} -colors2 = {"task_sort": "r", "task_kick1": "m"} - +types = {"0": "task_none", + "1": "task_sort", + "2": "task_self", + "3": "task_pair", + "4": "task_sub", + "5": "task_ghost", + "6": "task_kick1", + "7": "task_kick2", + "8": "task_send", + "9": "task_recv", + "10": "task_grav_pp", + "11": "task_grav_mm", + "12": "task_grav_up", + "13": "task_grav_down", + "14": "task_psort", + "15": "task_split_cell", + "16": "task_rewait", + "17": "task_count"} + +subtypes = {"0": "subtask_none", + "1": "subtask_density", + "2": "subtask_force", + "3": "subtask_grav", + "4": "subtask_count"} + +# Assign colours for all types. +colors = ["red","blue","green","yellow","cyan","magenta","black"] +colors = colors + list(matplotlib.colors.cnames) +index = 0 + +subtypecolors = {} +for key in subtypes: + subtypecolors[subtypes[key]] = colors[index] + print subtypes[key], " = ", colors[index] + index = index + 1 + +taskcolors = {} +for key in types: + taskcolors[types[key]] = colors[index] + print types[key], " = ", colors[index] + index = index + 1 data = pl.loadtxt("thread_info.dat") @@ -86,7 +116,7 @@ for line in range(num_lines): tasks[thread][-1]["subtype"] = subtypes[str(int(data[line,2]))] tasks[thread][-1]["tic"] = int(data[line,4]) / CPU_CLOCK * 1000 tasks[thread][-1]["toc"] = int(data[line,5]) / CPU_CLOCK * 1000 - tasks[thread][-1]["t"] = (tasks[thread][-1]["toc"]+tasks[thread][-1]["tic"]) / 2 + tasks[thread][-1]["t"] = (tasks[thread][-1]["toc"]+tasks[thread][-1]["tic"]) / 2 combtasks = {} combtasks[-1] = [] @@ -107,9 +137,9 @@ for thread in range(nthread): combtasks[thread][-1]["tic"] = task["tic"] combtasks[thread][-1]["toc"] = task["toc"] if task["type"] == 'task_self' or task["type"] == 'task_pair' or task["type"] == 'task_sub': - combtasks[thread][-1]["color"] = colors[task["subtype"]] + combtasks[thread][-1]["color"] = subtypecolors[task["subtype"]] else: - combtasks[thread][-1]["color"] = colors2[task["type"]] + combtasks[thread][-1]["color"] = taskcolors[task["type"]] lasttype = task["type"] else: combtasks[thread][-1]["toc"] = task["toc"] diff --git a/examples/plot_tasks_MPI.py b/examples/plot_tasks_MPI.py new file mode 100755 index 0000000000000000000000000000000000000000..c71d96a4e127f348284b7095a612b5783be73e79 --- /dev/null +++ b/examples/plot_tasks_MPI.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python +""" +Usage: + plot_tasks_MPI.py input.dat png-output-prefix [time-range-ms] + +where input.dat is a thread info file for a step of an MPI run. Use the '-y +interval' flag of the swift MPI commands to create these. The output plots +will be called 'png-output-prefix<mpi-rank>.png', i.e. one each for all the +threads in each MPI rank. Use the time-range-ms in millisecs to produce +plots with the same time span. + +See the command 'process_plot_tasks_MPI' to efficiently wrap this command to +process a number of thread info files and create an HTML file to view them. + +This file is part of SWIFT. + +Copyright (C) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk), + Bert Vandenbroucke (bert.vandenbroucke@ugent.be) + Matthieu Schaller (matthieu.schaller@durham.ac.uk) + Peter W. Draper (p.w.draper@durham.ac.uk) +All Rights Reserved. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +""" + +import matplotlib +import matplotlib.collections as collections +matplotlib.use("Agg") +import pylab as pl +import numpy as np +import sys + +# CPU ticks per second. +CPU_CLOCK = 2.7e9 + +# Basic plot configuration. +PLOT_PARAMS = {"axes.labelsize": 10, + "axes.titlesize": 10, + "font.size": 12, + "legend.fontsize": 12, + "xtick.labelsize": 10, + "ytick.labelsize": 10, + "figure.figsize" : (16., 4.), + "figure.subplot.left" : 0.03, + "figure.subplot.right" : 0.995, + "figure.subplot.bottom" : 0.1, + "figure.subplot.top" : 0.99, + "figure.subplot.wspace" : 0., + "figure.subplot.hspace" : 0., + "lines.markersize" : 6, + "lines.linewidth" : 3. + } +pl.rcParams.update(PLOT_PARAMS) + +# Tasks and subtypes. Indexed as in tasks.h. +TASKTYPES = ["none", "sort", "self", "pair", "sub", "ghost", "kick1", "kick2", + "send", "recv", "grav_pp", "grav_mm", "grav_up", "grav_down", + "psort", "split_cell", "rewait", "count"] + +TASKCOLOURS = {"none": "black", + "sort": "lightblue", + "self": "greenyellow", + "pair": "navy", + "sub": "hotpink", + "ghost": "cyan", + "kick1": "maroon", + "kick2": "green", + "send": "yellow", + "recv": "magenta", + "grav_pp": "mediumorchid", + "grav_mm": "mediumturquoise", + "grav_up": "mediumvioletred", + "grav_down": "mediumnightblue", + "psort": "steelblue", + "split_cell": "seagreen", + "rewait": "olive", + "count": "powerblue"} + +SUBTYPES = ["none", "density", "force", "grav", "count"] + +SUBCOLOURS = {"none": "black", + "density": "red", + "force": "blue", + "grav": "indigo", + "count": "purple"} + +# Show docs if help is requested. +if len( sys.argv ) == 2 and ( sys.argv[1][0:2] == "-h" or sys.argv[1][0:3] == "--h" ): + from pydoc import help + help( "__main__" ) + sys.exit( 0 ) + +# Handle command-line. +if len( sys.argv ) != 3 and len( sys.argv ) != 4: + print "Usage: ", sys.argv[0], "input.dat png-output-prefix [time-range-ms]" + sys.exit(1) + + +infile = sys.argv[1] +outbase = sys.argv[2] +delta_t = 0 +if len( sys.argv ) == 4: + delta_t = int(sys.argv[3]) * CPU_CLOCK / 1000 + +# Read input. +data = pl.loadtxt( infile ) + +nranks = int(max(data[:,0])) + 1 +print "Number of ranks:", nranks +nthread = int(max(data[:,1])) + 1 +print "Number of threads:", nthread + +# Avoid start and end times of zero. +sdata = data[data[:,5] != 0] +sdata = sdata[sdata[:,6] != 0] + +# Each rank can have different clock (compute node), but we want to use the +# same delta times range for comparisons, so we suck it up and take the hit of +# precalculating this, unless the user knows better. +if delta_t == 0: + for rank in range(nranks): + data = sdata[sdata[:,0] == rank] + dt = max(data[:,6]) - min(data[:,5]) + if dt > delta_t: + delta_t = dt + +# Once more doing the real gather and plots this time. +for rank in range(nranks): + data = sdata[sdata[:,0] == rank] + + start_t = min(data[:,5]) + data[:,5] -= start_t + data[:,6] -= start_t + + tasks = {} + tasks[-1] = [] + for i in range(nthread): + tasks[i] = [] + + num_lines = pl.size(data) / 10 + for line in range(num_lines): + thread = int(data[line,1]) + tasks[thread].append({}) + tasks[thread][-1]["type"] = TASKTYPES[int(data[line,2])] + tasks[thread][-1]["subtype"] = SUBTYPES[int(data[line,3])] + tic = int(data[line,5]) / CPU_CLOCK * 1000 + toc = int(data[line,6]) / CPU_CLOCK * 1000 + tasks[thread][-1]["tic"] = tic + tasks[thread][-1]["toc"] = toc + tasks[thread][-1]["t"] = (toc + tic)/ 2 + + combtasks = {} + combtasks[-1] = [] + for i in range(nthread): + combtasks[i] = [] + + for thread in range(nthread): + tasks[thread] = sorted(tasks[thread], key=lambda l: l["t"]) + lasttype = "" + types = [] + for task in tasks[thread]: + if task["type"] not in types: + types.append(task["type"]) + if lasttype == "" or not lasttype == task["type"]: + combtasks[thread].append({}) + combtasks[thread][-1]["type"] = task["type"] + combtasks[thread][-1]["subtype"] = task["subtype"] + combtasks[thread][-1]["tic"] = task["tic"] + combtasks[thread][-1]["toc"] = task["toc"] + if task["type"] == "self" or task["type"] == "pair" or task["type"] == "sub": + combtasks[thread][-1]["colour"] = SUBCOLOURS[task["subtype"]] + else: + combtasks[thread][-1]["colour"] = TASKCOLOURS[task["type"]] + lasttype = task["type"] + else: + combtasks[thread][-1]["toc"] = task["toc"] + + typesseen = [] + fig = pl.figure() + ax = fig.add_subplot(1,1,1) + ax.set_xlim(0, delta_t * 1.03 * 1000 / CPU_CLOCK) + ax.set_ylim(0, nthread) + tictoc = np.zeros(2) + for i in range(nthread): + + # Collect ranges and colours into arrays. + tictocs = np.zeros(len(combtasks[i])*2) + colours = np.empty(len(combtasks[i])*2, dtype='object') + coloursseen = [] + j = 0 + for task in combtasks[i]: + tictocs[j] = task["tic"] + tictocs[j+1] = task["toc"] + colours[j] = task["colour"] + colours[j+1] = task["colour"] + j = j + 2 + if task["colour"] not in coloursseen: + coloursseen.append(task["colour"]) + + # Legend support, collections don't add to this. + if task["subtype"] != "none": + qtask = task["type"] + "/" + task["subtype"] + else: + qtask = task["type"] + if qtask not in typesseen: + pl.plot([], [], color=task["colour"], label=qtask) + typesseen.append(qtask) + + # Now plot each colour, faster to use a mask to select colour ranges. + for colour in coloursseen: + collection = collections.BrokenBarHCollection.span_where(tictocs, ymin=i+0.05, ymax=i+0.95, + where=colours == colour, + facecolor=colour, + linewidths=0) + ax.add_collection(collection) + + + # Legend and room for it. + nrow = len(typesseen) / 5 + if len(typesseen) * 5 < nrow: + nrow = nrow + 1 + ax.fill_between([0, 0], nthread+0.5, nthread + nrow + 0.5, facecolor="white") + ax.set_ylim(0, nthread + nrow + 1) + ax.legend(loc=1, shadow=True, mode="expand", ncol=5) + + ax.set_xlabel("Wall clock time [ms]") + ax.set_ylabel("Thread ID for MPI Rank " + str(rank) ) + ax.set_yticks(pl.array(range(nthread)), True) + + pl.show() + outpng = outbase + str(rank) + ".png" + pl.savefig(outpng) + print "Graphics done, output written to", outpng + +sys.exit(0) diff --git a/examples/process_plot_tasks_MPI b/examples/process_plot_tasks_MPI new file mode 100755 index 0000000000000000000000000000000000000000..d3eb5d4a5fc5918b287cd5d98efcc5881b6f910c --- /dev/null +++ b/examples/process_plot_tasks_MPI @@ -0,0 +1,96 @@ +#!/bin/bash +# +# Usage: +# process_plot_tasks_MPI nprocess time-range-ms +# +# Description: +# Process all the thread info files in the current directory +# creating task graphs for all MPI ranks and all threads. +# +# The input files are created by a run using the "-y interval" flag and +# should be named "thread_info_MPI-step<n>.dat" in the current directory. +# All located files will be processed using "nprocess" concurrent +# processes and all plots will have the given time range. An output +# HTML file "index.html" will be created to view all the plots. +# +# +# This file is part of SWIFT: +# +# Copyright (C) 2015 Peter W. Draper (p.w.draper@durham.ac.uk) +# All Rights Reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Handle command-line +if test "$2" == ""; then + echo "Usage: $0 nprocess time-range-ms" + exit 1 +fi +NPROCS=$1 +TIMERANGE=$2 + +# Find all thread info files. Use version sort to get into correct order. +files=$(ls -v thread_info_MPI-step*.dat) +if test $? != 0; then + echo "Failed to find any thread info files" + exit 1 +fi + +# Construct list of names, the step no and names for the graphics. +list="" +for f in $files; do + s=$(echo $f| sed 's,thread_info_MPI-step\(.*\).dat,\1,') + list="$list $f $s step${s}r" +done + +# Need number of ranks used. +basefile=$(echo $list | awk '{print $1}') +nrank=$(cat $basefile | awk '{print $1}' | uniq | wc -l) +nrank=$(($nrank-1)) + +# And process them, +echo "Processing thread info files..." +echo $list | xargs -P $NPROCS -n 3 /bin/bash -c "./plot_tasks_MPI.py \$0 \$2 $TIMERANGE" + +echo "Writing output index.html file" +# Construct document - serial. +cat <<EOF > index.html + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html> + <head> + <title>SWIFT task graphs</title> + </head> + <body> + <h1>SWIFT task graphs</h1> +EOF + +echo $list | xargs -n 3 | while read f s g; do + cat <<EOF >> index.html +<h2>Step $s</h2> +EOF + for i in $(seq 0 $nrank); do + cat <<EOF >> index.html +<a href="step${s}r${i}.png"><img src="step${s}r${i}.png" width=400px/></a> +EOF + done +done + +cat <<EOF >> index.html + </body> +</html> +EOF + +echo "Finished" + +exit diff --git a/src/Makefile.am b/src/Makefile.am index 0fdc51571424e2a9ad91d1d7c714d91446702161..c71cbbbbd1b1463a61e8b138fb85525d74ebbe03 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -41,7 +41,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \ units.c common_io.c single_io.c multipole.c version.c map.c \ - kernel.c tools.c + kernel.c tools.c part.c # Include files for distribution, not installation. noinst_HEADERS = atomic.h cycle.h error.h inline.h kernel.h vector.h \ diff --git a/src/debug.c b/src/debug.c index c4d9b43723386128623fa8cd596cbc6260a70086..49273e6c1c19f82f308a24f2d4158bde8aa656d6 100644 --- a/src/debug.c +++ b/src/debug.c @@ -111,14 +111,14 @@ void printParticle_single(struct part *p) { * @brief Dump the METIS graph in standard format, simple format and weights * only, to a file. * - * @description The standard format output can be read into the METIS + * The standard format output can be read into the METIS * command-line tools. The simple format is just the cell connectivity (this * should not change between calls). The weights format is the standard one, * minus the cell connectivity. * * The output filenames are generated from the prefix and the sequence number - * of calls. So the first is called <prefix>_std_001.dat, <prefix>_simple_001.dat, - * <prefix>_weights_001.dat, etc. + * of calls. So the first is called {prefix}_std_001.dat, {prefix}_simple_001.dat, + * {prefix}_weights_001.dat, etc. * * @param prefix base output filename * @param nvertices the number of vertices diff --git a/src/engine.c b/src/engine.c index 0911f444d14953f73b45c63b0b5de767c79495ba..a000c4eb4157ce2d1b52c767b0e8d14f700aa1ea 100644 --- a/src/engine.c +++ b/src/engine.c @@ -28,6 +28,7 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <stdbool.h> /* MPI headers. */ #ifdef WITH_MPI @@ -38,6 +39,10 @@ #endif #endif +#ifdef HAVE_LIBNUMA +#include <numa.h> +#endif + /* This object's header. */ #include "engine.h" @@ -47,8 +52,15 @@ #include "cycle.h" #include "debug.h" #include "error.h" +#include "part.h" #include "timers.h" +#ifdef LEGACY_GADGET2_SPH +#include "runner_iact_legacy.h" +#else +#include "runner_iact.h" +#endif + /* Convert cell location to ID. */ #define cell_getid(cdim, i, j, k) \ ((int)(k) + (cdim)[2] * ((int)(j) + (cdim)[1] * (int)(i))) @@ -209,28 +221,24 @@ void engine_redistribute(struct engine *e) { offset_send += counts[ind_send]; offset_recv += counts[ind_recv]; } else { - if (MPI_Isend(&s->parts[offset_send], - sizeof(struct part) * counts[ind_send], MPI_BYTE, k, - 2 * ind_send + 0, MPI_COMM_WORLD, + if (MPI_Isend(&s->parts[offset_send], counts[ind_send], + e->part_mpi_type, k, 2 * ind_send + 0, MPI_COMM_WORLD, &reqs[4 * k]) != MPI_SUCCESS) error("Failed to isend parts to node %i.", k); - if (MPI_Isend(&s->xparts[offset_send], - sizeof(struct xpart) * counts[ind_send], MPI_BYTE, k, - 2 * ind_send + 1, MPI_COMM_WORLD, + if (MPI_Isend(&s->xparts[offset_send], counts[ind_send], + e->xpart_mpi_type, k, 2 * ind_send + 1, MPI_COMM_WORLD, &reqs[4 * k + 1]) != MPI_SUCCESS) error("Failed to isend xparts to node %i.", k); offset_send += counts[ind_send]; } } if (k != nodeID && counts[ind_recv] > 0) { - if (MPI_Irecv(&parts_new[offset_recv], - sizeof(struct part) * counts[ind_recv], MPI_BYTE, k, - 2 * ind_recv + 0, MPI_COMM_WORLD, + if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], e->part_mpi_type, + k, 2 * ind_recv + 0, MPI_COMM_WORLD, &reqs[4 * k + 2]) != MPI_SUCCESS) error("Failed to emit irecv of parts from node %i.", k); - if (MPI_Irecv(&xparts_new[offset_recv], - sizeof(struct xpart) * counts[ind_recv], MPI_BYTE, k, - 2 * ind_recv + 1, MPI_COMM_WORLD, + if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv], + e->xpart_mpi_type, k, 2 * ind_recv + 1, MPI_COMM_WORLD, &reqs[4 * k + 3]) != MPI_SUCCESS) error("Failed to emit irecv of parts from node %i.", k); offset_recv += counts[ind_recv]; @@ -1114,7 +1122,7 @@ void engine_maketasks(struct engine *e) { /* Allocate the list of cell-task links. The maximum number of links is the number of cells (s->tot_cells) times the number of neighbours (27) times the number of interaction types (2, density and force). */ - free(e->links); + if (e->links != NULL) free(e->links); e->size_links = s->tot_cells * 27 * 2; if ((e->links = malloc(sizeof(struct link) * e->size_links)) == NULL) error("Failed to allocate cell-task links."); @@ -1825,6 +1833,13 @@ void engine_launch(struct engine *e, int nr_runners, unsigned int mask) { */ void engine_init_particles(struct engine *e) { + int k; + float dt_max = 0.0f, dt_min = FLT_MAX; + double epot = 0.0, ekin = 0.0; + float mom[3] = {0.0, 0.0, 0.0}; + float ang[3] = {0.0, 0.0, 0.0}; + int count = 0; + struct cell *c; struct space *s = e->s; // engine_repartition(e); @@ -2151,6 +2166,26 @@ void engine_split(struct engine *e, int *grid) { s->xparts = xparts_new; } + +#if defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE) + static bool hyperthreads_present(void) { + #ifdef __linux__ + FILE *f = fopen("/sys/devices/system/cpu/cpu0/topology/thread_siblings_list", "r"); + + int c; + while ((c = fgetc(f)) != EOF && c != ','); + fclose(f); + + return c == ','; + #else + return true; // just guess + #endif + } +#endif + + + + /** * @brief init an engine with the given number of threads, queues, and * the given policy. @@ -2171,10 +2206,10 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, int nr_queues, int nr_nodes, int nodeID, int policy, float timeBegin, float timeEnd, float dt_min, float dt_max) { - int k; + int i, k; #if defined(HAVE_SETAFFINITY) int nr_cores = sysconf(_SC_NPROCESSORS_ONLN); - int i, j, cpuid[nr_cores]; + int j, cpuid[nr_cores]; cpu_set_t cpuset; if (policy & engine_policy_cputight) { for (k = 0; k < nr_cores; k++) cpuid[k] = k; @@ -2189,6 +2224,42 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, for (j = maxint / i / 2; j < maxint; j += maxint / i) if (j < nr_cores && j != 0) cpuid[k++] = j; +#if defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE) + /* Ascending NUMA distance. Bubblesort(!) for stable equidistant CPUs. */ + if (numa_available() >= 0) { + if (nodeID == 0) message("prefer NUMA-local CPUs"); + + int home = numa_node_of_cpu(sched_getcpu()), half = nr_cores / 2; + bool done = false, swap_hyperthreads = hyperthreads_present(); + if (swap_hyperthreads) message("prefer physical cores to hyperthreads"); + + while (!done) { + done = true; + for (i = 1; i < nr_cores; i++) { + int node_a = numa_node_of_cpu(cpuid[i-1]); + int node_b = numa_node_of_cpu(cpuid[i]); + + /* Avoid using local hyperthreads over unused remote physical cores. + * Assume two hyperthreads, and that cpuid >= half partitions them. + */ + int thread_a = swap_hyperthreads && cpuid[i-1] >= half; + int thread_b = swap_hyperthreads && cpuid[i] >= half; + + bool swap = thread_a > thread_b; + if (thread_a == thread_b) + swap = numa_distance(home, node_a) > numa_distance(home, node_b); + + if (swap) { + int t = cpuid[i-1]; + cpuid[i-1] = cpuid[i]; + cpuid[i] = t; + done = false; + } + } + } + } +#endif + if (nodeID == 0) { #ifdef WITH_MPI message("engine_init: cpu map is [ "); @@ -2250,11 +2321,13 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, e->barrier_launch = 0; e->barrier_launchcount = 0; - /* Init the scheduler. */ - scheduler_init(&e->sched, e->s, nr_queues, scheduler_flag_steal, e->nodeID); - s->nr_queues = nr_queues; - scheduler_reset(&e->sched, 2 * s->tot_cells + e->nr_threads); + /* Init the scheduler with sufficient tasks for the initial kick1 and sorting + tasks. */ + int nr_tasks = 2 * s->tot_cells + e->nr_threads; + scheduler_init(&e->sched, e->s, nr_tasks, nr_queues, scheduler_flag_steal, + e->nodeID); + s->nr_queues = nr_queues; /* Create the sorting tasks. */ for (i = 0; i < e->nr_threads; i++) diff --git a/src/engine.h b/src/engine.h index eef97bb7ce9d3fed0d1044393cb97163b37ff1af..b70e609325d6abe707c8281d95798adb8191dd0f 100644 --- a/src/engine.h +++ b/src/engine.h @@ -19,6 +19,14 @@ #ifndef SWIFT_ENGINE_H #define SWIFT_ENGINE_H +/* Config parameters. */ +#include "../config.h" + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + /* Some standard headers. */ #include <pthread.h> @@ -131,6 +139,12 @@ struct engine { /* Linked list for cell-task association. */ struct link *links; int nr_links, size_links; + +#ifdef WITH_MPI + /* MPI data type for the particle transfers */ + MPI_Datatype part_mpi_type; + MPI_Datatype xpart_mpi_type; +#endif }; /* Function prototypes. */ diff --git a/src/kernel.h b/src/kernel.h index d55e073479d1585f3677389939fa8e7c53dbe1c9..aead6a95adc35028834d671448223a31a57fc2b6 100644 --- a/src/kernel.h +++ b/src/kernel.h @@ -516,16 +516,17 @@ __attribute__((always_inline)) INLINE static void kernel_eval(float x, #define kernel_name "Wendland C2" #define kernel_degree 5 #define kernel_ivals 1 -#define kernel_gamma 1.f -#define kernel_gamma2 1.f -#define kernel_gamma3 1.f -#define kernel_igamma 1.f +#define kernel_gamma 2.f +#define kernel_gamma2 4.f +#define kernel_gamma3 8.f +#define kernel_igamma 0.5f #define kernel_nwneigh \ (4.0 / 3.0 * M_PI *const_eta_kernel *const_eta_kernel *const_eta_kernel * \ 7.261825f) static float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] - __attribute__((aligned(16))) = {4.0f, -15.0f, 20.0f, -10.0f, 0.0f, 1.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; + __attribute__((aligned(16))) = { + 0.05222272f, -0.39167037f, 1.04445431f, -1.04445431f, 0.f, 0.41778173f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; #define kernel_root (kernel_coeffs[kernel_degree]) #define kernel_wroot (4.0 / 3.0 * M_PI *kernel_coeffs[kernel_degree]) @@ -537,7 +538,7 @@ static float kernel_coeffs[(kernel_degree + 1) * (kernel_ivals + 1)] __attribute__((always_inline)) INLINE static void kernel_deval(float x, float *W, float *dW_dx) { - int ind = fminf(x, kernel_ivals); + int ind = fminf(0.5f * x, kernel_ivals); float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)]; float w = coeffs[0] * x + coeffs[1]; float dw_dx = coeffs[0]; @@ -563,7 +564,7 @@ __attribute__((always_inline)) int j, k; /* Load x and get the interval id. */ - ind.m = vec_ftoi(vec_fmin(x->v, vec_set1((float)kernel_ivals))); + ind.m = vec_ftoi(vec_fmin(0.5f * x->v, vec_set1((float)kernel_ivals))); /* load the coefficients. */ for (k = 0; k < VEC_SIZE; k++) @@ -590,7 +591,7 @@ __attribute__((always_inline)) __attribute__((always_inline)) INLINE static void kernel_eval(float x, float *W) { - int ind = fmin(x, kernel_ivals); + int ind = fmin(0.5f * x, kernel_ivals); float *coeffs = &kernel_coeffs[ind * (kernel_degree + 1)]; float w = coeffs[0] * x + coeffs[1]; for (int k = 2; k <= kernel_degree; k++) w = x * w + coeffs[k]; diff --git a/src/part.c b/src/part.c new file mode 100644 index 0000000000000000000000000000000000000000..6d49887ad72d6afdd1f419cfdaca88090e781f3e --- /dev/null +++ b/src/part.c @@ -0,0 +1,68 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + +/* This object's header. */ +#include "part.h" + +#ifdef WITH_MPI +/** + * @brief Registers and returns an MPI type for the particles + * + * @param part_type The type container + */ +void part_create_mpi_type(MPI_Datatype* part_type) { + + /* This is not the recommended way of doing this. + One should define the structure field by field + But as long as we don't do serialization via MPI-IO + we don't really care. + Also we would have to modify this function everytime something + is added to the part structure. */ + MPI_Type_contiguous(sizeof(struct part) / sizeof(unsigned char), MPI_BYTE, + part_type); + MPI_Type_commit(part_type); +} + +/** + * @brief Registers and returns an MPI type for the xparticles + * + * @param xpart_type The type container + */ +void xpart_create_mpi_type(MPI_Datatype* xpart_type) { + + /* This is not the recommended way of doing this. + One should define the structure field by field + But as long as we don't do serialization via MPI-IO + we don't really care. + Also we would have to modify this function everytime something + is added to the part structure. */ + MPI_Type_contiguous(sizeof(struct xpart) / sizeof(unsigned char), MPI_BYTE, + xpart_type); + MPI_Type_commit(xpart_type); +} + +#endif diff --git a/src/part.h b/src/part.h index 02c9f2d8fb95ba7810f04a521e74b7516a274486..f6e3c9c94b93fe88df732cd8a69402a810bbcb2a 100644 --- a/src/part.h +++ b/src/part.h @@ -19,6 +19,18 @@ #ifndef SWIFT_PART_H #define SWIFT_PART_H +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <stdlib.h> + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + +/* Local headers. */ #include "const.h" /* Some constants. */ @@ -72,7 +84,7 @@ struct gpart { size_t id; /* Pointer to corresponding SPH part. */ - struct part *part; + struct part* part; }; } __attribute__((aligned(part_align))); @@ -165,8 +177,13 @@ struct part { unsigned long long id; /* Associated gravitas. */ - struct gpart *gpart; + struct gpart* gpart; } __attribute__((aligned(part_align))); +#ifdef WITH_MPI +void part_create_mpi_type(MPI_Datatype* part_type); +void xpart_create_mpi_type(MPI_Datatype* xpart_type); +#endif + #endif /* SWIFT_PART_H */ diff --git a/src/queue.c b/src/queue.c index 4e5330518c3d067a3e8a8ce7806e3560a41cc8ef..af84c4b1f5e7e80fac05e5b122703fa718b7fead 100644 --- a/src/queue.c +++ b/src/queue.c @@ -126,15 +126,14 @@ void queue_init(struct queue *q, struct task *tasks) { * @brief Get a task free of dependencies and conflicts. * * @param q The task #queue. - * @param super The super-cell tat might conflict with the #queue + * @param prev The previous #task extracted from this #queue. * @param blocking Block until access to the queue is granted. */ -struct task *queue_gettask(struct queue *q, struct cell *super, int blocking) { +struct task *queue_gettask(struct queue *q, const struct task *prev, int blocking) { - int k, qcount, *qtid, gotcha; lock_type *qlock = &q->lock; - struct task *qtasks, *res = NULL; + struct task *res = NULL; /* If there are no tasks, leave immediately. */ if (q->count == 0) return NULL; @@ -147,49 +146,78 @@ struct task *queue_gettask(struct queue *q, struct cell *super, int blocking) { } /* Set some pointers we will use often. */ - qtid = q->tid; - qtasks = q->tasks; - qcount = q->count; - gotcha = 0; - - /* Loop over the task IDs looking for tasks with the same super-cell. */ - if (super != NULL) { - for (k = 0; k < qcount && k < queue_maxsuper; k++) { - - /* Put a finger on the task. */ - res = &qtasks[qtid[k]]; - - /* Try to lock the task and exit if successful. */ - if ((res->ci->super == super || - (res->cj != NULL && res->cj->super == super)) && - task_lock(res)) { - gotcha = 1; + int *qtid = q->tid; + struct task *qtasks = q->tasks; + const int qcount = q->count; + + /* Data for the sliding window in which to try the task with the + best overlap with the previous task. */ + struct { + int ind, tid; + float score; + } window[queue_search_window]; + int window_count = 0; + int tid = -1; + int ind = -1; + + /* Loop over the queue entries. */ + for (int k = 0; k < qcount; k++) { + if (k < queue_search_window) { + window[window_count].ind = k; + window[window_count].tid = qtid[k]; + window[window_count].score = task_overlap(prev, &qtasks[qtid[k]]); + window_count += 1; + } else { + /* Find the task with the largest overlap. */ + int ind_max = 0; + for (int i = 1; i < window_count; i++) + if (window[i].score > window[ind_max].score) ind_max = i; + + /* Try to lock that task. */ + if (task_lock(&qtasks[window[ind_max].tid])) { + tid = window[ind_max].tid; + ind = window[ind_max].ind; + // message("best task has overlap %f.", window[ind_max].score); break; - } - } /* loop over the task IDs. */ + /* Otherwise, replace it with a new one from the queue. */ + } else { + window[ind_max].ind = k; + window[ind_max].tid = qtid[k]; + window[ind_max].score = task_overlap(prev, &qtasks[qtid[k]]); + } + } } - /* Loop over the task IDs again if nothing was found, take anything. */ - if (!gotcha) { - for (k = 0; k < qcount; k++) { - - /* Put a finger on the task. */ - res = &qtasks[qtid[k]]; - - /* Try to lock the task and exit if successful. */ - if (task_lock(res)) break; - - } /* loop over the task IDs. */ + /* If we didn't get a task, loop through whatever is left in the window. */ + if (tid < 0) { + while (window_count > 0) { + int ind_max = 0; + for (int i = 1; i < window_count; i++) + if (window[i].score > window[ind_max].score) ind_max = i; + if (task_lock(&qtasks[window[ind_max].tid])) { + tid = window[ind_max].tid; + ind = window[ind_max].ind; + // message("best task has overlap %f.", window[ind_max].score); + break; + } else { + window_count -= 1; + window[ind_max] = window[window_count]; + } + } } /* Did we get a task? */ - if (k < qcount) { + if (ind >= 0) { /* Another one bites the dust. */ - qcount = q->count -= 1; + const int qcount = q->count -= 1; + + /* Get a pointer on the task that we want to return. */ + res = &qtasks[tid]; /* Swap this task with the last task and re-heap. */ + int k = ind; if (k < qcount) { qtid[k] = qtid[qcount]; int w = qtasks[qtid[k]].weight; diff --git a/src/queue.h b/src/queue.h index d81627b4c4a709498c31c6c6152d3650388020e2..7f6d13c425f80cac20125bf422fe9da1ed06361f 100644 --- a/src/queue.h +++ b/src/queue.h @@ -28,6 +28,7 @@ #define queue_maxsuper 50 #define queue_sizeinit 100 #define queue_sizegrow 2 +#define queue_search_window 8 /* Counters. */ enum { @@ -54,7 +55,7 @@ struct queue { } __attribute__((aligned(64))); /* Function prototypes. */ -struct task *queue_gettask(struct queue *q, struct cell *super, int blocking); +struct task *queue_gettask(struct queue *q, const struct task *prev, int blocking); void queue_init(struct queue *q, struct task *tasks); void queue_insert(struct queue *q, struct task *t); diff --git a/src/runner.c b/src/runner.c index 30777c446fc40e1ae7cda0bcb4d3b9087965aa09..4440c3ac7d0ac7ffc3436947ab445073cd788629 100644 --- a/src/runner.c +++ b/src/runner.c @@ -44,9 +44,6 @@ #include "timers.h" #include "timestep.h" -//struct task *store = NULL; - - /* Include the right variant of the SPH interactions */ #ifdef LEGACY_GADGET2_SPH #include "runner_iact_legacy.h" @@ -55,25 +52,7 @@ #endif #include "runner_iact_grav.h" -#define PRINT_PART \ - if (p->id == 1000) { \ - message( \ - "p->id=%lld p->h=%3.2f p->N_ngb=%3.2f p->rho=%3.2f p->t_beg=%3.2f p->t_end=%3.2f pos=[%3.2f %3.2f %3.2f] a=[%3.2f %3.2f %3.2f]", \ - p->id, p->h, p->density.wcount, p->rho, p->t_begin, p->t_end, p->x[0], p->x[1], p->x[2], p->a[0], p->a[1], p->a[2]); \ - } - - - -/* Convert cell location to ID. */ -#define cell_getid(cdim, i, j, k) \ - ((int)(k) + (cdim)[2] * ((int)(j) + (cdim)[1] * (int)(i))) - -/* Histograms bins. */ -long long int runner_hist_bins[runner_hist_N]; - -/* The counters. */ -int runner_counter[runner_counter_count]; - +/* Orientation of the cell pairs */ const float runner_shift[13 * 3] = { 5.773502691896258e-01, 5.773502691896258e-01, 5.773502691896258e-01, 7.071067811865475e-01, 7.071067811865475e-01, 0.0, @@ -88,6 +67,8 @@ const float runner_shift[13 * 3] = { 0.0, 1.0, 0.0, 0.0, 7.071067811865475e-01, -7.071067811865475e-01, 0.0, 0.0, 1.0, }; + +/* Does the axis need flipping ? */ const char runner_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; @@ -205,8 +186,6 @@ void runner_dosort(struct runner *r, struct cell *c, int flags, int clock) { TIMER_TIC - // message("sort!"); - /* Clean-up the flags, i.e. filter out what's already been sorted. */ flags &= ~c->sorted; if (flags == 0) return; @@ -547,7 +526,7 @@ void runner_doinit(struct runner *r, struct cell *c) { for (k = 0; k < 3; k++) p->density.curl_v[k] = 0.0; } - PRINT_PART; + ; } } @@ -621,7 +600,7 @@ void runner_doghost(struct runner *r, struct cell *c) { wcount_dh = p->density.wcount_dh * ih * (4.0f / 3.0 * M_PI * kernel_gamma3); - PRINT_PART; + ; // if(p->id==1000) // message("wcount_dh=%f", wcount_dh); @@ -723,8 +702,6 @@ void runner_doghost(struct runner *r, struct cell *c) { /* Run through this cell's density interactions. */ for (struct link *l = finger->density; l != NULL; l = l->next) { - // message("link: %p next: %p", l, l->next); fflush(stdout); - /* Self-interaction? */ if (l->t->type == task_type_self) runner_doself_subset_density(r, finger, parts, pid, count); @@ -732,8 +709,6 @@ void runner_doghost(struct runner *r, struct cell *c) { /* Otherwise, pair interaction? */ else if (l->t->type == task_type_pair) { - // message("pair"); - /* Left or right? */ if (l->t->ci == finger) runner_dopair_subset_density(r, finger, parts, pid, count, @@ -747,8 +722,6 @@ void runner_doghost(struct runner *r, struct cell *c) { /* Otherwise, sub interaction? */ else if (l->t->type == task_type_sub) { - // message("sub"); - /* Left or right? */ if (l->t->ci == finger) runner_dosub_subset_density(r, finger, parts, pid, count, @@ -758,7 +731,6 @@ void runner_doghost(struct runner *r, struct cell *c) { l->t->ci, -1, 1); } } - // error("done"); } } } @@ -791,6 +763,7 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { struct xpart *restrict xp, *restrict xparts = c->xparts; TIMER_TIC + /* No children? */ if (!c->split) { @@ -805,7 +778,7 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { h = p->h; ih = 1.0f / h; - PRINT_PART; + ; /* Drift... */ p->x[0] += xp->v_full[0] * dt; @@ -907,6 +880,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { /* Get a handle on the part. */ p = &parts[k]; xp = &xparts[k]; + m = p->mass; x[0] = p->x[0], x[1] = p->x[1], x[2] = p->x[2]; @@ -948,7 +922,6 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { p->t_begin = p->t_end; p->t_end = p->t_begin + dt; - PRINT_PART /* Kick particles in momentum space */ xp->v_full[0] += p->a[0] * dt; @@ -990,7 +963,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { } - /* Otherwise, agregate data from children. */ + /* Otherwise, aggregate data from children. */ else { /* Init everything. */ @@ -1074,7 +1047,7 @@ void *runner_main(void *data) { struct engine *e = r->e; struct scheduler *sched = &e->sched; struct task *t = NULL; - struct cell *ci, *cj, *super; + struct cell *ci, *cj; struct part *parts; int k, nr_parts; @@ -1084,8 +1057,8 @@ void *runner_main(void *data) { /* Wait at the barrier. */ engine_barrier(e, r->id); - /* Re-set the pointer to the previous super cell. */ - super = NULL; + /* Re-set the pointer to the previous task, as there is none. */ + struct task* prev = NULL; /* Loop while there are tasks... */ while (1) { @@ -1095,11 +1068,9 @@ void *runner_main(void *data) { /* Get the task. */ TIMER_TIC - t = scheduler_gettask(sched, r->qid, super); + t = scheduler_gettask(sched, r->qid, prev); TIMER_TOC(timer_gettask); - // message("Got task %p", t->type); fflush(stdout); - /* Did I get anything? */ if (t == NULL) break; } @@ -1109,18 +1080,9 @@ void *runner_main(void *data) { cj = t->cj; t->rid = r->cpuid; - /* Set super to the first cell that I own. */ - if (t->type != task_type_rewait && t->type != task_type_psort) { - if (ci->super != NULL && ci->super->owner == r->qid) - super = ci->super; - else if (cj != NULL && cj->super != NULL && cj->super->owner == r->qid) - super = cj->super; - } - /* Different types of tasks... */ switch (t->type) { case task_type_self: - //message("self"); if (t->subtype == task_subtype_density) runner_doself1_density(r, ci); else if (t->subtype == task_subtype_force) @@ -1129,7 +1091,6 @@ void *runner_main(void *data) { error("Unknown task subtype."); break; case task_type_pair: - //message("pair unlocking %d tasks", t->nr_unlock_tasks); if (t->subtype == task_subtype_density) runner_dopair1_density(r, ci, cj); else if (t->subtype == task_subtype_force) @@ -1155,11 +1116,9 @@ void *runner_main(void *data) { runner_doinit(r, ci); break; case task_type_ghost: - //message("ghost"); runner_doghost(r, ci); break; case task_type_drift: - runner_dodrift(r, ci, 1); break; case task_type_kick: @@ -1195,13 +1154,14 @@ void *runner_main(void *data) { space_split(e->s, t->ci); break; case task_type_rewait: - task_do_rewait(t); + scheduler_do_rewait((struct task *)t->ci, (struct task *)t->cj, t->flags); break; default: error("Unknown task type."); } /* We're done with this task, see if we get a next one. */ + prev = t; t = scheduler_done(sched, t); } /* main loop. */ diff --git a/src/runner.h b/src/runner.h index ce4b8405bcde69d19990fbcfee257ddafc717b6e..366c4f7af6ec94c0b366da85b21713600d559dc0 100644 --- a/src/runner.h +++ b/src/runner.h @@ -19,53 +19,12 @@ #ifndef SWIFT_RUNNER_H #define SWIFT_RUNNER_H -/* Some standard headers. */ -#include <pthread.h> - /* Includes. */ #include "cell.h" #include "inline.h" -/* Forward-declare the engine type to avoid cyclic header dependencies. */ -struct engine; - -/* Some constants/flags. */ -#define runner_prefetch 0 - -/* SID stuff. */ -extern const char runner_flip[]; - -/* Counters. */ -enum runner_counters { - runner_counter_swap = 0, - runner_counter_stall, - runner_counter_steal_stall, - runner_counter_steal_empty, - runner_counter_keep, - runner_counter_iact, - runner_counter_count, -}; -extern int runner_counter[runner_counter_count]; - -/* Counter macros. */ -#ifdef COUNTER -#define COUNT(c) (__sync_add_and_fetch(&runner_counter[c], 1)) -#else -#define COUNT(c) -#endif - -/* Histogram functions. */ -#define runner_hist_a 1.0 -#define runner_hist_b 100.0 -#define runner_hist_N 99 -extern long long int runner_hist_bins[runner_hist_N]; -#define runner_hist_hit(x) \ - __sync_add_and_fetch( \ - &runner_hist_bins[(int)fmax( \ - 0.0, fmin(runner_hist_N - 1, ((x) - runner_hist_a) / \ - (runner_hist_b - runner_hist_a) * \ - runner_hist_N))], \ - 1) +extern const float runner_shift[13 * 3]; +extern const char runner_flip[27]; /* A struct representing a runner's thread and its data. */ struct runner { diff --git a/src/scheduler.c b/src/scheduler.c index 726f71c6ede452ca79bc6fe45611005d9538c812..41826ce22181ebb3ed625aac07e327bb49461640 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -1095,8 +1095,8 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_recv: #ifdef WITH_MPI - if ((err = MPI_Irecv(t->ci->parts, sizeof(struct part) * t->ci->count, - MPI_BYTE, t->ci->nodeID, t->flags, MPI_COMM_WORLD, + if ((err = MPI_Irecv(t->ci->parts, t->ci->count, s->part_mpi_type, + t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req)) != MPI_SUCCESS) { char buff[MPI_MAX_ERROR_STRING]; int len; @@ -1113,9 +1113,9 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_send: #ifdef WITH_MPI - if ((err = MPI_Isend(t->ci->parts, sizeof(struct part) * t->ci->count, - MPI_BYTE, t->cj->nodeID, t->flags, MPI_COMM_WORLD, - &t->req)) != MPI_SUCCESS) { + if ((err = MPI_Isend(t->ci->parts, t->ci->count, s->part_mpi_type, + t->cj->nodeID, t->flags, MPI_COMM_WORLD, + &t->req)) != MPI_SUCCESS) { char buff[MPI_MAX_ERROR_STRING]; int len; MPI_Error_string(err, buff, &len); @@ -1242,13 +1242,13 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t) { * * @param s The #scheduler. * @param qid The ID of the preferred #queue. - * @param super the super-cell + * @param prev the previous task that was run. * * @return A pointer to a #task or @c NULL if there are no available tasks. */ struct task *scheduler_gettask(struct scheduler *s, int qid, - struct cell *super) { + const struct task *prev) { struct task *res = NULL; int k, nr_queues = s->nr_queues; @@ -1267,7 +1267,7 @@ struct task *scheduler_gettask(struct scheduler *s, int qid, /* Try to get a task from the suggested queue. */ if (s->queues[qid].count > 0) { TIMER_TIC - res = queue_gettask(&s->queues[qid], super, 0); + res = queue_gettask(&s->queues[qid], prev, 0); TIMER_TOC(timer_qget); if (res != NULL) break; } @@ -1280,7 +1280,7 @@ struct task *scheduler_gettask(struct scheduler *s, int qid, for (k = 0; k < scheduler_maxsteal && count > 0; k++) { int ind = rand_r(&seed) % count; TIMER_TIC - res = queue_gettask(&s->queues[qids[ind]], super, 0); + res = queue_gettask(&s->queues[qids[ind]], prev, 0); TIMER_TOC(timer_qsteal); if (res != NULL) break; @@ -1318,15 +1318,14 @@ struct task *scheduler_gettask(struct scheduler *s, int qid, * * @param s The #scheduler. * @param space The #space we are working with + * @param nr_tasks The number of tasks to allocate initially. * @param nr_queues The number of queues in this scheduler. * @param flags The #scheduler flags. * @param nodeID The MPI rank */ -void scheduler_init(struct scheduler *s, struct space *space, int nr_queues, - unsigned int flags, int nodeID) { - - int k; +void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks, + int nr_queues, unsigned int flags, int nodeID) { /* Init the lock. */ lock_init(&s->lock); @@ -1337,7 +1336,7 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_queues, error("Failed to allocate queues."); /* Initialize each queue. */ - for (k = 0; k < nr_queues; k++) queue_init(&s->queues[k], NULL); + for (int k = 0; k < nr_queues; k++) queue_init(&s->queues[k], NULL); /* Init the sleep mutex and cond. */ if (pthread_cond_init(&s->sleep_cond, NULL) != 0 || @@ -1359,13 +1358,17 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_queues, s->space = space; s->nodeID = nodeID; - /* Init other values. */ + /* Init the tasks array. */ + s->size = 0; s->tasks = NULL; s->tasks_ind = NULL; - s->waiting = 0; - s->size = 0; - s->nr_tasks = 0; - s->tasks_next = 0; + scheduler_reset(s, nr_tasks); + +/* Construct types for MPI communications */ +#ifdef WITH_MPI + part_create_mpi_type(&s->part_mpi_type); + xpart_create_mpi_type(&s->xpart_mpi_type); +#endif } /** @@ -1392,3 +1395,31 @@ void scheduler_print_tasks(struct scheduler *s, char *fileName) { fclose(file); } + +/** + * @brief Sets the waits of the dependants of a range of task + * + * @param t_begin Beginning of the #task range + * @param t_end End of the #task range + * @param mask The scheduler mask + */ +void scheduler_do_rewait(struct task *t_begin, struct task *t_end, + unsigned int mask) { + for (struct task *t2 = t_begin; t2 != t_end; t2++) { + + if (t2->skip) continue; + + /* Skip tasks not in the mask */ + if (!((1 << t2->type) & mask)) continue; + + /* Skip sort tasks that have already been performed */ + if (t2->type == task_type_sort && t2->flags == 0) continue; + + /* Sets the waits of the dependances */ + for (int k = 0; k < t2->nr_unlock_tasks; k++) { + struct task *t3 = t2->unlock_tasks[k]; + atomic_inc(&t3->wait); + } + } +} + diff --git a/src/scheduler.h b/src/scheduler.h index f87734210689dc4ecb7f89572c3494614ee826a3..fe2a979450f9af7cf4b2450225bdce364543a237 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -20,6 +20,14 @@ #ifndef SWIFT_SCHEDULER_H #define SWIFT_SCHEDULER_H +/* Config parameters. */ +#include "../config.h" + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#endif + /* Some standard headers. */ #include <pthread.h> @@ -89,13 +97,19 @@ struct scheduler { /* The node we are working on. */ int nodeID; + +#ifdef WITH_MPI + /* MPI data type for the particle transfers */ + MPI_Datatype part_mpi_type; + MPI_Datatype xpart_mpi_type; +#endif }; /* Function prototypes. */ -void scheduler_init(struct scheduler *s, struct space *space, int nr_queues, - unsigned int flags, int nodeID); +void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks, + int nr_queues, unsigned int flags, int nodeID); struct task *scheduler_gettask(struct scheduler *s, int qid, - struct cell *super); + const struct task* prev); void scheduler_enqueue(struct scheduler *s, struct task *t); void scheduler_start(struct scheduler *s, unsigned int mask); void scheduler_reset(struct scheduler *s, int nr_tasks); @@ -111,5 +125,7 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta, struct task *tb); void scheduler_set_unlocks(struct scheduler *s); void scheduler_dump_queue(struct scheduler *s); void scheduler_print_tasks(struct scheduler *s, char *fileName); +void scheduler_do_rewait(struct task *t_begin, struct task *t_end, + unsigned int mask); #endif /* SWIFT_SCHEDULER_H */ diff --git a/src/task.c b/src/task.c index a388cb69a3b145ab14b274d3470ce7c192e1aa3f..452ffd0e721fa2e8467e906f6b19f1d02db29fd5 100644 --- a/src/task.c +++ b/src/task.c @@ -41,8 +41,6 @@ #include "error.h" #include "lock.h" -//struct task *store; - /* Task type names. */ const char *taskID_names[task_type_count] = { "none", "sort", "self", "pair", "sub", "init", @@ -53,6 +51,56 @@ const char *taskID_names[task_type_count] = { const char *subtaskID_names[task_type_count] = {"none", "density", "force", "grav"}; + +/** + * @brief Computes the overlap between the parts array of two given cells. + */ + +size_t task_cell_overlap(const struct cell *ci, const struct cell *cj) { + if (ci == NULL || cj == NULL) return 0; + if (ci->parts <= cj->parts && + ci->parts + ci->count >= cj->parts + cj->count) { + return cj->count; + } else if (cj->parts <= ci->parts && + cj->parts + cj->count >= ci->parts + ci->count) { + return ci->count; + } + return 0; +} + +/** + * @brief Compute the Jaccard similarity of the data used by two + * different tasks. + * + * @param ta The first #task. + * @param tb The second #task. + */ + +float task_overlap(const struct task *ta, const struct task *tb) { + /* First check if any of the two tasks are of a type that don't + use cells. */ + if (ta == NULL || tb == NULL || ta->type == task_type_none || + ta->type == task_type_psort || ta->type == task_type_split_cell || + ta->type == task_type_rewait || tb->type == task_type_none || + tb->type == task_type_psort || tb->type == task_type_split_cell || + tb->type == task_type_rewait) + return 0.0f; + + /* Compute the union of the cell data. */ + size_t size_union = 0; + if (ta->ci != NULL) size_union += ta->ci->count; + if (ta->cj != NULL) size_union += ta->cj->count; + if (tb->ci != NULL) size_union += tb->ci->count; + if (tb->cj != NULL) size_union += tb->cj->count; + + /* Compute the intersection of the cell data. */ + const size_t size_intersect = + task_cell_overlap(ta->ci, tb->ci) + task_cell_overlap(ta->ci, tb->cj) + + task_cell_overlap(ta->cj, tb->ci) + task_cell_overlap(ta->cj, tb->cj); + + return ((float)size_intersect) / (size_union - size_intersect); +} + /** * @brief Unlock the cell held by this task. * @@ -258,48 +306,3 @@ void task_addunlock_old(struct task *ta, struct task *tb) { lock_unlock_blind(&ta->lock); } - - -void task_print_mask(unsigned int mask) { - - int k; - - printf("task_print_mask: The tasks to run are ["); - for (k = 1; k < task_type_count; k++) - printf(" %s=%s", taskID_names[k], (mask & (1 << k)) ? "yes" : "no"); - printf(" ]\n"); -} - - -void task_do_rewait(struct task *t) { - - const unsigned int mask = t->flags; - - for (struct task *t2 = (struct task *)t->ci; t2 != (struct task *)t->cj; t2++) { - - if( t2->skip ) continue; - - /* Skip tasks not in the mask */ - if( !((1<<t2->type) & mask) ) continue; - - /* Skip sort tasks that have already been */ - if(t2->type == task_type_sort && t2->flags == 0) continue; - - /* if(store == NULL && t2->type==task_type_pair && t2->subtype==task_subtype_density) { */ - /* message("\n"); */ - /* message("Checking task %s-%s address: %p", taskID_names[t2->type], subtaskID_names[t2->subtype], t2); */ - /* store = t2; */ - /* } */ - - for (int k = 0; k < t2->nr_unlock_tasks; k++) { - - struct task *t3=t2->unlock_tasks[k]; - - atomic_inc(&t3->wait); - - /* if (t3 == store) { */ - /* message("Unlocked by task %s-%s address: %p" , taskID_names[t2->type], subtaskID_names[t2->subtype], t2); */ - /* } */ - } - } -} diff --git a/src/task.h b/src/task.h index 8f633a640ef7457a28713fe3fa3f182232d137de..1203a954c9b9b5314683f61428995d47fdea7dc9 100644 --- a/src/task.h +++ b/src/task.h @@ -93,6 +93,7 @@ void task_rmunlock_blind(struct task *ta, struct task *tb); void task_cleanunlock(struct task *t, int type); void task_addunlock(struct task *ta, struct task *tb); void task_unlock(struct task *t); +float task_overlap(const struct task *ta, const struct task *tb); int task_lock(struct task *t); void task_print_mask(unsigned int mask); void task_do_rewait(struct task *t); diff --git a/src/version.c b/src/version.c index 7370c061d28a1594213015c3ba8b2108fa4b7a37..4624f036a0178f5b31b1aa24b1607507914444a7 100644 --- a/src/version.c +++ b/src/version.c @@ -18,6 +18,21 @@ * ******************************************************************************/ +/* Config parameters. */ +#include "../config.h" + +/* MPI headers. */ +#ifdef WITH_MPI +#include <mpi.h> +#ifdef HAVE_METIS +#include <metis.h> +#endif +#endif + +#ifdef HAVE_HDF5 +#include <hdf5.h> +#endif + /* Some standard headers. */ #include <stdio.h> #include <string.h> @@ -28,26 +43,50 @@ /** * @brief Return the source code git revision * - * @details The SHA of the code checked out when the library was last built. + * The SHA of the code checked out when the library was last built. * Will include -dirty if they are local modifications. + * + * @result the git version. */ const char *git_revision(void) { + static char buf[256]; + static int initialised = 0; static const char *revision = GIT_REVISION; - return revision; + if (!initialised) { + if (strlen(revision) == 0) + sprintf(buf, "%s", "unknown"); + else + sprintf(buf, "%s", revision); + initialised = 1; + } + return buf; } /** * @brief Return the source code git branch * - * @details The name of the current branch when the code was last built. + * The name of the current branch when the code was last built. + * + * @result git branch */ const char *git_branch(void) { + static char buf[256]; + static int initialised = 0; static const char *branch = GIT_BRANCH; - return branch; + if (!initialised) { + if (strlen(branch) == 0) + sprintf(buf, "%s", "unknown"); + else + sprintf(buf, "%s", branch); + initialised = 1; + } + return buf; } /** * @brief The version of SWIFT + * + * @result the package version */ const char *package_version(void) { static const char *version = PACKAGE_VERSION; @@ -56,6 +95,8 @@ const char *package_version(void) { /** * @brief A description of the package version and code status. + * + * @result description of the package version */ const char *package_description(void) { static char buf[256]; @@ -68,6 +109,11 @@ const char *package_description(void) { return buf; } +/** + * @brief return the name of the compiler used to build SWIFT. + * + * @result description of the compiler. + */ const char *compiler_name(void) { static char compiler[256] = {0}; #if defined(__INTEL_COMPILER) @@ -83,6 +129,11 @@ const char *compiler_name(void) { return compiler; } +/** + * @brief return compiler version used to build SWIFT. + * + * @result description of the compiler. + */ const char *compiler_version(void) { static char version[256] = {0}; #if defined(__INTEL_COMPILER) @@ -104,6 +155,87 @@ const char *compiler_version(void) { return version; } + +/** + * @brief return the MPI version, runtime if possible otherwise that used when + * built. + * + * @result description of the MPI version. + */ +const char *mpi_version(void) { + static char version[80] = {0}; + +#ifdef WITH_MPI + int std_version, std_subversion; + + /* Check that the library implements the version string routine */ +#ifdef MPI_MAX_LIBRARY_VERSION_STRING + static char lib_version[MPI_MAX_LIBRARY_VERSION_STRING] = {0}; + int len; + MPI_Get_library_version(lib_version, &len); + + /* Find first \n and truncate string to this length, can get many lines from + * some MPIs (MPICH). */ + char *ptr = strchr(lib_version, '\n'); + if (ptr != NULL) *ptr = '\0'; + + /* Also arbitrarily truncate to keep down to one line, Open MPI, + * check for last comma and keep to ~60 chars max. */ + strcpy(lib_version+60, "..."); + ptr = strrchr(lib_version, ','); + if (ptr != NULL) *ptr = '\0'; + +#else + /* Use autoconf guessed value. */ + static char lib_version[60] = {0}; + snprintf(lib_version, 60, "%s", SWIFT_MPI_LIBRARY); +#endif + + /* Numeric version. */ + MPI_Get_version(&std_version, &std_subversion); + snprintf(version, 80, "%s (MPI std v%i.%i)", lib_version, + std_version, std_subversion); +#else + sprintf(version, "Code was not compiled with MPI support"); +#endif + return version; +} + +/** + * @brief return the HDF5 version in use at runtime. + * + * @result description of the current HDF5 version. + */ +const char *hdf5_version(void) { + + static char version[256] = {0}; +#ifdef HAVE_HDF5 + unsigned int majnum, minnum, relnum; + H5get_libversion(&majnum, &minnum, &relnum); + sprintf(version, "%i.%i.%i", majnum, minnum, relnum); +#else + sprintf(version, "Unknown version"); +#endif + return version; +} + +/** + * @brief return the METIS version used when SWIFT was built. + * + * @result description of the METIS version. + */ +const char *metis_version(void) { + + static char version[256] = {0}; +#if defined(WITH_MPI) && defined(HAVE_METIS) + sprintf(version, "%i.%i.%i", METIS_VER_MAJOR, METIS_VER_MINOR, + METIS_VER_SUBMINOR); +#else + sprintf(version, "Unknown version"); +#endif + return version; +} + /** * @brief Prints a greeting message to the standard output containing code * version and revision number @@ -120,6 +252,16 @@ void greetings(void) { printf(" Version : %s\n", package_version()); printf(" Revision: %s, Branch: %s\n", git_revision(), git_branch()); - printf(" Webpage : www.swiftsim.com\n"); - printf(" Compiler: %s, Version: %s\n\n", compiler_name(), compiler_version()); + printf(" Webpage : www.swiftsim.com\n\n"); + printf(" Compiler: %s, Version: %s\n", compiler_name(), compiler_version()); +#ifdef HAVE_HDF5 + printf(" HDF5 library version: %s\n", hdf5_version()); +#endif +#ifdef WITH_MPI + printf(" MPI library: %s\n", mpi_version()); +#ifdef HAVE_METIS + printf(" METIS library version: %s\n", metis_version()); +#endif +#endif + printf("\n"); } diff --git a/src/version.h.in b/src/version.h.in index d29ab6d43784c0f64b37a9c0d1a99781c82770bd..7824e06b996dfbb4178b57f1b72365a1e2d24484 100644 --- a/src/version.h.in +++ b/src/version.h.in @@ -35,6 +35,9 @@ const char* git_revision(void); const char* git_branch(void); const char* compiler_name(void); const char* compiler_version(void); +const char* mpi_version(void); +const char *hdf5_version(void); +const char *metis_version(void); void greetings(void); #endif /* SWIFT_VERSION_H */ diff --git a/theory/kernel/kernels.py b/theory/kernel/kernels.py new file mode 100644 index 0000000000000000000000000000000000000000..d7bdbe2bf9ba49a30f4c8a2ae136c4843ce5c2cf --- /dev/null +++ b/theory/kernel/kernels.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +import matplotlib +matplotlib.use("Agg") +from pylab import * +from scipy import integrate +import distinct_colours as colours +from scipy.optimize import curve_fit +from scipy.optimize import fsolve +from matplotlib.font_manager import FontProperties +import numpy + +params = { + 'axes.labelsize': 8, + 'axes.titlesize': 8, + 'font.size': 8, + 'legend.fontsize': 9, + 'xtick.labelsize': 8, + 'ytick.labelsize': 8, + 'xtick.major.pad': 2.5, + 'ytick.major.pad': 2.5, + 'text.usetex': True, +'figure.figsize' : (3.15,3.15), +'figure.subplot.left' : 0.12, +'figure.subplot.right' : 0.99 , +'figure.subplot.bottom' : 0.08 , +'figure.subplot.top' : 0.99 , +'figure.subplot.wspace' : 0. , +'figure.subplot.hspace' : 0. , +'lines.markersize' : 6, +'lines.linewidth' : 2., +'text.latex.unicode': True +} +rcParams.update(params) +rc('font',**{'family':'sans-serif','sans-serif':['Times']}) + + +#Parameters +eta = 1.2349 +h = 2.1 + +#Constants +PI = math.pi + +#Cubic Spline +cubic_kernel_degree = 3 +cubic_kernel_ivals = 2 +cubic_kernel_gamma = 2. +cubic_kernel_ngb = 4.0 / 3.0 * PI * eta**3 * 6.0858 +cubic_kernel_coeffs = array([[3./(4.*PI) , -3./(2.*PI), 0., 1./PI], + [-1./(4.*PI), 3./(2.*PI), -3./PI, 2./PI], + [0., 0., 0., 0.]]) +def cubic_W(x): + if size(x) == 1: + x = array([x]) + ind = (minimum(x, cubic_kernel_ivals)).astype(int) + coeffs = cubic_kernel_coeffs[ind,:] + w = coeffs[:,0] * x + coeffs[:,1] + for k in range(2, cubic_kernel_degree+1): + w = x * w + coeffs[:,k] + return w + + +#Quartic Spline +quartic_kernel_degree = 4 +quartic_kernel_ivals = 3 +quartic_kernel_gamma = 2.5 +quartic_kernel_ngb = 4.0 / 3.0 * PI * eta**3 * 8.2293 +quartic_kernel_coeffs = array([[3./(10.*PI) , 0., -3./(4.*PI) , 0. , 23./(32.*PI)], + [-1./(5.*PI) , 1./PI , -3./(2.*PI) ,1./(4.*PI) , 11./(16.*PI)], + [1./(20.*PI) , -1./(2.*PI) , 15./(8.*PI) , -25./(8.*PI), 125./(64.*PI)], + [ 0. , 0., 0., 0., 0.]]) +def quartic_W(x): + if size(x) == 1: + x = array([x]) + ind = (minimum(x+0.5, quartic_kernel_ivals)).astype(int) + coeffs = quartic_kernel_coeffs[ind,:] + w = coeffs[:,0] * x + coeffs[:,1] + for k in range(2, quartic_kernel_degree+1): + w = x * w + coeffs[:,k] + return w + + +# Wendland kernel +wendland2_kernel_degree = 5 +wendland2_kernel_ivals = 1 +wendland2_kernel_gamma = 2 +wendland2_kernel_ngb = 4.0 / 3.0 * PI * eta**3 * 7.261825 +wendland2_kernel_coeffs = 3.342253804929802 * array([[1./8, -30./32, 80./32, -80./32., 0., 1.], + [ 0. , 0., 0., 0., 0., 0.]]) / 8. + +print wendland2_kernel_coeffs +def wendland2_W(x): + if size(x) == 1: + x = array([x]) + ind = (minimum(0.5*x, wendland2_kernel_ivals)).astype(int) + coeffs = wendland2_kernel_coeffs[ind,:] + w = coeffs[:,0] * x + coeffs[:,1] + for k in range(2, wendland2_kernel_degree+1): + w = x * w + coeffs[:,k] + return w + +#def wendland2_W(x): +# if size(x) == 1: +# x = array([x]) +# x /= 1.936492 +# x[x>1.] = 1. +# oneminusu = 1.-x +# oneminusu4 = oneminusu * oneminusu * oneminusu * oneminusu +# return 3.342253804929802 * oneminusu4 * (1. + 4.*x) / h**3 + + +#Find H +r = arange(0, 3.5*h, 1./1000.) +xi = r/h +cubic_Ws = cubic_W(xi) +quartic_Ws = quartic_W(xi) +wendland2_Ws = wendland2_W(xi) +for j in range(size(r)): + if cubic_Ws[j] == 0: + cubic_H = r[j] + break +for j in range(size(r)): + if quartic_Ws[j] == 0: + quartic_H = r[j] + break +for j in range(size(r)): + if wendland2_Ws[j] == 0: + wendland2_H = r[j] + break + + +print "H=", cubic_H +print "H=", quartic_H +print "H=", wendland2_H + + +# Compute sigma ----------------------------------------- +cubic_norm = 4.*PI*integrate.quad(lambda x: x**2*cubic_W(x), 0, cubic_H)[0] +quartic_norm = 4.*PI*integrate.quad(lambda x: x**2*quartic_W(x), 0, quartic_H)[0] +wendland2_norm = 4.*PI*integrate.quad(lambda x: x**2*wendland2_W(x), 0, wendland2_H)[0] + +print cubic_norm +print quartic_norm +print wendland2_norm + + +# Plot kernels ------------------------------------------ +r = arange(0, 3.5*h, 1./100.) +xi = r/h + +cubic_Ws = cubic_W(xi) +quartic_Ws = quartic_W(xi) +wendland2_Ws = wendland2_W(xi) + + + +figure() + +text(h-0.1, cubic_Ws[0]/20., "h", ha="right",va="center") +arrow(h, cubic_Ws[0]/10., 0., -cubic_Ws[0]/10., fc='k', ec='k', length_includes_head=True, head_length=cubic_Ws[0]/30., head_width=0.1) + + +plot(r,cubic_Ws, 'b-' ,label="Cubic") +plot(r, quartic_Ws, 'r-', label="Quartic") +plot(r, wendland2_Ws, 'g-', label="Wendland C2") + +text(cubic_H-0.1, cubic_Ws[0]/20., "H", ha="right",va="center", color='b') +arrow(cubic_H, cubic_Ws[0]/10., 0., -cubic_Ws[0]/10., fc='b', ec='b', length_includes_head=True, head_length=cubic_Ws[0]/30., head_width=0.1) + +text(quartic_H-0.1, cubic_Ws[0]/20., "H", ha="right",va="center", color='r') +arrow(quartic_H, cubic_Ws[0]/10., 0., -cubic_Ws[0]/10., fc='r', ec='r', length_includes_head=True, head_length=quartic_Ws[0]/30., head_width=0.1) + +text(wendland2_H-0.1, cubic_Ws[0]/20., "H", ha="right",va="center", color='r') +arrow(wendland2_H, cubic_Ws[0]/10., 0., -cubic_Ws[0]/10., fc='g', ec='g', length_includes_head=True, head_length=wendland2_Ws[0]/30., head_width=0.1) + + +xlabel("r", labelpad=0) +ylabel("W(r,h)", labelpad=0) +legend(loc="upper right") +savefig("kernel.pdf") +