Commit 869f9305 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge branch 'master' into io_unit_conversion

Conflicts:
        src/Makefile.am
	src/parallel_io.c
	src/serial_io.c
	src/single_io.c
parents 13fe6915 489556aa
......@@ -52,6 +52,13 @@ tests/testSPHStep
tests/testKernel
tests/testParser
tests/parser_output.yml
tests/test27cells.sh
tests/test27cellsPerturbed.sh
tests/testPair.sh
tests/testPairPerturbed.sh
tests/testParser.sh
tests/testReading.sh
theory/latex/swift.pdf
theory/kernel/kernels.pdf
......
......@@ -39,7 +39,7 @@ SWIFT has been successfully built and tested with the following compilers:
- clang 3.4.x
More recent versions and slightly older ones should also be able to
built the software.
build the software.
By default an attempt to choose suitable set of optimizing compiler flags
will be made, targeted for the host machine of the build. If this doesn't
......@@ -67,6 +67,14 @@ for instance. GCC address sanitizer flags can be included using the
option. Note this requires a GCC compiler version of at least 4.8.
By default vectorization is switched on. The highest instruction set
available on the platform will be automatically used. However, not all
implementations of SPH available in the code have vectorized
routines. Vectorization will have to be switched off for these. It can
also be switched off for benchmarking purposes. To do so, you can use:
./configure --disable-vec
Dependencies
============
......
......@@ -168,6 +168,20 @@ AC_ARG_ENABLE([optimization],
[enable_opt="yes"]
)
# Disable vectorisation for known compilers. This switches off optimizations
# that could be enabled above, so in general should be appended. Slightly odd
# implementation as want to describe as --disable-vec, but macro is enable
# (there is no enable action).
AC_ARG_ENABLE([vec],
[AS_HELP_STRING([--disable-vec],
[Disable vectorization]
)],
[enable_vec="$enableval"],
[enable_vec="yes"]
)
HAVEVECTORIZATION=0
if test "$enable_opt" = "yes" ; then
# Add code optimisation flags and tuning to host. This is a funny macro
......@@ -188,7 +202,28 @@ if test "$enable_opt" = "yes" ; then
CFLAGS="$CFLAGS $SIMD_FLAGS"
fi
fi
if test "$enable_vec" = "no"; then
if test "$ax_cv_c_compiler_vendor" = "intel"; then
CFLAGS="$CFLAGS -no-vec -no-simd"
AC_MSG_RESULT([disabled Intel vectorization])
elif test "$ax_cv_c_compiler_vendor" = "gnu"; then
CFLAGS="$CFLAGS -fno-tree-vectorize"
AC_MSG_RESULT([disabled GCC vectorization])
elif test "$ax_cv_c_compiler_vendor" = "clang"; then
CFLAGS="$CFLAGS -fno-vectorize -fno-slp-vectorize"
AC_MSG_RESULT([disabled clang vectorization])
else
AC_MSG_WARN([Do not know how to disable vectorization for this compiler])
fi
else
AC_DEFINE([WITH_VECTORIZATION],1,[Enable vectorization])
HAVEVECTORIZATION=1
fi
fi
AM_CONDITIONAL([HAVEVECTORIZATION],[test -n "$HAVEVECTORIZATION"])
# Add address sanitizer options to flags, if requested. Only useful for GCC
# version 4.8 and later and clang.
......@@ -216,31 +251,6 @@ if test "$enable_san" = "yes"; then
fi
fi
# Disable vectorisation for known compilers. This switches off optimizations
# that could be enabled above, so in general should be appended. Slightly odd
# implementation as want to describe as --disable-vec, but macro is enable
# (there is no enable action).
AC_ARG_ENABLE([vec],
[AS_HELP_STRING([--disable-vec],
[Disable vectorization]
)],
[enable_vec="$enableval"],
[enable_vec="yes"]
)
if test "$enable_vec" = "no"; then
if test "$ax_cv_c_compiler_vendor" = "intel"; then
CFLAGS="$CFLAGS -no-vec -no-simd"
AC_MSG_RESULT([disabled Intel vectorization])
elif test "$ax_cv_c_compiler_vendor" = "gnu"; then
CFLAGS="$CFLAGS -fno-tree-vectorize"
AC_MSG_RESULT([disabled GCC vectorization])
elif test "$ax_cv_c_compiler_vendor" = "clang"; then
CFLAGS="$CFLAGS -fno-vectorize -fno-slp-vectorize"
AC_MSG_RESULT([disabled clang vectorization])
else
AC_MSG_WARN([Do not know how to disable vectorization for this compiler])
fi
fi
# Autoconf stuff.
AC_PROG_INSTALL
......@@ -461,6 +471,12 @@ AM_CONDITIONAL([HAVE_DOXYGEN], [test "$ac_cv_path_ac_pt_DX_DOXYGEN" != ""])
# Handle .in files.
AC_CONFIG_FILES([Makefile src/Makefile examples/Makefile doc/Makefile doc/Doxyfile tests/Makefile])
AC_CONFIG_FILES([tests/testReading.sh], [chmod +x tests/testReading.sh])
AC_CONFIG_FILES([tests/testPair.sh], [chmod +x tests/testPair.sh])
AC_CONFIG_FILES([tests/testPairPerturbed.sh], [chmod +x tests/testPairPerturbed.sh])
AC_CONFIG_FILES([tests/test27cells.sh], [chmod +x tests/test27cells.sh])
AC_CONFIG_FILES([tests/test27cellsPerturbed.sh], [chmod +x tests/test27cellsPerturbed.sh])
AC_CONFIG_FILES([tests/testParser.sh], [chmod +x tests/testParser.sh])
# Report general configuration.
AC_MSG_RESULT([
......
......@@ -1428,7 +1428,7 @@ FORMULA_TRANSPARENT = YES
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.
USE_MATHJAX = NO
USE_MATHJAX = YES
# When MathJax is enabled you can set the default output format to be used for
# the MathJax output. See the MathJax site (see:
......
......@@ -19,7 +19,7 @@
MYFLAGS = -DTIMER
# Add the source directory and debug to CFLAGS
AM_CFLAGS = -I../src $(HDF5_CPPFLAGS)
AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS)
AM_LDFLAGS = $(HDF5_LDFLAGS)
......
......@@ -7,4 +7,4 @@ then
python makeIC.py 50 60
fi
../swift -s -t 16 multiTypes.yml
../swift -s -g -t 16 multiTypes.yml
......@@ -266,6 +266,24 @@ int main(int argc, char *argv[]) {
message("sizeof(struct cell) is %4zi bytes.", sizeof(struct cell));
}
/* Temporary abort to handle absence of vectorized functions */
#ifdef WITH_VECTORIZATION
#ifdef GADGET2_SPH
error(
"Vectorized version of Gadget SPH routines not implemented yet. "
"Reconfigure with --disable-vec and recompile or use DEFAULT_SPH.");
#endif
#ifdef MINIMAL_SPH
error(
"Vectorized version of Minimal SPH routines not implemented yet. "
"Reconfigure with --disable-vec and recompile or use DEFAULT_SPH.");
#endif
#endif
/* End temporary fix */
/* How vocal are we ? */
const int talking = (verbose == 1 && myrank == 0) || (verbose == 2);
......
......@@ -33,7 +33,9 @@ Snapshots:
# Parameters governing the conserved quantities statistics
Statistics:
delta_time: 1e-2 # Time between statistics output
delta_time: 1e-2 # Time between statistics output
energy_file_name: energy # (Optional) File name for energy output
timestep_file_name: timesteps # (Optional) File name for timing information output. Note: No underscores "_" allowed in file name
# Parameters for the hydrodynamics scheme
SPH:
......
#!/usr/bin/env python
#
# Usage:
# python plot_scaling_results.py input-file1-ext input-file2-ext ...
#
# Description:
# Plots speed up, parallel efficiency and time to solution given a "timesteps" output file generated by SWIFT.
#
# Example:
# python plot_scaling_results.py _hreads_cosma_stdout.txt _threads_knl_stdout.txt
#
# The working directory should contain files 1_threads_cosma_stdout.txt - 64_threads_cosma_stdout.txt and 1_threads_knl_stdout.txt - 64_threads_knl_stdout.txt, i.e wall clock time for each run using a given number of threads
import sys
import glob
import re
import numpy as np
import matplotlib.pyplot as plt
version = []
branch = []
revision = []
hydro_scheme = []
hydro_kernel = []
hydro_neighbours = []
hydro_eta = []
threadList = []
linestyle = ('ro-','bo-','go-','yo-','mo-')
#cmdLine = './swift_fixdt -s -t 16 cosmoVolume.yml'
#platform = 'KNL'
# Work out how many data series there are
if len(sys.argv) == 2:
inputFileNames = (sys.argv[1],"")
numOfSeries = 1
elif len(sys.argv) == 3:
inputFileNames = (sys.argv[1],sys.argv[2])
numOfSeries = 2
elif len(sys.argv) == 4:
inputFileNames = (sys.argv[1],sys.argv[2],sys.argv[3])
numOfSeries = 3
elif len(sys.argv) == 5:
inputFileNames = (sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4])
numOfSeries = 4
elif len(sys.argv) == 6:
inputFileNames = (sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5])
numOfSeries = 5
# Get the names of the branch, Git revision, hydro scheme and hydro kernel
def parse_header(inputFile):
with open(inputFile, 'r') as f:
found_end = False
for line in f:
if 'Branch:' in line:
s = line.split()
branch.append(s[2])
elif 'Revision:' in line:
s = line.split()
revision.append(s[2])
elif 'Hydrodynamic scheme:' in line:
line = line[2:-1]
s = line.split()
line = s[2:]
hydro_scheme.append(" ".join(line))
elif 'Hydrodynamic kernel:' in line:
line = line[2:-1]
s = line.split()
line = s[2:5]
hydro_kernel.append(" ".join(line))
elif 'neighbours:' in line:
s = line.split()
hydro_neighbours.append(s[4])
elif 'Eta:' in line:
s = line.split()
hydro_eta.append(s[2])
return
# Parse file and return total time taken, speed up and parallel efficiency
def parse_files():
times = []
totalTime = []
serialTime = []
speedUp = []
parallelEff = []
for i in range(0,numOfSeries): # Loop over each data series
# Get each file that starts with the cmd line arg
file_list = glob.glob(inputFileNames[i] + "*")
threadList.append([])
# Create a list of threads using the list of files
for fileName in file_list:
s = re.split(r'[_.]+',fileName)
threadList[i].append(int(s[1]))
# Sort the thread list in ascending order and save the indices
sorted_indices = np.argsort(threadList[i])
threadList[i].sort()
# Sort the file list in ascending order acording to the thread number
file_list = [ file_list[j] for j in sorted_indices]
parse_header(file_list[0])
version.append(branch[i] + " " + revision[i] + "\n" + hydro_scheme[i] +
"\n" + hydro_kernel[i] + r", $N_{neigh}$=" + hydro_neighbours[i] +
r", $\eta$=" + hydro_eta[i] + "\n")
times.append([])
totalTime.append([])
speedUp.append([])
parallelEff.append([])
# Loop over all files for a given series and load the times
for j in range(0,len(file_list)):
times[i].append([])
times[i][j].append(np.loadtxt(file_list[j],usecols=(5,)))
totalTime[i].append(np.sum(times[i][j]))
serialTime.append(totalTime[i][0])
# Loop over all files for a given series and calculate speed up and parallel efficiency
for j in range(0,len(file_list)):
speedUp[i].append(serialTime[i] / totalTime[i][j])
parallelEff[i].append(speedUp[i][j] / threadList[i][j])
return (times,totalTime,speedUp,parallelEff)
def print_results(times,totalTime,parallelEff,version):
for i in range(0,numOfSeries):
print " "
print "------------------------------------"
print version[i]
print "------------------------------------"
print "Wall clock time for: {} time steps".format(len(times[0][0][0]))
print "------------------------------------"
for j in range(0,len(threadList[i])):
print str(threadList[i][j]) + " threads: {}".format(totalTime[i][j])
print " "
print "------------------------------------"
print "Parallel Efficiency for: {} time steps".format(len(times[0][0][0]))
print "------------------------------------"
for j in range(0,len(threadList[i])):
print str(threadList[i][j]) + " threads: {}".format(parallelEff[i][j])
return
def plot_results(times,totalTime,speedUp,parallelEff):
fig, axarr = plt.subplots(2, 2,figsize=(15,15))
speedUpPlot = axarr[0, 0]
parallelEffPlot = axarr[0, 1]
totalTimePlot = axarr[1, 0]
emptyPlot = axarr[1, 1]
# Plot speed up
for i in range(0,numOfSeries):
speedUpPlot.plot(threadList[i],speedUp[i],linestyle[i],label=version[i])
speedUpPlot.plot(threadList[i],threadList[i],color='k',linestyle='--')
speedUpPlot.set_ylabel("Speed Up")
speedUpPlot.set_xlabel("No. of Threads")
# Plot parallel efficiency
for i in range(0,numOfSeries):
parallelEffPlot.plot(threadList[i],parallelEff[i],linestyle[i])
parallelEffPlot.set_xscale('log')
parallelEffPlot.set_ylabel("Parallel Efficiency")
parallelEffPlot.set_xlabel("No. of Threads")
parallelEffPlot.set_ylim([0,1.1])
# Plot time to solution
for i in range(0,numOfSeries):
totalTimePlot.loglog(threadList[i],totalTime[i],linestyle[i],label=version[i])
totalTimePlot.set_xscale('log')
totalTimePlot.set_xlabel("No. of Threads")
totalTimePlot.set_ylabel("Time to Solution (ms)")
totalTimePlot.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.,prop={'size':14})
emptyPlot.axis('off')
for i, txt in enumerate(threadList[0]):
speedUpPlot.annotate(txt, (threadList[0][i],speedUp[0][i]))
parallelEffPlot.annotate(txt, (threadList[0][i],parallelEff[0][i]))
totalTimePlot.annotate(txt, (threadList[0][i],totalTime[0][i]))
#fig.suptitle("Thread Speed Up, Parallel Efficiency and Time To Solution for {} Time Steps of Cosmo Volume\n Cmd Line: {}, Platform: {}".format(len(times[0][0][0]),cmdLine,platform))
fig.suptitle("Thread Speed Up, Parallel Efficiency and Time To Solution for {} Time Steps".format(len(times[0][0][0])))
return
# Calculate results
(times,totalTime,speedUp,parallelEff) = parse_files()
plot_results(times,totalTime,speedUp,parallelEff)
print_results(times,totalTime,parallelEff,version)
plt.show()
......@@ -54,11 +54,11 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
# Include files for distribution, not installation.
nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
vector.h runner_doiact.h runner_doiact_grav.h units.h intrinsics.h minmax.h kick.h \
timestep.h drift.h io_properties.h \
timestep.h drift.h adiabatic_index.h io_properties.h \
gravity.h gravity_io.h \
gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \
gravity/Default/gravity_debug.h gravity/Default/gravity_part.h \
hydro.h hydro_io.h \
hydro.h hydro_io.h \
hydro/Minimal/hydro.h hydro/Minimal/hydro_iact.h hydro/Minimal/hydro_io.h \
hydro/Minimal/hydro_debug.h hydro/Minimal/hydro_part.h \
hydro/Default/hydro.h hydro/Default/hydro_iact.h hydro/Default/hydro_io.h \
......@@ -92,12 +92,12 @@ version_string.h: version_string.h.in $(AM_SOURCES) $(include_HEADERS) $(noinst_
GIT_BRANCH=`$(GIT_CMD) branch | sed -n 's/^\* \(.*\)/\1/p'`; \
sed -e "s,@PACKAGE_VERSION\@,$(PACKAGE_VERSION)," \
-e "s,@GIT_REVISION\@,$${GIT_REVISION}," \
-e "s|@GIT_BRANCH\@|$${GIT_BRANCH}|" version_string.h.in > version_string.h; \
-e "s|@GIT_BRANCH\@|$${GIT_BRANCH}|" $< > version_string.h; \
else \
if test ! -f version_string.h; then \
sed -e "s,@PACKAGE_VERSION\@,$(PACKAGE_VERSION)," \
-e "s,@GIT_REVISION\@,unknown," \
-e "s,@GIT_BRANCH\@,unknown," version_string.h.in > version_string.h; \
-e "s,@GIT_BRANCH\@,unknown," $< > version_string.h; \
fi; \
fi
......
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#ifndef SWIFT_ADIABATIC_INDEX_H
#define SWIFT_ADIABATIC_INDEX_H
/* Config parameters. */
#include "../config.h"
/* Some standard headers. */
#include <math.h>
/* Local headers. */
#include "const.h"
#include "debug.h"
#include "inline.h"
/* First define some constants */
#if defined(HYDRO_GAMMA_5_3)
#define hydro_gamma 1.66666666666666667f
#define hydro_gamma_minus_one 0.66666666666666667f
#define hydro_one_over_gamma_minus_one 1.5f
#elif defined(HYDRO_GAMMA_4_3)
#define hydro_gamma 1.33333333333333333f
#define hydro_gamma_minus_one 0.33333333333333333f
#define hydro_one_over_gamma_minus_one 3.f
#elif defined(HYDRO_GAMMA_2_1)
#define hydro_gamma 2.f
#define hydro_gamma_minus_one 1.f
#define hydro_one_over_gamma_minus_one 1.f
#endif
/**
* @brief Returns the argument to the power given by the adiabatic index
*
* Computes \f$x^\gamma\f$.
*/
__attribute__((always_inline)) INLINE static float pow_gamma(float x) {
#if defined(HYDRO_GAMMA_5_3)
const float x2 = x * x;
const float x5 = x2 * x2 * x;
return cbrtf(x5);
#elif defined(HYDRO_GAMMA_4_3)
const float x2 = x * x;
const float x4 = x2 * x2;
return cbrtf(x4);
#elif defined(HYDRO_GAMMA_2_1)
return x * x;
#else
error("The adiabatic index is not defined !");
return 0.f;
#endif
}
/**
* @brief Returns the argument to the power given by the adiabatic index minus
* one
*
* Computes \f$x^{(\gamma-1)}\f$.
*/
__attribute__((always_inline)) INLINE static float pow_gamma_minus_one(
float x) {
#if defined(HYDRO_GAMMA_5_3)
return cbrtf(x * x);
#elif defined(HYDRO_GAMMA_4_3)
return cbrtf(x);
#elif defined(HYDRO_GAMMA_2_1)
return x;
#else
error("The adiabatic index is not defined !");
return 0.f;
#endif
}
/**
* @brief Returns one over the argument to the power given by the adiabatic
* index minus one
*
* Computes \f$x^{-(\gamma-1)}\f$.
*/
__attribute__((always_inline)) INLINE static float pow_minus_gamma_minus_one(
float x) {
#if defined(HYDRO_GAMMA_5_3)
return 1.f / cbrtf(x * x);
#elif defined(HYDRO_GAMMA_4_3)
return 1.f / cbrtf(x);
#elif defined(HYDRO_GAMMA_2_1)
return 1.f / x;
#else
error("The adiabatic index is not defined !");
return 0.f;
#endif
}
#endif /* SWIFT_ADIABATIC_INDEX_H */
......@@ -20,9 +20,6 @@
#ifndef SWIFT_CONST_H
#define SWIFT_CONST_H
/* Hydrodynamical constants. */
#define const_hydro_gamma (5.0f / 3.0f)
/* SPH Viscosity constants. */
#define const_viscosity_alpha 0.8f
#define const_viscosity_alpha_min \
......@@ -44,6 +41,11 @@
#define const_G 6.672e-8f /* Gravitational constant. */
#define const_epsilon 0.0014f /* Gravity blending distance. */
/* Hydrodynamical adiabatic index. */
#define HYDRO_GAMMA_5_3
//#define HYDRO_GAMMA_4_3
//#define HYDRO_GAMMA_2_1
/* Kernel function to use */
#define CUBIC_SPLINE_KERNEL
//#define QUARTIC_SPLINE_KERNEL
......
......@@ -65,6 +65,7 @@
#include "single_io.h"
#include "timers.h"
#include "units.h"
#include "version.h"
const char *engine_policy_names[13] = {"none",
"rand",
......@@ -2392,6 +2393,10 @@ void engine_step(struct engine *e) {
printf(" %6d %14e %14e %10zd %10zd %21.3f\n", e->step, e->time,
e->timeStep, e->updates, e->g_updates, e->wallclock_time);
fflush(stdout);
fprintf(e->file_timesteps, " %6d %14e %14e %10zd %10zd %21.3f\n", e->step,
e->time, e->timeStep, e->updates, e->g_updates, e->wallclock_time);
fflush(e->file_timesteps);
}
/* Save some statistics */
......@@ -2807,6 +2812,7 @@ void engine_init(struct engine *e, struct space *s,
e->dt_min = parser_get_param_double(params, "TimeIntegration:dt_min");
e->dt_max = parser_get_param_double(params, "TimeIntegration:dt_max");
e->file_stats = NULL;
e->file_timesteps = NULL;
e->deltaTimeStatistics =
parser_get_param_double(params, "Statistics:delta_time");