Commit 9c65f704 authored by James Willis's avatar James Willis
Browse files

Merge branch 'master' into debug_interactions

Conflicts:
	src/hydro/Gadget2/hydro_io.h
parents 46cd345e 9ddb0c82
......@@ -26,11 +26,13 @@ examples/*/*.xmf
examples/*/*.hdf5
examples/*/*.png
examples/*/*.txt
examples/*/*.dot
examples/*/used_parameters.yml
examples/*/*/*.xmf
examples/*/*/*.hdf5
examples/*/*/*.png
examples/*/*/*.txt
examples/*/*/*.dot
examples/*/*/used_parameters.yml
examples/*/gravity_checks_*.dat
......
......@@ -75,6 +75,15 @@ also be switched off for benchmarking purposes. To do so, you can use:
./configure --disable-vec
Please note that to build SWIFT on MacOS, you will need to configure
using
./configure --disable-compiler-warnings
due to the incorrect behaviour of the LLVM compiler on this platform
that raises warnings when the pthread flags are passed to the linker.
Dependencies
============
......
......@@ -379,6 +379,19 @@ AX_PTHREAD([LIBS="$PTHREAD_LIBS $LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
or use CPPFLAGS and LDFLAGS if the library is installed in a
non-standard location.]))
# Check whether POSIX thread barriers are implemented (e.g. OSX does not have them)
have_pthread_barrier="no"
AC_CHECK_LIB(pthread, pthread_barrier_init,
have_pthread_barrier="yes",
AC_MSG_WARN(POSIX implementation does not have barriers. SWIFT will use home-made ones.))
if test "x$have_pthread_barrier" == "xyes"; then
AC_DEFINE([HAVE_PTHREAD_BARRIERS], [1], [The posix library implements barriers])
fi
# Check whether POSIX file allocation functions exist (e.g. OSX does not have them)
AC_CHECK_LIB(pthread, posix_fallocate,
AC_DEFINE([HAVE_POSIX_FALLOCATE], [1], [The posix library implements file allocation functions.]),
AC_MSG_WARN(POSIX implementation does not have file allocation functions.))
# Check for METIS. Note AX_LIB_METIS exists, but cannot be configured
# to be default off (i.e. given no option it tries to locate METIS), so we
......@@ -548,6 +561,10 @@ if test "$with_hdf5" = "yes"; then
fi
AM_CONDITIONAL([HAVEPARALLELHDF5],[test "$have_parallel_hdf5" = "yes"])
# Check for floating-point execeptions
AC_CHECK_FUNC(feenableexcept, AC_DEFINE([HAVE_FE_ENABLE_EXCEPT],[1],
[Defined if the floating-point exception can be enabled using non-standard GNU functions.]))
# Check for setaffinity.
AC_CHECK_FUNC(pthread_setaffinity_np, AC_DEFINE([HAVE_SETAFFINITY],[1],
[Defined if pthread_setaffinity_np exists.]) )
......@@ -914,19 +931,20 @@ AC_MSG_RESULT([
$PACKAGE_NAME v.$PACKAGE_VERSION
Compiler : $CC
- vendor : $ax_cv_c_compiler_vendor
- version : $ax_cv_c_compiler_version
- flags : $CFLAGS
MPI enabled : $enable_mpi
HDF5 enabled : $with_hdf5
- parallel : $have_parallel_hdf5
Metis enabled : $have_metis
FFTW3 enabled : $have_fftw3
libNUMA enabled : $have_numa
Using tcmalloc : $have_tcmalloc
Using jemalloc : $have_jemalloc
CPU profiler : $have_profiler
Compiler : $CC
- vendor : $ax_cv_c_compiler_vendor
- version : $ax_cv_c_compiler_version
- flags : $CFLAGS
MPI enabled : $enable_mpi
HDF5 enabled : $with_hdf5
- parallel : $have_parallel_hdf5
Metis enabled : $have_metis
FFTW3 enabled : $have_fftw3
libNUMA enabled : $have_numa
Using tcmalloc : $have_tcmalloc
Using jemalloc : $have_jemalloc
CPU profiler : $have_profiler
Pthread barriers : $have_pthread_barrier
Hydro scheme : $with_hydro
Dimensionality : $with_dimension
......
......@@ -31,6 +31,9 @@ SPH:
max_ghost_iterations: 30 # Maximal number of iterations allowed to converge towards the smoothing length.
h_max: 60. # Maximal smoothing length allowed (in internal units).
EoS:
isothermal_internal_energy: 20.267845
# Parameters related to the initial conditions
InitialConditions:
file_name: Disc-Patch.hdf5 # The file to read
......
......@@ -202,7 +202,5 @@ print "--- Runtime parameters (YAML file): ---"
print "DiscPatchPotential:surface_density: ", surface_density
print "DiscPatchPotential:scale_height: ", scale_height
print "DiscPatchPotential:x_disc: ", 0.5 * boxSize_x
print "EoS:isothermal_internal_energy: %ef"%u_therm
print ""
print "--- Constant parameters: ---"
print "const_isothermal_internal_energy: %ef"%u_therm
#!/usr/bin/env python
"""
Usage:
analysedumpcells.py nx ny nx cell<1>.dat cell<2>.dat ...
Analyses a number of output files created by calls to the dumpCells() debug
function (presumably called in engine_step()) to output a list of active
top-level cells, identifying those that are on the edges of the volumes being
processed on by various nodes. The point is that these should be minimised to
reduce the MPI communications.
The nx, ny and nz arguments are the number of cells in the complete space,
we need these so that we can wrap around the edge of space correctly.
This file is part of SWIFT.
Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import pylab as pl
import numpy as np
import sys
import pandas
xcol = 0
ycol = 1
zcol = 2
xwcol = 3
ywcol = 4
zwcol = 5
localcol = 18
supercol = 15
activecol = 16
# Command-line arguments.
if len(sys.argv) < 5:
print "usage: ", sys.argv[0], " nx ny nz cell1.dat cell2.dat ..."
sys.exit(1)
nx = int(sys.argv[1])
ny = int(sys.argv[2])
nz = int(sys.argv[3])
print "# x y z onedge"
allactives = []
onedge = 0
tcount = 0
for i in range(4, len(sys.argv)):
# Read the file.
data = pl.loadtxt(sys.argv[i])
#print data
# Select cells that are on the current rank and are super cells.
rdata = data[data[:,localcol] == 1]
tdata = rdata[rdata[:,supercol] == 1]
# Separation of the cells is in data.
xwidth = tdata[0,xwcol]
ywidth = tdata[0,ywcol]
zwidth = tdata[0,zwcol]
# Fill space nx, ny,n nz with all toplevel cells and flag their active
# state.
space = np.zeros((nx,ny,nz))
actives = []
for line in tdata:
ix = int(np.rint(line[xcol] / xwidth))
iy = int(np.rint(line[ycol] / ywidth))
iz = int(np.rint(line[zcol] / zwidth))
active = int(line[activecol])
space[ix,iy,iz] = 1 + active
tcount = tcount + 1
if active == 1:
actives.append([ix, iy, iz, line])
# Report all active cells and flag any without 26 neighbours. These are
# on the edge of the partition volume and will have foreign neighbour
# cells.
for active in actives:
count = 0
for ii in [-1, 0, 1]:
i = active[0] + ii
if i < 0:
i = i + nx
elif i >= nx:
i = i - nx
for jj in [-1, 0, 1]:
j = active[1] + jj
if j < 0:
j = j + ny
elif j >= ny:
j = j - ny
for kk in [-1, 0, 1]:
k = active[2] + kk
if k < 0:
k = k + nz
elif k >= nz:
k = k - nz
if space[i, j, k] > 0:
count = count + 1
if count < 27:
onedge = onedge + 1
print active[3][0], active[3][1], active[3][2], 1
else:
print active[3][0], active[3][1], active[3][2], 0
allactives.extend(actives)
print "# top cells: ", tcount, " active: ", len(allactives), " on edge: ", onedge
sys.exit(0)
......@@ -203,7 +203,11 @@ int main(int argc, char *argv[]) {
with_drift_all = 1;
break;
case 'e':
#ifdef HAVE_FE_ENABLE_EXCEPT
with_fp_exceptions = 1;
#else
error("Need support for floating point exception on this platform");
#endif
break;
case 'f':
if (sscanf(optarg, "%llu", &cpufreq) != 1) {
......@@ -379,11 +383,19 @@ int main(int argc, char *argv[]) {
/* Do we choke on FP-exceptions ? */
if (with_fp_exceptions) {
#ifdef HAVE_FE_ENABLE_EXCEPT
feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
#endif
if (myrank == 0)
message("WARNING: Floating point exceptions will be reported.");
}
/* Do we have slow barriers? */
#ifndef HAVE_PTHREAD_BARRIERS
if (myrank == 0)
message("WARNING: Non-optimal thread barriers are being used.");
#endif
/* How large are the parts? */
if (myrank == 0) {
message("sizeof(part) is %4zi bytes.", sizeof(struct part));
......@@ -463,6 +475,7 @@ int main(int argc, char *argv[]) {
/* Initialise the hydro properties */
struct hydro_props hydro_properties;
if (with_hydro) hydro_props_init(&hydro_properties, params);
if (with_hydro) eos_init(&eos, params);
/* Initialise the gravity properties */
struct gravity_props gravity_properties;
......@@ -689,9 +702,9 @@ int main(int argc, char *argv[]) {
/* Legend */
if (myrank == 0)
printf("# %6s %14s %14s %10s %10s %10s %16s [%s]\n", "Step", "Time",
printf("# %6s %14s %14s %12s %12s %12s %16s [%s] %6s\n", "Step", "Time",
"Time-step", "Updates", "g-Updates", "s-Updates", "Wall-clock time",
clocks_getunit());
clocks_getunit(), "Props");
/* File for the timers */
if (with_verbose_timers) timers_open_file(myrank);
......
......@@ -15,6 +15,7 @@ Scheduler:
cell_split_size: 400 # (Optional) Maximal number of particles per cell (this is the default value).
max_top_level_cells: 12 # (Optional) Maximal number of top-level cells in any dimension. The number of top-level cells will be the cube of this (this is the default value).
tasks_per_cell: 0 # (Optional) The average number of tasks per cell. If not large enough the simulation will fail (means guess...).
mpi_message_limit: 4096 # (Optional) Maximum MPI task message size to send non-buffered, KB.
# Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.)
TimeIntegration:
......@@ -50,6 +51,9 @@ SPH:
max_volume_change: 1.4 # (Optional) Maximal allowed change of kernel volume over one time-step.
max_ghost_iterations: 30 # (Optional) Maximal number of iterations allowed to converge towards the smoothing length.
EoS:
isothermal_internal_energy: 20.26784 # Thermal energy per unit mass for the case of isothermal equation of state (in internal units).
# Parameters for the self-gravity scheme
Gravity:
eta: 0.025 # Constant dimensionless multiplier for time integration.
......@@ -76,8 +80,13 @@ DomainDecomposition:
initial_grid_x: 10 # (Optional) Grid size if the "grid" strategy is chosen.
initial_grid_y: 10 # ""
initial_grid_z: 10 # ""
repartition_type: task_weights # (Optional) The re-decomposition strategy: "none", "task_weights", "particle_weights",
# "edge_task_weights" or "hybrid_weights".
repartition_type: costs/costs # (Optional) The re-decomposition strategy, one of:
# "none/none", "costs/costs", "counts/none", "none/costs", "counts/costs",
# "costs/time" or "none/time".
# These are vertex/edge weights with "costs" as task timing, "counts" as
# sum of particles and "time" as the expected time of the next updates
trigger: 0.05 # (Optional) Fractional (<1) CPU time difference between MPI ranks required to trigger a
# new decomposition, or number of steps (>1) between decompositions
minfrac: 0.9 # (Optional) Fractional of all particles that should be updated in previous step when
......@@ -117,7 +126,7 @@ SineWavePotential:
amplitude: 10. # Amplitude of the sine wave (internal units)
timestep_limit: 1. # Time-step dimensionless pre-factor.
growth_time: 0. # (Optional) Time for the potential to grow to its final size.
# Parameters related to cooling function ----------------------------------------------
# Constant du/dt cooling function
......
#!/bin/bash
# Creates a graphic from the task graph file dependency_graph.dot.
# Requires the graphviz command "dot".
if [ ! -e dependency_graph.dot ]; then
echo "Missing task-graph output 'dependency_graph.dot'! Cannot generate figure."
else
dot -Tpng dependency_graph.dot -o task_graph.png
echo "Output written to task_graph.png"
fi
exit
#!/bin/bash
#
# Usage:
# process_cells nx ny nz nprocess
#
# Description:
# Process all the cell dumps in the current directory.
# Outputs file per rank with the active cells identified and marked as to
# whether they are near an edge or not. Note requires the numbers of cells
# per dimension of the space.
#
# Also outputs a graphic showing the fraction of active cells on edges
# for each step.
# Handle command-line
if test "$4" = ""; then
echo "Usage: $0 nx ny nz nprocess"
exit 1
fi
NX=$1
NY=$2
NZ=$3
NPROCS=$4
# Locate script.
SCRIPTHOME=$(dirname "$0")
# Find all files. Use version sort to get into correct order.
files=$(ls -v cells_*.dat)
if test $? != 0; then
echo "Failed to find any cell dump files"
exit 1
fi
# Construct list of names need the number of ranks.
ranks=$(ls -v cells_*.dat | sed 's,cells_\(.*\)_.*.dat,\1,' | sort | uniq | wc -l)
echo "Number of ranks = $ranks"
# Now construct a list of files ordered by rank, not step.
files=$(ls cells_*.dat | sort -t "_" -k 3,3 -n | xargs -n 4)
# Need number of steps.
nfiles=$(echo $files| wc -w)
echo "Number of files = $nfiles"
steps=$(( $nfiles / $ranks + 1 ))
echo "Number of steps = $steps"
# And process them,
echo "Processing cell dumps files..."
#echo $files | xargs -P $NPROCS -n 4 /bin/bash -c "${SCRIPTHOME}/process_cells_helper $NX $NY $NZ \$0 \$1 \$2 \$3"
# Create summary.
grep "top cells" step*-active-cells.dat | sort -h > active_cells.log
# And plot of active cells to edge cells.
stilts plot2plane ifmt=ascii in=active_cells.log xmin=-0.1 xmax=1.1 ymin=0 ymax=$steps grid=1 \
legend=false xpix=600 ypix=500 xlabel="Edge cells/Active cells" ylabel="Step" \
layer1=mark x1="col9/1.0/col6" y1="index" size1=3 shading1=aux auxmap=rainbow \
aux=col6 auxfunc=log auxlabel="Active cells" layer2=histogram x2="col9/1.0/col6" \
color2=grey binsize2=0.01 phase2=0.5 barform2=semi_steps thick2=1 \
out=active_cells.png
exit
#!/bin/bash
# Helper for process_cells.
# Locate script.
SCRIPTHOME=$(dirname "$0")
step=$(echo $4|sed 's,cells_\(.*\)_\(.*\).dat,\2,')
echo "${SCRIPTHOME}/analyse_dump_cells.py $* > step${step}-active-cells.dat"
${SCRIPTHOME}/analyse_dump_cells.py $* > step${step}-active-cells.dat
exit
......@@ -146,6 +146,22 @@ if test "$ac_test_CFLAGS" != "set"; then
fi
;;
clang)
# default optimization flags for clang on all systems
CFLAGS="-O3 -fomit-frame-pointer"
# Always good optimisation to have
AX_CHECK_COMPILE_FLAG(-fstrict-aliasing, CFLAGS="$CFLAGS -fstrict-aliasing")
# note that we enable "unsafe" fp optimization with other compilers, too
AX_CHECK_COMPILE_FLAG(-ffast-math, CFLAGS="$CFLAGS -ffast-math")
# not all codes will benefit from this.
AX_CHECK_COMPILE_FLAG(-funroll-loops, CFLAGS="$CFLAGS -funroll-loops")
AX_GCC_ARCHFLAG($acx_maxopt_portable)
;;
gnu)
# default optimization flags for gcc on all systems
CFLAGS="-O3 -fomit-frame-pointer"
......@@ -155,7 +171,7 @@ if test "$ac_test_CFLAGS" != "set"; then
# -fstrict-aliasing for gcc-2.95+
AX_CHECK_COMPILE_FLAG(-fstrict-aliasing,
CFLAGS="$CFLAGS -fstrict-aliasing")
CFLAGS="$CFLAGS -fstrict-aliasing")
# note that we enable "unsafe" fp optimization with other compilers, too
AX_CHECK_COMPILE_FLAG(-ffast-math, CFLAGS="$CFLAGS -ffast-math")
......
......@@ -108,7 +108,8 @@ case $host_cpu in
*3?6[[ae]]?:*:*:*) ax_gcc_arch="ivybridge core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;;
*3?6[[cf]]?:*:*:*|*4?6[[56]]?:*:*:*) ax_gcc_arch="haswell core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;;
*3?6d?:*:*:*|*4?6f?:*:*:*) ax_gcc_arch="broadwell core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;;
*9?6[[de]]?:*:*:*) ax_gcc_arch="kabylake core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;;
*4?6[[de]]?:*:*:*) ax_gcc_arch="skylake core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;;
*9?6[[de]]?:*:*:*) ax_gcc_arch="kabylake core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;;
*1?6c?:*:*:*|*2?6[[67]]?:*:*:*|*3?6[[56]]?:*:*:*) ax_gcc_arch="bonnell atom core2 pentium-m pentium3 pentiumpro" ;;
*3?67?:*:*:*|*[[45]]?6[[ad]]?:*:*:*) ax_gcc_arch="silvermont atom core2 pentium-m pentium3 pentiumpro" ;;
*000?f[[012]]?:*:*:*|?f[[012]]?:*:*:*|f[[012]]?:*:*:*) ax_gcc_arch="pentium4 pentiumpro" ;;
......
This diff is collapsed.
......@@ -57,10 +57,10 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
runner_doiact_fft.c threadpool.c cooling.c sourceterms.c \
statistics.c runner_doiact_vec.c profiler.c dump.c logger.c \
part_type.c xmf.c gravity_properties.c gravity.c \
collectgroup.c hydro_space.c
collectgroup.c hydro_space.c equation_of_state.c
# Include files for distribution, not installation.
nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h runner_doiact_fft.h \
runner_doiact_nosort.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \
dimension.h equation_of_state.h part_type.h periodic.h \
......
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2017 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#ifndef SWIFT_BARRIER_H
#define SWIFT_BARRIER_H
/**
* @file barrier.h
* @brief Define the thread barriers if the POSIX implementation on this system
* does not.
*
* The pthread barriers are only an option of the POSIX norm and they are not
* necessarily implemented. One example is OSX where all the rest of POSIX
* exists but not the barriers.
* We implement them here in a simple way to allow for SWIFT to run on such
* systems but this may lead to poorer performance.
*
* Note that we only define the three functions we need. This is a textbook
* implementation of a barrier that uses the common POSIX features (mutex,
* conditions and broadcasts).
*
* If the pthread barriers exist (Linux systems), we default to them.
*/
/* Config parameters. */
#include "../config.h"
/* Standard headers */
#include <pthread.h>
/* Does this POSIX implementation provide barriers? */
#ifdef HAVE_PTHREAD_BARRIERS
#define swift_barrier_t pthread_barrier_t
#define swift_barrier_wait pthread_barrier_wait
#define swift_barrier_init pthread_barrier_init
#define swift_barrier_destroy pthread_barrier_destroy
#else
/* Local headers */
#include "error.h"
#include "inline.h"
/**
* @brief An ersatz of POSIX barriers to be used on systems that don't provide
* the good ones.
*/
typedef struct {
/*! Barrier mutex */
pthread_mutex_t mutex;
/*! Condition to open the barrier */
pthread_cond_t condition;
/*! Total number of threads */
int limit;
/*! Number of threads that reached the barrier */
int count;
} swift_barrier_t;
/**
* @brief Initialise a barrier object.
*
* @param barrier The #swift_barrier_t to initialise
* @param unused Unused parameter (NULL) as we don't support barrier attributes.
* @param count The number of threads that will wait at the barrier.
*/
static INLINE int swift_barrier_init(swift_barrier_t *barrier, void *unused,
unsigned int count) {
/* Initialise the mutex */
if (pthread_mutex_init(&barrier->mutex, 0) != 0)
error("Error initializing the barrier mutex");
/* Initialise the condition */
if (pthread_cond_init(&barrier->condition, 0) != 0)
error("Error initializing the barrier condition");
barrier->limit = count;
barrier->count = 0;
/* All is good */
return 0;
}
/**
* @brief Make a set of threads wait at the barrier
*
* Note that once all threads have reached the barrier, we also
* reset the barrier to state where it is ready to be re-used
* without calling swift_barrier_init.
*
* @param barrier The (initialised) #swift_barrier_t to wait at.
*/
static INLINE int swift_barrier_wait(swift_barrier_t *barrier) {
/* Start by locking the barrier */
pthread_mutex_lock(&barrier->mutex);
/* One more thread has gone home*/
barrier->count++;
/* Are threads still running? */
if (barrier->count < barrier->limit) {
/* We need to make the thread wait until everyone is back */
pthread_cond_wait(&barrier->condition, &(barrier->mutex));
/* Release the mutex */
pthread_mutex_unlock(&barrier->mutex);
/* Say that this was not the last thread */
return 0;
} else { /* Everybody is home */
/* Open the barrier (i.e. release the threads blocked in the while loop) */
pthread_cond_broadcast(&barrier->condition);
/* Re-initialize the barrier */
barrier->count = 0;
/* Release the mutex */
pthread_mutex_unlock(&barrier->mutex);