diff --git a/INSTALL.swift b/INSTALL.swift index 90096bc1b88d34ab86c04f623f1d3f04ca5a2997..fd960200117972d72ec5144250c51fa5126f09f4 100644 --- a/INSTALL.swift +++ b/INSTALL.swift @@ -75,6 +75,15 @@ also be switched off for benchmarking purposes. To do so, you can use: ./configure --disable-vec +Please note that to build SWIFT on MacOS, you will need to configure +using + + ./configure --disable-compiler-warnings + +due to the incorrect behaviour of the LLVM compiler on this platform +that raises warnings when the pthread flags are passed to the linker. + + Dependencies ============ diff --git a/configure.ac b/configure.ac index d7372c660ab2500d6d809fdbbbd5fa6f0b36130f..539dea934771fcc125a0a13e280379e399274ea6 100644 --- a/configure.ac +++ b/configure.ac @@ -379,6 +379,19 @@ AX_PTHREAD([LIBS="$PTHREAD_LIBS $LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" or use CPPFLAGS and LDFLAGS if the library is installed in a non-standard location.])) +# Check whether POSIX thread barriers are implemented (e.g. OSX does not have them) +have_pthread_barrier="no" +AC_CHECK_LIB(pthread, pthread_barrier_init, + have_pthread_barrier="yes", + AC_MSG_WARN(POSIX implementation does not have barriers. SWIFT will use home-made ones.)) +if test "x$have_pthread_barrier" == "xyes"; then + AC_DEFINE([HAVE_PTHREAD_BARRIERS], [1], [The posix library implements barriers]) +fi + +# Check whether POSIX file allocation functions exist (e.g. OSX does not have them) +AC_CHECK_LIB(pthread, posix_fallocate, + AC_DEFINE([HAVE_POSIX_FALLOCATE], [1], [The posix library implements file allocation functions.]), + AC_MSG_WARN(POSIX implementation does not have file allocation functions.)) # Check for METIS. Note AX_LIB_METIS exists, but cannot be configured # to be default off (i.e. given no option it tries to locate METIS), so we @@ -548,6 +561,10 @@ if test "$with_hdf5" = "yes"; then fi AM_CONDITIONAL([HAVEPARALLELHDF5],[test "$have_parallel_hdf5" = "yes"]) +# Check for floating-point execeptions +AC_CHECK_FUNC(feenableexcept, AC_DEFINE([HAVE_FE_ENABLE_EXCEPT],[1], + [Defined if the floating-point exception can be enabled using non-standard GNU functions.])) + # Check for setaffinity. AC_CHECK_FUNC(pthread_setaffinity_np, AC_DEFINE([HAVE_SETAFFINITY],[1], [Defined if pthread_setaffinity_np exists.]) ) @@ -901,19 +918,20 @@ AC_MSG_RESULT([ $PACKAGE_NAME v.$PACKAGE_VERSION - Compiler : $CC - - vendor : $ax_cv_c_compiler_vendor - - version : $ax_cv_c_compiler_version - - flags : $CFLAGS - MPI enabled : $enable_mpi - HDF5 enabled : $with_hdf5 - - parallel : $have_parallel_hdf5 - Metis enabled : $have_metis - FFTW3 enabled : $have_fftw3 - libNUMA enabled : $have_numa - Using tcmalloc : $have_tcmalloc - Using jemalloc : $have_jemalloc - CPU profiler : $have_profiler + Compiler : $CC + - vendor : $ax_cv_c_compiler_vendor + - version : $ax_cv_c_compiler_version + - flags : $CFLAGS + MPI enabled : $enable_mpi + HDF5 enabled : $with_hdf5 + - parallel : $have_parallel_hdf5 + Metis enabled : $have_metis + FFTW3 enabled : $have_fftw3 + libNUMA enabled : $have_numa + Using tcmalloc : $have_tcmalloc + Using jemalloc : $have_jemalloc + CPU profiler : $have_profiler + Pthread barriers : $have_pthread_barrier Hydro scheme : $with_hydro Dimensionality : $with_dimension diff --git a/examples/main.c b/examples/main.c index ea43c35b5f749cc7d5bc219013ee271549d35745..31e6a5db089a919107e0f768f2e23b60a3520dd0 100644 --- a/examples/main.c +++ b/examples/main.c @@ -203,7 +203,11 @@ int main(int argc, char *argv[]) { with_drift_all = 1; break; case 'e': +#ifdef HAVE_FE_ENABLE_EXCEPT with_fp_exceptions = 1; +#else + error("Need support for floating point exception on this platform"); +#endif break; case 'f': if (sscanf(optarg, "%llu", &cpufreq) != 1) { @@ -379,11 +383,19 @@ int main(int argc, char *argv[]) { /* Do we choke on FP-exceptions ? */ if (with_fp_exceptions) { +#ifdef HAVE_FE_ENABLE_EXCEPT feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif if (myrank == 0) message("WARNING: Floating point exceptions will be reported."); } +/* Do we have slow barriers? */ +#ifndef HAVE_PTHREAD_BARRIERS + if (myrank == 0) + message("WARNING: Non-optimal thread barriers are being used."); +#endif + /* How large are the parts? */ if (myrank == 0) { message("sizeof(part) is %4zi bytes.", sizeof(struct part)); diff --git a/m4/ax_cc_maxopt.m4 b/m4/ax_cc_maxopt.m4 index 93d5d6dcd78ff77c934f77ad0e1e02ef37873a37..0fcef66c7542fbb840e68d259550343a1a5601a1 100644 --- a/m4/ax_cc_maxopt.m4 +++ b/m4/ax_cc_maxopt.m4 @@ -146,6 +146,22 @@ if test "$ac_test_CFLAGS" != "set"; then fi ;; + clang) + # default optimization flags for clang on all systems + CFLAGS="-O3 -fomit-frame-pointer" + + # Always good optimisation to have + AX_CHECK_COMPILE_FLAG(-fstrict-aliasing, CFLAGS="$CFLAGS -fstrict-aliasing") + + # note that we enable "unsafe" fp optimization with other compilers, too + AX_CHECK_COMPILE_FLAG(-ffast-math, CFLAGS="$CFLAGS -ffast-math") + + # not all codes will benefit from this. + AX_CHECK_COMPILE_FLAG(-funroll-loops, CFLAGS="$CFLAGS -funroll-loops") + + AX_GCC_ARCHFLAG($acx_maxopt_portable) + ;; + gnu) # default optimization flags for gcc on all systems CFLAGS="-O3 -fomit-frame-pointer" @@ -155,7 +171,7 @@ if test "$ac_test_CFLAGS" != "set"; then # -fstrict-aliasing for gcc-2.95+ AX_CHECK_COMPILE_FLAG(-fstrict-aliasing, - CFLAGS="$CFLAGS -fstrict-aliasing") + CFLAGS="$CFLAGS -fstrict-aliasing") # note that we enable "unsafe" fp optimization with other compilers, too AX_CHECK_COMPILE_FLAG(-ffast-math, CFLAGS="$CFLAGS -ffast-math") diff --git a/m4/ax_gcc_archflag.m4 b/m4/ax_gcc_archflag.m4 index 298db809e82130b77758f5e66c29c276ceb8266a..6d4feb1e0aab816d0830dcdfdd6d84d61575c8b2 100644 --- a/m4/ax_gcc_archflag.m4 +++ b/m4/ax_gcc_archflag.m4 @@ -108,7 +108,8 @@ case $host_cpu in *3?6[[ae]]?:*:*:*) ax_gcc_arch="ivybridge core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; *3?6[[cf]]?:*:*:*|*4?6[[56]]?:*:*:*) ax_gcc_arch="haswell core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; *3?6d?:*:*:*|*4?6f?:*:*:*) ax_gcc_arch="broadwell core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; - *9?6[[de]]?:*:*:*) ax_gcc_arch="kabylake core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; + *4?6[[de]]?:*:*:*) ax_gcc_arch="skylake core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; + *9?6[[de]]?:*:*:*) ax_gcc_arch="kabylake core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;; *1?6c?:*:*:*|*2?6[[67]]?:*:*:*|*3?6[[56]]?:*:*:*) ax_gcc_arch="bonnell atom core2 pentium-m pentium3 pentiumpro" ;; *3?67?:*:*:*|*[[45]]?6[[ad]]?:*:*:*) ax_gcc_arch="silvermont atom core2 pentium-m pentium3 pentiumpro" ;; *000?f[[012]]?:*:*:*|?f[[012]]?:*:*:*|f[[012]]?:*:*:*) ax_gcc_arch="pentium4 pentiumpro" ;; diff --git a/m4/ax_pthread.m4 b/m4/ax_pthread.m4 index d383ad5c6d6a5061370800bb1dc89b7a334c0638..5fbf9fe0d68616042f87a8365190211cb8ccfbf1 100644 --- a/m4/ax_pthread.m4 +++ b/m4/ax_pthread.m4 @@ -1,5 +1,5 @@ # =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_pthread.html +# https://www.gnu.org/software/autoconf-archive/ax_pthread.html # =========================================================================== # # SYNOPSIS @@ -19,10 +19,10 @@ # is necessary on AIX to use the special cc_r compiler alias.) # # NOTE: You are assumed to not only compile your program with these flags, -# but also link it with them as well. e.g. you should link with +# but also to link with them as well. For example, you might link with # $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS # -# If you are only building threads programs, you may wish to use these +# If you are only building threaded programs, you may wish to use these # variables in your default LIBS, CFLAGS, and CC: # # LIBS="$PTHREAD_LIBS $LIBS" @@ -30,8 +30,8 @@ # CC="$PTHREAD_CC" # # In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant -# has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to that name -# (e.g. PTHREAD_CREATE_UNDETACHED on AIX). +# has a nonstandard name, this macro defines PTHREAD_CREATE_JOINABLE to +# that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX). # # Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the # PTHREAD_PRIO_INHERIT symbol is defined when compiling with @@ -67,7 +67,7 @@ # Public License for more details. # # You should have received a copy of the GNU General Public License along -# with this program. If not, see <http://www.gnu.org/licenses/>. +# with this program. If not, see <https://www.gnu.org/licenses/>. # # As a special exception, the respective Autoconf Macro's copyright owner # gives unlimited permission to copy, distribute and modify the configure @@ -82,35 +82,40 @@ # modified version of the Autoconf Macro, you may extend this special # exception to the GPL to apply to your modified version as well. -#serial 21 +#serial 24 AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD]) AC_DEFUN([AX_PTHREAD], [ AC_REQUIRE([AC_CANONICAL_HOST]) +AC_REQUIRE([AC_PROG_CC]) +AC_REQUIRE([AC_PROG_SED]) AC_LANG_PUSH([C]) ax_pthread_ok=no # We used to check for pthread.h first, but this fails if pthread.h -# requires special compiler flags (e.g. on True64 or Sequent). +# requires special compiler flags (e.g. on Tru64 or Sequent). # It gets checked for in the link test anyway. # First of all, check if the user has set any of the PTHREAD_LIBS, # etcetera environment variables, and if threads linking works using # them: -if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then - save_CFLAGS="$CFLAGS" +if test "x$PTHREAD_CFLAGS$PTHREAD_LIBS" != "x"; then + ax_pthread_save_CC="$CC" + ax_pthread_save_CFLAGS="$CFLAGS" + ax_pthread_save_LIBS="$LIBS" + AS_IF([test "x$PTHREAD_CC" != "x"], [CC="$PTHREAD_CC"]) CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - save_LIBS="$LIBS" LIBS="$PTHREAD_LIBS $LIBS" - AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS]) - AC_TRY_LINK_FUNC([pthread_join], [ax_pthread_ok=yes]) + AC_MSG_CHECKING([for pthread_join using $CC $PTHREAD_CFLAGS $PTHREAD_LIBS]) + AC_LINK_IFELSE([AC_LANG_CALL([], [pthread_join])], [ax_pthread_ok=yes]) AC_MSG_RESULT([$ax_pthread_ok]) - if test x"$ax_pthread_ok" = xno; then + if test "x$ax_pthread_ok" = "xno"; then PTHREAD_LIBS="" PTHREAD_CFLAGS="" fi - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" + CC="$ax_pthread_save_CC" + CFLAGS="$ax_pthread_save_CFLAGS" + LIBS="$ax_pthread_save_LIBS" fi # We must check for the threads library under a number of different @@ -123,7 +128,7 @@ fi # which indicates that we try without any flags at all, and "pthread-config" # which is a program returning the flags for the Pth emulation library. -ax_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" +ax_pthread_flags="pthreads none -Kthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" # The ordering *is* (sometimes) important. Some notes on the # individual items follow: @@ -132,82 +137,225 @@ ax_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mt # none: in case threads are in libc; should be tried before -Kthread and # other compiler flags to prevent continual compiler warnings # -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) -# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) -# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) -# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) -# -pthreads: Solaris/gcc -# -mthreads: Mingw32/gcc, Lynx/gcc +# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads), Tru64 +# (Note: HP C rejects this with "bad form for `-t' option") +# -pthreads: Solaris/gcc (Note: HP C also rejects) # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it -# doesn't hurt to check since this sometimes defines pthreads too; -# also defines -D_REENTRANT) -# ... -mt is also the pthreads flag for HP/aCC +# doesn't hurt to check since this sometimes defines pthreads and +# -D_REENTRANT too), HP C (must be checked before -lpthread, which +# is present but should not be used directly; and before -mthreads, +# because the compiler interprets this as "-mt" + "-hreads") +# -mthreads: Mingw32/gcc, Lynx/gcc # pthread: Linux, etcetera # --thread-safe: KAI C++ # pthread-config: use pthread-config program (for GNU Pth library) -case ${host_os} in +case $host_os in + + freebsd*) + + # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) + # lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) + + ax_pthread_flags="-kthread lthread $ax_pthread_flags" + ;; + + hpux*) + + # From the cc(1) man page: "[-mt] Sets various -D flags to enable + # multi-threading and also sets -lpthread." + + ax_pthread_flags="-mt -pthread pthread $ax_pthread_flags" + ;; + + openedition*) + + # IBM z/OS requires a feature-test macro to be defined in order to + # enable POSIX threads at all, so give the user a hint if this is + # not set. (We don't define these ourselves, as they can affect + # other portions of the system API in unpredictable ways.) + + AC_EGREP_CPP([AX_PTHREAD_ZOS_MISSING], + [ +# if !defined(_OPEN_THREADS) && !defined(_UNIX03_THREADS) + AX_PTHREAD_ZOS_MISSING +# endif + ], + [AC_MSG_WARN([IBM z/OS requires -D_OPEN_THREADS or -D_UNIX03_THREADS to enable pthreads support.])]) + ;; + solaris*) # On Solaris (at least, for some versions), libc contains stubbed # (non-functional) versions of the pthreads routines, so link-based - # tests will erroneously succeed. (We need to link with -pthreads/-mt/ - # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather - # a function called by this macro, so we could check for that, but - # who knows whether they'll stub that too in a future libc.) So, - # we'll just look for -pthreads and -lpthread first: + # tests will erroneously succeed. (N.B.: The stubs are missing + # pthread_cleanup_push, or rather a function called by this macro, + # so we could check for that, but who knows whether they'll stub + # that too in a future libc.) So we'll check first for the + # standard Solaris way of linking pthreads (-mt -lpthread). + + ax_pthread_flags="-mt,pthread pthread $ax_pthread_flags" + ;; +esac + +# GCC generally uses -pthread, or -pthreads on some platforms (e.g. SPARC) - ax_pthread_flags="-pthreads pthread -mt -pthread $ax_pthread_flags" +AS_IF([test "x$GCC" = "xyes"], + [ax_pthread_flags="-pthread -pthreads $ax_pthread_flags"]) + +# The presence of a feature test macro requesting re-entrant function +# definitions is, on some systems, a strong hint that pthreads support is +# correctly enabled + +case $host_os in + darwin* | hpux* | linux* | osf* | solaris*) + ax_pthread_check_macro="_REENTRANT" ;; - darwin*) - ax_pthread_flags="-pthread $ax_pthread_flags" + aix*) + ax_pthread_check_macro="_THREAD_SAFE" + ;; + + *) + ax_pthread_check_macro="--" ;; esac +AS_IF([test "x$ax_pthread_check_macro" = "x--"], + [ax_pthread_check_cond=0], + [ax_pthread_check_cond="!defined($ax_pthread_check_macro)"]) + +# Are we compiling with Clang? + +AC_CACHE_CHECK([whether $CC is Clang], + [ax_cv_PTHREAD_CLANG], + [ax_cv_PTHREAD_CLANG=no + # Note that Autoconf sets GCC=yes for Clang as well as GCC + if test "x$GCC" = "xyes"; then + AC_EGREP_CPP([AX_PTHREAD_CC_IS_CLANG], + [/* Note: Clang 2.7 lacks __clang_[a-z]+__ */ +# if defined(__clang__) && defined(__llvm__) + AX_PTHREAD_CC_IS_CLANG +# endif + ], + [ax_cv_PTHREAD_CLANG=yes]) + fi + ]) +ax_pthread_clang="$ax_cv_PTHREAD_CLANG" + +ax_pthread_clang_warning=no + +# Clang needs special handling, because older versions handle the -pthread +# option in a rather... idiosyncratic way + +if test "x$ax_pthread_clang" = "xyes"; then + + # Clang takes -pthread; it has never supported any other flag + + # (Note 1: This will need to be revisited if a system that Clang + # supports has POSIX threads in a separate library. This tends not + # to be the way of modern systems, but it's conceivable.) + + # (Note 2: On some systems, notably Darwin, -pthread is not needed + # to get POSIX threads support; the API is always present and + # active. We could reasonably leave PTHREAD_CFLAGS empty. But + # -pthread does define _REENTRANT, and while the Darwin headers + # ignore this macro, third-party headers might not.) + + PTHREAD_CFLAGS="-pthread" + PTHREAD_LIBS= + + ax_pthread_ok=yes + + # However, older versions of Clang make a point of warning the user + # that, in an invocation where only linking and no compilation is + # taking place, the -pthread option has no effect ("argument unused + # during compilation"). They expect -pthread to be passed in only + # when source code is being compiled. + # + # Problem is, this is at odds with the way Automake and most other + # C build frameworks function, which is that the same flags used in + # compilation (CFLAGS) are also used in linking. Many systems + # supported by AX_PTHREAD require exactly this for POSIX threads + # support, and in fact it is often not straightforward to specify a + # flag that is used only in the compilation phase and not in + # linking. Such a scenario is extremely rare in practice. + # + # Even though use of the -pthread flag in linking would only print + # a warning, this can be a nuisance for well-run software projects + # that build with -Werror. So if the active version of Clang has + # this misfeature, we search for an option to squash it. + + AC_CACHE_CHECK([whether Clang needs flag to prevent "argument unused" warning when linking with -pthread], + [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG], + [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG=unknown + # Create an alternate version of $ac_link that compiles and + # links in two steps (.c -> .o, .o -> exe) instead of one + # (.c -> exe), because the warning occurs only in the second + # step + ax_pthread_save_ac_link="$ac_link" + ax_pthread_sed='s/conftest\.\$ac_ext/conftest.$ac_objext/g' + ax_pthread_link_step=`$as_echo "$ac_link" | sed "$ax_pthread_sed"` + ax_pthread_2step_ac_link="($ac_compile) && (echo ==== >&5) && ($ax_pthread_link_step)" + ax_pthread_save_CFLAGS="$CFLAGS" + for ax_pthread_try in '' -Qunused-arguments -Wno-unused-command-line-argument unknown; do + AS_IF([test "x$ax_pthread_try" = "xunknown"], [break]) + CFLAGS="-Werror -Wunknown-warning-option $ax_pthread_try -pthread $ax_pthread_save_CFLAGS" + ac_link="$ax_pthread_save_ac_link" + AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])], + [ac_link="$ax_pthread_2step_ac_link" + AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])], + [break]) + ]) + done + ac_link="$ax_pthread_save_ac_link" + CFLAGS="$ax_pthread_save_CFLAGS" + AS_IF([test "x$ax_pthread_try" = "x"], [ax_pthread_try=no]) + ax_cv_PTHREAD_CLANG_NO_WARN_FLAG="$ax_pthread_try" + ]) -# Clang doesn't consider unrecognized options an error unless we specify -# -Werror. We throw in some extra Clang-specific options to ensure that -# this doesn't happen for GCC, which also accepts -Werror. + case "$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG" in + no | unknown) ;; + *) PTHREAD_CFLAGS="$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG $PTHREAD_CFLAGS" ;; + esac -AC_MSG_CHECKING([if compiler needs -Werror to reject unknown flags]) -save_CFLAGS="$CFLAGS" -ax_pthread_extra_flags="-Werror" -CFLAGS="$CFLAGS $ax_pthread_extra_flags -Wunknown-warning-option -Wsizeof-array-argument" -AC_COMPILE_IFELSE([AC_LANG_PROGRAM([int foo(void);],[foo()])], - [AC_MSG_RESULT([yes])], - [ax_pthread_extra_flags= - AC_MSG_RESULT([no])]) -CFLAGS="$save_CFLAGS" +fi # $ax_pthread_clang = yes -if test x"$ax_pthread_ok" = xno; then -for flag in $ax_pthread_flags; do +if test "x$ax_pthread_ok" = "xno"; then +for ax_pthread_try_flag in $ax_pthread_flags; do - case $flag in + case $ax_pthread_try_flag in none) AC_MSG_CHECKING([whether pthreads work without any flags]) ;; + -mt,pthread) + AC_MSG_CHECKING([whether pthreads work with -mt -lpthread]) + PTHREAD_CFLAGS="-mt" + PTHREAD_LIBS="-lpthread" + ;; + -*) - AC_MSG_CHECKING([whether pthreads work with $flag]) - PTHREAD_CFLAGS="$flag" + AC_MSG_CHECKING([whether pthreads work with $ax_pthread_try_flag]) + PTHREAD_CFLAGS="$ax_pthread_try_flag" ;; pthread-config) AC_CHECK_PROG([ax_pthread_config], [pthread-config], [yes], [no]) - if test x"$ax_pthread_config" = xno; then continue; fi + AS_IF([test "x$ax_pthread_config" = "xno"], [continue]) PTHREAD_CFLAGS="`pthread-config --cflags`" PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" ;; *) - AC_MSG_CHECKING([for the pthreads library -l$flag]) - PTHREAD_LIBS="-l$flag" + AC_MSG_CHECKING([for the pthreads library -l$ax_pthread_try_flag]) + PTHREAD_LIBS="-l$ax_pthread_try_flag" ;; esac - save_LIBS="$LIBS" - save_CFLAGS="$CFLAGS" + ax_pthread_save_CFLAGS="$CFLAGS" + ax_pthread_save_LIBS="$LIBS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" LIBS="$PTHREAD_LIBS $LIBS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS $ax_pthread_extra_flags" # Check for various functions. We must include pthread.h, # since some functions may be macros. (On the Sequent, we @@ -218,7 +366,11 @@ for flag in $ax_pthread_flags; do # pthread_cleanup_push because it is one of the few pthread # functions on Solaris that doesn't have a non-functional libc stub. # We try pthread_create on general principles. + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <pthread.h> +# if $ax_pthread_check_cond +# error "$ax_pthread_check_macro must be defined" +# endif static void routine(void *a) { a = 0; } static void *start_routine(void *a) { return a; }], [pthread_t th; pthread_attr_t attr; @@ -227,16 +379,14 @@ for flag in $ax_pthread_flags; do pthread_attr_init(&attr); pthread_cleanup_push(routine, 0); pthread_cleanup_pop(0) /* ; */])], - [ax_pthread_ok=yes], - []) + [ax_pthread_ok=yes], + []) - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" + CFLAGS="$ax_pthread_save_CFLAGS" + LIBS="$ax_pthread_save_LIBS" AC_MSG_RESULT([$ax_pthread_ok]) - if test "x$ax_pthread_ok" = xyes; then - break; - fi + AS_IF([test "x$ax_pthread_ok" = "xyes"], [break]) PTHREAD_LIBS="" PTHREAD_CFLAGS="" @@ -244,71 +394,74 @@ done fi # Various other checks: -if test "x$ax_pthread_ok" = xyes; then - save_LIBS="$LIBS" - LIBS="$PTHREAD_LIBS $LIBS" - save_CFLAGS="$CFLAGS" +if test "x$ax_pthread_ok" = "xyes"; then + ax_pthread_save_CFLAGS="$CFLAGS" + ax_pthread_save_LIBS="$LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. - AC_MSG_CHECKING([for joinable pthread attribute]) - attr_name=unknown - for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do - AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <pthread.h>], - [int attr = $attr; return attr /* ; */])], - [attr_name=$attr; break], - []) - done - AC_MSG_RESULT([$attr_name]) - if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then - AC_DEFINE_UNQUOTED([PTHREAD_CREATE_JOINABLE], [$attr_name], - [Define to necessary symbol if this constant - uses a non-standard name on your system.]) - fi - - AC_MSG_CHECKING([if more special flags are required for pthreads]) - flag=no - case ${host_os} in - aix* | freebsd* | darwin*) flag="-D_THREAD_SAFE";; - osf* | hpux*) flag="-D_REENTRANT";; - solaris*) - if test "$GCC" = "yes"; then - flag="-D_REENTRANT" - else - # TODO: What about Clang on Solaris? - flag="-mt -D_REENTRANT" - fi - ;; - esac - AC_MSG_RESULT([$flag]) - if test "x$flag" != xno; then - PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" - fi + AC_CACHE_CHECK([for joinable pthread attribute], + [ax_cv_PTHREAD_JOINABLE_ATTR], + [ax_cv_PTHREAD_JOINABLE_ATTR=unknown + for ax_pthread_attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <pthread.h>], + [int attr = $ax_pthread_attr; return attr /* ; */])], + [ax_cv_PTHREAD_JOINABLE_ATTR=$ax_pthread_attr; break], + []) + done + ]) + AS_IF([test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xunknown" && \ + test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xPTHREAD_CREATE_JOINABLE" && \ + test "x$ax_pthread_joinable_attr_defined" != "xyes"], + [AC_DEFINE_UNQUOTED([PTHREAD_CREATE_JOINABLE], + [$ax_cv_PTHREAD_JOINABLE_ATTR], + [Define to necessary symbol if this constant + uses a non-standard name on your system.]) + ax_pthread_joinable_attr_defined=yes + ]) + + AC_CACHE_CHECK([whether more special flags are required for pthreads], + [ax_cv_PTHREAD_SPECIAL_FLAGS], + [ax_cv_PTHREAD_SPECIAL_FLAGS=no + case $host_os in + solaris*) + ax_cv_PTHREAD_SPECIAL_FLAGS="-D_POSIX_PTHREAD_SEMANTICS" + ;; + esac + ]) + AS_IF([test "x$ax_cv_PTHREAD_SPECIAL_FLAGS" != "xno" && \ + test "x$ax_pthread_special_flags_added" != "xyes"], + [PTHREAD_CFLAGS="$ax_cv_PTHREAD_SPECIAL_FLAGS $PTHREAD_CFLAGS" + ax_pthread_special_flags_added=yes]) AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT], - [ax_cv_PTHREAD_PRIO_INHERIT], [ - AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <pthread.h>]], - [[int i = PTHREAD_PRIO_INHERIT;]])], - [ax_cv_PTHREAD_PRIO_INHERIT=yes], - [ax_cv_PTHREAD_PRIO_INHERIT=no]) + [ax_cv_PTHREAD_PRIO_INHERIT], + [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <pthread.h>]], + [[int i = PTHREAD_PRIO_INHERIT;]])], + [ax_cv_PTHREAD_PRIO_INHERIT=yes], + [ax_cv_PTHREAD_PRIO_INHERIT=no]) ]) - AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes"], - [AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], [1], [Have PTHREAD_PRIO_INHERIT.])]) + AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes" && \ + test "x$ax_pthread_prio_inherit_defined" != "xyes"], + [AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], [1], [Have PTHREAD_PRIO_INHERIT.]) + ax_pthread_prio_inherit_defined=yes + ]) - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" + CFLAGS="$ax_pthread_save_CFLAGS" + LIBS="$ax_pthread_save_LIBS" # More AIX lossage: compile with *_r variant - if test "x$GCC" != xyes; then + if test "x$GCC" != "xyes"; then case $host_os in aix*) AS_CASE(["x/$CC"], - [x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6], - [#handle absolute path differently from PATH based program lookup - AS_CASE(["x$CC"], - [x/*], - [AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"])], - [AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC])])]) + [x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6], + [#handle absolute path differently from PATH based program lookup + AS_CASE(["x$CC"], + [x/*], + [AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"])], + [AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC])])]) ;; esac fi @@ -321,7 +474,7 @@ AC_SUBST([PTHREAD_CFLAGS]) AC_SUBST([PTHREAD_CC]) # Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: -if test x"$ax_pthread_ok" = xyes; then +if test "x$ax_pthread_ok" = "xyes"; then ifelse([$1],,[AC_DEFINE([HAVE_PTHREAD],[1],[Define if you have POSIX threads libraries and header files.])],[$1]) : else diff --git a/src/Makefile.am b/src/Makefile.am index e269c19e5d5b34fdcc469ff037856db91dc6e7f3..e229d7f11e76aae4b6898a2f86145ec2bd025592 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -60,7 +60,7 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ collectgroup.c hydro_space.c equation_of_state.c # Include files for distribution, not installation. -nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ +nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h runner_doiact_fft.h \ runner_doiact_nosort.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \ dimension.h equation_of_state.h part_type.h periodic.h \ diff --git a/src/barrier.h b/src/barrier.h new file mode 100644 index 0000000000000000000000000000000000000000..dbe856b402580f859042023cd423bd955887f790 --- /dev/null +++ b/src/barrier.h @@ -0,0 +1,170 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2017 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_BARRIER_H +#define SWIFT_BARRIER_H + +/** + * @file barrier.h + * @brief Define the thread barriers if the POSIX implementation on this system + * does not. + * + * The pthread barriers are only an option of the POSIX norm and they are not + * necessarily implemented. One example is OSX where all the rest of POSIX + * exists but not the barriers. + * We implement them here in a simple way to allow for SWIFT to run on such + * systems but this may lead to poorer performance. + * + * Note that we only define the three functions we need. This is a textbook + * implementation of a barrier that uses the common POSIX features (mutex, + * conditions and broadcasts). + * + * If the pthread barriers exist (Linux systems), we default to them. + */ + +/* Config parameters. */ +#include "../config.h" + +/* Standard headers */ +#include <pthread.h> + +/* Does this POSIX implementation provide barriers? */ +#ifdef HAVE_PTHREAD_BARRIERS + +#define swift_barrier_t pthread_barrier_t +#define swift_barrier_wait pthread_barrier_wait +#define swift_barrier_init pthread_barrier_init +#define swift_barrier_destroy pthread_barrier_destroy + +#else + +/* Local headers */ +#include "error.h" +#include "inline.h" + +/** + * @brief An ersatz of POSIX barriers to be used on systems that don't provide + * the good ones. + */ +typedef struct { + + /*! Barrier mutex */ + pthread_mutex_t mutex; + + /*! Condition to open the barrier */ + pthread_cond_t condition; + + /*! Total number of threads */ + int limit; + + /*! Number of threads that reached the barrier */ + int count; + +} swift_barrier_t; + +/** + * @brief Initialise a barrier object. + * + * @param barrier The #swift_barrier_t to initialise + * @param unused Unused parameter (NULL) as we don't support barrier attributes. + * @param count The number of threads that will wait at the barrier. + */ +static INLINE int swift_barrier_init(swift_barrier_t *barrier, void *unused, + unsigned int count) { + /* Initialise the mutex */ + if (pthread_mutex_init(&barrier->mutex, 0) != 0) + error("Error initializing the barrier mutex"); + + /* Initialise the condition */ + if (pthread_cond_init(&barrier->condition, 0) != 0) + error("Error initializing the barrier condition"); + + barrier->limit = count; + barrier->count = 0; + + /* All is good */ + return 0; +} + +/** + * @brief Make a set of threads wait at the barrier + * + * Note that once all threads have reached the barrier, we also + * reset the barrier to state where it is ready to be re-used + * without calling swift_barrier_init. + * + * @param barrier The (initialised) #swift_barrier_t to wait at. + */ +static INLINE int swift_barrier_wait(swift_barrier_t *barrier) { + + /* Start by locking the barrier */ + pthread_mutex_lock(&barrier->mutex); + + /* One more thread has gone home*/ + barrier->count++; + + /* Are threads still running? */ + if (barrier->count < barrier->limit) { + + /* We need to make the thread wait until everyone is back */ + pthread_cond_wait(&barrier->condition, &(barrier->mutex)); + + /* Release the mutex */ + pthread_mutex_unlock(&barrier->mutex); + + /* Say that this was not the last thread */ + return 0; + + } else { /* Everybody is home */ + + /* Open the barrier (i.e. release the threads blocked in the while loop) */ + pthread_cond_broadcast(&barrier->condition); + + /* Re-initialize the barrier */ + barrier->count = 0; + + /* Release the mutex */ + pthread_mutex_unlock(&barrier->mutex); + + /* Say that we are all done */ + return 1; + } +} + +/** + * @brief Destroy a barrier object + * + * Note that if destroy is called before a barrier is open, we return + * an error message and do not attempt to wait for the barrier to open + * before destroying it. + * + * @param barrier The #swift_barrier_t object to destroy. + */ +static INLINE int swift_barrier_destroy(swift_barrier_t *barrier) { + + /* Destroy the pthread things */ + pthread_cond_destroy(&barrier->condition); + pthread_mutex_destroy(&barrier->mutex); + + /* All is good */ + return 0; +} + +#endif /* HAVE_PTHREAD_BARRIERS */ + +#endif /* SWIFT_BARRIER_H */ diff --git a/src/cell.c b/src/cell.c index 8d6c6b35c68b4af27ed7c836b1618e9d7857d9dd..9c5d1bd3aae8a0982db79c8fd370354c5c840dac 100644 --- a/src/cell.c +++ b/src/cell.c @@ -50,6 +50,7 @@ #include "active.h" #include "atomic.h" #include "drift.h" +#include "engine.h" #include "error.h" #include "gravity.h" #include "hydro.h" @@ -1862,6 +1863,7 @@ void cell_activate_subcell_external_grav_tasks(struct cell *ci, int cell_unskip_tasks(struct cell *c, struct scheduler *s) { struct engine *e = s->space->e; + const int nodeID = e->nodeID; int rebuild = 0; /* Un-skip the density tasks involved with this cell. */ @@ -1873,13 +1875,13 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { const int cj_active = (cj != NULL) ? cell_is_active(cj, e) : 0; /* Only activate tasks that involve a local active cell. */ - if ((ci_active && ci->nodeID == engine_rank) || - (cj_active && cj->nodeID == engine_rank)) { + if ((ci_active && ci->nodeID == nodeID) || + (cj_active && cj->nodeID == nodeID)) { scheduler_activate(s, t); /* Activate hydro drift */ if (t->type == task_type_self) { - if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s); + if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s); } /* Set the correct sorting flags and activate hydro drifts */ @@ -1891,8 +1893,8 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { cj->dx_max_sort_old = cj->dx_max_sort; /* Activate the drift tasks. */ - if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s); - if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s); + if (ci->nodeID == nodeID) cell_activate_drift_part(ci, s); + if (cj->nodeID == nodeID) cell_activate_drift_part(cj, s); /* Check the sorts and activate them if needed. */ cell_activate_sorts(ci, t->flags, s); @@ -1913,7 +1915,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { #ifdef WITH_MPI /* Activate the send/recv tasks. */ - if (ci->nodeID != engine_rank) { + if (ci->nodeID != nodeID) { /* If the local cell is active, receive data from the foreign cell. */ if (cj_active) { @@ -1951,7 +1953,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { /* If the local cell is active, send its ti_end values. */ if (cj_active) scheduler_activate_send(s, cj->send_ti, ci->nodeID); - } else if (cj->nodeID != engine_rank) { + } else if (cj->nodeID != nodeID) { /* If the local cell is active, receive data from the foreign cell. */ if (ci_active) { @@ -2001,8 +2003,8 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { struct cell *cj = t->cj; /* Only activate tasks that involve a local active cell. */ - if ((cell_is_active(ci, e) && ci->nodeID == engine_rank) || - (cj != NULL && cell_is_active(cj, e) && cj->nodeID == engine_rank)) { + if ((cell_is_active(ci, e) && ci->nodeID == nodeID) || + (cj != NULL && cell_is_active(cj, e) && cj->nodeID == nodeID)) { scheduler_activate(s, t); /* Set the drifting flags */ @@ -2018,7 +2020,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { } /* Unskip all the other task types. */ - if (c->nodeID == engine_rank && cell_is_active(c, e)) { + if (c->nodeID == nodeID && cell_is_active(c, e)) { for (struct link *l = c->gradient; l != NULL; l = l->next) scheduler_activate(s, l->t); diff --git a/src/common_io.c b/src/common_io.c index 1b0770b1132c8c2a62bbe0bcf3426f8f6eef4a17..917f2a63755f88c300ed7ff654bfe6e8cf3c464f 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -386,6 +386,7 @@ void io_write_code_description(hid_t h_file) { configuration_options()); io_write_attribute_s(h_grpcode, "CFLAGS", compilation_cflags()); io_write_attribute_s(h_grpcode, "HDF5 library version", hdf5_version()); + io_write_attribute_s(h_grpcode, "Thread barriers", thread_barrier_version()); #ifdef HAVE_FFTW io_write_attribute_s(h_grpcode, "FFTW library version", fftw3_version()); #endif diff --git a/src/dump.c b/src/dump.c index 2c0cf221ebd897bab0d047c196ce8a2aeddc6eae..de5c5afb18bd2c3cb80fe983b0f88ee60350fd5e 100644 --- a/src/dump.c +++ b/src/dump.c @@ -20,6 +20,15 @@ /* Config parameters. */ #include "../config.h" +#ifdef HAVE_POSIX_FALLOCATE + +/* This object's header. */ +#include "dump.h" + +/* Local headers. */ +#include "atomic.h" +#include "error.h" + /* Some standard headers. */ #include <errno.h> #include <fcntl.h> @@ -29,13 +38,6 @@ #include <sys/types.h> #include <unistd.h> -/* This object's header. */ -#include "dump.h" - -/* Local headers. */ -#include "atomic.h" -#include "error.h" - /** * @brief Obtain a chunk of memory from a dump. * @@ -44,7 +46,6 @@ * @param offset The offset of the returned memory address within the dump file. * @return A pointer to the memory-mapped chunk of data. */ - void *dump_get(struct dump *d, size_t count, size_t *offset) { size_t local_offset = atomic_add(&d->count, count); *offset = local_offset + d->file_offset; @@ -54,7 +55,6 @@ void *dump_get(struct dump *d, size_t count, size_t *offset) { /** * @brief Ensure that at least size bytes are available in the #dump. */ - void dump_ensure(struct dump *d, size_t size) { /* If we have enough space already, just bail. */ @@ -88,7 +88,6 @@ void dump_ensure(struct dump *d, size_t size) { /** * @brief Flush the #dump to disk. */ - void dump_sync(struct dump *d) { if (msync(d->data, d->count, MS_SYNC) != 0) error("Failed to sync memory-mapped data."); @@ -97,7 +96,6 @@ void dump_sync(struct dump *d) { /** * @brief Finalize the #dump. */ - void dump_close(struct dump *d) { /* Unmap the data in memory. */ if (munmap(d->data, d->count) != 0) { @@ -121,7 +119,6 @@ void dump_close(struct dump *d) { * note that it will be overwritten. * @param size The initial buffer size for this #dump. */ - void dump_init(struct dump *d, const char *filename, size_t size) { /* Create the output file. */ @@ -151,3 +148,5 @@ void dump_init(struct dump *d, const char *filename, size_t size) { d->file_offset = 0; d->page_mask = page_mask; } + +#endif diff --git a/src/dump.h b/src/dump.h index a7e934218c271d2f82b99d39f278e5af3047be6e..6857aa3a008a27e0e8ed23854d84f848ee0ca2be 100644 --- a/src/dump.h +++ b/src/dump.h @@ -19,8 +19,13 @@ #ifndef SWIFT_DUMP_H #define SWIFT_DUMP_H -/* Includes. */ -#include "lock.h" +/* Config parameters. */ +#include "../config.h" + +#ifdef HAVE_POSIX_FALLOCATE /* Are we on a sensible platform? */ + +/* Standard headers */ +#include <stdlib.h> /* Some constants. */ #define dump_grow_ensure_factor 10 @@ -54,4 +59,6 @@ void dump_sync(struct dump *d); void dump_close(struct dump *d); void *dump_get(struct dump *d, size_t count, size_t *offset); +#endif /* HAVE_POSIX_FALLOCATE */ + #endif /* SWIFT_DUMP_H */ diff --git a/src/engine.c b/src/engine.c index 53ade0ed336fa7f74322fddb291a10be954bf9a2..869432417d848e68290a44ba4e63cc9b3dc75b9c 100644 --- a/src/engine.c +++ b/src/engine.c @@ -3162,10 +3162,10 @@ void engine_prepare(struct engine *e) { void engine_barrier(struct engine *e) { /* Wait at the wait barrier. */ - pthread_barrier_wait(&e->wait_barrier); + swift_barrier_wait(&e->wait_barrier); /* Wait at the run barrier. */ - pthread_barrier_wait(&e->run_barrier); + swift_barrier_wait(&e->run_barrier); } /** @@ -3481,7 +3481,7 @@ void engine_launch(struct engine *e) { atomic_inc(&e->sched.waiting); /* Cry havoc and let loose the dogs of war. */ - pthread_barrier_wait(&e->run_barrier); + swift_barrier_wait(&e->run_barrier); /* Load the tasks. */ scheduler_start(&e->sched); @@ -3493,7 +3493,7 @@ void engine_launch(struct engine *e) { pthread_mutex_unlock(&e->sched.sleep_mutex); /* Sit back and wait for the runners to come home. */ - pthread_barrier_wait(&e->wait_barrier); + swift_barrier_wait(&e->wait_barrier); if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), @@ -4687,8 +4687,8 @@ void engine_init(struct engine *e, struct space *s, threadpool_init(&e->threadpool, e->nr_threads); /* First of all, init the barrier and lock it. */ - if (pthread_barrier_init(&e->wait_barrier, NULL, e->nr_threads + 1) != 0 || - pthread_barrier_init(&e->run_barrier, NULL, e->nr_threads + 1) != 0) + if (swift_barrier_init(&e->wait_barrier, NULL, e->nr_threads + 1) != 0 || + swift_barrier_init(&e->run_barrier, NULL, e->nr_threads + 1) != 0) error("Failed to initialize barrier."); /* Expected average for tasks per cell. If set to zero we use a heuristic @@ -4774,7 +4774,7 @@ void engine_init(struct engine *e, struct space *s, #endif /* Wait for the runner threads to be in place. */ - pthread_barrier_wait(&e->wait_barrier); + swift_barrier_wait(&e->wait_barrier); } /** diff --git a/src/engine.h b/src/engine.h index b5ad53290d5a144712201a5d515b4c38bc22871b..3613afd9f14705be74c00e60ff7e83c739f9339b 100644 --- a/src/engine.h +++ b/src/engine.h @@ -32,11 +32,8 @@ #include <mpi.h> #endif -/* Some standard headers. */ -#include <pthread.h> -#include <stdio.h> - /* Includes. */ +#include "barrier.h" #include "clocks.h" #include "collectgroup.h" #include "cooling_struct.h" @@ -194,8 +191,8 @@ struct engine { int count_step; /* Data for the threads' barrier. */ - pthread_barrier_t wait_barrier; - pthread_barrier_t run_barrier; + swift_barrier_t wait_barrier; + swift_barrier_t run_barrier; /* ID of the node this engine lives on. */ int nr_nodes, nodeID; diff --git a/src/logger.c b/src/logger.c index b2acf47aa70cef55f53d296033f6f5c6162fd5bd..0e0bc930c0841f985da2c353357a69b69aba5d91 100644 --- a/src/logger.c +++ b/src/logger.c @@ -20,6 +20,8 @@ /* Config parameters. */ #include "../config.h" +#ifdef HAVE_POSIX_FALLOCATE /* Are we on a sensible platform? */ + /* Some standard headers. */ #include <stdint.h> #include <stdlib.h> @@ -41,7 +43,6 @@ * * @return The size of the logger message in bytes. */ - int logger_size(unsigned int mask) { /* Start with 8 bytes for the header. */ @@ -95,7 +96,6 @@ int logger_size(unsigned int mask) { * @param offset Pointer to the offset of the previous log of this particle. * @param dump The #dump in which to log the particle data. */ - void logger_log_part(struct part *p, unsigned int mask, size_t *offset, struct dump *dump) { @@ -176,7 +176,6 @@ void logger_log_part(struct part *p, unsigned int mask, size_t *offset, * @param offset Pointer to the offset of the previous log of this particle. * @param dump The #dump in which to log the particle data. */ - void logger_log_gpart(struct gpart *p, unsigned int mask, size_t *offset, struct dump *dump) { @@ -270,7 +269,6 @@ void logger_log_timestamp(unsigned long long int timestamp, size_t *offset, * * @return The mask containing the values read. */ - int logger_read_part(struct part *p, size_t *offset, const char *buff) { /* Jump to the offset. */ @@ -349,7 +347,6 @@ int logger_read_part(struct part *p, size_t *offset, const char *buff) { * * @return The mask containing the values read. */ - int logger_read_gpart(struct gpart *p, size_t *offset, const char *buff) { /* Jump to the offset. */ @@ -416,7 +413,6 @@ int logger_read_gpart(struct gpart *p, size_t *offset, const char *buff) { * * @return The mask containing the values read. */ - int logger_read_timestamp(unsigned long long int *t, size_t *offset, const char *buff) { @@ -444,3 +440,5 @@ int logger_read_timestamp(unsigned long long int *t, size_t *offset, /* Finally, return the mask of the values we just read. */ return mask; } + +#endif /* HAVE_POSIX_FALLOCATE */ diff --git a/src/logger.h b/src/logger.h index 32fae752c2ae13a143809d9df3030dbc06b0942d..596c0903750404d0934e0d3843a5461523700e9e 100644 --- a/src/logger.h +++ b/src/logger.h @@ -20,9 +20,11 @@ #define SWIFT_LOGGER_H /* Includes. */ -#include "dump.h" #include "part.h" +/* Forward declaration */ +struct dump; + /** * Logger entries contain messages representing the particle data at a given * point in time during the simulation. diff --git a/src/swift.h b/src/swift.h index a5556730ae965109385257c0c38bfc34277223d4..33a0425154d45e030443bc7f2c405377ef6a39e2 100644 --- a/src/swift.h +++ b/src/swift.h @@ -32,6 +32,7 @@ #include "cooling.h" #include "cycle.h" #include "debug.h" +#include "dump.h" #include "engine.h" #include "error.h" #include "gravity.h" @@ -40,6 +41,7 @@ #include "hydro.h" #include "hydro_properties.h" #include "lock.h" +#include "logger.h" #include "map.h" #include "multipole.h" #include "parallel_io.h" diff --git a/src/threadpool.c b/src/threadpool.c index 465756f71d88df81921a880edf8cdb1ee17f6026..0ef69f9895d3d127a330b105ec385d97975fc82f 100644 --- a/src/threadpool.c +++ b/src/threadpool.c @@ -169,10 +169,10 @@ void *threadpool_runner(void *data) { while (1) { /* Let the controller know that this thread is waiting. */ - pthread_barrier_wait(&tp->wait_barrier); + swift_barrier_wait(&tp->wait_barrier); /* Wait for the controller. */ - pthread_barrier_wait(&tp->run_barrier); + swift_barrier_wait(&tp->run_barrier); /* If no map function is specified, just die. We use this as a mechanism to shut down threads without leaving the barriers in an invalid state. */ @@ -212,8 +212,8 @@ void threadpool_init(struct threadpool *tp, int num_threads) { if (num_threads == 1) return; /* Init the barriers. */ - if (pthread_barrier_init(&tp->wait_barrier, NULL, num_threads) != 0 || - pthread_barrier_init(&tp->run_barrier, NULL, num_threads) != 0) + if (swift_barrier_init(&tp->wait_barrier, NULL, num_threads) != 0 || + swift_barrier_init(&tp->run_barrier, NULL, num_threads) != 0) error("Failed to initialize barriers."); /* Set the task counter to zero. */ @@ -237,7 +237,7 @@ void threadpool_init(struct threadpool *tp, int num_threads) { } /* Wait for all the threads to be up and running. */ - pthread_barrier_wait(&tp->wait_barrier); + swift_barrier_wait(&tp->wait_barrier); } /** @@ -288,13 +288,13 @@ void threadpool_map(struct threadpool *tp, threadpool_map_function map_function, tp->num_threads_running = 0; /* Wait for all the threads to be up and running. */ - pthread_barrier_wait(&tp->run_barrier); + swift_barrier_wait(&tp->run_barrier); /* Do some work while I'm at it. */ threadpool_chomp(tp, tp->num_threads - 1); /* Wait for all threads to be done. */ - pthread_barrier_wait(&tp->wait_barrier); + swift_barrier_wait(&tp->wait_barrier); #ifdef SWIFT_DEBUG_THREADPOOL /* Log the total call time to thread id -1. */ @@ -321,15 +321,15 @@ void threadpool_clean(struct threadpool *tp) { * and waiting for all the threads to terminate. This ensures that no * thread is still waiting at a barrier. */ tp->map_function = NULL; - pthread_barrier_wait(&tp->run_barrier); + swift_barrier_wait(&tp->run_barrier); for (int k = 0; k < tp->num_threads - 1; k++) { void *retval; pthread_join(tp->threads[k], &retval); } /* Release the barriers. */ - if (pthread_barrier_destroy(&tp->wait_barrier) != 0 || - pthread_barrier_destroy(&tp->run_barrier) != 0) + if (swift_barrier_destroy(&tp->wait_barrier) != 0 || + swift_barrier_destroy(&tp->run_barrier) != 0) error("Failed to destroy threadpool barriers."); /* Clean up memory. */ diff --git a/src/threadpool.h b/src/threadpool.h index 019403f658a22d36c4a6e1ec1ae1fdc47c62658d..9d19b56836330f59093933bbed5900727142f4a2 100644 --- a/src/threadpool.h +++ b/src/threadpool.h @@ -22,10 +22,11 @@ /* Config parameters. */ #include "../config.h" -/* Some standard headers. */ +/* Standard headers */ #include <pthread.h> /* Local includes. */ +#include "barrier.h" #include "cycle.h" /* Local defines. */ @@ -70,8 +71,8 @@ struct threadpool { pthread_t *threads; /* This is where threads go to rest. */ - pthread_barrier_t wait_barrier; - pthread_barrier_t run_barrier; + swift_barrier_t wait_barrier; + swift_barrier_t run_barrier; /* Current map data and count. */ void *map_data, *map_extra_data; diff --git a/src/version.c b/src/version.c index 46c31103c953ce2ff70b9e346f88470008dd8266..f4177e5c83c776ea063ad32fd00895199c94b182 100644 --- a/src/version.c +++ b/src/version.c @@ -332,6 +332,22 @@ const char *fftw3_version(void) { return version; } +/** + * @brief return the thread barrier used in SWIFT. + * + * @result description of the thread barriers + */ +const char *thread_barrier_version(void) { + + static char version[256] = {0}; +#if defined(HAVE_PTHREAD_BARRIERS) + sprintf(version, "%s", "pthread"); +#else + sprintf(version, "homemade"); +#endif + return version; +} + /** * @brief Prints a greeting message to the standard output containing code * version and revision number diff --git a/src/version.h b/src/version.h index 60998958098c1a37198cdbb3729982835f6e4f62..1af76b647b2d401bb4a7998b281864fb3e63a0c8 100644 --- a/src/version.h +++ b/src/version.h @@ -34,6 +34,7 @@ const char* mpi_version(void); const char* metis_version(void); const char* hdf5_version(void); const char* fftw3_version(void); +const char* thread_barrier_version(void); void greetings(void); #endif /* SWIFT_VERSION_H */ diff --git a/tests/test125cells.c b/tests/test125cells.c index 62ded5c1df47c9d40759dbc7aa239bb4a8a47dc4..bf1c219fab9927b81bb087684f6ad0d747e958bc 100644 --- a/tests/test125cells.c +++ b/tests/test125cells.c @@ -474,8 +474,10 @@ int main(int argc, char *argv[]) { unsigned long long cpufreq = 0; clocks_set_cpufreq(cpufreq); - /* Choke on FP-exceptions */ +/* Choke on FP-exceptions */ +#ifdef HAVE_FE_ENABLE_EXCEPT feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif /* Get some randomness going */ srand(0); diff --git a/tests/test27cells.c b/tests/test27cells.c index c153fc6f57dc976e1f00767bae275a6a3de24c00..f9e3ba1dd78e67a11414c88efe3c604b02b1aa16 100644 --- a/tests/test27cells.c +++ b/tests/test27cells.c @@ -333,8 +333,10 @@ int main(int argc, char *argv[]) { unsigned long long cpufreq = 0; clocks_set_cpufreq(cpufreq); - /* Choke on FP-exceptions */ +/* Choke on FP-exceptions */ +#ifdef HAVE_FE_ENABLE_EXCEPT feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif /* Get some randomness going */ srand(0); diff --git a/tests/testActivePair.c b/tests/testActivePair.c index c8e0233c2c317be321ed8b923a46a98e0905b36c..fd2244db971f6a5f7cfeb62b9661c33b7ee88145 100644 --- a/tests/testActivePair.c +++ b/tests/testActivePair.c @@ -433,8 +433,10 @@ int main(int argc, char *argv[]) { unsigned long long cpufreq = 0; clocks_set_cpufreq(cpufreq); - /* Choke on FP-exceptions */ +/* Choke on FP-exceptions */ +#ifdef HAVE_FE_ENABLE_EXCEPT feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif /* Generate a RNG seed from time. */ unsigned int seed = time(NULL); diff --git a/tests/testDump.c b/tests/testDump.c index 7343af49f654582de444681ec291311d41251dca..b1406ba5f5e6652c6e2727d6b161215f4e905d62 100644 --- a/tests/testDump.c +++ b/tests/testDump.c @@ -20,6 +20,8 @@ /* Config parameters. */ #include "../config.h" +#ifdef HAVE_POSIX_FALLOCATE /* Are we on a sensible platform? */ + /* Some standard headers. */ #include <errno.h> #include <fcntl.h> @@ -30,11 +32,8 @@ #include <sys/types.h> #include <unistd.h> -/* This object's header. */ -#include "../src/dump.h" - /* Local headers. */ -#include "../src/threadpool.h" +#include "swift.h" void dump_mapper(void *map_data, int num_elements, void *extra_data) { struct dump *d = (struct dump *)extra_data; @@ -49,10 +48,14 @@ int main(int argc, char *argv[]) { /* Some constants. */ const int num_threads = 4; - const char *filename = "/tmp/dump_test.out"; const int num_runs = 20; const int chunk_size = 1000; + /* Some constants. */ + char filename[256]; + const int now = time(NULL); + sprintf(filename, "/tmp/SWIFT_dump_test_%d.out", now); + /* Prepare a threadpool to write to the dump. */ struct threadpool t; threadpool_init(&t, num_threads); @@ -82,6 +85,15 @@ int main(int argc, char *argv[]) { /* Clean the threads */ threadpool_clean(&t); + /* Be clean */ + remove(filename); + /* Return a happy number. */ return 0; } + +#else + +int main(int argc, char *argv[]) { return 0; } + +#endif /* HAVE_POSIX_FALLOCATE */ diff --git a/tests/testFFT.c b/tests/testFFT.c index 4ddd030ece95bf26cbfe41f2408be7c3e0c50535..970678c9a503517599737bfe41766913dde2a2d7 100644 --- a/tests/testFFT.c +++ b/tests/testFFT.c @@ -76,7 +76,9 @@ int main() { engine.max_active_bin = num_time_bins; engine.gravity_properties = &gravity_properties; engine.nr_threads = 1; - + engine.nodeID = 0; + engine_rank = 0; + struct runner runner; runner.e = &engine; diff --git a/tests/testLogger.c b/tests/testLogger.c index ec3b33b6a9e38741e41b4678681e7afe9b9a7950..9ec08607383fdb192b7ba994e4af506fde12fea9 100644 --- a/tests/testLogger.c +++ b/tests/testLogger.c @@ -20,18 +20,16 @@ /* Config parameters. */ #include "../config.h" +#ifdef HAVE_POSIX_FALLOCATE /* Are we on a sensible platform? */ + /* Some standard headers. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> -/* This object's header. */ -#include "../src/logger.h" - /* Local headers. */ -#include "../src/dump.h" -#include "../src/part.h" +#include "swift.h" void test_log_parts(struct dump *d) { @@ -223,7 +221,9 @@ void test_log_timestamps(struct dump *d) { int main(int argc, char *argv[]) { /* Some constants. */ - const char *filename = "/tmp/dump_test.out"; + char filename[256]; + const int now = time(NULL); + sprintf(filename, "/tmp/SWIFT_logger_test_%d.out", now); /* Prepare a dump. */ struct dump d; @@ -241,7 +241,15 @@ int main(int argc, char *argv[]) { /* Finalize the dump. */ dump_close(&d); + /* Be clean */ + remove(filename); + /* Return a happy number. */ - printf("PASS\n"); return 0; } + +#else + +int main(int argc, char *argv[]) { return 0; } + +#endif /* HAVE_POSIX_FALLOCATE */ diff --git a/tests/testPeriodicBC.c b/tests/testPeriodicBC.c index 1afe829ce70c5d02f565e8fd002ba414abcb7388..ded767c5d657c0a05dc227d1ee8facbfa5f4de63 100644 --- a/tests/testPeriodicBC.c +++ b/tests/testPeriodicBC.c @@ -391,8 +391,10 @@ int main(int argc, char *argv[]) { unsigned long long cpufreq = 0; clocks_set_cpufreq(cpufreq); - /* Choke on FP-exceptions */ +/* Choke on FP-exceptions */ +#ifdef HAVE_FE_ENABLE_EXCEPT feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif /* Get some randomness going */ srand(0); diff --git a/tests/testSymmetry.c b/tests/testSymmetry.c index 73c5708a6add174b88f26cc716a716fa2ad81709..68a878b05c3fc298ef7d0b159df9997d2ef1f82d 100644 --- a/tests/testSymmetry.c +++ b/tests/testSymmetry.c @@ -28,8 +28,10 @@ int main(int argc, char *argv[]) { - /* Choke if need be */ +/* Choke on FPEs */ +#ifdef HAVE_FE_ENABLE_EXCEPT feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif #if defined(SHADOWFAX_SPH) /* Initialize the Voronoi simulation box */