diff --git a/configure.ac b/configure.ac index 58cab4dc2ceb140cce5d1ae54ca8fbe45ea3f828..dc584f0ec28040ff7cea54284b6630265b3f414b 100644 --- a/configure.ac +++ b/configure.ac @@ -333,6 +333,9 @@ elif test "$no_gravity_below_id" != "no"; then AC_DEFINE_UNQUOTED([SWIFT_NO_GRAVITY_BELOW_ID], [$enableval] ,[Particles with smaller ID than this will have zero gravity forces]) fi +# Check whether we have any of the ARM v8.1 tick timers +AX_ASM_ARM_PMCCNTR +AX_ASM_ARM_CNTVCT # Define HAVE_POSIX_MEMALIGN if it works. AX_FUNC_POSIX_MEMALIGN @@ -700,6 +703,71 @@ if test "x$with_fftw" != "xno"; then fi fi fi + +AC_ARG_WITH([arm-fftw], + [AS_HELP_STRING([--with-arm-fftw=PATH], + [root directory where arm fft library is installed @<:@yes/no@:>@] + )], + [with_arm_fftw="$withval"], + [with_arm_fftw=no] +) +if test "x$with_arm_fftw" != "xno"; then + + # Was FFTW's location specifically given? + if test "x$with_arm_fftw" != "xyes" -a "x$with_arm_fftw" != "xtest" -a "x$with_arm_fftw" != "x"; then + FFTW_LIBS="-L$with_arm_fftw/lib -larmpl_lp64" + FFTW_INCS="-I$with_arm_fftw/include" + else + FFTW_LIBS="-larmpl_lp64" + FFTW_INCS="" + fi + + # FFTW is not specified, so just check if we have it. + if test "x$with_arm_fftw" = "xtest"; then + AC_CHECK_LIB([armpl_lp64],[fftw_malloc],[have_fftw="yes"],[have_fftw="no"],$FFTW_LIBS) + if test "x$have_arm_fftw" != "xno"; then + AC_DEFINE([HAVE_FFTW],1,[The FFTW library appears to be present.]) + have_fftw="yes - ARM" + fi + # FFTW was specified, check that it was a valid location. + else + AC_CHECK_LIB([armpl_lp64],[fftw_malloc], + AC_DEFINE([HAVE_FFTW],1,[The FFTW library appears to be present.]), + AC_MSG_ERROR(something is wrong with the FFTW library!), $FFTW_LIBS) + have_fftw="yes - ARM" + fi + + # FFTW was requested not to be used. + if test "$have_arm_fftw" = "no"; then + FFTW_LIBS="" + FFTW_INCS="" + fi + + # Now, check whether we have the threaded version of FFTW + if test "x$have_arm_fftw" = "xyes"; then + + # Was FFTW's location specifically given? + if test "x$with_arm_fftw" != "xyes" -a "x$with_arm_fftw" != "xtest" -a "x$with_arm_fftw" != "x"; then + FFTW_THREADED_LIBS="-L$with_arm_fftw/lib -larmpl_lp64_threads -larmpl_lp64" + FFTW_THREADED_INCS="-I$with_arm_fftw/include" + else + FFTW_THREADED_LIBS="-larmpl_lp64_threads -larmpl_lp64" + FFTW_THREADED_INCS="" + fi + + # Verify that the library is threaded + AC_CHECK_LIB([armpl_lp64],[fftw_init_threads],[have_threaded_fftw="yes"], + [have_threaded_fftw="no"], $FFTW_THREADED_LIBS) + + # If found, update things + if test "x$have_threaded_fftw" = "xyes"; then + AC_DEFINE([HAVE_THREADED_FFTW],1,[The threaded FFTW library appears to be present.]) + FFTW_LIBS=$FFTW_THREADED_LIBS + FFTW_INCS=$FFTW_THREADED_INCS + have_fftw="yes - ARM - threaded" + fi + fi +fi AC_SUBST([FFTW_LIBS]) AC_SUBST([FFTW_INCS]) AM_CONDITIONAL([HAVEFFTW],[test -n "$FFTW_LIBS"]) @@ -1063,8 +1131,16 @@ fi AC_SUBST([NUMA_LIBS]) # Check for Intel and PowerPC intrinsics header optionally used by vector.h. -AC_CHECK_HEADERS([immintrin.h]) -AC_CHECK_HEADERS([altivec.h]) +AC_CHECK_HEADERS([immintrin.h], [], [], +[#ifdef HAVE_IMMINTRIN_H +# include <immintrin.h> +#endif +]) +AC_CHECK_HEADERS([altivec.h], [], [], +[#ifdef HAVE_ALTIVEC_H +# include <altivec.h> +#endif +]) # Check for timing functions needed by cycle.h. AC_HEADER_TIME @@ -1085,17 +1161,7 @@ AC_LINK_IFELSE([AC_LANG_PROGRAM( [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])],[rtc_ok=no]) AC_MSG_RESULT($rtc_ok) -# Special timers for the ARM v7 and ARM v8 platforms (taken from FFTW-3 to match their cycle.h) -AC_ARG_ENABLE(armv8-pmccntr-el0, [AC_HELP_STRING([--enable-armv8-pmccntr-el0],[enable the cycle counter on ARMv8 via the PMCCNTR_EL0 register])], have_armv8pmccntrel0=$enableval) -if test "$have_armv8pmccntrel0"x = "yes"x; then - AC_DEFINE(HAVE_ARMV8_PMCCNTR_EL0,1,[Define if you have enabled the PMCCNTR_EL0 cycle counter on ARMv8]) -fi - -AC_ARG_ENABLE(armv8-cntvct-el0, [AC_HELP_STRING([--enable-armv8-cntvct-el0],[enable the cycle counter on ARMv8 via the CNTVCT_EL0 register])], have_armv8cntvctel0=$enableval) -if test "$have_armv8cntvctel0"x = "yes"x; then - AC_DEFINE(HAVE_ARMV8_CNTVCT_EL0,1,[Define if you have enabled the CNTVCT_EL0 cycle counter on ARMv8]) -fi - +# Special timers for the ARM v7 platforms (taken from FFTW-3 to match their cycle.h) AC_ARG_ENABLE(armv7a-cntvct, [AC_HELP_STRING([--enable-armv7a-cntvct],[enable the cycle counter on Armv7a via the CNTVCT register])], have_armv7acntvct=$enableval) if test "$have_armv7acntvct"x = "yes"x; then AC_DEFINE(HAVE_ARMV7A_CNTVCT,1,[Define if you have enabled the CNTVCT cycle counter on ARMv7a]) diff --git a/m4/ax_asm_arm_cntvct.m4 b/m4/ax_asm_arm_cntvct.m4 new file mode 100644 index 0000000000000000000000000000000000000000..9a9c7d799d46ab1654a804dc3ee09ce5b616df2c --- /dev/null +++ b/m4/ax_asm_arm_cntvct.m4 @@ -0,0 +1,43 @@ +# +# SYNOPSIS +# +# AX_ASM_ARM_CNTVCT +# +# DESCRIPTION +# +# Check whether the CNTVCT_EL0 exists on this platform. Defines +# HAVE_ARMV8_CNTVCT_EL0 if true. +# +# LICENSE +# +# Copyright (c) 2019 Matthieu Schaller <schaller@strw.leidenuniv.nl> +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 1 + +AC_DEFUN([AX_ASM_ARM_CNTVCT], +[AC_CACHE_CHECK([for CNTVCT_EL0 asm instruction on ARM v8.1a], + [ax_cv_asm_arm_cntvct_works], + [AC_RUN_IFELSE([AC_LANG_SOURCE([[ +#include <stdint.h> + +int +main() +{ + uint64_t cc = 0; + __asm__ __volatile__("mrs %0, CNTVCT_EL0" : "=r"(cc)); + return 0; +} + ]])], + [ax_cv_asm_arm_cntvct_works=yes], + [ax_cv_asm_arm_cntvct_works=no], + [ax_cv_asm_arm_cntvct_works=no])]) +if test "$ax_cv_asm_arm_cntvct_works" = "yes" ; then + AC_DEFINE([HAVE_ARMV8_CNTVCT_EL0], [1], + [Define to 1 if the ARM v8.1a instruction CNTVCT_EL0 exists.]) +fi +]) diff --git a/m4/ax_asm_arm_pmccntr.m4 b/m4/ax_asm_arm_pmccntr.m4 new file mode 100644 index 0000000000000000000000000000000000000000..ded3bbbc04a5270acb8045d8375ccd2e5986ecd2 --- /dev/null +++ b/m4/ax_asm_arm_pmccntr.m4 @@ -0,0 +1,43 @@ +# +# SYNOPSIS +# +# AX_ASM_ARM_PMCCNTR +# +# DESCRIPTION +# +# Check whether the PMCCNTR_EL0 exists on this platform. Defines +# HAVE_ARMV8_PMCCNTR_EL0 if true. +# +# LICENSE +# +# Copyright (c) 2019 Matthieu Schaller <schaller@strw.leidenuniv.nl> +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 1 + +AC_DEFUN([AX_ASM_ARM_PMCCNTR], +[AC_CACHE_CHECK([for PMCCNTR_EL0 asm instruction on ARM v8.1a], + [ax_cv_asm_arm_pmccntr_works], + [AC_RUN_IFELSE([AC_LANG_SOURCE([[ +#include <stdint.h> + +int +main() +{ + uint64_t cc = 0; + __asm__ __volatile__("mrs %0, PMCCNTR_EL0" : "=r"(cc)); + return 0; +} + ]])], + [ax_cv_asm_arm_pmccntr_works=yes], + [ax_cv_asm_arm_pmccntr_works=no], + [ax_cv_asm_arm_pmccntr_works=no])]) +if test "$ax_cv_asm_arm_pmccntr_works" = "yes" ; then + AC_DEFINE([HAVE_ARMV8_PMCCNTR_EL0], [1], + [Define to 1 if the ARM v8.1a instruction PMCCNTR_EL0 exists.]) +fi +]) diff --git a/m4/ax_ext.m4 b/m4/ax_ext.m4 index 8da8c61decc9aa35737fb977def82d51ade5ef0c..0db44374f35e4605d6ca3da14ba22de0663cb87a 100644 --- a/m4/ax_ext.m4 +++ b/m4/ax_ext.m4 @@ -1,5 +1,5 @@ # =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_ext.html +# https://www.gnu.org/software/autoconf-archive/ax_ext.html # =========================================================================== # # SYNOPSIS @@ -31,12 +31,13 @@ # HAVE_SHA / HAVE_AES / HAVE_AVX / HAVE_FMA3 / HAVE_FMA4 / HAVE_XOP # HAVE_AVX2 / HAVE_AVX512_F / HAVE_AVX512_CD / HAVE_AVX512_PF # HAVE_AVX512_ER / HAVE_AVX512_VL / HAVE_AVX512_BW / HAVE_AVX512_DQ -# HAVE_AVX512_IFMA / HAVE_AVX512_VBMI +# HAVE_AVX512_IFMA / HAVE_AVX512_VBMI / HAVE_ALTIVEC / HAVE_VSX # # LICENSE # # Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net> # Copyright (c) 2013,2015 Michael Petch <mpetch@capp-sysware.com> +# Copyright (c) 2017 Rafael de Lucena Valle <rafaeldelucena@gmail.com> # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice @@ -47,7 +48,7 @@ # the order of the flags when more than one is used. Given that we just # set SIMD_FLAGS to the most specific value, rather than all accepted ones. -#serial 15 +#serial 18 AC_DEFUN([AX_EXT], [ @@ -59,19 +60,43 @@ AC_DEFUN([AX_EXT], case $host_cpu in powerpc*) - AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext], + AC_CACHE_CHECK([whether altivec is supported for old distros], [ax_cv_have_altivec_old_ext], [ if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then - ax_cv_have_altivec_ext=yes + ax_cv_have_altivec_old_ext=yes fi fi ]) - if test "$ax_cv_have_altivec_ext" = yes; then + if test "$ax_cv_have_altivec_old_ext" = yes; then AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions]) AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", []) fi + + AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext], + [ + if test `LD_SHOW_AUXV=1 /bin/true 2>/dev/null|grep -c altivec` != 0; then + ax_cv_have_altivec_ext=yes + fi + ]) + + if test "$ax_cv_have_altivec_ext" = yes; then + AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions]) + AX_CHECK_COMPILE_FLAG(-maltivec, SIMD_FLAGS="$SIMD_FLAGS -maltivec", []) + fi + + AC_CACHE_CHECK([whether vsx is supported], [ax_cv_have_vsx_ext], + [ + if test `LD_SHOW_AUXV=1 /bin/true 2>/dev/null|grep -c vsx` != 0; then + ax_cv_have_vsx_ext=yes + fi + ]) + + if test "$ax_cv_have_vsx_ext" = yes; then + AC_DEFINE(HAVE_VSX,,[Support VSX instructions]) + AX_CHECK_COMPILE_FLAG(-mvsx, SIMD_FLAGS="$SIMD_FLAGS -mvsx", []) + fi ;; i[[3456]]86*|x86_64*|amd64*) @@ -139,7 +164,7 @@ AC_DEFUN([AX_EXT], ax_cv_have_sse_os_support_ext=no, if test "$((0x$edx_cpuid1>>25&0x01))" = 1; then AC_LANG_PUSH([C]) - AC_TRY_RUN([ + AC_RUN_IFELSE([AC_LANG_SOURCE([[ #include <signal.h> #include <stdlib.h> /* No way at ring1 to ring3 in protected mode to check the CR0 and CR4 @@ -151,10 +176,10 @@ AC_DEFUN([AX_EXT], /* SSE instruction xorps %xmm0,%xmm0 */ __asm__ __volatile__ (".byte 0x0f, 0x57, 0xc0"); return 0; - }], - ax_cv_have_sse_os_support_ext=yes, - ax_cv_have_sse_os_support_ext=no, - ax_cv_have_sse_os_support_ext=no) + }]])], + [ax_cv_have_sse_os_support_ext=yes], + [ax_cv_have_sse_os_support_ext=no], + [ax_cv_have_sse_os_support_ext=no]) AC_LANG_POP([C]) fi ]) diff --git a/m4/ax_func_posix_memalign.m4 b/m4/ax_func_posix_memalign.m4 index bd60adcbc81a5ce5c9e68f71081e6872e5139b0a..2442ceca74c3e40ceaffb2859336527964b22b52 100644 --- a/m4/ax_func_posix_memalign.m4 +++ b/m4/ax_func_posix_memalign.m4 @@ -1,5 +1,5 @@ # =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_func_posix_memalign.html +# https://www.gnu.org/software/autoconf-archive/ax_func_posix_memalign.html # =========================================================================== # # SYNOPSIS @@ -22,12 +22,12 @@ # and this notice are preserved. This file is offered as-is, without any # warranty. -#serial 7 +#serial 9 AC_DEFUN([AX_FUNC_POSIX_MEMALIGN], [AC_CACHE_CHECK([for working posix_memalign], [ax_cv_func_posix_memalign_works], - [AC_TRY_RUN([ + [AC_RUN_IFELSE([AC_LANG_SOURCE([[ #include <stdlib.h> int @@ -39,7 +39,7 @@ main () * the size word. */ exit (posix_memalign (&buffer, sizeof(void *), 123) != 0); } - ], + ]])], [ax_cv_func_posix_memalign_works=yes], [ax_cv_func_posix_memalign_works=no], [ax_cv_func_posix_memalign_works=no])]) diff --git a/m4/ax_gcc_archflag.m4 b/m4/ax_gcc_archflag.m4 index ef8e7c199da1622354a029ec142386b7f1f9e442..ec600016ad3bd7a6afea4ab36c434a91172de9af 100644 --- a/m4/ax_gcc_archflag.m4 +++ b/m4/ax_gcc_archflag.m4 @@ -216,15 +216,15 @@ case $host_cpu in case $cpuimpl in 0x42) case $cpuarch in 8) case $cpuvar in - 0x0) ax_gcc_arch="thunderx2t99 vulcan armv8.1-a armv8-a+lse armv8-a native" ;; + 0x0) ax_gcc_arch="native" ;; esac ;; esac ;; 0x43) case $cpuarch in 8) case $cpuvar in - 0x0) ax_gcc_arch="thunderx armv8-a native" ;; - 0x1) ax_gcc_arch="thunderx+lse armv8.1-a armv8-a+lse armv8-a native" ;; + 0x0) ax_gcc_arch="native" ;; + 0x1) ax_gcc_arch="native" ;; esac ;; esac diff --git a/src/cycle.h b/src/cycle.h index 65fe3bee17173dd1efdc98f5d90f8cc4fe51f007..0ba1277893fe7be7d8b62cdcb9a8873d5709eb60 100644 --- a/src/cycle.h +++ b/src/cycle.h @@ -543,7 +543,8 @@ INLINE_ELAPSED(inline) #define HAVE_TICK_COUNTER #endif -#if defined(__aarch64__) && defined(HAVE_ARMV8_PMCCNTR_EL0) +#if defined(__aarch64__) && defined(HAVE_ARMV8_PMCCNTR_EL0) && \ + !defined(HAVE_TICK_COUNTER) typedef uint64_t ticks; static inline ticks getticks(void) { uint64_t cc = 0; diff --git a/src/dimension.h b/src/dimension.h index 7084d70f5794853557539862091809071af2e790..2ae3a6d8ee9cac0b8bd3241d5e7b45f8f62dc92a 100644 --- a/src/dimension.h +++ b/src/dimension.h @@ -316,7 +316,7 @@ __attribute__((always_inline)) INLINE static vector pow_dimension_vec( #else error("The dimension is not defined !"); - return vec_set(0.f); + return vec_set1(0.f); #endif } @@ -346,7 +346,7 @@ __attribute__((always_inline)) INLINE static vector pow_dimension_plus_one_vec( #else error("The dimension is not defined !"); - return vec_set(0.f); + return vec_set1(0.f); #endif } diff --git a/src/hydro/AnarchyPU/hydro_part.h b/src/hydro/AnarchyPU/hydro_part.h index ae42e00e03a3bca621fc4b7a61ae2aa8b2f3c922..2c5022c262587c50f577970e1d1891a42b70491b 100644 --- a/src/hydro/AnarchyPU/hydro_part.h +++ b/src/hydro/AnarchyPU/hydro_part.h @@ -201,7 +201,7 @@ struct part { timebin_t time_bin; /* Need waking up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/Default/hydro_part.h b/src/hydro/Default/hydro_part.h index 7230826dc3c7c2a3486001ca9060dd07d55d0931..21c0269f78c85b7d11ab5e838d45614161aee013 100644 --- a/src/hydro/Default/hydro_part.h +++ b/src/hydro/Default/hydro_part.h @@ -137,7 +137,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/Gadget2/hydro_part.h b/src/hydro/Gadget2/hydro_part.h index 6b028b0497584ca65bd236c4a507b55356e7c260..3001700395b8584981c0087c8ff402a953461213 100644 --- a/src/hydro/Gadget2/hydro_part.h +++ b/src/hydro/Gadget2/hydro_part.h @@ -155,7 +155,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/GizmoMFM/hydro_part.h b/src/hydro/GizmoMFM/hydro_part.h index c4884ebc40a18de765aed193a028c92465ad63ad..8097b1b2560f24f78636bbb855700054524fe0bb 100644 --- a/src/hydro/GizmoMFM/hydro_part.h +++ b/src/hydro/GizmoMFM/hydro_part.h @@ -196,7 +196,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/GizmoMFV/hydro_part.h b/src/hydro/GizmoMFV/hydro_part.h index c6bbaeb1c8e256081f7300c041dfa3cd996fbf18..af83dbd92590a30aa401e1cc5626554633058b1f 100644 --- a/src/hydro/GizmoMFV/hydro_part.h +++ b/src/hydro/GizmoMFV/hydro_part.h @@ -207,7 +207,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/Minimal/hydro_part.h b/src/hydro/Minimal/hydro_part.h index 2068c08b01fcbe57f6a2439770ca85e6e0e06b3a..7697f36ca723875a8b77705eefcf2e2af4605583 100644 --- a/src/hydro/Minimal/hydro_part.h +++ b/src/hydro/Minimal/hydro_part.h @@ -177,7 +177,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/Planetary/hydro_part.h b/src/hydro/Planetary/hydro_part.h index 383d803fd9baebbaba702d788239cc8d684254bc..b2725ca1fceddb196a6b2be42b768eb3f88f1101 100644 --- a/src/hydro/Planetary/hydro_part.h +++ b/src/hydro/Planetary/hydro_part.h @@ -182,7 +182,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/PressureEnergy/hydro_part.h b/src/hydro/PressureEnergy/hydro_part.h index 6a2d890049bef5fe7040e7908a48df304beddb64..20c326da443e4acd1c3bdc0ebd01cce81bb6bad7 100644 --- a/src/hydro/PressureEnergy/hydro_part.h +++ b/src/hydro/PressureEnergy/hydro_part.h @@ -177,7 +177,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h index b75313b58e01260fa267d4b5bab29815a7329ce7..ecd20938456b04004ed2299fbe1de0c1b8bb50d6 100644 --- a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h +++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h @@ -181,7 +181,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/PressureEntropy/hydro_part.h b/src/hydro/PressureEntropy/hydro_part.h index 992ed4654e63fe42e1f45c04ed8bfe876cb89e45..6cf5a88a167c529ad81737f1e206ba475f6bbc0e 100644 --- a/src/hydro/PressureEntropy/hydro_part.h +++ b/src/hydro/PressureEntropy/hydro_part.h @@ -157,7 +157,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/Shadowswift/hydro_part.h b/src/hydro/Shadowswift/hydro_part.h index 91ffaa85e5e6e80e7db577ce09363265f73e7f4c..d229b5c9d63ecfa855397f74f8a52a4117cefc03 100644 --- a/src/hydro/Shadowswift/hydro_part.h +++ b/src/hydro/Shadowswift/hydro_part.h @@ -184,7 +184,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/memswap.h b/src/memswap.h index 91d83d231692a2b8f540c3e6b9334bc89e1ee87b..330173100f41b80fcc65c9fce01838b5de8e778f 100644 --- a/src/memswap.h +++ b/src/memswap.h @@ -81,6 +81,13 @@ __attribute__((always_inline)) inline void memswap(void *restrict void_a, swap_loop(int_least32_t, a, b, bytes); swap_loop(int_least16_t, a, b, bytes); swap_loop(int_least8_t, a, b, bytes); + + /* This is a known bug for the current version of clang on ARM. + * We add this synchronization as a temporary bug fix. + * See https://bugs.llvm.org/show_bug.cgi?id=40051 */ +#if defined(__clang__) && defined(__aarch64__) + __sync_synchronize(); +#endif } /** @@ -139,6 +146,13 @@ __attribute__((always_inline)) inline void memswap_unaligned( swap_loop(int_least32_t, a, b, bytes); swap_loop(int_least16_t, a, b, bytes); swap_loop(int_least8_t, a, b, bytes); + + /* This is a known bug for the current version of clang on ARM. + * We add this synchronization as a temporary bug fix. + * See https://bugs.llvm.org/show_bug.cgi?id=40051 */ +#if defined(__clang__) && defined(__aarch64__) + __sync_synchronize(); +#endif } #endif /* SWIFT_MEMSWAP_H */ diff --git a/src/task.c b/src/task.c index ab4184e002294d5aee39f138936f710236c771a1..69837548be104f33891f5bef97f8f5c499b62812 100644 --- a/src/task.c +++ b/src/task.c @@ -755,7 +755,8 @@ void task_dump_all(struct engine *e, int step) { /* Add some information to help with the plots and conversion of ticks to * seconds. */ fprintf(file_thread, " %03d 0 0 0 0 %lld %lld %lld %lld %lld 0 0 %lld\n", - engine_rank, e->tic_step, e->toc_step, e->updates, e->g_updates, + engine_rank, (long long int)e->tic_step, + (long long int)e->toc_step, e->updates, e->g_updates, e->s_updates, cpufreq); int count = 0; for (int l = 0; l < e->sched.nr_tasks; l++) { @@ -764,7 +765,8 @@ void task_dump_all(struct engine *e, int step) { file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i\n", engine_rank, e->sched.tasks[l].rid, e->sched.tasks[l].type, e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL), - e->sched.tasks[l].tic, e->sched.tasks[l].toc, + (long long int)e->sched.tasks[l].tic, + (long long int)e->sched.tasks[l].toc, (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->hydro.count : 0, (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->hydro.count @@ -803,7 +805,8 @@ void task_dump_all(struct engine *e, int step) { file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n", e->sched.tasks[l].rid, e->sched.tasks[l].type, e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL), - e->sched.tasks[l].tic, e->sched.tasks[l].toc, + (unsigned long long)e->sched.tasks[l].tic, + (unsigned long long)e->sched.tasks[l].toc, (e->sched.tasks[l].ci == NULL) ? 0 : e->sched.tasks[l].ci->hydro.count, (e->sched.tasks[l].cj == NULL) ? 0 diff --git a/src/timeline.h b/src/timeline.h index 09161b3ec68e9f7c19bf812103384257473f5719..a2bb8da6e8c5c92288541c206b453af141bf094e 100644 --- a/src/timeline.h +++ b/src/timeline.h @@ -27,9 +27,10 @@ #include "intrinsics.h" #include <math.h> +#include <stdint.h> typedef long long integertime_t; -typedef char timebin_t; +typedef int8_t timebin_t; /*! The number of time bins */ #define num_time_bins 56