From dfe28004e8e9934d57d153819e051fcc49f2b2fd Mon Sep 17 00:00:00 2001 From: Josh Borrow <joshua.borrow@durham.ac.uk> Date: Thu, 7 Feb 2019 18:33:48 +0000 Subject: [PATCH] ARM Fixes --- configure.ac | 92 ++++++++++++++++--- m4/ax_asm_arm_cntvct.m4 | 43 +++++++++ m4/ax_asm_arm_pmccntr.m4 | 43 +++++++++ m4/ax_ext.m4 | 47 +++++++--- m4/ax_func_posix_memalign.m4 | 8 +- m4/ax_gcc_archflag.m4 | 6 +- src/cycle.h | 3 +- src/dimension.h | 4 +- src/hydro/AnarchyPU/hydro_part.h | 2 +- src/hydro/Default/hydro_part.h | 2 +- src/hydro/Gadget2/hydro_part.h | 2 +- src/hydro/GizmoMFM/hydro_part.h | 2 +- src/hydro/GizmoMFV/hydro_part.h | 2 +- src/hydro/Minimal/hydro_part.h | 2 +- src/hydro/Planetary/hydro_part.h | 2 +- src/hydro/PressureEnergy/hydro_part.h | 2 +- .../hydro_part.h | 2 +- src/hydro/PressureEntropy/hydro_part.h | 2 +- src/hydro/Shadowswift/hydro_part.h | 2 +- src/memswap.h | 14 +++ src/task.c | 9 +- src/timeline.h | 3 +- 22 files changed, 245 insertions(+), 49 deletions(-) create mode 100644 m4/ax_asm_arm_cntvct.m4 create mode 100644 m4/ax_asm_arm_pmccntr.m4 diff --git a/configure.ac b/configure.ac index 58cab4dc2c..dc584f0ec2 100644 --- a/configure.ac +++ b/configure.ac @@ -333,6 +333,9 @@ elif test "$no_gravity_below_id" != "no"; then AC_DEFINE_UNQUOTED([SWIFT_NO_GRAVITY_BELOW_ID], [$enableval] ,[Particles with smaller ID than this will have zero gravity forces]) fi +# Check whether we have any of the ARM v8.1 tick timers +AX_ASM_ARM_PMCCNTR +AX_ASM_ARM_CNTVCT # Define HAVE_POSIX_MEMALIGN if it works. AX_FUNC_POSIX_MEMALIGN @@ -700,6 +703,71 @@ if test "x$with_fftw" != "xno"; then fi fi fi + +AC_ARG_WITH([arm-fftw], + [AS_HELP_STRING([--with-arm-fftw=PATH], + [root directory where arm fft library is installed @<:@yes/no@:>@] + )], + [with_arm_fftw="$withval"], + [with_arm_fftw=no] +) +if test "x$with_arm_fftw" != "xno"; then + + # Was FFTW's location specifically given? + if test "x$with_arm_fftw" != "xyes" -a "x$with_arm_fftw" != "xtest" -a "x$with_arm_fftw" != "x"; then + FFTW_LIBS="-L$with_arm_fftw/lib -larmpl_lp64" + FFTW_INCS="-I$with_arm_fftw/include" + else + FFTW_LIBS="-larmpl_lp64" + FFTW_INCS="" + fi + + # FFTW is not specified, so just check if we have it. + if test "x$with_arm_fftw" = "xtest"; then + AC_CHECK_LIB([armpl_lp64],[fftw_malloc],[have_fftw="yes"],[have_fftw="no"],$FFTW_LIBS) + if test "x$have_arm_fftw" != "xno"; then + AC_DEFINE([HAVE_FFTW],1,[The FFTW library appears to be present.]) + have_fftw="yes - ARM" + fi + # FFTW was specified, check that it was a valid location. + else + AC_CHECK_LIB([armpl_lp64],[fftw_malloc], + AC_DEFINE([HAVE_FFTW],1,[The FFTW library appears to be present.]), + AC_MSG_ERROR(something is wrong with the FFTW library!), $FFTW_LIBS) + have_fftw="yes - ARM" + fi + + # FFTW was requested not to be used. + if test "$have_arm_fftw" = "no"; then + FFTW_LIBS="" + FFTW_INCS="" + fi + + # Now, check whether we have the threaded version of FFTW + if test "x$have_arm_fftw" = "xyes"; then + + # Was FFTW's location specifically given? + if test "x$with_arm_fftw" != "xyes" -a "x$with_arm_fftw" != "xtest" -a "x$with_arm_fftw" != "x"; then + FFTW_THREADED_LIBS="-L$with_arm_fftw/lib -larmpl_lp64_threads -larmpl_lp64" + FFTW_THREADED_INCS="-I$with_arm_fftw/include" + else + FFTW_THREADED_LIBS="-larmpl_lp64_threads -larmpl_lp64" + FFTW_THREADED_INCS="" + fi + + # Verify that the library is threaded + AC_CHECK_LIB([armpl_lp64],[fftw_init_threads],[have_threaded_fftw="yes"], + [have_threaded_fftw="no"], $FFTW_THREADED_LIBS) + + # If found, update things + if test "x$have_threaded_fftw" = "xyes"; then + AC_DEFINE([HAVE_THREADED_FFTW],1,[The threaded FFTW library appears to be present.]) + FFTW_LIBS=$FFTW_THREADED_LIBS + FFTW_INCS=$FFTW_THREADED_INCS + have_fftw="yes - ARM - threaded" + fi + fi +fi AC_SUBST([FFTW_LIBS]) AC_SUBST([FFTW_INCS]) AM_CONDITIONAL([HAVEFFTW],[test -n "$FFTW_LIBS"]) @@ -1063,8 +1131,16 @@ fi AC_SUBST([NUMA_LIBS]) # Check for Intel and PowerPC intrinsics header optionally used by vector.h. -AC_CHECK_HEADERS([immintrin.h]) -AC_CHECK_HEADERS([altivec.h]) +AC_CHECK_HEADERS([immintrin.h], [], [], +[#ifdef HAVE_IMMINTRIN_H +# include <immintrin.h> +#endif +]) +AC_CHECK_HEADERS([altivec.h], [], [], +[#ifdef HAVE_ALTIVEC_H +# include <altivec.h> +#endif +]) # Check for timing functions needed by cycle.h. AC_HEADER_TIME @@ -1085,17 +1161,7 @@ AC_LINK_IFELSE([AC_LANG_PROGRAM( [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])],[rtc_ok=no]) AC_MSG_RESULT($rtc_ok) -# Special timers for the ARM v7 and ARM v8 platforms (taken from FFTW-3 to match their cycle.h) -AC_ARG_ENABLE(armv8-pmccntr-el0, [AC_HELP_STRING([--enable-armv8-pmccntr-el0],[enable the cycle counter on ARMv8 via the PMCCNTR_EL0 register])], have_armv8pmccntrel0=$enableval) -if test "$have_armv8pmccntrel0"x = "yes"x; then - AC_DEFINE(HAVE_ARMV8_PMCCNTR_EL0,1,[Define if you have enabled the PMCCNTR_EL0 cycle counter on ARMv8]) -fi - -AC_ARG_ENABLE(armv8-cntvct-el0, [AC_HELP_STRING([--enable-armv8-cntvct-el0],[enable the cycle counter on ARMv8 via the CNTVCT_EL0 register])], have_armv8cntvctel0=$enableval) -if test "$have_armv8cntvctel0"x = "yes"x; then - AC_DEFINE(HAVE_ARMV8_CNTVCT_EL0,1,[Define if you have enabled the CNTVCT_EL0 cycle counter on ARMv8]) -fi - +# Special timers for the ARM v7 platforms (taken from FFTW-3 to match their cycle.h) AC_ARG_ENABLE(armv7a-cntvct, [AC_HELP_STRING([--enable-armv7a-cntvct],[enable the cycle counter on Armv7a via the CNTVCT register])], have_armv7acntvct=$enableval) if test "$have_armv7acntvct"x = "yes"x; then AC_DEFINE(HAVE_ARMV7A_CNTVCT,1,[Define if you have enabled the CNTVCT cycle counter on ARMv7a]) diff --git a/m4/ax_asm_arm_cntvct.m4 b/m4/ax_asm_arm_cntvct.m4 new file mode 100644 index 0000000000..9a9c7d799d --- /dev/null +++ b/m4/ax_asm_arm_cntvct.m4 @@ -0,0 +1,43 @@ +# +# SYNOPSIS +# +# AX_ASM_ARM_CNTVCT +# +# DESCRIPTION +# +# Check whether the CNTVCT_EL0 exists on this platform. Defines +# HAVE_ARMV8_CNTVCT_EL0 if true. +# +# LICENSE +# +# Copyright (c) 2019 Matthieu Schaller <schaller@strw.leidenuniv.nl> +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 1 + +AC_DEFUN([AX_ASM_ARM_CNTVCT], +[AC_CACHE_CHECK([for CNTVCT_EL0 asm instruction on ARM v8.1a], + [ax_cv_asm_arm_cntvct_works], + [AC_RUN_IFELSE([AC_LANG_SOURCE([[ +#include <stdint.h> + +int +main() +{ + uint64_t cc = 0; + __asm__ __volatile__("mrs %0, CNTVCT_EL0" : "=r"(cc)); + return 0; +} + ]])], + [ax_cv_asm_arm_cntvct_works=yes], + [ax_cv_asm_arm_cntvct_works=no], + [ax_cv_asm_arm_cntvct_works=no])]) +if test "$ax_cv_asm_arm_cntvct_works" = "yes" ; then + AC_DEFINE([HAVE_ARMV8_CNTVCT_EL0], [1], + [Define to 1 if the ARM v8.1a instruction CNTVCT_EL0 exists.]) +fi +]) diff --git a/m4/ax_asm_arm_pmccntr.m4 b/m4/ax_asm_arm_pmccntr.m4 new file mode 100644 index 0000000000..ded3bbbc04 --- /dev/null +++ b/m4/ax_asm_arm_pmccntr.m4 @@ -0,0 +1,43 @@ +# +# SYNOPSIS +# +# AX_ASM_ARM_PMCCNTR +# +# DESCRIPTION +# +# Check whether the PMCCNTR_EL0 exists on this platform. Defines +# HAVE_ARMV8_PMCCNTR_EL0 if true. +# +# LICENSE +# +# Copyright (c) 2019 Matthieu Schaller <schaller@strw.leidenuniv.nl> +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 1 + +AC_DEFUN([AX_ASM_ARM_PMCCNTR], +[AC_CACHE_CHECK([for PMCCNTR_EL0 asm instruction on ARM v8.1a], + [ax_cv_asm_arm_pmccntr_works], + [AC_RUN_IFELSE([AC_LANG_SOURCE([[ +#include <stdint.h> + +int +main() +{ + uint64_t cc = 0; + __asm__ __volatile__("mrs %0, PMCCNTR_EL0" : "=r"(cc)); + return 0; +} + ]])], + [ax_cv_asm_arm_pmccntr_works=yes], + [ax_cv_asm_arm_pmccntr_works=no], + [ax_cv_asm_arm_pmccntr_works=no])]) +if test "$ax_cv_asm_arm_pmccntr_works" = "yes" ; then + AC_DEFINE([HAVE_ARMV8_PMCCNTR_EL0], [1], + [Define to 1 if the ARM v8.1a instruction PMCCNTR_EL0 exists.]) +fi +]) diff --git a/m4/ax_ext.m4 b/m4/ax_ext.m4 index 8da8c61dec..0db44374f3 100644 --- a/m4/ax_ext.m4 +++ b/m4/ax_ext.m4 @@ -1,5 +1,5 @@ # =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_ext.html +# https://www.gnu.org/software/autoconf-archive/ax_ext.html # =========================================================================== # # SYNOPSIS @@ -31,12 +31,13 @@ # HAVE_SHA / HAVE_AES / HAVE_AVX / HAVE_FMA3 / HAVE_FMA4 / HAVE_XOP # HAVE_AVX2 / HAVE_AVX512_F / HAVE_AVX512_CD / HAVE_AVX512_PF # HAVE_AVX512_ER / HAVE_AVX512_VL / HAVE_AVX512_BW / HAVE_AVX512_DQ -# HAVE_AVX512_IFMA / HAVE_AVX512_VBMI +# HAVE_AVX512_IFMA / HAVE_AVX512_VBMI / HAVE_ALTIVEC / HAVE_VSX # # LICENSE # # Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net> # Copyright (c) 2013,2015 Michael Petch <mpetch@capp-sysware.com> +# Copyright (c) 2017 Rafael de Lucena Valle <rafaeldelucena@gmail.com> # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice @@ -47,7 +48,7 @@ # the order of the flags when more than one is used. Given that we just # set SIMD_FLAGS to the most specific value, rather than all accepted ones. -#serial 15 +#serial 18 AC_DEFUN([AX_EXT], [ @@ -59,19 +60,43 @@ AC_DEFUN([AX_EXT], case $host_cpu in powerpc*) - AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext], + AC_CACHE_CHECK([whether altivec is supported for old distros], [ax_cv_have_altivec_old_ext], [ if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then - ax_cv_have_altivec_ext=yes + ax_cv_have_altivec_old_ext=yes fi fi ]) - if test "$ax_cv_have_altivec_ext" = yes; then + if test "$ax_cv_have_altivec_old_ext" = yes; then AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions]) AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", []) fi + + AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext], + [ + if test `LD_SHOW_AUXV=1 /bin/true 2>/dev/null|grep -c altivec` != 0; then + ax_cv_have_altivec_ext=yes + fi + ]) + + if test "$ax_cv_have_altivec_ext" = yes; then + AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions]) + AX_CHECK_COMPILE_FLAG(-maltivec, SIMD_FLAGS="$SIMD_FLAGS -maltivec", []) + fi + + AC_CACHE_CHECK([whether vsx is supported], [ax_cv_have_vsx_ext], + [ + if test `LD_SHOW_AUXV=1 /bin/true 2>/dev/null|grep -c vsx` != 0; then + ax_cv_have_vsx_ext=yes + fi + ]) + + if test "$ax_cv_have_vsx_ext" = yes; then + AC_DEFINE(HAVE_VSX,,[Support VSX instructions]) + AX_CHECK_COMPILE_FLAG(-mvsx, SIMD_FLAGS="$SIMD_FLAGS -mvsx", []) + fi ;; i[[3456]]86*|x86_64*|amd64*) @@ -139,7 +164,7 @@ AC_DEFUN([AX_EXT], ax_cv_have_sse_os_support_ext=no, if test "$((0x$edx_cpuid1>>25&0x01))" = 1; then AC_LANG_PUSH([C]) - AC_TRY_RUN([ + AC_RUN_IFELSE([AC_LANG_SOURCE([[ #include <signal.h> #include <stdlib.h> /* No way at ring1 to ring3 in protected mode to check the CR0 and CR4 @@ -151,10 +176,10 @@ AC_DEFUN([AX_EXT], /* SSE instruction xorps %xmm0,%xmm0 */ __asm__ __volatile__ (".byte 0x0f, 0x57, 0xc0"); return 0; - }], - ax_cv_have_sse_os_support_ext=yes, - ax_cv_have_sse_os_support_ext=no, - ax_cv_have_sse_os_support_ext=no) + }]])], + [ax_cv_have_sse_os_support_ext=yes], + [ax_cv_have_sse_os_support_ext=no], + [ax_cv_have_sse_os_support_ext=no]) AC_LANG_POP([C]) fi ]) diff --git a/m4/ax_func_posix_memalign.m4 b/m4/ax_func_posix_memalign.m4 index bd60adcbc8..2442ceca74 100644 --- a/m4/ax_func_posix_memalign.m4 +++ b/m4/ax_func_posix_memalign.m4 @@ -1,5 +1,5 @@ # =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_func_posix_memalign.html +# https://www.gnu.org/software/autoconf-archive/ax_func_posix_memalign.html # =========================================================================== # # SYNOPSIS @@ -22,12 +22,12 @@ # and this notice are preserved. This file is offered as-is, without any # warranty. -#serial 7 +#serial 9 AC_DEFUN([AX_FUNC_POSIX_MEMALIGN], [AC_CACHE_CHECK([for working posix_memalign], [ax_cv_func_posix_memalign_works], - [AC_TRY_RUN([ + [AC_RUN_IFELSE([AC_LANG_SOURCE([[ #include <stdlib.h> int @@ -39,7 +39,7 @@ main () * the size word. */ exit (posix_memalign (&buffer, sizeof(void *), 123) != 0); } - ], + ]])], [ax_cv_func_posix_memalign_works=yes], [ax_cv_func_posix_memalign_works=no], [ax_cv_func_posix_memalign_works=no])]) diff --git a/m4/ax_gcc_archflag.m4 b/m4/ax_gcc_archflag.m4 index ef8e7c199d..ec600016ad 100644 --- a/m4/ax_gcc_archflag.m4 +++ b/m4/ax_gcc_archflag.m4 @@ -216,15 +216,15 @@ case $host_cpu in case $cpuimpl in 0x42) case $cpuarch in 8) case $cpuvar in - 0x0) ax_gcc_arch="thunderx2t99 vulcan armv8.1-a armv8-a+lse armv8-a native" ;; + 0x0) ax_gcc_arch="native" ;; esac ;; esac ;; 0x43) case $cpuarch in 8) case $cpuvar in - 0x0) ax_gcc_arch="thunderx armv8-a native" ;; - 0x1) ax_gcc_arch="thunderx+lse armv8.1-a armv8-a+lse armv8-a native" ;; + 0x0) ax_gcc_arch="native" ;; + 0x1) ax_gcc_arch="native" ;; esac ;; esac diff --git a/src/cycle.h b/src/cycle.h index 65fe3bee17..0ba1277893 100644 --- a/src/cycle.h +++ b/src/cycle.h @@ -543,7 +543,8 @@ INLINE_ELAPSED(inline) #define HAVE_TICK_COUNTER #endif -#if defined(__aarch64__) && defined(HAVE_ARMV8_PMCCNTR_EL0) +#if defined(__aarch64__) && defined(HAVE_ARMV8_PMCCNTR_EL0) && \ + !defined(HAVE_TICK_COUNTER) typedef uint64_t ticks; static inline ticks getticks(void) { uint64_t cc = 0; diff --git a/src/dimension.h b/src/dimension.h index 7084d70f57..2ae3a6d8ee 100644 --- a/src/dimension.h +++ b/src/dimension.h @@ -316,7 +316,7 @@ __attribute__((always_inline)) INLINE static vector pow_dimension_vec( #else error("The dimension is not defined !"); - return vec_set(0.f); + return vec_set1(0.f); #endif } @@ -346,7 +346,7 @@ __attribute__((always_inline)) INLINE static vector pow_dimension_plus_one_vec( #else error("The dimension is not defined !"); - return vec_set(0.f); + return vec_set1(0.f); #endif } diff --git a/src/hydro/AnarchyPU/hydro_part.h b/src/hydro/AnarchyPU/hydro_part.h index ae42e00e03..2c5022c262 100644 --- a/src/hydro/AnarchyPU/hydro_part.h +++ b/src/hydro/AnarchyPU/hydro_part.h @@ -201,7 +201,7 @@ struct part { timebin_t time_bin; /* Need waking up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/Default/hydro_part.h b/src/hydro/Default/hydro_part.h index 7230826dc3..21c0269f78 100644 --- a/src/hydro/Default/hydro_part.h +++ b/src/hydro/Default/hydro_part.h @@ -137,7 +137,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/Gadget2/hydro_part.h b/src/hydro/Gadget2/hydro_part.h index 6b028b0497..3001700395 100644 --- a/src/hydro/Gadget2/hydro_part.h +++ b/src/hydro/Gadget2/hydro_part.h @@ -155,7 +155,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/GizmoMFM/hydro_part.h b/src/hydro/GizmoMFM/hydro_part.h index c4884ebc40..8097b1b256 100644 --- a/src/hydro/GizmoMFM/hydro_part.h +++ b/src/hydro/GizmoMFM/hydro_part.h @@ -196,7 +196,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/GizmoMFV/hydro_part.h b/src/hydro/GizmoMFV/hydro_part.h index c6bbaeb1c8..af83dbd925 100644 --- a/src/hydro/GizmoMFV/hydro_part.h +++ b/src/hydro/GizmoMFV/hydro_part.h @@ -207,7 +207,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/Minimal/hydro_part.h b/src/hydro/Minimal/hydro_part.h index 2068c08b01..7697f36ca7 100644 --- a/src/hydro/Minimal/hydro_part.h +++ b/src/hydro/Minimal/hydro_part.h @@ -177,7 +177,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/Planetary/hydro_part.h b/src/hydro/Planetary/hydro_part.h index 383d803fd9..b2725ca1fc 100644 --- a/src/hydro/Planetary/hydro_part.h +++ b/src/hydro/Planetary/hydro_part.h @@ -182,7 +182,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/PressureEnergy/hydro_part.h b/src/hydro/PressureEnergy/hydro_part.h index 6a2d890049..20c326da44 100644 --- a/src/hydro/PressureEnergy/hydro_part.h +++ b/src/hydro/PressureEnergy/hydro_part.h @@ -177,7 +177,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h index b75313b58e..ecd2093845 100644 --- a/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h +++ b/src/hydro/PressureEnergyMorrisMonaghanAV/hydro_part.h @@ -181,7 +181,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/PressureEntropy/hydro_part.h b/src/hydro/PressureEntropy/hydro_part.h index 992ed4654e..6cf5a88a16 100644 --- a/src/hydro/PressureEntropy/hydro_part.h +++ b/src/hydro/PressureEntropy/hydro_part.h @@ -157,7 +157,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/hydro/Shadowswift/hydro_part.h b/src/hydro/Shadowswift/hydro_part.h index 91ffaa85e5..d229b5c9d6 100644 --- a/src/hydro/Shadowswift/hydro_part.h +++ b/src/hydro/Shadowswift/hydro_part.h @@ -184,7 +184,7 @@ struct part { timebin_t time_bin; /* Need waking-up ? */ - char wakeup; + timebin_t wakeup; #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/memswap.h b/src/memswap.h index 91d83d2316..330173100f 100644 --- a/src/memswap.h +++ b/src/memswap.h @@ -81,6 +81,13 @@ __attribute__((always_inline)) inline void memswap(void *restrict void_a, swap_loop(int_least32_t, a, b, bytes); swap_loop(int_least16_t, a, b, bytes); swap_loop(int_least8_t, a, b, bytes); + + /* This is a known bug for the current version of clang on ARM. + * We add this synchronization as a temporary bug fix. + * See https://bugs.llvm.org/show_bug.cgi?id=40051 */ +#if defined(__clang__) && defined(__aarch64__) + __sync_synchronize(); +#endif } /** @@ -139,6 +146,13 @@ __attribute__((always_inline)) inline void memswap_unaligned( swap_loop(int_least32_t, a, b, bytes); swap_loop(int_least16_t, a, b, bytes); swap_loop(int_least8_t, a, b, bytes); + + /* This is a known bug for the current version of clang on ARM. + * We add this synchronization as a temporary bug fix. + * See https://bugs.llvm.org/show_bug.cgi?id=40051 */ +#if defined(__clang__) && defined(__aarch64__) + __sync_synchronize(); +#endif } #endif /* SWIFT_MEMSWAP_H */ diff --git a/src/task.c b/src/task.c index ab4184e002..69837548be 100644 --- a/src/task.c +++ b/src/task.c @@ -755,7 +755,8 @@ void task_dump_all(struct engine *e, int step) { /* Add some information to help with the plots and conversion of ticks to * seconds. */ fprintf(file_thread, " %03d 0 0 0 0 %lld %lld %lld %lld %lld 0 0 %lld\n", - engine_rank, e->tic_step, e->toc_step, e->updates, e->g_updates, + engine_rank, (long long int)e->tic_step, + (long long int)e->toc_step, e->updates, e->g_updates, e->s_updates, cpufreq); int count = 0; for (int l = 0; l < e->sched.nr_tasks; l++) { @@ -764,7 +765,8 @@ void task_dump_all(struct engine *e, int step) { file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i\n", engine_rank, e->sched.tasks[l].rid, e->sched.tasks[l].type, e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL), - e->sched.tasks[l].tic, e->sched.tasks[l].toc, + (long long int)e->sched.tasks[l].tic, + (long long int)e->sched.tasks[l].toc, (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->hydro.count : 0, (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->hydro.count @@ -803,7 +805,8 @@ void task_dump_all(struct engine *e, int step) { file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n", e->sched.tasks[l].rid, e->sched.tasks[l].type, e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL), - e->sched.tasks[l].tic, e->sched.tasks[l].toc, + (unsigned long long)e->sched.tasks[l].tic, + (unsigned long long)e->sched.tasks[l].toc, (e->sched.tasks[l].ci == NULL) ? 0 : e->sched.tasks[l].ci->hydro.count, (e->sched.tasks[l].cj == NULL) ? 0 diff --git a/src/timeline.h b/src/timeline.h index 09161b3ec6..a2bb8da6e8 100644 --- a/src/timeline.h +++ b/src/timeline.h @@ -27,9 +27,10 @@ #include "intrinsics.h" #include <math.h> +#include <stdint.h> typedef long long integertime_t; -typedef char timebin_t; +typedef int8_t timebin_t; /*! The number of time bins */ #define num_time_bins 56 -- GitLab