Commit de224973 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge branch 'arm_fixes' into 'master'

ARM Fixes

See merge request !734

Few changes for ARM ThunderX2 and ARM-clang:


Do not assume char is signed. Use int8_t for the time-bin .
Improve the detection of the ARMv8.1a cycle counter assembly instruction.
Fix cycle.h for ARMv8.1a. Will push upstream to FFTW.
Add a synchronization point in the memswap() for clang on ARM.
Do not let the autotools choose the flags for clang and GCC on ARM 8.1a. Use the native instructions.
Upgrade to some of the autotools macros.
parents ccb491b7 dfe28004
......@@ -333,6 +333,9 @@ elif test "$no_gravity_below_id" != "no"; then
AC_DEFINE_UNQUOTED([SWIFT_NO_GRAVITY_BELOW_ID], [$enableval] ,[Particles with smaller ID than this will have zero gravity forces])
fi
# Check whether we have any of the ARM v8.1 tick timers
AX_ASM_ARM_PMCCNTR
AX_ASM_ARM_CNTVCT
# Define HAVE_POSIX_MEMALIGN if it works.
AX_FUNC_POSIX_MEMALIGN
......@@ -700,6 +703,71 @@ if test "x$with_fftw" != "xno"; then
fi
fi
fi
AC_ARG_WITH([arm-fftw],
[AS_HELP_STRING([--with-arm-fftw=PATH],
[root directory where arm fft library is installed @<:@yes/no@:>@]
)],
[with_arm_fftw="$withval"],
[with_arm_fftw=no]
)
if test "x$with_arm_fftw" != "xno"; then
# Was FFTW's location specifically given?
if test "x$with_arm_fftw" != "xyes" -a "x$with_arm_fftw" != "xtest" -a "x$with_arm_fftw" != "x"; then
FFTW_LIBS="-L$with_arm_fftw/lib -larmpl_lp64"
FFTW_INCS="-I$with_arm_fftw/include"
else
FFTW_LIBS="-larmpl_lp64"
FFTW_INCS=""
fi
# FFTW is not specified, so just check if we have it.
if test "x$with_arm_fftw" = "xtest"; then
AC_CHECK_LIB([armpl_lp64],[fftw_malloc],[have_fftw="yes"],[have_fftw="no"],$FFTW_LIBS)
if test "x$have_arm_fftw" != "xno"; then
AC_DEFINE([HAVE_FFTW],1,[The FFTW library appears to be present.])
have_fftw="yes - ARM"
fi
# FFTW was specified, check that it was a valid location.
else
AC_CHECK_LIB([armpl_lp64],[fftw_malloc],
AC_DEFINE([HAVE_FFTW],1,[The FFTW library appears to be present.]),
AC_MSG_ERROR(something is wrong with the FFTW library!), $FFTW_LIBS)
have_fftw="yes - ARM"
fi
# FFTW was requested not to be used.
if test "$have_arm_fftw" = "no"; then
FFTW_LIBS=""
FFTW_INCS=""
fi
# Now, check whether we have the threaded version of FFTW
if test "x$have_arm_fftw" = "xyes"; then
# Was FFTW's location specifically given?
if test "x$with_arm_fftw" != "xyes" -a "x$with_arm_fftw" != "xtest" -a "x$with_arm_fftw" != "x"; then
FFTW_THREADED_LIBS="-L$with_arm_fftw/lib -larmpl_lp64_threads -larmpl_lp64"
FFTW_THREADED_INCS="-I$with_arm_fftw/include"
else
FFTW_THREADED_LIBS="-larmpl_lp64_threads -larmpl_lp64"
FFTW_THREADED_INCS=""
fi
# Verify that the library is threaded
AC_CHECK_LIB([armpl_lp64],[fftw_init_threads],[have_threaded_fftw="yes"],
[have_threaded_fftw="no"], $FFTW_THREADED_LIBS)
# If found, update things
if test "x$have_threaded_fftw" = "xyes"; then
AC_DEFINE([HAVE_THREADED_FFTW],1,[The threaded FFTW library appears to be present.])
FFTW_LIBS=$FFTW_THREADED_LIBS
FFTW_INCS=$FFTW_THREADED_INCS
have_fftw="yes - ARM - threaded"
fi
fi
fi
AC_SUBST([FFTW_LIBS])
AC_SUBST([FFTW_INCS])
AM_CONDITIONAL([HAVEFFTW],[test -n "$FFTW_LIBS"])
......@@ -1063,8 +1131,16 @@ fi
AC_SUBST([NUMA_LIBS])
# Check for Intel and PowerPC intrinsics header optionally used by vector.h.
AC_CHECK_HEADERS([immintrin.h])
AC_CHECK_HEADERS([altivec.h])
AC_CHECK_HEADERS([immintrin.h], [], [],
[#ifdef HAVE_IMMINTRIN_H
# include <immintrin.h>
#endif
])
AC_CHECK_HEADERS([altivec.h], [], [],
[#ifdef HAVE_ALTIVEC_H
# include <altivec.h>
#endif
])
# Check for timing functions needed by cycle.h.
AC_HEADER_TIME
......@@ -1085,17 +1161,7 @@ AC_LINK_IFELSE([AC_LANG_PROGRAM(
[AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])],[rtc_ok=no])
AC_MSG_RESULT($rtc_ok)
# Special timers for the ARM v7 and ARM v8 platforms (taken from FFTW-3 to match their cycle.h)
AC_ARG_ENABLE(armv8-pmccntr-el0, [AC_HELP_STRING([--enable-armv8-pmccntr-el0],[enable the cycle counter on ARMv8 via the PMCCNTR_EL0 register])], have_armv8pmccntrel0=$enableval)
if test "$have_armv8pmccntrel0"x = "yes"x; then
AC_DEFINE(HAVE_ARMV8_PMCCNTR_EL0,1,[Define if you have enabled the PMCCNTR_EL0 cycle counter on ARMv8])
fi
AC_ARG_ENABLE(armv8-cntvct-el0, [AC_HELP_STRING([--enable-armv8-cntvct-el0],[enable the cycle counter on ARMv8 via the CNTVCT_EL0 register])], have_armv8cntvctel0=$enableval)
if test "$have_armv8cntvctel0"x = "yes"x; then
AC_DEFINE(HAVE_ARMV8_CNTVCT_EL0,1,[Define if you have enabled the CNTVCT_EL0 cycle counter on ARMv8])
fi
# Special timers for the ARM v7 platforms (taken from FFTW-3 to match their cycle.h)
AC_ARG_ENABLE(armv7a-cntvct, [AC_HELP_STRING([--enable-armv7a-cntvct],[enable the cycle counter on Armv7a via the CNTVCT register])], have_armv7acntvct=$enableval)
if test "$have_armv7acntvct"x = "yes"x; then
AC_DEFINE(HAVE_ARMV7A_CNTVCT,1,[Define if you have enabled the CNTVCT cycle counter on ARMv7a])
......
#
# SYNOPSIS
#
# AX_ASM_ARM_CNTVCT
#
# DESCRIPTION
#
# Check whether the CNTVCT_EL0 exists on this platform. Defines
# HAVE_ARMV8_CNTVCT_EL0 if true.
#
# LICENSE
#
# Copyright (c) 2019 Matthieu Schaller <schaller@strw.leidenuniv.nl>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 1
AC_DEFUN([AX_ASM_ARM_CNTVCT],
[AC_CACHE_CHECK([for CNTVCT_EL0 asm instruction on ARM v8.1a],
[ax_cv_asm_arm_cntvct_works],
[AC_RUN_IFELSE([AC_LANG_SOURCE([[
#include <stdint.h>
int
main()
{
uint64_t cc = 0;
__asm__ __volatile__("mrs %0, CNTVCT_EL0" : "=r"(cc));
return 0;
}
]])],
[ax_cv_asm_arm_cntvct_works=yes],
[ax_cv_asm_arm_cntvct_works=no],
[ax_cv_asm_arm_cntvct_works=no])])
if test "$ax_cv_asm_arm_cntvct_works" = "yes" ; then
AC_DEFINE([HAVE_ARMV8_CNTVCT_EL0], [1],
[Define to 1 if the ARM v8.1a instruction CNTVCT_EL0 exists.])
fi
])
#
# SYNOPSIS
#
# AX_ASM_ARM_PMCCNTR
#
# DESCRIPTION
#
# Check whether the PMCCNTR_EL0 exists on this platform. Defines
# HAVE_ARMV8_PMCCNTR_EL0 if true.
#
# LICENSE
#
# Copyright (c) 2019 Matthieu Schaller <schaller@strw.leidenuniv.nl>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 1
AC_DEFUN([AX_ASM_ARM_PMCCNTR],
[AC_CACHE_CHECK([for PMCCNTR_EL0 asm instruction on ARM v8.1a],
[ax_cv_asm_arm_pmccntr_works],
[AC_RUN_IFELSE([AC_LANG_SOURCE([[
#include <stdint.h>
int
main()
{
uint64_t cc = 0;
__asm__ __volatile__("mrs %0, PMCCNTR_EL0" : "=r"(cc));
return 0;
}
]])],
[ax_cv_asm_arm_pmccntr_works=yes],
[ax_cv_asm_arm_pmccntr_works=no],
[ax_cv_asm_arm_pmccntr_works=no])])
if test "$ax_cv_asm_arm_pmccntr_works" = "yes" ; then
AC_DEFINE([HAVE_ARMV8_PMCCNTR_EL0], [1],
[Define to 1 if the ARM v8.1a instruction PMCCNTR_EL0 exists.])
fi
])
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_ext.html
# https://www.gnu.org/software/autoconf-archive/ax_ext.html
# ===========================================================================
#
# SYNOPSIS
......@@ -31,12 +31,13 @@
# HAVE_SHA / HAVE_AES / HAVE_AVX / HAVE_FMA3 / HAVE_FMA4 / HAVE_XOP
# HAVE_AVX2 / HAVE_AVX512_F / HAVE_AVX512_CD / HAVE_AVX512_PF
# HAVE_AVX512_ER / HAVE_AVX512_VL / HAVE_AVX512_BW / HAVE_AVX512_DQ
# HAVE_AVX512_IFMA / HAVE_AVX512_VBMI
# HAVE_AVX512_IFMA / HAVE_AVX512_VBMI / HAVE_ALTIVEC / HAVE_VSX
#
# LICENSE
#
# Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net>
# Copyright (c) 2013,2015 Michael Petch <mpetch@capp-sysware.com>
# Copyright (c) 2017 Rafael de Lucena Valle <rafaeldelucena@gmail.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
......@@ -47,7 +48,7 @@
# the order of the flags when more than one is used. Given that we just
# set SIMD_FLAGS to the most specific value, rather than all accepted ones.
#serial 15
#serial 18
AC_DEFUN([AX_EXT],
[
......@@ -59,19 +60,43 @@ AC_DEFUN([AX_EXT],
case $host_cpu in
powerpc*)
AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
AC_CACHE_CHECK([whether altivec is supported for old distros], [ax_cv_have_altivec_old_ext],
[
if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then
if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then
ax_cv_have_altivec_ext=yes
ax_cv_have_altivec_old_ext=yes
fi
fi
])
if test "$ax_cv_have_altivec_ext" = yes; then
if test "$ax_cv_have_altivec_old_ext" = yes; then
AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])
AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", [])
fi
AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
[
if test `LD_SHOW_AUXV=1 /bin/true 2>/dev/null|grep -c altivec` != 0; then
ax_cv_have_altivec_ext=yes
fi
])
if test "$ax_cv_have_altivec_ext" = yes; then
AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])
AX_CHECK_COMPILE_FLAG(-maltivec, SIMD_FLAGS="$SIMD_FLAGS -maltivec", [])
fi
AC_CACHE_CHECK([whether vsx is supported], [ax_cv_have_vsx_ext],
[
if test `LD_SHOW_AUXV=1 /bin/true 2>/dev/null|grep -c vsx` != 0; then
ax_cv_have_vsx_ext=yes
fi
])
if test "$ax_cv_have_vsx_ext" = yes; then
AC_DEFINE(HAVE_VSX,,[Support VSX instructions])
AX_CHECK_COMPILE_FLAG(-mvsx, SIMD_FLAGS="$SIMD_FLAGS -mvsx", [])
fi
;;
i[[3456]]86*|x86_64*|amd64*)
......@@ -139,7 +164,7 @@ AC_DEFUN([AX_EXT],
ax_cv_have_sse_os_support_ext=no,
if test "$((0x$edx_cpuid1>>25&0x01))" = 1; then
AC_LANG_PUSH([C])
AC_TRY_RUN([
AC_RUN_IFELSE([AC_LANG_SOURCE([[
#include <signal.h>
#include <stdlib.h>
/* No way at ring1 to ring3 in protected mode to check the CR0 and CR4
......@@ -151,10 +176,10 @@ AC_DEFUN([AX_EXT],
/* SSE instruction xorps %xmm0,%xmm0 */
__asm__ __volatile__ (".byte 0x0f, 0x57, 0xc0");
return 0;
}],
ax_cv_have_sse_os_support_ext=yes,
ax_cv_have_sse_os_support_ext=no,
ax_cv_have_sse_os_support_ext=no)
}]])],
[ax_cv_have_sse_os_support_ext=yes],
[ax_cv_have_sse_os_support_ext=no],
[ax_cv_have_sse_os_support_ext=no])
AC_LANG_POP([C])
fi
])
......
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_func_posix_memalign.html
# https://www.gnu.org/software/autoconf-archive/ax_func_posix_memalign.html
# ===========================================================================
#
# SYNOPSIS
......@@ -22,12 +22,12 @@
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 7
#serial 9
AC_DEFUN([AX_FUNC_POSIX_MEMALIGN],
[AC_CACHE_CHECK([for working posix_memalign],
[ax_cv_func_posix_memalign_works],
[AC_TRY_RUN([
[AC_RUN_IFELSE([AC_LANG_SOURCE([[
#include <stdlib.h>
int
......@@ -39,7 +39,7 @@ main ()
* the size word. */
exit (posix_memalign (&buffer, sizeof(void *), 123) != 0);
}
],
]])],
[ax_cv_func_posix_memalign_works=yes],
[ax_cv_func_posix_memalign_works=no],
[ax_cv_func_posix_memalign_works=no])])
......
......@@ -216,15 +216,15 @@ case $host_cpu in
case $cpuimpl in
0x42) case $cpuarch in
8) case $cpuvar in
0x0) ax_gcc_arch="thunderx2t99 vulcan armv8.1-a armv8-a+lse armv8-a native" ;;
0x0) ax_gcc_arch="native" ;;
esac
;;
esac
;;
0x43) case $cpuarch in
8) case $cpuvar in
0x0) ax_gcc_arch="thunderx armv8-a native" ;;
0x1) ax_gcc_arch="thunderx+lse armv8.1-a armv8-a+lse armv8-a native" ;;
0x0) ax_gcc_arch="native" ;;
0x1) ax_gcc_arch="native" ;;
esac
;;
esac
......
......@@ -543,7 +543,8 @@ INLINE_ELAPSED(inline)
#define HAVE_TICK_COUNTER
#endif
#if defined(__aarch64__) && defined(HAVE_ARMV8_PMCCNTR_EL0)
#if defined(__aarch64__) && defined(HAVE_ARMV8_PMCCNTR_EL0) && \
!defined(HAVE_TICK_COUNTER)
typedef uint64_t ticks;
static inline ticks getticks(void) {
uint64_t cc = 0;
......
......@@ -316,7 +316,7 @@ __attribute__((always_inline)) INLINE static vector pow_dimension_vec(
#else
error("The dimension is not defined !");
return vec_set(0.f);
return vec_set1(0.f);
#endif
}
......@@ -346,7 +346,7 @@ __attribute__((always_inline)) INLINE static vector pow_dimension_plus_one_vec(
#else
error("The dimension is not defined !");
return vec_set(0.f);
return vec_set1(0.f);
#endif
}
......
......@@ -201,7 +201,7 @@ struct part {
timebin_t time_bin;
/* Need waking up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -137,7 +137,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -155,7 +155,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -196,7 +196,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -207,7 +207,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -177,7 +177,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -182,7 +182,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -177,7 +177,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -181,7 +181,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -157,7 +157,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -184,7 +184,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -81,6 +81,13 @@ __attribute__((always_inline)) inline void memswap(void *restrict void_a,
swap_loop(int_least32_t, a, b, bytes);
swap_loop(int_least16_t, a, b, bytes);
swap_loop(int_least8_t, a, b, bytes);
/* This is a known bug for the current version of clang on ARM.
* We add this synchronization as a temporary bug fix.
* See https://bugs.llvm.org/show_bug.cgi?id=40051 */
#if defined(__clang__) && defined(__aarch64__)
__sync_synchronize();
#endif
}
/**
......@@ -139,6 +146,13 @@ __attribute__((always_inline)) inline void memswap_unaligned(
swap_loop(int_least32_t, a, b, bytes);
swap_loop(int_least16_t, a, b, bytes);
swap_loop(int_least8_t, a, b, bytes);
/* This is a known bug for the current version of clang on ARM.
* We add this synchronization as a temporary bug fix.
* See https://bugs.llvm.org/show_bug.cgi?id=40051 */
#if defined(__clang__) && defined(__aarch64__)
__sync_synchronize();
#endif
}
#endif /* SWIFT_MEMSWAP_H */
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment