Commit dfe28004 authored by Josh Borrow's avatar Josh Borrow Committed by Peter W. Draper
Browse files

ARM Fixes

parent ccb491b7
......@@ -333,6 +333,9 @@ elif test "$no_gravity_below_id" != "no"; then
AC_DEFINE_UNQUOTED([SWIFT_NO_GRAVITY_BELOW_ID], [$enableval] ,[Particles with smaller ID than this will have zero gravity forces])
fi
# Check whether we have any of the ARM v8.1 tick timers
AX_ASM_ARM_PMCCNTR
AX_ASM_ARM_CNTVCT
# Define HAVE_POSIX_MEMALIGN if it works.
AX_FUNC_POSIX_MEMALIGN
......@@ -700,6 +703,71 @@ if test "x$with_fftw" != "xno"; then
fi
fi
fi
AC_ARG_WITH([arm-fftw],
[AS_HELP_STRING([--with-arm-fftw=PATH],
[root directory where arm fft library is installed @<:@yes/no@:>@]
)],
[with_arm_fftw="$withval"],
[with_arm_fftw=no]
)
if test "x$with_arm_fftw" != "xno"; then
# Was FFTW's location specifically given?
if test "x$with_arm_fftw" != "xyes" -a "x$with_arm_fftw" != "xtest" -a "x$with_arm_fftw" != "x"; then
FFTW_LIBS="-L$with_arm_fftw/lib -larmpl_lp64"
FFTW_INCS="-I$with_arm_fftw/include"
else
FFTW_LIBS="-larmpl_lp64"
FFTW_INCS=""
fi
# FFTW is not specified, so just check if we have it.
if test "x$with_arm_fftw" = "xtest"; then
AC_CHECK_LIB([armpl_lp64],[fftw_malloc],[have_fftw="yes"],[have_fftw="no"],$FFTW_LIBS)
if test "x$have_arm_fftw" != "xno"; then
AC_DEFINE([HAVE_FFTW],1,[The FFTW library appears to be present.])
have_fftw="yes - ARM"
fi
# FFTW was specified, check that it was a valid location.
else
AC_CHECK_LIB([armpl_lp64],[fftw_malloc],
AC_DEFINE([HAVE_FFTW],1,[The FFTW library appears to be present.]),
AC_MSG_ERROR(something is wrong with the FFTW library!), $FFTW_LIBS)
have_fftw="yes - ARM"
fi
# FFTW was requested not to be used.
if test "$have_arm_fftw" = "no"; then
FFTW_LIBS=""
FFTW_INCS=""
fi
# Now, check whether we have the threaded version of FFTW
if test "x$have_arm_fftw" = "xyes"; then
# Was FFTW's location specifically given?
if test "x$with_arm_fftw" != "xyes" -a "x$with_arm_fftw" != "xtest" -a "x$with_arm_fftw" != "x"; then
FFTW_THREADED_LIBS="-L$with_arm_fftw/lib -larmpl_lp64_threads -larmpl_lp64"
FFTW_THREADED_INCS="-I$with_arm_fftw/include"
else
FFTW_THREADED_LIBS="-larmpl_lp64_threads -larmpl_lp64"
FFTW_THREADED_INCS=""
fi
# Verify that the library is threaded
AC_CHECK_LIB([armpl_lp64],[fftw_init_threads],[have_threaded_fftw="yes"],
[have_threaded_fftw="no"], $FFTW_THREADED_LIBS)
# If found, update things
if test "x$have_threaded_fftw" = "xyes"; then
AC_DEFINE([HAVE_THREADED_FFTW],1,[The threaded FFTW library appears to be present.])
FFTW_LIBS=$FFTW_THREADED_LIBS
FFTW_INCS=$FFTW_THREADED_INCS
have_fftw="yes - ARM - threaded"
fi
fi
fi
AC_SUBST([FFTW_LIBS])
AC_SUBST([FFTW_INCS])
AM_CONDITIONAL([HAVEFFTW],[test -n "$FFTW_LIBS"])
......@@ -1063,8 +1131,16 @@ fi
AC_SUBST([NUMA_LIBS])
# Check for Intel and PowerPC intrinsics header optionally used by vector.h.
AC_CHECK_HEADERS([immintrin.h])
AC_CHECK_HEADERS([altivec.h])
AC_CHECK_HEADERS([immintrin.h], [], [],
[#ifdef HAVE_IMMINTRIN_H
# include <immintrin.h>
#endif
])
AC_CHECK_HEADERS([altivec.h], [], [],
[#ifdef HAVE_ALTIVEC_H
# include <altivec.h>
#endif
])
# Check for timing functions needed by cycle.h.
AC_HEADER_TIME
......@@ -1085,17 +1161,7 @@ AC_LINK_IFELSE([AC_LANG_PROGRAM(
[AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])],[rtc_ok=no])
AC_MSG_RESULT($rtc_ok)
# Special timers for the ARM v7 and ARM v8 platforms (taken from FFTW-3 to match their cycle.h)
AC_ARG_ENABLE(armv8-pmccntr-el0, [AC_HELP_STRING([--enable-armv8-pmccntr-el0],[enable the cycle counter on ARMv8 via the PMCCNTR_EL0 register])], have_armv8pmccntrel0=$enableval)
if test "$have_armv8pmccntrel0"x = "yes"x; then
AC_DEFINE(HAVE_ARMV8_PMCCNTR_EL0,1,[Define if you have enabled the PMCCNTR_EL0 cycle counter on ARMv8])
fi
AC_ARG_ENABLE(armv8-cntvct-el0, [AC_HELP_STRING([--enable-armv8-cntvct-el0],[enable the cycle counter on ARMv8 via the CNTVCT_EL0 register])], have_armv8cntvctel0=$enableval)
if test "$have_armv8cntvctel0"x = "yes"x; then
AC_DEFINE(HAVE_ARMV8_CNTVCT_EL0,1,[Define if you have enabled the CNTVCT_EL0 cycle counter on ARMv8])
fi
# Special timers for the ARM v7 platforms (taken from FFTW-3 to match their cycle.h)
AC_ARG_ENABLE(armv7a-cntvct, [AC_HELP_STRING([--enable-armv7a-cntvct],[enable the cycle counter on Armv7a via the CNTVCT register])], have_armv7acntvct=$enableval)
if test "$have_armv7acntvct"x = "yes"x; then
AC_DEFINE(HAVE_ARMV7A_CNTVCT,1,[Define if you have enabled the CNTVCT cycle counter on ARMv7a])
......
#
# SYNOPSIS
#
# AX_ASM_ARM_CNTVCT
#
# DESCRIPTION
#
# Check whether the CNTVCT_EL0 exists on this platform. Defines
# HAVE_ARMV8_CNTVCT_EL0 if true.
#
# LICENSE
#
# Copyright (c) 2019 Matthieu Schaller <schaller@strw.leidenuniv.nl>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 1
AC_DEFUN([AX_ASM_ARM_CNTVCT],
[AC_CACHE_CHECK([for CNTVCT_EL0 asm instruction on ARM v8.1a],
[ax_cv_asm_arm_cntvct_works],
[AC_RUN_IFELSE([AC_LANG_SOURCE([[
#include <stdint.h>
int
main()
{
uint64_t cc = 0;
__asm__ __volatile__("mrs %0, CNTVCT_EL0" : "=r"(cc));
return 0;
}
]])],
[ax_cv_asm_arm_cntvct_works=yes],
[ax_cv_asm_arm_cntvct_works=no],
[ax_cv_asm_arm_cntvct_works=no])])
if test "$ax_cv_asm_arm_cntvct_works" = "yes" ; then
AC_DEFINE([HAVE_ARMV8_CNTVCT_EL0], [1],
[Define to 1 if the ARM v8.1a instruction CNTVCT_EL0 exists.])
fi
])
#
# SYNOPSIS
#
# AX_ASM_ARM_PMCCNTR
#
# DESCRIPTION
#
# Check whether the PMCCNTR_EL0 exists on this platform. Defines
# HAVE_ARMV8_PMCCNTR_EL0 if true.
#
# LICENSE
#
# Copyright (c) 2019 Matthieu Schaller <schaller@strw.leidenuniv.nl>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 1
AC_DEFUN([AX_ASM_ARM_PMCCNTR],
[AC_CACHE_CHECK([for PMCCNTR_EL0 asm instruction on ARM v8.1a],
[ax_cv_asm_arm_pmccntr_works],
[AC_RUN_IFELSE([AC_LANG_SOURCE([[
#include <stdint.h>
int
main()
{
uint64_t cc = 0;
__asm__ __volatile__("mrs %0, PMCCNTR_EL0" : "=r"(cc));
return 0;
}
]])],
[ax_cv_asm_arm_pmccntr_works=yes],
[ax_cv_asm_arm_pmccntr_works=no],
[ax_cv_asm_arm_pmccntr_works=no])])
if test "$ax_cv_asm_arm_pmccntr_works" = "yes" ; then
AC_DEFINE([HAVE_ARMV8_PMCCNTR_EL0], [1],
[Define to 1 if the ARM v8.1a instruction PMCCNTR_EL0 exists.])
fi
])
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_ext.html
# https://www.gnu.org/software/autoconf-archive/ax_ext.html
# ===========================================================================
#
# SYNOPSIS
......@@ -31,12 +31,13 @@
# HAVE_SHA / HAVE_AES / HAVE_AVX / HAVE_FMA3 / HAVE_FMA4 / HAVE_XOP
# HAVE_AVX2 / HAVE_AVX512_F / HAVE_AVX512_CD / HAVE_AVX512_PF
# HAVE_AVX512_ER / HAVE_AVX512_VL / HAVE_AVX512_BW / HAVE_AVX512_DQ
# HAVE_AVX512_IFMA / HAVE_AVX512_VBMI
# HAVE_AVX512_IFMA / HAVE_AVX512_VBMI / HAVE_ALTIVEC / HAVE_VSX
#
# LICENSE
#
# Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net>
# Copyright (c) 2013,2015 Michael Petch <mpetch@capp-sysware.com>
# Copyright (c) 2017 Rafael de Lucena Valle <rafaeldelucena@gmail.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
......@@ -47,7 +48,7 @@
# the order of the flags when more than one is used. Given that we just
# set SIMD_FLAGS to the most specific value, rather than all accepted ones.
#serial 15
#serial 18
AC_DEFUN([AX_EXT],
[
......@@ -59,19 +60,43 @@ AC_DEFUN([AX_EXT],
case $host_cpu in
powerpc*)
AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
AC_CACHE_CHECK([whether altivec is supported for old distros], [ax_cv_have_altivec_old_ext],
[
if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then
if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then
ax_cv_have_altivec_ext=yes
ax_cv_have_altivec_old_ext=yes
fi
fi
])
if test "$ax_cv_have_altivec_ext" = yes; then
if test "$ax_cv_have_altivec_old_ext" = yes; then
AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])
AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", [])
fi
AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
[
if test `LD_SHOW_AUXV=1 /bin/true 2>/dev/null|grep -c altivec` != 0; then
ax_cv_have_altivec_ext=yes
fi
])
if test "$ax_cv_have_altivec_ext" = yes; then
AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])
AX_CHECK_COMPILE_FLAG(-maltivec, SIMD_FLAGS="$SIMD_FLAGS -maltivec", [])
fi
AC_CACHE_CHECK([whether vsx is supported], [ax_cv_have_vsx_ext],
[
if test `LD_SHOW_AUXV=1 /bin/true 2>/dev/null|grep -c vsx` != 0; then
ax_cv_have_vsx_ext=yes
fi
])
if test "$ax_cv_have_vsx_ext" = yes; then
AC_DEFINE(HAVE_VSX,,[Support VSX instructions])
AX_CHECK_COMPILE_FLAG(-mvsx, SIMD_FLAGS="$SIMD_FLAGS -mvsx", [])
fi
;;
i[[3456]]86*|x86_64*|amd64*)
......@@ -139,7 +164,7 @@ AC_DEFUN([AX_EXT],
ax_cv_have_sse_os_support_ext=no,
if test "$((0x$edx_cpuid1>>25&0x01))" = 1; then
AC_LANG_PUSH([C])
AC_TRY_RUN([
AC_RUN_IFELSE([AC_LANG_SOURCE([[
#include <signal.h>
#include <stdlib.h>
/* No way at ring1 to ring3 in protected mode to check the CR0 and CR4
......@@ -151,10 +176,10 @@ AC_DEFUN([AX_EXT],
/* SSE instruction xorps %xmm0,%xmm0 */
__asm__ __volatile__ (".byte 0x0f, 0x57, 0xc0");
return 0;
}],
ax_cv_have_sse_os_support_ext=yes,
ax_cv_have_sse_os_support_ext=no,
ax_cv_have_sse_os_support_ext=no)
}]])],
[ax_cv_have_sse_os_support_ext=yes],
[ax_cv_have_sse_os_support_ext=no],
[ax_cv_have_sse_os_support_ext=no])
AC_LANG_POP([C])
fi
])
......
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_func_posix_memalign.html
# https://www.gnu.org/software/autoconf-archive/ax_func_posix_memalign.html
# ===========================================================================
#
# SYNOPSIS
......@@ -22,12 +22,12 @@
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 7
#serial 9
AC_DEFUN([AX_FUNC_POSIX_MEMALIGN],
[AC_CACHE_CHECK([for working posix_memalign],
[ax_cv_func_posix_memalign_works],
[AC_TRY_RUN([
[AC_RUN_IFELSE([AC_LANG_SOURCE([[
#include <stdlib.h>
int
......@@ -39,7 +39,7 @@ main ()
* the size word. */
exit (posix_memalign (&buffer, sizeof(void *), 123) != 0);
}
],
]])],
[ax_cv_func_posix_memalign_works=yes],
[ax_cv_func_posix_memalign_works=no],
[ax_cv_func_posix_memalign_works=no])])
......
......@@ -216,15 +216,15 @@ case $host_cpu in
case $cpuimpl in
0x42) case $cpuarch in
8) case $cpuvar in
0x0) ax_gcc_arch="thunderx2t99 vulcan armv8.1-a armv8-a+lse armv8-a native" ;;
0x0) ax_gcc_arch="native" ;;
esac
;;
esac
;;
0x43) case $cpuarch in
8) case $cpuvar in
0x0) ax_gcc_arch="thunderx armv8-a native" ;;
0x1) ax_gcc_arch="thunderx+lse armv8.1-a armv8-a+lse armv8-a native" ;;
0x0) ax_gcc_arch="native" ;;
0x1) ax_gcc_arch="native" ;;
esac
;;
esac
......
......@@ -543,7 +543,8 @@ INLINE_ELAPSED(inline)
#define HAVE_TICK_COUNTER
#endif
#if defined(__aarch64__) && defined(HAVE_ARMV8_PMCCNTR_EL0)
#if defined(__aarch64__) && defined(HAVE_ARMV8_PMCCNTR_EL0) && \
!defined(HAVE_TICK_COUNTER)
typedef uint64_t ticks;
static inline ticks getticks(void) {
uint64_t cc = 0;
......
......@@ -316,7 +316,7 @@ __attribute__((always_inline)) INLINE static vector pow_dimension_vec(
#else
error("The dimension is not defined !");
return vec_set(0.f);
return vec_set1(0.f);
#endif
}
......@@ -346,7 +346,7 @@ __attribute__((always_inline)) INLINE static vector pow_dimension_plus_one_vec(
#else
error("The dimension is not defined !");
return vec_set(0.f);
return vec_set1(0.f);
#endif
}
......
......@@ -201,7 +201,7 @@ struct part {
timebin_t time_bin;
/* Need waking up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -137,7 +137,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -155,7 +155,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -196,7 +196,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -207,7 +207,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -177,7 +177,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -182,7 +182,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -177,7 +177,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -181,7 +181,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -157,7 +157,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -184,7 +184,7 @@ struct part {
timebin_t time_bin;
/* Need waking-up ? */
char wakeup;
timebin_t wakeup;
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -81,6 +81,13 @@ __attribute__((always_inline)) inline void memswap(void *restrict void_a,
swap_loop(int_least32_t, a, b, bytes);
swap_loop(int_least16_t, a, b, bytes);
swap_loop(int_least8_t, a, b, bytes);
/* This is a known bug for the current version of clang on ARM.
* We add this synchronization as a temporary bug fix.
* See https://bugs.llvm.org/show_bug.cgi?id=40051 */
#if defined(__clang__) && defined(__aarch64__)
__sync_synchronize();
#endif
}
/**
......@@ -139,6 +146,13 @@ __attribute__((always_inline)) inline void memswap_unaligned(
swap_loop(int_least32_t, a, b, bytes);
swap_loop(int_least16_t, a, b, bytes);
swap_loop(int_least8_t, a, b, bytes);
/* This is a known bug for the current version of clang on ARM.
* We add this synchronization as a temporary bug fix.
* See https://bugs.llvm.org/show_bug.cgi?id=40051 */
#if defined(__clang__) && defined(__aarch64__)
__sync_synchronize();
#endif
}
#endif /* SWIFT_MEMSWAP_H */
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment