Skip to content
Snippets Groups Projects
Commit e6517e6d authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge branch 'memswap_fix' into 'master'

Modern C approach to memswap after we found issues on ARM architectures with…

See merge request !695
parents 726d3264 afd1f91d
No related branches found
No related tags found
1 merge request!695Modern C approach to memswap after we found issues on ARM architectures with…
/******************************************************************************* /*******************************************************************************
* This file is part of SWIFT. * This file is part of SWIFT.
* Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk) * Copyright (c) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
* * 2018 STFC (author email aidan.chalk@stfc.ac.uk)
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published * it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or * by the Free Software Foundation, either version 3 of the License, or
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
/* Config parameters. */ /* Config parameters. */
#include "../config.h" #include "../config.h"
#include <stdint.h>
#ifdef HAVE_IMMINTRIN_H #ifdef HAVE_IMMINTRIN_H
/* Include the header file with the intrinsics for Intel architecture. */ /* Include the header file with the intrinsics for Intel architecture. */
...@@ -33,7 +34,7 @@ ...@@ -33,7 +34,7 @@
#endif #endif
/* Macro for in-place swap of two values a and b of type t. a and b are /* Macro for in-place swap of two values a and b of type t. a and b are
assumed to be of type char* so that the pointer arithmetic works. */ assumed to be of type uint8_t* so that the pointer arithmetic works. */
#define swap_loop(type, a, b, count) \ #define swap_loop(type, a, b, count) \
while (count >= sizeof(type)) { \ while (count >= sizeof(type)) { \
register type temp = *(type *)a; \ register type temp = *(type *)a; \
...@@ -60,9 +61,9 @@ ...@@ -60,9 +61,9 @@
* @param void_b Pointer to the second element. * @param void_b Pointer to the second element.
* @param bytes Size, in bytes, of the data pointed to by @c a and @c b. * @param bytes Size, in bytes, of the data pointed to by @c a and @c b.
*/ */
__attribute__((always_inline)) inline void memswap(void *void_a, void *void_b, __attribute__((always_inline)) inline void memswap(void *restrict void_a, void *restrict void_b,
size_t bytes) { size_t bytes) {
char *a = (char *)void_a, *b = (char *)void_b; int8_t *restrict a = (int8_t *)void_a, *restrict b = (int8_t *)void_b;
#if defined(__AVX512F__) && defined(__INTEL_COMPILER) #if defined(__AVX512F__) && defined(__INTEL_COMPILER)
swap_loop(__m512i, a, b, bytes); swap_loop(__m512i, a, b, bytes);
#endif #endif
...@@ -75,10 +76,10 @@ __attribute__((always_inline)) inline void memswap(void *void_a, void *void_b, ...@@ -75,10 +76,10 @@ __attribute__((always_inline)) inline void memswap(void *void_a, void *void_b,
#ifdef __ALTIVEC__ #ifdef __ALTIVEC__
swap_loop(vector int, a, b, bytes); swap_loop(vector int, a, b, bytes);
#endif #endif
swap_loop(size_t, a, b, bytes); swap_loop(int_least64_t, a, b, bytes);
swap_loop(int, a, b, bytes); swap_loop(int_least32_t, a, b, bytes);
swap_loop(short, a, b, bytes); swap_loop(int_least16_t, a, b, bytes);
swap_loop(char, a, b, bytes); swap_loop(int_least8_t, a, b, bytes);
} }
/** /**
...@@ -93,10 +94,10 @@ __attribute__((always_inline)) inline void memswap(void *void_a, void *void_b, ...@@ -93,10 +94,10 @@ __attribute__((always_inline)) inline void memswap(void *void_a, void *void_b,
* @param void_b Pointer to the second element. * @param void_b Pointer to the second element.
* @param bytes Size, in bytes, of the data pointed to by @c a and @c b. * @param bytes Size, in bytes, of the data pointed to by @c a and @c b.
*/ */
__attribute__((always_inline)) inline void memswap_unaligned(void *void_a, __attribute__((always_inline)) inline void memswap_unaligned(void *restrict void_a,
void *void_b, void *restrict void_b,
size_t bytes) { size_t bytes) {
char *a = (char *)void_a, *b = (char *)void_b; int8_t *restrict a = (int8_t *)void_a, *restrict b = (int8_t *)void_b;
#ifdef __AVX512F__ #ifdef __AVX512F__
while (bytes >= sizeof(__m512i)) { while (bytes >= sizeof(__m512i)) {
register __m512i temp; register __m512i temp;
...@@ -134,10 +135,10 @@ __attribute__((always_inline)) inline void memswap_unaligned(void *void_a, ...@@ -134,10 +135,10 @@ __attribute__((always_inline)) inline void memswap_unaligned(void *void_a,
// Power8 supports unaligned load/stores, but not sure what it will do here. // Power8 supports unaligned load/stores, but not sure what it will do here.
swap_loop(vector int, a, b, bytes); swap_loop(vector int, a, b, bytes);
#endif #endif
swap_loop(size_t, a, b, bytes); swap_loop(int_least64_t, a, b, bytes);
swap_loop(int, a, b, bytes); swap_loop(int_least32_t, a, b, bytes);
swap_loop(short, a, b, bytes); swap_loop(int_least16_t, a, b, bytes);
swap_loop(char, a, b, bytes); swap_loop(int_least8_t, a, b, bytes);
} }
#endif /* SWIFT_MEMSWAP_H */ #endif /* SWIFT_MEMSWAP_H */
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment