Skip to content
Snippets Groups Projects
Commit 51a40a61 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

add memswap_unaligned for unaligned parameters, have to use explicit unaligned...

add memswap_unaligned for unaligned parameters, have to use explicit unaligned load/store intrinsics, not sure this works for AltiVec.
parent 35a0ac82
No related branches found
No related tags found
1 merge request!294Buffered cell_split
......@@ -79,4 +79,63 @@ __attribute__((always_inline)) inline void memswap(void *void_a, void *void_b,
swap_loop(char, a, b, bytes);
}
/**
* @brief Swap the contents of two elements in-place.
*
* As opposed to #memswap, this function does not require the parameters
* to be aligned in any specific way.
* Furthermore, register re-labeling only seems to work when the code is
* compiled with @c -funroll-loops.
*
* @param void_a Pointer to the first element.
* @param void_b Pointer to the second element.
* @param bytes Size, in bytes, of the data pointed to by @c a and @c b.
*/
__attribute__((always_inline)) inline void memswap_unaligned(void *void_a,
void *void_b,
size_t bytes) {
char *a = (char *)void_a, *b = (char *)void_b;
#ifdef __AVX512F__
while (bytes >= sizeof(__m512i)) {
register __m512i temp;
temp = _mm512_loadu_si512((__m512i *)a);
_mm512_storeu_si512((__m512i *)a, _mm512_loadu_si512((__m512i *)b));
_mm512_storeu_si512((__m512i *)b, temp);
a += sizeof(__m512i);
b += sizeof(__m512i);
bytes -= sizeof(__m512i);
}
#endif
#ifdef __AVX__
while (bytes >= sizeof(__m256i)) {
register __m256i temp;
temp = _mm256_loadu_si256((__m256i *)a);
_mm256_storeu_si256((__m256i *)a, _mm256_loadu_si256((__m256i *)b));
_mm256_storeu_si256((__m256i *)b, temp);
a += sizeof(__m256i);
b += sizeof(__m256i);
bytes -= sizeof(__m256i);
}
#endif
#ifdef __SSE2__
while (bytes >= sizeof(__m128i)) {
register __m128i temp;
temp = _mm_loadu_si128((__m128i *)a);
_mm_storeu_si128((__m128i *)a, _mm_loadu_si128((__m128i *)b));
_mm_storeu_si128((__m128i *)b, temp);
a += sizeof(__m128i);
b += sizeof(__m128i);
bytes -= sizeof(__m128i);
}
#endif
#ifdef __ALTIVEC__
// Power8 supports unaligned load/stores, but not sure what it will do here.
swap_loop(vector int, a, b, bytes);
#endif
swap_loop(size_t, a, b, bytes);
swap_loop(int, a, b, bytes);
swap_loop(short, a, b, bytes);
swap_loop(char, a, b, bytes);
}
#endif /* SWIFT_MEMSWAP_H */
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment