diff --git a/src/Random123/array.h b/src/Random123/array.h deleted file mode 100644 index ab85392d8d6868ff631a8ce74737db7d37aee84f..0000000000000000000000000000000000000000 --- a/src/Random123/array.h +++ /dev/null @@ -1,326 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef _r123array_dot_h__ -#define _r123array_dot_h__ -#include "features/compilerfeatures.h" -#include "features/sse.h" - -#ifndef __cplusplus -#define CXXMETHODS(_N, W, T) -#define CXXOVERLOADS(_N, W, T) -#else - -#include <stddef.h> -#include <algorithm> -#include <stdexcept> -#include <iterator> -#include <limits> -#include <iostream> - -/** @defgroup arrayNxW The r123arrayNxW classes - - Each of the r123arrayNxW is a fixed size array of N W-bit unsigned integers. - It is functionally equivalent to the C++0x std::array<N, uintW_t>, - but does not require C++0x features or libraries. - - In addition to meeting most of the requirements of a Container, - it also has a member function, incr(), which increments the zero-th - element and carrys overflows into higher indexed elements. Thus, - by using incr(), sequences of up to 2^(N*W) distinct values - can be produced. - - If SSE is supported by the compiler, then the class - r123array1xm128i is also defined, in which the data member is an - array of one r123128i object. - - @cond HIDDEN_FROM_DOXYGEN -*/ - -template <typename value_type> -inline R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){ - value_type v=0; - for(size_t i=0; i<(3+sizeof(value_type))/4; ++i) - v |= ((value_type)(*p32++)) << (32*i); - return v; -} - -// Work-alike methods and typedefs modeled on std::array: -#define CXXMETHODS(_N, W, T) \ - typedef T value_type; \ - typedef T* iterator; \ - typedef const T* const_iterator; \ - typedef value_type& reference; \ - typedef const value_type& const_reference; \ - typedef size_t size_type; \ - typedef ptrdiff_t difference_type; \ - typedef T* pointer; \ - typedef const T* const_pointer; \ - typedef std::reverse_iterator<iterator> reverse_iterator; \ - typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \ - /* Boost.array has static_size. C++11 specializes tuple_size */ \ - enum {static_size = _N}; \ - R123_CUDA_DEVICE reference operator[](size_type i){return v[i];} \ - R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];} \ - R123_CUDA_DEVICE reference at(size_type i){ if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \ - R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \ - R123_CUDA_DEVICE size_type size() const { return _N; } \ - R123_CUDA_DEVICE size_type max_size() const { return _N; } \ - R123_CUDA_DEVICE bool empty() const { return _N==0; }; \ - R123_CUDA_DEVICE iterator begin() { return &v[0]; } \ - R123_CUDA_DEVICE iterator end() { return &v[_N]; } \ - R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; } \ - R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; } \ - R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; } \ - R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; } \ - R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); } \ - R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \ - R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); } \ - R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \ - R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \ - R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); } \ - R123_CUDA_DEVICE pointer data(){ return &v[0]; } \ - R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; } \ - R123_CUDA_DEVICE reference front(){ return v[0]; } \ - R123_CUDA_DEVICE const_reference front() const{ return v[0]; } \ - R123_CUDA_DEVICE reference back(){ return v[_N-1]; } \ - R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; } \ - R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \ - /* CUDA3 does not have std::equal */ \ - for (size_t i = 0; i < _N; ++i) \ - if (v[i] != rhs.v[i]) return false; \ - return true; \ - } \ - R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \ - /* CUDA3 does not have std::fill_n */ \ - R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \ - R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \ - /* CUDA3 does not have std::swap_ranges */ \ - for (size_t i = 0; i < _N; ++i) { \ - T tmp = v[i]; \ - v[i] = rhs.v[i]; \ - rhs.v[i] = tmp; \ - } \ - } \ - R123_CUDA_DEVICE r123array##_N##x##W& incr(R123_ULONG_LONG n=1){ \ - /* This test is tricky because we're trying to avoid spurious \ - complaints about illegal shifts, yet still be compile-time \ - evaulated. */ \ - if(sizeof(T)<sizeof(n) && n>>((sizeof(T)<sizeof(n))?8*sizeof(T):0) ) \ - return incr_carefully(n); \ - if(n==1){ \ - ++v[0]; \ - if(_N==1 || R123_BUILTIN_EXPECT(!!v[0], 1)) return *this; \ - }else{ \ - v[0] += n; \ - if(_N==1 || R123_BUILTIN_EXPECT(n<=v[0], 1)) return *this; \ - } \ - /* We expect that the N==?? tests will be \ - constant-folded/optimized away by the compiler, so only the \ - overflow tests (!!v[i]) remain to be done at runtime. For \ - small values of N, it would be better to do this as an \ - uncondtional sequence of adc. An experiment/optimization \ - for another day... \ - N.B. The weird subscripting: v[_N>3?3:0] is to silence \ - a spurious error from icpc \ - */ \ - ++v[_N>1?1:0]; \ - if(_N==2 || R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \ - ++v[_N>2?2:0]; \ - if(_N==3 || R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this; \ - ++v[_N>3?3:0]; \ - for(size_t i=4; i<_N; ++i){ \ - if( R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this; \ - ++v[i]; \ - } \ - return *this; \ - } \ - /* seed(SeedSeq) would be a constructor if having a constructor */ \ - /* didn't cause headaches with defaults */ \ - template <typename SeedSeq> \ - R123_CUDA_DEVICE static r123array##_N##x##W seed(SeedSeq &ss){ \ - r123array##_N##x##W ret; \ - const size_t Ngen = _N*((3+sizeof(value_type))/4); \ - uint32_t u32[Ngen]; \ - uint32_t *p32 = &u32[0]; \ - ss.generate(&u32[0], &u32[Ngen]); \ - for(size_t i=0; i<_N; ++i){ \ - ret.v[i] = assemble_from_u32<value_type>(p32); \ - p32 += (3+sizeof(value_type))/4; \ - } \ - return ret; \ - } \ -protected: \ - R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(R123_ULONG_LONG n){ \ - /* n may be greater than the maximum value of a single value_type */ \ - value_type vtn; \ - vtn = n; \ - v[0] += n; \ - const unsigned rshift = 8* ((sizeof(n)>sizeof(value_type))? sizeof(value_type) : 0); \ - for(size_t i=1; i<_N; ++i){ \ - if(rshift){ \ - n >>= rshift; \ - }else{ \ - n=0; \ - } \ - if( v[i-1] < vtn ) \ - ++n; \ - if( n==0 ) break; \ - vtn = n; \ - v[i] += n; \ - } \ - return *this; \ - } \ - - -// There are several tricky considerations for the insertion and extraction -// operators: -// - we would like to be able to print r123array16x8 as a sequence of 16 integers, -// not as 16 bytes. -// - we would like to be able to print r123array1xm128i. -// - we do not want an int conversion operator in r123m128i because it causes -// lots of ambiguity problems with automatic promotions. -// Solution: r123arrayinsertable and r123arrayextractable - -template<typename T> -struct r123arrayinsertable{ - const T& v; - r123arrayinsertable(const T& t_) : v(t_) {} - friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<T>& t){ - return os << t.v; - } -}; - -template<> -struct r123arrayinsertable<uint8_t>{ - const uint8_t& v; - r123arrayinsertable(const uint8_t& t_) : v(t_) {} - friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<uint8_t>& t){ - return os << (int)t.v; - } -}; - -template<typename T> -struct r123arrayextractable{ - T& v; - r123arrayextractable(T& t_) : v(t_) {} - friend std::istream& operator>>(std::istream& is, r123arrayextractable<T>& t){ - return is >> t.v; - } -}; - -template<> -struct r123arrayextractable<uint8_t>{ - uint8_t& v; - r123arrayextractable(uint8_t& t_) : v(t_) {} - friend std::istream& operator>>(std::istream& is, r123arrayextractable<uint8_t>& t){ - int i; - is >> i; - t.v = i; - return is; - } -}; - -#define CXXOVERLOADS(_N, W, T) \ - \ -inline std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){ \ - os << r123arrayinsertable<T>(a.v[0]); \ - for(size_t i=1; i<_N; ++i) \ - os << " " << r123arrayinsertable<T>(a.v[i]); \ - return os; \ -} \ - \ -inline std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){ \ - for(size_t i=0; i<_N; ++i){ \ - r123arrayextractable<T> x(a.v[i]); \ - is >> x; \ - } \ - return is; \ -} \ - \ -namespace r123{ \ - typedef r123array##_N##x##W Array##_N##x##W; \ -} - -#endif /* __cplusplus */ - -/* _r123array_tpl expands to a declaration of struct r123arrayNxW. - - In C, it's nothing more than a struct containing an array of N - objects of type T. - - In C++ it's the same, but endowed with an assortment of member - functions, typedefs and friends. In C++, r123arrayNxW looks a lot - like std::array<T,N>, has most of the capabilities of a container, - and satisfies the requirements outlined in compat/Engine.hpp for - counter and key types. ArrayNxW, in the r123 namespace is - a typedef equivalent to r123arrayNxW. -*/ - -#define _r123array_tpl(_N, W, T) \ - /** @ingroup arrayNxW */ \ - /** @see arrayNxW */ \ -struct r123array##_N##x##W{ \ - T v[_N]; \ - CXXMETHODS(_N, W, T) \ -}; \ - \ -CXXOVERLOADS(_N, W, T) - -/** @endcond */ - -_r123array_tpl(1, 32, uint32_t) /* r123array1x32 */ -_r123array_tpl(2, 32, uint32_t) /* r123array2x32 */ -_r123array_tpl(4, 32, uint32_t) /* r123array4x32 */ -_r123array_tpl(8, 32, uint32_t) /* r123array8x32 */ - -_r123array_tpl(1, 64, uint64_t) /* r123array1x64 */ -_r123array_tpl(2, 64, uint64_t) /* r123array2x64 */ -_r123array_tpl(4, 64, uint64_t) /* r123array4x64 */ - -_r123array_tpl(16, 8, uint8_t) /* r123array16x8 for ARSsw, AESsw */ - -#if R123_USE_SSE -_r123array_tpl(1, m128i, r123m128i) /* r123array1x128i for ARSni, AESni */ -#endif - -/* In C++, it's natural to use sizeof(a::value_type), but in C it's - pretty convoluted to figure out the width of the value_type of an - r123arrayNxW: -*/ -#define R123_W(a) (8*sizeof(((a *)0)->v[0])) - -/** @namespace r123 - Most of the Random123 C++ API is contained in the r123 namespace. -*/ - -#endif - diff --git a/src/Random123/features/clangfeatures.h b/src/Random123/features/clangfeatures.h deleted file mode 100644 index 7138eb0dd4c7e348be5a46e0039b986990245f98..0000000000000000000000000000000000000000 --- a/src/Random123/features/clangfeatures.h +++ /dev/null @@ -1,89 +0,0 @@ -/* -Copyright 2010-2016, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __clangfeatures_dot_hpp -#define __clangfeatures_dot_hpp - -#ifndef R123_USE_X86INTRIN_H -#define R123_USE_X86INTRIN_H ((defined(__x86_64__)||defined(__i386__))) -#endif - -#ifndef R123_USE_CXX11_UNRESTRICTED_UNIONS -#define R123_USE_CXX11_UNRESTRICTED_UNIONS __has_feature(cxx_unrestricted_unions) -#endif - -#ifndef R123_USE_CXX11_STATIC_ASSERT -#define R123_USE_CXX11_STATIC_ASSERT __has_feature(cxx_static_assert) -#endif - -// With clang-3.6, -Wall warns about unused-local-typedefs. -// The "obvious" thing to do is to ignore -Wunused-local-typedefs, -// but that doesn't work because earlier versions of clang blow -// up on an 'unknown warning group'. So we briefly ignore -Wall... -// It's tempting to just give up on static assertions in pre-c++11 code. -#if !R123_USE_CXX11_STATIC_ASSERT && !defined(R123_STATIC_ASSERT) -#define R123_STATIC_ASSERT(expr, msg) \ -_Pragma("clang diagnostic push") \ -_Pragma("clang diagnostic ignored \"-Wall\"") \ -typedef char static_assertion[(!!(expr))*2-1] \ -_Pragma("clang diagnostic pop") -#endif - -#ifndef R123_USE_CXX11_CONSTEXPR -#define R123_USE_CXX11_CONSTEXPR __has_feature(cxx_constexpr) -#endif - -#ifndef R123_USE_CXX11_EXPLICIT_CONVERSIONS -#define R123_USE_CXX11_EXPLICIT_CONVERSIONS __has_feature(cxx_explicit_conversions) -#endif - -// With clang-3.0, the apparently simpler: -// #define R123_USE_CXX11_RANDOM __has_include(<random>) -// dumps core. -#ifndef R123_USE_CXX11_RANDOM -#if __cplusplus>=201103L && __has_include(<random>) -#define R123_USE_CXX11_RANDOM 1 -#else -#define R123_USE_CXX11_RANDOM 0 -#endif -#endif - -#ifndef R123_USE_CXX11_TYPE_TRAITS -#if __cplusplus>=201103L && __has_include(<type_traits>) -#define R123_USE_CXX11_TYPE_TRAITS 1 -#else -#define R123_USE_CXX11_TYPE_TRAITS 0 -#endif -#endif - -#include "gccfeatures.h" - -#endif diff --git a/src/Random123/features/compilerfeatures.h b/src/Random123/features/compilerfeatures.h deleted file mode 100644 index 6ad4f6088563e7f007d0fb59b60d88bd7b2fb028..0000000000000000000000000000000000000000 --- a/src/Random123/features/compilerfeatures.h +++ /dev/null @@ -1,322 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -/** - -@page porting Preprocessor symbols for porting Random123 to different platforms. - -The Random123 library is portable across C, C++, CUDA, OpenCL environments, -and multiple operating systems (Linux, Windows 7, Mac OS X, FreeBSD, Solaris). -This level of portability requires the abstraction of some features -and idioms that are either not standardized (e.g., asm statments), or for which -different vendors have their own standards (e.g., SSE intrinsics) or for -which vendors simply refuse to conform to well-established standards (e.g., <inttypes.h>). - -Random123/features/compilerfeatures.h -conditionally includes a compiler-or-OS-specific Random123/featires/XXXfeatures.h file which -defines appropriate values for the preprocessor symbols which can be used with -a specific compiler or OS. Those symbols will then -be used by other header files and source files in the Random123 -library (and may be used by applications) to control what actually -gets presented to the compiler. - -Most of the symbols are boolean valued. In general, they will -\b always be defined with value either 1 or 0, so do -\b NOT use \#ifdef. Use \#if R123_USE_SOMETHING instead. - -Library users can override any value by defining the pp-symbol with a compiler option, -e.g., - - cc -DR123_USE_MULHILO64_C99 - -will use a strictly c99 version of the full-width 64x64->128-bit multiplication -function, even if it would be disabled by default. - -All boolean-valued pre-processor symbols in Random123/features/compilerfeatures.h start with the prefix R123_USE_ -@verbatim - AES_NI - AES_OPENSSL - SSE4_2 - SSE4_1 - SSE - - STD_RANDOM - - GNU_UINT128 - ASM_GNU - ASM_MSASM - - CPUID_MSVC - - CXX11_RANDOM - CXX11_TYPE_TRAITS - CXX11_STATIC_ASSERT - CXX11_CONSTEXPR - CXX11_UNRESTRICTED_UNIONS - CXX11_EXPLICIT_CONVERSIONS - CXX11_LONG_LONG - CXX11 - - X86INTRIN_H - IA32INTRIN_H - XMMINTRIN_H - EMMINTRIN_H - SMMINTRIN_H - WMMINTRIN_H - INTRIN_H - - MULHILO32_ASM - MULHILO64_ASM - MULHILO64_MSVC_INTRIN - MULHILO64_CUDA_INTRIN - MULHILO64_OPENCL_INTRIN - MULHILO64_C99 - - U01_DOUBLE - -@endverbatim -Most have obvious meanings. Some non-obvious ones: - -AES_NI and AES_OPENSSL are not mutually exclusive. You can have one, -both or neither. - -GNU_UINT128 says that it's safe to use __uint128_t, but it -does not require its use. In particular, it should be -used in mulhilo<uint64_t> only if MULHILO64_ASM is unset. - -If the XXXINTRIN_H macros are true, then one should -@code -#include <xxxintrin.h> -@endcode -to gain accesss to compiler intrinsics. - -The CXX11_SOME_FEATURE macros allow the code to use specific -features of the C++11 language and library. The catchall -In the absence of a specific CXX11_SOME_FEATURE, the feature -is controlled by the catch-all R123_USE_CXX11 macro. - -U01_DOUBLE defaults on, and can be turned off (set to 0) -if one does not want the utility functions that convert to double -(i.e. u01_*_53()), e.g. on OpenCL without the cl_khr_fp64 extension. - -There are a number of invariants that are always true. Application code may -choose to rely on these: - -<ul> -<li>ASM_GNU and ASM_MASM are mutually exclusive -<li>The "higher" SSE values imply the lower ones. -</ul> - -There are also non-boolean valued symbols: - -<ul> -<li>R123_STATIC_INLINE - - According to both C99 and GNU99, the 'static inline' declaration allows - the compiler to not emit code if the function is not used. - Note that the semantics of 'inline', 'static' and 'extern' in - gcc have changed over time and are subject to modification by - command line options, e.g., -std=gnu89, -fgnu-inline. - Nevertheless, it appears that the meaning of 'static inline' - has not changed over time and (with a little luck) the use of 'static inline' - here will be portable between versions of gcc and to other C99 - compilers. - See: http://gcc.gnu.org/onlinedocs/gcc/Inline.html - http://www.greenend.org.uk/rjk/2003/03/inline.html - -<li>R123_FORCE_INLINE(decl) - - which expands to 'decl', adorned with the compiler-specific - embellishments to strongly encourage that the declared function be - inlined. If there is no such compiler-specific magic, it should - expand to decl, unadorned. - -<li>R123_CUDA_DEVICE - which expands to __device__ (or something else with - sufficiently similar semantics) when CUDA is in use, and expands - to nothing in other cases. - -<li>R123_ASSERT(x) - which expands to assert(x), or maybe to nothing at - all if we're in an environment so feature-poor that you can't even - call assert (I'm looking at you, CUDA and OpenCL), or even include - assert.h safely (OpenCL). - -<li>R123_STATIC_ASSERT(expr,msg) - which expands to - static_assert(expr,msg), or to an expression that - will raise a compile-time exception if expr is not true. - -<li>R123_ULONG_LONG - which expands to a declaration of the longest available - unsigned integer. - -<li>R123_64BIT(x) - expands to something equivalent to - UINT64_C(x) from <stdint.h>, even in environments where <stdint.h> - is not available, e.g., MSVC and OpenCL. - -<li>R123_BUILTIN_EXPECT(expr,likely_value) - expands to something with - the semantics of gcc's __builtin_expect(expr,likely_value). If - the environment has nothing like __builtin_expect, it should expand - to just expr. -</ul> - - -\cond HIDDEN_FROM_DOXYGEN -*/ - -/* -N.B. When something is added to the list of features, it should be -added to each of the *features.h files, AND to examples/ut_features.cpp. -*/ - -/* N.B. most other compilers (icc, nvcc, open64, llvm) will also define __GNUC__, so order matters. */ -#if defined(__OPENCL_VERSION__) && __OPENCL_VERSION__ > 0 -#include "openclfeatures.h" -#elif defined(__CUDACC__) -#include "nvccfeatures.h" -#elif defined(__ICC) -#include "iccfeatures.h" -#elif defined(__xlC__) -#include "xlcfeatures.h" -#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) -#include "sunprofeatures.h" -#elif defined(__OPEN64__) -#include "open64features.h" -#elif defined(__clang__) -#include "clangfeatures.h" -#elif defined(__GNUC__) -#include "gccfeatures.h" -#elif defined(__PGI) -#include "pgccfeatures.h" -#elif defined(_MSC_FULL_VER) -#include "msvcfeatures.h" -#else -#error "Can't identify compiler. You'll need to add a new xxfeatures.hpp" -{ /* maybe an unbalanced brace will terminate the compilation */ -#endif - -#ifndef R123_USE_CXX11 -#define R123_USE_CXX11 0 -#endif - -#ifndef R123_USE_CXX11_UNRESTRICTED_UNIONS -#define R123_USE_CXX11_UNRESTRICTED_UNIONS R123_USE_CXX11 -#endif - -//#ifndef R123_USE_CXX11_STATIC_ASSERT -//#define R123_USE_CXX11_STATIC_ASSERT R123_USE_CXX11 -//#endif - -//#ifndef R123_USE_CXX11_CONSTEXPR -//#define R123_USE_CXX11_CONSTEXPR R123_USE_CXX11 -//#endif - -#ifndef R123_USE_CXX11_EXPLICIT_CONVERSIONS -#define R123_USE_CXX11_EXPLICIT_CONVERSIONS R123_USE_CXX11 -#endif - -#ifndef R123_USE_CXX11_RANDOM -#define R123_USE_CXX11_RANDOM R123_USE_CXX11 -#endif - -#ifndef R123_USE_CXX11_TYPE_TRAITS -#define R123_USE_CXX11_TYPE_TRAITS R123_USE_CXX11 -#endif - -#ifndef R123_USE_CXX11_LONG_LONG -#define R123_USE_CXX11_LONG_LONG R123_USE_CXX11 -#endif - -#ifndef R123_USE_MULHILO64_C99 -#define R123_USE_MULHILO64_C99 0 -#endif - -#ifndef R123_USE_MULHILO64_MULHI_INTRIN -#define R123_USE_MULHILO64_MULHI_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO32_MULHI_INTRIN -#define R123_USE_MULHILO32_MULHI_INTRIN 0 -#endif - -//#ifndef R123_STATIC_ASSERT -//#if R123_USE_CXX11_STATIC_ASSERT -//#define R123_STATIC_ASSERT(expr, msg) static_assert(expr, msg) -//#else -// /* if msg always_looked_like_this, we could paste it into the name. Worth it? */ -//#define R123_STATIC_ASSERT(expr, msg) typedef char static_assertion[(!!(expr))*2-1] -//#endif -//#endif - -#define R123_STATIC_ASSERT(expr, msg) typedef char static_assertion[(!!(expr))*2-1] - -//#ifndef R123_CONSTEXPR -//#if R123_USE_CXX11_CONSTEXPR -//#define R123_CONSTEXPR constexpr -//#else -#define R123_CONSTEXPR -//#endif -//#endif - -#ifndef R123_USE_PHILOX_64BIT -#define R123_USE_PHILOX_64BIT (R123_USE_MULHILO64_ASM || R123_USE_MULHILO64_MSVC_INTRIN || R123_USE_MULHILO64_CUDA_INTRIN || R123_USE_GNU_UINT128 || R123_USE_MULHILO64_C99 || R123_USE_MULHILO64_OPENCL_INTRIN || R123_USE_MULHILO64_MULHI_INTRIN) -#endif - -#ifndef R123_ULONG_LONG -#if defined(__cplusplus) && !R123_USE_CXX11_LONG_LONG -/* C++98 doesn't have long long. It doesn't have uint64_t either, but - we will have typedef'ed uint64_t to something in the xxxfeatures.h. - With luck, it won't elicit complaints from -pedantic. Cross your - fingers... */ -#define R123_ULONG_LONG uint64_t -#else -#define R123_ULONG_LONG unsigned long long -#endif -#endif - -/* UINT64_C should have been #defined by XXXfeatures.h, either by - #include <stdint.h> or through compiler-dependent hacks */ -#ifndef R123_64BIT -#define R123_64BIT(x) UINT64_C(x) -#endif - -#ifndef R123_THROW -#define R123_THROW(x) throw (x) -#endif - -/* - * Windows.h (and perhaps other "well-meaning" code define min and - * max, so there's a high chance that our definition of min, max - * methods or use of std::numeric_limits min and max will cause - * complaints in any program that happened to include Windows.h or - * suchlike first. We use the null macro below in our own header - * files definition or use of min, max to defensively preclude - * this problem. It may not be enough; one might need to #define - * NOMINMAX before including Windows.h or compile with -DNOMINMAX. - */ -#define R123_NO_MACRO_SUBST - -/** \endcond */ diff --git a/src/Random123/features/gccfeatures.h b/src/Random123/features/gccfeatures.h deleted file mode 100644 index d6bb06088d14d477bfa70620c3a9af49c8a3a9b2..0000000000000000000000000000000000000000 --- a/src/Random123/features/gccfeatures.h +++ /dev/null @@ -1,247 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __gccfeatures_dot_hpp -#define __gccfeatures_dot_hpp - -#define R123_GNUC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__) - -#if !defined(__x86_64__) && !defined(__i386__) && !defined(__powerpc__) -# error "This code has only been tested on x86 and powerpc platforms." -#include <including_a_nonexistent_file_will_stop_some_compilers_from_continuing_with_a_hopeless_task> -{ /* maybe an unbalanced brace will terminate the compilation */ - /* Feel free to try the Random123 library on other architectures by changing - the conditions that reach this error, but you should consider it a - porting exercise and expect to encounter bugs and deficiencies. - Please let the authors know of any successes (or failures). */ -#endif - -#ifdef __powerpc__ -#include <ppu_intrinsics.h> -#endif - -#ifndef R123_STATIC_INLINE -#define R123_STATIC_INLINE static __inline__ -#endif - -#ifndef R123_FORCE_INLINE -#if R123_GNUC_VERSION >= 40000 -#define R123_FORCE_INLINE(decl) decl __attribute__((always_inline)) -#else -#define R123_FORCE_INLINE(decl) decl -#endif -#endif - -#ifndef R123_CUDA_DEVICE -#define R123_CUDA_DEVICE -#endif - -#ifndef R123_ASSERT -#include <assert.h> -#define R123_ASSERT(x) assert(x) -#endif - -#ifndef R123_BUILTIN_EXPECT -#define R123_BUILTIN_EXPECT(expr,likely) __builtin_expect(expr,likely) -#endif - -/* According to the C++0x standard, we should be able to test the numeric - value of __cplusplus == 199701L for C++98, __cplusplus == 201103L for C++0x - But gcc has had an open bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=1773 - since early 2001, which was finally fixed in 4.7 (early 2012). For - earlier versions, the only way to detect whether --std=c++0x was requested - on the command line is to look at the __GCC_EXPERIMENTAL_CXX0X__ pp-symbol. -*/ -#define GNU_CXX11 (__cplusplus>=201103L || (R123_GNUC_VERSION<40700 && defined(__GCC_EXPERIMENTAL_CXX0X__) )) - -#ifndef R123_USE_CXX11_UNRESTRICTED_UNIONS -#define R123_USE_CXX11_UNRESTRICTED_UNIONS ((R123_GNUC_VERSION >= 40600) && GNU_CXX11) -#endif - -#ifndef R123_USE_CXX11_STATIC_ASSERT -#define R123_USE_CXX11_STATIC_ASSERT ((R123_GNUC_VERSION >= 40300) && GNU_CXX11) -#endif - -#ifndef R123_USE_CXX11_CONSTEXPR -#define R123_USE_CXX11_CONSTEXPR ((R123_GNUC_VERSION >= 40600) && GNU_CXX11) -#endif - -#ifndef R123_USE_CXX11_EXPLICIT_CONVERSIONS -#define R123_USE_CXX11_EXPLICIT_CONVERSIONS ((R123_GNUC_VERSION >= 40500) && GNU_CXX11) -#endif - -#ifndef R123_USE_CXX11_RANDOM -#define R123_USE_CXX11_RANDOM ((R123_GNUC_VERSION>=40500) && GNU_CXX11) -#endif - -#ifndef R123_USE_CXX11_TYPE_TRAITS -#define R123_USE_CXX11_TYPE_TRAITS ((R123_GNUC_VERSION>=40400) && GNU_CXX11) -#endif - -#ifndef R123_USE_AES_NI -#ifdef __AES__ -#define R123_USE_AES_NI 1 -#else -#define R123_USE_AES_NI 0 -#endif -#endif - -#ifndef R123_USE_SSE4_2 -#ifdef __SSE4_2__ -#define R123_USE_SSE4_2 1 -#else -#define R123_USE_SSE4_2 0 -#endif -#endif - -#ifndef R123_USE_SSE4_1 -#ifdef __SSE4_1__ -#define R123_USE_SSE4_1 1 -#else -#define R123_USE_SSE4_1 0 -#endif -#endif - -#ifndef R123_USE_SSE -/* There's no point in trying to compile SSE code in Random123 - unless SSE2 is available. */ -#ifdef __SSE2__ -#define R123_USE_SSE 1 -#else -#define R123_USE_SSE 0 -#endif -#endif - -#ifndef R123_USE_AES_OPENSSL -/* There isn't really a good way to tell at compile time whether - openssl is available. Without a pre-compilation configure-like - tool, it's less error-prone to guess that it isn't available. Add - -DR123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to - play with openssl */ -#define R123_USE_AES_OPENSSL 0 -#endif - -#ifndef R123_USE_GNU_UINT128 -#ifdef __x86_64__ -#define R123_USE_GNU_UINT128 1 -#else -#define R123_USE_GNU_UINT128 0 -#endif -#endif - -#ifndef R123_USE_ASM_GNU -#define R123_USE_ASM_GNU (defined(__x86_64__)||defined(__i386__)) -#endif - -#ifndef R123_USE_CPUID_MSVC -#define R123_USE_CPUID_MSVC 0 -#endif - -#ifndef R123_USE_X86INTRIN_H -#define R123_USE_X86INTRIN_H ((defined(__x86_64__)||defined(__i386__)) && R123_GNUC_VERSION >= 40402) -#endif - -#ifndef R123_USE_IA32INTRIN_H -#define R123_USE_IA32INTRIN_H 0 -#endif - -#ifndef R123_USE_XMMINTRIN_H -#define R123_USE_XMMINTRIN_H 0 -#endif - -#ifndef R123_USE_EMMINTRIN_H -/* gcc -m64 on Solaris 10 defines __SSE2__ but doesn't have - emmintrin.h in the include search path. This is - so broken that I refuse to try to work around it. If this - affects you, figure out where your emmintrin.h lives and - add an appropriate -I to your CPPFLAGS. Or add -DR123_USE_SSE=0. */ -#define R123_USE_EMMINTRIN_H (R123_USE_SSE && (R123_GNUC_VERSION < 40402)) -#endif - -#ifndef R123_USE_SMMINTRIN_H -#define R123_USE_SMMINTRIN_H ((R123_USE_SSE4_1 || R123_USE_SSE4_2) && (R123_GNUC_VERSION < 40402)) -#endif - -#ifndef R123_USE_WMMINTRIN_H -#define R123_USE_WMMINTRIN_H 0 -#endif - -#ifndef R123_USE_INTRIN_H -#define R123_USE_INTRIN_H 0 -#endif - -#ifndef R123_USE_MULHILO32_ASM -#define R123_USE_MULHILO32_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_MSVC_INTRIN -#define R123_USE_MULHILO64_MSVC_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_CUDA_INTRIN -#define R123_USE_MULHILO64_CUDA_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_OPENCL_INTRIN -#define R123_USE_MULHILO64_OPENCL_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_MULHI_INTRIN -#define R123_USE_MULHILO64_MULHI_INTRIN (defined(__powerpc64__)) -#endif - -#ifndef R123_MULHILO64_MULHI_INTRIN -#define R123_MULHILO64_MULHI_INTRIN __mulhdu -#endif - -#ifndef R123_USE_MULHILO32_MULHI_INTRIN -#define R123_USE_MULHILO32_MULHI_INTRIN 0 -#endif - -#ifndef R123_MULHILO32_MULHI_INTRIN -#define R123_MULHILO32_MULHI_INTRIN __mulhwu -#endif - -#ifndef __STDC_CONSTANT_MACROS -#define __STDC_CONSTANT_MACROS -#endif -#include <stdint.h> -#ifndef UINT64_C -#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h> -#endif - -/* If you add something, it must go in all the other XXfeatures.hpp - and in ../ut_features.cpp */ -#endif diff --git a/src/Random123/features/iccfeatures.h b/src/Random123/features/iccfeatures.h deleted file mode 100644 index b64e5c2299f9ab367141dfba69dccf91dc36e496..0000000000000000000000000000000000000000 --- a/src/Random123/features/iccfeatures.h +++ /dev/null @@ -1,208 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __icpcfeatures_dot_hpp -#define __icpcfeatures_dot_hpp - -// icc relies on gcc libraries and other toolchain components. -#define R123_GNUC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__) - -#if !defined(__x86_64__) && !defined(__i386__) -# error "This code has only been tested on x86 platforms." -{ // maybe an unbalanced brace will terminate the compilation -// You are invited to try Easy123 on other architectures, by changing -// the conditions that reach this error, but you should consider it a -// porting exercise and expect to encounter bugs and deficiencies. -// Please let the authors know of any successes (or failures). -#endif - -#ifndef R123_STATIC_INLINE -#define R123_STATIC_INLINE static inline -#endif - -#ifndef R123_FORCE_INLINE -#define R123_FORCE_INLINE(decl) decl __attribute__((always_inline)) -#endif - -#ifndef R123_CUDA_DEVICE -#define R123_CUDA_DEVICE -#endif - -#ifndef R123_ASSERT -#include <assert.h> -#define R123_ASSERT(x) assert(x) -#endif - -#ifndef R123_BUILTIN_EXPECT -#define R123_BUILTIN_EXPECT(expr,likely) __builtin_expect(expr,likely) -#endif - -// The basic idiom is: -// #ifndef R123_SOMETHING -// #if some condition -// #define R123_SOMETHING 1 -// #else -// #define R123_SOMETHING 0 -// #endif -// #endif -// This idiom allows an external user to override any decision -// in this file with a command-line -DR123_SOMETHING=1 or -DR123_SOMETHINE=0 - -// An alternative idiom is: -// #ifndef R123_SOMETHING -// #define R123_SOMETHING (some boolean expression) -// #endif -// where the boolean expression might contain previously-defined R123_SOMETHING_ELSE -// pp-symbols. - -#ifndef R123_USE_SSE4_2 -#ifdef __SSE4_2__ -#define R123_USE_SSE4_2 1 -#else -#define R123_USE_SSE4_2 0 -#endif -#endif - -#ifndef R123_USE_SSE4_1 -#ifdef __SSE4_1__ -#define R123_USE_SSE4_1 1 -#else -#define R123_USE_SSE4_1 0 -#endif -#endif - -#ifndef R123_USE_SSE -#ifdef __SSE2__ -#define R123_USE_SSE 1 -#else -#define R123_USE_SSE 0 -#endif -#endif - -#ifndef R123_USE_AES_NI -// Unlike gcc, icc (version 12) does not pre-define an __AES__ -// pp-symbol when -maes or -xHost is on the command line. This feels -// like a defect in icc (it defines __SSE4_2__ in analogous -// circumstances), but until Intel fixes it, we're better off erring -// on the side of caution and not generating instructions that are -// going to raise SIGILL when executed. To get the AES-NI -// instructions with icc, the caller must puts something like -// -DR123_USE_AES_NI=1 or -D__AES__ on the command line. FWIW, the -// AES-NI Whitepaper by Gueron says that icc has supported AES-NI from -// 11.1 onwards. -// -#define R123_USE_AES_NI ((__ICC>=1101) && defined(__AES__)) -#endif - -#ifndef R123_USE_AES_OPENSSL -/* There isn't really a good way to tell at compile time whether - openssl is available. Without a pre-compilation configure-like - tool, it's less error-prone to guess that it isn't available. Add - -DR123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to - play with openssl */ -#define R123_USE_AES_OPENSSL 0 -#endif - -#ifndef R123_USE_GNU_UINT128 -#define R123_USE_GNU_UINT128 0 -#endif - -#ifndef R123_USE_ASM_GNU -#define R123_USE_ASM_GNU 1 -#endif - -#ifndef R123_USE_CPUID_MSVC -#define R123_USE_CPUID_MSVC 0 -#endif - -#ifndef R123_USE_X86INTRIN_H -#define R123_USE_X86INTRIN_H 0 -#endif - -#ifndef R123_USE_IA32INTRIN_H -#define R123_USE_IA32INTRIN_H 1 -#endif - -#ifndef R123_USE_XMMINTRIN_H -#define R123_USE_XMMINTRIN_H 0 -#endif - -#ifndef R123_USE_EMMINTRIN_H -#define R123_USE_EMMINTRIN_H 1 -#endif - -#ifndef R123_USE_SMMINTRIN_H -#define R123_USE_SMMINTRIN_H 1 -#endif - -#ifndef R123_USE_WMMINTRIN_H -#define R123_USE_WMMINTRIN_H 1 -#endif - -#ifndef R123_USE_INTRIN_H -#define R123_USE_INTRIN_H 0 -#endif - -#ifndef R123_USE_MULHILO16_ASM -#define R123_USE_MULHILO16_ASM 0 -#endif - -#ifndef R123_USE_MULHILO32_ASM -#define R123_USE_MULHILO32_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM 1 -#endif - -#ifndef R123_USE_MULHILO64_MSVC_INTRIN -#define R123_USE_MULHILO64_MSVC_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_CUDA_INTRIN -#define R123_USE_MULHILO64_CUDA_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_OPENCL_INTRIN -#define R123_USE_MULHILO64_OPENCL_INTRIN 0 -#endif - -#ifndef __STDC_CONSTANT_MACROS -#define __STDC_CONSTANT_MACROS -#endif -#include <stdint.h> -#ifndef UINT64_C -#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h> -#endif - -// If you add something, it must go in all the other XXfeatures.hpp -// and in ../ut_features.cpp -#endif diff --git a/src/Random123/features/msvcfeatures.h b/src/Random123/features/msvcfeatures.h deleted file mode 100644 index 9eb9520912daf66869a6cf9fd027c37f06a8a3d4..0000000000000000000000000000000000000000 --- a/src/Random123/features/msvcfeatures.h +++ /dev/null @@ -1,200 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __msvcfeatures_dot_hpp -#define __msvcfeatures_dot_hpp - -//#if _MSVC_FULL_VER <= 15 -//#error "We've only tested MSVC_FULL_VER==15." -//#endif - -#if !defined(_M_IX86) && !defined(_M_X64) -# error "This code has only been tested on x86 platforms." -{ // maybe an unbalanced brace will terminate the compilation -// You are invited to try Random123 on other architectures, by changing -// the conditions that reach this error, but you should consider it a -// porting exercise and expect to encounter bugs and deficiencies. -// Please let the authors know of any successes (or failures). -#endif - -#ifndef R123_STATIC_INLINE -#define R123_STATIC_INLINE static __inline -#endif - -#ifndef R123_FORCE_INLINE -#define R123_FORCE_INLINE(decl) _forceinline decl -#endif - -#ifndef R123_CUDA_DEVICE -#define R123_CUDA_DEVICE -#endif - -#ifndef R123_ASSERT -#include <assert.h> -#define R123_ASSERT(x) assert(x) -#endif - -#ifndef R123_BUILTIN_EXPECT -#define R123_BUILTIN_EXPECT(expr,likely) expr -#endif - -// The basic idiom is: -// #ifndef R123_SOMETHING -// #if some condition -// #define R123_SOMETHING 1 -// #else -// #define R123_SOMETHING 0 -// #endif -// #endif -// This idiom allows an external user to override any decision -// in this file with a command-line -DR123_SOMETHING=1 or -DR123_SOMETHINE=0 - -// An alternative idiom is: -// #ifndef R123_SOMETHING -// #define R123_SOMETHING (some boolean expression) -// #endif -// where the boolean expression might contain previously-defined R123_SOMETHING_ELSE -// pp-symbols. - -#ifndef R123_USE_AES_NI -#if defined(_M_X64) -#define R123_USE_AES_NI 1 -#else -#define R123_USE_AES_NI 0 -#endif -#endif - -#ifndef R123_USE_SSE4_2 -#if defined(_M_X64) -#define R123_USE_SSE4_2 1 -#else -#define R123_USE_SSE4_2 0 -#endif -#endif - -#ifndef R123_USE_SSE4_1 -#if defined(_M_X64) -#define R123_USE_SSE4_1 1 -#else -#define R123_USE_SSE4_1 0 -#endif -#endif - -#ifndef R123_USE_SSE -#define R123_USE_SSE 1 -#endif - -#ifndef R123_USE_AES_OPENSSL -#define R123_USE_AES_OPENSSL 0 -#endif - -#ifndef R123_USE_GNU_UINT128 -#define R123_USE_GNU_UINT128 0 -#endif - -#ifndef R123_USE_ASM_GNU -#define R123_USE_ASM_GNU 0 -#endif - -#ifndef R123_USE_CPUID_MSVC -#define R123_USE_CPUID_MSVC 1 -#endif - -#ifndef R123_USE_X86INTRIN_H -#define R123_USE_X86INTRIN_H 0 -#endif - -#ifndef R123_USE_IA32INTRIN_H -#define R123_USE_IA32INTRIN_H 0 -#endif - -#ifndef R123_USE_XMMINTRIN_H -#define R123_USE_XMMINTRIN_H 0 -#endif - -#ifndef R123_USE_EMMINTRIN_H -#define R123_USE_EMMINTRIN_H 1 -#endif - -#ifndef R123_USE_SMMINTRIN_H -#define R123_USE_SMMINTRIN_H 1 -#endif - -#ifndef R123_USE_WMMINTRIN_H -#define R123_USE_WMMINTRIN_H 1 -#endif - -#ifndef R123_USE_INTRIN_H -#define R123_USE_INTRIN_H 1 -#endif - -#ifndef R123_USE_MULHILO16_ASM -#define R123_USE_MULHILO16_ASM 0 -#endif - -#ifndef R123_USE_MULHILO32_ASM -#define R123_USE_MULHILO32_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_MSVC_INTRIN -#if defined(_M_X64) -#define R123_USE_MULHILO64_MSVC_INTRIN 1 -#else -#define R123_USE_MULHILO64_MSVC_INTRIN 0 -#endif -#endif - -#ifndef R123_USE_MULHILO64_CUDA_INTRIN -#define R123_USE_MULHILO64_CUDA_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_OPENCL_INTRIN -#define R123_USE_MULHILO64_OPENCL_INTRIN 0 -#endif - -#ifndef __STDC_CONSTANT_MACROS -#define __STDC_CONSTANT_MACROS -#endif -#include <stdint.h> -#ifndef UINT64_C -#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h> -#endif - -#pragma warning(disable:4244) -#pragma warning(disable:4996) - -// If you add something, it must go in all the other XXfeatures.hpp -// and in ../ut_features.cpp -#endif diff --git a/src/Random123/features/nvccfeatures.h b/src/Random123/features/nvccfeatures.h deleted file mode 100644 index d1ff8bf521a05d45232e922c9a875439bc84f837..0000000000000000000000000000000000000000 --- a/src/Random123/features/nvccfeatures.h +++ /dev/null @@ -1,125 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __r123_nvcc_features_dot_h__ -#define __r123_nvcc_features_dot_h__ - -#if !defined(CUDART_VERSION) -#error "why are we in nvccfeatures.h if CUDART_VERSION is not defined" -#endif - -#if CUDART_VERSION < 4010 -#error "CUDA versions earlier than 4.1 produce incorrect results for some templated functions in namespaces. Random123 isunsupported. See comments in nvccfeatures.h" -// This test was added in Random123-1.08 (August, 2013) because we -// discovered that Ftype(maxTvalue<T>()) with Ftype=double and -// T=uint64_t in examples/uniform.hpp produces -1 for CUDA4.0 and -// earlier. We can't be sure this bug doesn't also affect invocations -// of other templated functions, e.g., essentially all of Random123. -// Thus, we no longer trust CUDA versions earlier than 4.1 even though -// we had previously tested and timed Random123 with CUDA 3.x and 4.0. -// If you feel lucky or desperate, you can change #error to #warning, but -// please take extra care to be sure that you are getting correct -// results. -#endif - -// nvcc falls through to gcc or msvc. So first define -// a couple of things and then include either gccfeatures.h -// or msvcfeatures.h - -//#ifdef __CUDA_ARCH__ allows Philox32 and Philox64 to be compiled -//for both device and host functions in CUDA by setting compiler flags -//for the device function -#ifdef __CUDA_ARCH__ -#ifndef R123_CUDA_DEVICE -#define R123_CUDA_DEVICE __device__ -#endif - -#ifndef R123_USE_MULHILO64_CUDA_INTRIN -#define R123_USE_MULHILO64_CUDA_INTRIN 1 -#endif - -#ifndef R123_THROW -// No exceptions in CUDA, at least upto 4.0 -#define R123_THROW(x) R123_ASSERT(0) -#endif - -#ifndef R123_ASSERT -#define R123_ASSERT(x) if((x)) ; else asm("trap;") -#endif - -#else // ! __CUDA_ARCH__ -// If we're using nvcc not compiling for the CUDA architecture, -// then we must be compiling for the host. In that case, -// tell the philox code to use the mulhilo64 asm because -// nvcc doesn't grok uint128_t. -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM 1 -#endif - -#endif // __CUDA_ARCH__ - -#ifndef R123_BUILTIN_EXPECT -#define R123_BUILTIN_EXPECT(expr,likely) expr -#endif - -#ifndef R123_USE_AES_NI -#define R123_USE_AES_NI 0 -#endif - -#ifndef R123_USE_SSE4_2 -#define R123_USE_SSE4_2 0 -#endif - -#ifndef R123_USE_SSE4_1 -#define R123_USE_SSE4_1 0 -#endif - -#ifndef R123_USE_SSE -#define R123_USE_SSE 0 -#endif - -#ifndef R123_USE_GNU_UINT128 -#define R123_USE_GNU_UINT128 0 -#endif - -#ifndef R123_ULONG_LONG -// uint64_t, which is what we'd get without this, is -// not the same as unsigned long long -#define R123_ULONG_LONG unsigned long long -#endif - -#if defined(__GNUC__) -#include "gccfeatures.h" -#elif defined(_MSC_FULL_VER) -#include "msvcfeatures.h" -#endif - -#endif diff --git a/src/Random123/features/open64features.h b/src/Random123/features/open64features.h deleted file mode 100644 index 8da9f5f51efab021c644b632b4499f12fa0220d9..0000000000000000000000000000000000000000 --- a/src/Random123/features/open64features.h +++ /dev/null @@ -1,50 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __open64features_dot_hpp -#define __open64features_dot_hpp - -/* The gcc features are mostly right. We just override a few and then include gccfeatures.h */ - -/* Open64 4.2.3 and 4.2.4 accept the __uint128_t code without complaint - but produce incorrect code for 64-bit philox. The MULHILO64_ASM - seems to work fine */ -#ifndef R123_USE_GNU_UINT128 -#define R123_USE_GNU_UINT128 0 -#endif - -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM 1 -#endif - -#include "gccfeatures.h" - -#endif diff --git a/src/Random123/features/openclfeatures.h b/src/Random123/features/openclfeatures.h deleted file mode 100644 index af03d3092318c6c27f1a65ce8104c1609b1e66e1..0000000000000000000000000000000000000000 --- a/src/Random123/features/openclfeatures.h +++ /dev/null @@ -1,89 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __openclfeatures_dot_hpp -#define __openclfeatures_dot_hpp - -#ifndef R123_STATIC_INLINE -#define R123_STATIC_INLINE inline -#endif - -#ifndef R123_FORCE_INLINE -#define R123_FORCE_INLINE(decl) decl __attribute__((always_inline)) -#endif - -#ifndef R123_CUDA_DEVICE -#define R123_CUDA_DEVICE -#endif - -#ifndef R123_ASSERT -#define R123_ASSERT(x) -#endif - -#ifndef R123_BUILTIN_EXPECT -#define R123_BUILTIN_EXPECT(expr,likely) expr -#endif - -#ifndef R123_USE_GNU_UINT128 -#define R123_USE_GNU_UINT128 0 -#endif - -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_MSVC_INTRIN -#define R123_USE_MULHILO64_MSVC_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_CUDA_INTRIN -#define R123_USE_MULHILO64_CUDA_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_OPENCL_INTRIN -#define R123_USE_MULHILO64_OPENCL_INTRIN 1 -#endif - -#ifndef R123_USE_AES_NI -#define R123_USE_AES_NI 0 -#endif - -// XXX ATI APP SDK 2.4 clBuildProgram SEGVs if one uses uint64_t instead of -// ulong to mul_hi. And gets lots of complaints from stdint.h -// on some machines. -// But these typedefs mean we cannot include stdint.h with -// these headers? Do we need R123_64T, R123_32T, R123_8T? -typedef ulong uint64_t; -typedef uint uint32_t; -typedef uchar uint8_t; -#define UINT64_C(x) ((ulong)(x##UL)) - -#endif diff --git a/src/Random123/features/pgccfeatures.h b/src/Random123/features/pgccfeatures.h deleted file mode 100644 index 18ace1353b4e0e6201c823e17b5325c2a9b05afe..0000000000000000000000000000000000000000 --- a/src/Random123/features/pgccfeatures.h +++ /dev/null @@ -1,194 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Copyright (c) 2013, Los Alamos National Security, LLC -All rights reserved. - -Copyright 2013. Los Alamos National Security, LLC. This software was produced -under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National -Laboratory (LANL), which is operated by Los Alamos National Security, LLC for -the U.S. Department of Energy. The U.S. Government has rights to use, -reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS -ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR -ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified -to produce derivative works, such modified software should be clearly marked, -so as not to confuse it with the version available from LANL. -*/ -#ifndef __pgccfeatures_dot_hpp -#define __pgccfeatures_dot_hpp - -#if !defined(__x86_64__) && !defined(__i386__) -# error "This code has only been tested on x86 platforms." -#include <including_a_nonexistent_file_will_stop_some_compilers_from_continuing_with_a_hopeless_task> -{ /* maybe an unbalanced brace will terminate the compilation */ - /* Feel free to try the Random123 library on other architectures by changing - the conditions that reach this error, but you should consider it a - porting exercise and expect to encounter bugs and deficiencies. - Please let the authors know of any successes (or failures). */ -#endif - -#ifndef R123_STATIC_INLINE -#define R123_STATIC_INLINE static inline -#endif - -/* Found this example in PGI's emmintrin.h. */ -#ifndef R123_FORCE_INLINE -#define R123_FORCE_INLINE(decl) decl __attribute__((__always_inline__)) -#endif - -#ifndef R123_CUDA_DEVICE -#define R123_CUDA_DEVICE -#endif - -#ifndef R123_ASSERT -#include <assert.h> -#define R123_ASSERT(x) assert(x) -#endif - -#ifndef R123_BUILTIN_EXPECT -#define R123_BUILTIN_EXPECT(expr,likely) (expr) -#endif - -/* PGI through 13.2 doesn't appear to support AES-NI. */ -#ifndef R123_USE_AES_NI -#define R123_USE_AES_NI 0 -#endif - -/* PGI through 13.2 appears to support MMX, SSE, SSE3, SSE3, SSSE3, SSE4a, and - ABM, but not SSE4.1 or SSE4.2. */ -#ifndef R123_USE_SSE4_2 -#define R123_USE_SSE4_2 0 -#endif - -#ifndef R123_USE_SSE4_1 -#define R123_USE_SSE4_1 0 -#endif - -#ifndef R123_USE_SSE -/* There's no point in trying to compile SSE code in Random123 - unless SSE2 is available. */ -#ifdef __SSE2__ -#define R123_USE_SSE 1 -#else -#define R123_USE_SSE 0 -#endif -#endif - -#ifndef R123_USE_AES_OPENSSL -/* There isn't really a good way to tell at compile time whether - openssl is available. Without a pre-compilation configure-like - tool, it's less error-prone to guess that it isn't available. Add - -DR123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to - play with openssl */ -#define R123_USE_AES_OPENSSL 0 -#endif - -#ifndef R123_USE_GNU_UINT128 -#define R123_USE_GNU_UINT128 0 -#endif - -#ifndef R123_USE_ASM_GNU -#define R123_USE_ASM_GNU 1 -#endif - -#ifndef R123_USE_CPUID_MSVC -#define R123_USE_CPUID_MSVC 0 -#endif - -#ifndef R123_USE_X86INTRIN_H -#define R123_USE_X86INTRIN_H 0 -#endif - -#ifndef R123_USE_IA32INTRIN_H -#define R123_USE_IA32INTRIN_H 0 -#endif - -/* emmintrin.h from PGI #includes xmmintrin.h but then complains at link time - about undefined references to _mm_castsi128_ps(__m128i). Why? */ -#ifndef R123_USE_XMMINTRIN_H -#define R123_USE_XMMINTRIN_H 1 -#endif - -#ifndef R123_USE_EMMINTRIN_H -#define R123_USE_EMMINTRIN_H 1 -#endif - -#ifndef R123_USE_SMMINTRIN_H -#define R123_USE_SMMINTRIN_H 0 -#endif - -#ifndef R123_USE_WMMINTRIN_H -#define R123_USE_WMMINTRIN_H 0 -#endif - -#ifndef R123_USE_INTRIN_H -#ifdef __ABM__ -#define R123_USE_INTRIN_H 1 -#else -#define R123_USE_INTRIN_H 0 -#endif -#endif - -#ifndef R123_USE_MULHILO32_ASM -#define R123_USE_MULHILO32_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_MULHI_INTRIN -#define R123_USE_MULHILO64_MULHI_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM 1 -#endif - -#ifndef R123_USE_MULHILO64_MSVC_INTRIN -#define R123_USE_MULHILO64_MSVC_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_CUDA_INTRIN -#define R123_USE_MULHILO64_CUDA_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_OPENCL_INTRIN -#define R123_USE_MULHILO64_OPENCL_INTRIN 0 -#endif - -#ifndef __STDC_CONSTANT_MACROS -#define __STDC_CONSTANT_MACROS -#endif -#include <stdint.h> -#ifndef UINT64_C -#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h> -#endif - -/* If you add something, it must go in all the other XXfeatures.hpp - and in ../ut_features.cpp */ -#endif diff --git a/src/Random123/features/sse.h b/src/Random123/features/sse.h deleted file mode 100644 index cf46257a3ba3ed4763176cb579ffbb5d269105cf..0000000000000000000000000000000000000000 --- a/src/Random123/features/sse.h +++ /dev/null @@ -1,269 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef _Random123_sse_dot_h__ -#define _Random123_sse_dot_h__ - -//#ifndef R123_USE_ASM_GNU -//#define R123_USE_ASM_GNU (defined(__x86_64__)||defined(__i386__)) -//#endif - -//#ifndef R123_USE_X86INTRIN_H -//#define R123_USE_X86INTRIN_H ((defined(__x86_64__)||defined(__i386__)) && R123_GNUC_VERSION >= 40402) -//#endif - -#if R123_USE_SSE - -//#if R123_USE_X86INTRIN_H -//#include <x86intrin.h> -//#endif -#if R123_USE_IA32INTRIN_H -#include <ia32intrin.h> -#endif -#if R123_USE_XMMINTRIN_H -#include <xmmintrin.h> -#endif -#if R123_USE_EMMINTRIN_H -#include <emmintrin.h> -#endif -#if R123_USE_SMMINTRIN_H -#include <smmintrin.h> -#endif -#if R123_USE_WMMINTRIN_H -#include <wmmintrin.h> -#endif -#if R123_USE_INTRIN_H -#include <intrin.h> -#endif -#ifdef __cplusplus -#include <iostream> -#include <limits> -#include <stdexcept> -#endif - - - - -// There is a lot of annoying and inexplicable variation in the -// SSE intrinsics available in different compilation environments. -// The details seem to depend on the compiler, the version and -// the target architecture. Rather than insisting on -// R123_USE_feature tests for each of these in each of the -// compilerfeatures.h files we just keep the complexity localized -// to here... -#if (defined(__ICC) && __ICC<1210) || (defined(_MSC_VER) && !defined(_WIN64)) -/* Is there an intrinsic to assemble an __m128i from two 64-bit words? - If not, use the 4x32-bit intrisic instead. N.B. It looks like Intel - added _mm_set_epi64x to icc version 12.1 in Jan 2012. -*/ -R123_STATIC_INLINE __m128i _mm_set_epi64x(uint64_t v1, uint64_t v0){ - union{ - uint64_t u64; - uint32_t u32[2]; - } u1, u0; - u1.u64 = v1; - u0.u64 = v0; - return _mm_set_epi32(u1.u32[1], u1.u32[0], u0.u32[1], u0.u32[0]); -} -#endif -/* _mm_extract_lo64 abstracts the task of extracting the low 64-bit - word from an __m128i. The _mm_cvtsi128_si64 intrinsic does the job - on 64-bit platforms. Unfortunately, both MSVC and Open64 fail - assertions in ut_M128.cpp and ut_carray.cpp when we use the - _mm_cvtsi128_si64 intrinsic. (See - https://bugs.open64.net/show_bug.cgi?id=873 for the Open64 bug). - On 32-bit platforms, there's no MOVQ, so there's no intrinsic. - Finally, even if the intrinsic exists, it may be spelled with or - without the 'x'. -*/ -#if !defined(__x86_64__) || defined(_MSC_VER) || defined(__OPEN64__) -R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){ - union{ - uint64_t u64[2]; - __m128i m; - }u; - _mm_store_si128(&u.m, si); - return u.u64[0]; -} -#elif defined(__llvm__) || defined(__ICC) -R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){ - return (uint64_t)_mm_cvtsi128_si64(si); -} -#else /* GNUC, others */ -/* FWIW, gcc's emmintrin.h has had the 'x' spelling - since at least gcc-3.4.4. The no-'x' spelling showed up - around 4.2. */ -R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){ - return (uint64_t)_mm_cvtsi128_si64x(si); -} -#endif -#if defined(__GNUC__) && __GNUC__ < 4 -/* the cast builtins showed up in gcc4. */ -R123_STATIC_INLINE __m128 _mm_castsi128_ps(__m128i si){ - return (__m128)si; -} -#endif - -#ifdef __cplusplus - -struct r123m128i{ - __m128i m; -#if R123_USE_CXX11_UNRESTRICTED_UNIONS - // C++98 forbids a union member from having *any* constructors. - // C++11 relaxes this, and allows union members to have constructors - // as long as there is a "trivial" default construtor. So in C++11 - // we can provide a r123m128i constructor with an __m128i argument, and still - // have the default (and hence trivial) default constructor. - r123m128i() = default; - r123m128i(__m128i _m): m(_m){} -#endif - r123m128i& operator=(const __m128i& rhs){ m=rhs; return *this;} - r123m128i& operator=(R123_ULONG_LONG n){ m = _mm_set_epi64x(0, n); return *this;} -#if R123_USE_CXX11_EXPLICIT_CONVERSIONS - // With C++0x we can attach explicit to the bool conversion operator - // to disambiguate undesired promotions. For g++, this works - // only in 4.5 and above. - explicit operator bool() const {return _bool();} -#else - // Pre-C++0x, we have to do something else. Google for the "safe bool" - // idiom for other ideas... - operator const void*() const{return _bool()?this:0;} -#endif - operator __m128i() const {return m;} - -private: -#if R123_USE_SSE4_1 - bool _bool() const{ return !_mm_testz_si128(m,m); } -#else - bool _bool() const{ return 0xf != _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(m, _mm_setzero_si128()))); } -#endif -}; - -R123_STATIC_INLINE r123m128i& operator++(r123m128i& v){ - __m128i& c = v.m; - __m128i zeroone = _mm_set_epi64x(R123_64BIT(0), R123_64BIT(1)); - c = _mm_add_epi64(c, zeroone); - //return c; -#if R123_USE_SSE4_1 - __m128i zerofff = _mm_set_epi64x(0, ~(R123_64BIT(0))); - if( R123_BUILTIN_EXPECT(_mm_testz_si128(c,zerofff), 0) ){ - __m128i onezero = _mm_set_epi64x(R123_64BIT(1), R123_64BIT(0)); - c = _mm_add_epi64(c, onezero); - } -#else - unsigned mask = _mm_movemask_ps( _mm_castsi128_ps(_mm_cmpeq_epi32(c, _mm_setzero_si128()))); - // The low two bits of mask are 11 iff the low 64 bits of - // c are zero. - if( R123_BUILTIN_EXPECT((mask&0x3) == 0x3, 0) ){ - __m128i onezero = _mm_set_epi64x(1,0); - c = _mm_add_epi64(c, onezero); - } -#endif - return v; -} - -R123_STATIC_INLINE r123m128i& operator+=(r123m128i& lhs, R123_ULONG_LONG n){ - __m128i c = lhs.m; - __m128i incr128 = _mm_set_epi64x(0, n); - c = _mm_add_epi64(c, incr128); - // return c; // NO CARRY! - - int64_t lo64 = _mm_extract_lo64(c); - if((uint64_t)lo64 < n) - c = _mm_add_epi64(c, _mm_set_epi64x(1,0)); - lhs.m = c; - return lhs; -} - -// We need this one because it's present, but never used in r123array1xm128i::incr -R123_STATIC_INLINE bool operator<=(R123_ULONG_LONG, const r123m128i &){ - throw std::runtime_error("operator<=(unsigned long long, r123m128i) is unimplemented.");} - -// The comparisons aren't implemented, but if we leave them out, and -// somebody writes, e.g., M1 < M2, the compiler will do an implicit -// conversion through void*. Sigh... -R123_STATIC_INLINE bool operator<(const r123m128i&, const r123m128i&){ - throw std::runtime_error("operator<(r123m128i, r123m128i) is unimplemented.");} -R123_STATIC_INLINE bool operator<=(const r123m128i&, const r123m128i&){ - throw std::runtime_error("operator<=(r123m128i, r123m128i) is unimplemented.");} -R123_STATIC_INLINE bool operator>(const r123m128i&, const r123m128i&){ - throw std::runtime_error("operator>(r123m128i, r123m128i) is unimplemented.");} -R123_STATIC_INLINE bool operator>=(const r123m128i&, const r123m128i&){ - throw std::runtime_error("operator>=(r123m128i, r123m128i) is unimplemented.");} - -R123_STATIC_INLINE bool operator==(const r123m128i &lhs, const r123m128i &rhs){ - return 0xf==_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(lhs, rhs))); } -R123_STATIC_INLINE bool operator!=(const r123m128i &lhs, const r123m128i &rhs){ - return !(lhs==rhs);} -R123_STATIC_INLINE bool operator==(R123_ULONG_LONG lhs, const r123m128i &rhs){ - r123m128i LHS; LHS.m=_mm_set_epi64x(0, lhs); return LHS == rhs; } -R123_STATIC_INLINE bool operator!=(R123_ULONG_LONG lhs, const r123m128i &rhs){ - return !(lhs==rhs);} -R123_STATIC_INLINE std::ostream& operator<<(std::ostream& os, const r123m128i& m){ - union{ - uint64_t u64[2]; - __m128i m; - }u; - _mm_storeu_si128(&u.m, m.m); - return os << u.u64[0] << " " << u.u64[1]; -} - -R123_STATIC_INLINE std::istream& operator>>(std::istream& is, r123m128i& m){ - uint64_t u64[2]; - is >> u64[0] >> u64[1]; - m.m = _mm_set_epi64x(u64[1], u64[0]); - return is; -} - -template<typename T> inline T assemble_from_u32(uint32_t *p32); // forward declaration - -template <> -inline r123m128i assemble_from_u32<r123m128i>(uint32_t *p32){ - r123m128i ret; - ret.m = _mm_set_epi32(p32[3], p32[2], p32[1], p32[0]); - return ret; -} - -#else - -typedef struct { - __m128i m; -} r123m128i; - -#endif /* __cplusplus */ - -#else /* !R123_USE_SSE */ -R123_STATIC_INLINE int haveAESNI(){ - return 0; -} -#endif /* R123_USE_SSE */ - -#endif /* _Random123_sse_dot_h__ */ diff --git a/src/Random123/features/sunprofeatures.h b/src/Random123/features/sunprofeatures.h deleted file mode 100644 index c9cdc00f5e8f970898ae577b14fa910ceb135a91..0000000000000000000000000000000000000000 --- a/src/Random123/features/sunprofeatures.h +++ /dev/null @@ -1,172 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef __sunprofeatures_dot_hpp -#define __sunprofeatures_dot_hpp - -#ifndef R123_STATIC_INLINE -#define R123_STATIC_INLINE static inline -#endif - -#ifndef R123_FORCE_INLINE -#define R123_FORCE_INLINE(decl) decl -#endif - -#ifndef R123_CUDA_DEVICE -#define R123_CUDA_DEVICE -#endif - -#ifndef R123_ASSERT -#include <assert.h> -#define R123_ASSERT(x) assert(x) -#endif - -#ifndef R123_BUILTIN_EXPECT -#define R123_BUILTIN_EXPECT(expr,likely) expr -#endif - -// The basic idiom is: -// #ifndef R123_SOMETHING -// #if some condition -// #define R123_SOMETHING 1 -// #else -// #define R123_SOMETHING 0 -// #endif -// #endif -// This idiom allows an external user to override any decision -// in this file with a command-line -DR123_SOMETHING=1 or -DR123_SOMETHINE=0 - -// An alternative idiom is: -// #ifndef R123_SOMETHING -// #define R123_SOMETHING (some boolean expression) -// #endif -// where the boolean expression might contain previously-defined R123_SOMETHING_ELSE -// pp-symbols. - -#ifndef R123_USE_AES_NI -#define R123_USE_AES_NI 0 -#endif - -#ifndef R123_USE_SSE4_2 -#define R123_USE_SSE4_2 0 -#endif - -#ifndef R123_USE_SSE4_1 -#define R123_USE_SSE4_1 0 -#endif - -#ifndef R123_USE_SSE -#define R123_USE_SSE 0 -#endif - -#ifndef R123_USE_AES_OPENSSL -#define R123_USE_AES_OPENSSL 0 -#endif - -#ifndef R123_USE_GNU_UINT128 -#define R123_USE_GNU_UINT128 0 -#endif - -#ifndef R123_USE_ASM_GNU -#define R123_USE_ASM_GNU 0 -#endif - -#ifndef R123_USE_CPUID_MSVC -#define R123_USE_CPUID_MSVC 0 -#endif - -#ifndef R123_USE_X86INTRIN_H -#define R123_USE_X86INTRIN_H 0 -#endif - -#ifndef R123_USE_IA32INTRIN_H -#define R123_USE_IA32INTRIN_H 0 -#endif - -#ifndef R123_USE_XMMINTRIN_H -#define R123_USE_XMMINTRIN_H 0 -#endif - -#ifndef R123_USE_EMMINTRIN_H -#define R123_USE_EMMINTRIN_H 0 -#endif - -#ifndef R123_USE_SMMINTRIN_H -#define R123_USE_SMMINTRIN_H 0 -#endif - -#ifndef R123_USE_WMMINTRIN_H -#define R123_USE_WMMINTRIN_H 0 -#endif - -#ifndef R123_USE_INTRIN_H -#define R123_USE_INTRIN_H 0 -#endif - -#ifndef R123_USE_MULHILO16_ASM -#define R123_USE_MULHILO16_ASM 0 -#endif - -#ifndef R123_USE_MULHILO32_ASM -#define R123_USE_MULHILO32_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_MSVC_INTRIN -#define R123_USE_MULHILO64_MSVC_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_CUDA_INTRIN -#define R123_USE_MULHILO64_CUDA_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_OPENCL_INTRIN -#define R123_USE_MULHILO64_OPENCL_INTRIN 0 -#endif - -#ifndef R123_USE_PHILOX_64BIT -#define R123_USE_PHILOX_64BIT 0 -#endif - -#ifndef __STDC_CONSTANT_MACROS -#define __STDC_CONSTANT_MACROS -#endif -#include <stdint.h> -#ifndef UINT64_C -#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h> -#endif - -// If you add something, it must go in all the other XXfeatures.hpp -// and in ../ut_features.cpp -#endif diff --git a/src/Random123/features/xlcfeatures.h b/src/Random123/features/xlcfeatures.h deleted file mode 100644 index a5c8412a447dabff245c8d56f13c402992fb21fa..0000000000000000000000000000000000000000 --- a/src/Random123/features/xlcfeatures.h +++ /dev/null @@ -1,202 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Copyright (c) 2013, Los Alamos National Security, LLC -All rights reserved. - -Copyright 2013. Los Alamos National Security, LLC. This software was produced -under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National -Laboratory (LANL), which is operated by Los Alamos National Security, LLC for -the U.S. Department of Energy. The U.S. Government has rights to use, -reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS -ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR -ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified -to produce derivative works, such modified software should be clearly marked, -so as not to confuse it with the version available from LANL. -*/ -#ifndef __xlcfeatures_dot_hpp -#define __xlcfeatures_dot_hpp - -#if !defined(__x86_64__) && !defined(__i386__) && !defined(__powerpc__) -# error "This code has only been tested on x86 and PowerPC platforms." -#include <including_a_nonexistent_file_will_stop_some_compilers_from_continuing_with_a_hopeless_task> -{ /* maybe an unbalanced brace will terminate the compilation */ - /* Feel free to try the Random123 library on other architectures by changing - the conditions that reach this error, but you should consider it a - porting exercise and expect to encounter bugs and deficiencies. - Please let the authors know of any successes (or failures). */ -#endif - -#ifdef __cplusplus -/* builtins are automatically available to xlc. To use them with xlc++, - one must include builtins.h. c.f - http://publib.boulder.ibm.com/infocenter/cellcomp/v101v121/index.jsp?topic=/com.ibm.xlcpp101.cell.doc/compiler_ref/compiler_builtins.html -*/ -#include <builtins.h> -#endif - -#ifndef R123_STATIC_INLINE -#define R123_STATIC_INLINE static inline -#endif - -#ifndef R123_FORCE_INLINE -#define R123_FORCE_INLINE(decl) decl __attribute__((__always_inline__)) -#endif - -#ifndef R123_CUDA_DEVICE -#define R123_CUDA_DEVICE -#endif - -#ifndef R123_ASSERT -#include <assert.h> -#define R123_ASSERT(x) assert(x) -#endif - -#ifndef R123_BUILTIN_EXPECT -#define R123_BUILTIN_EXPECT(expr,likely) __builtin_expect(expr,likely) -#endif - -#ifndef R123_USE_AES_NI -#define R123_USE_AES_NI 0 -#endif - -#ifndef R123_USE_SSE4_2 -#define R123_USE_SSE4_2 0 -#endif - -#ifndef R123_USE_SSE4_1 -#define R123_USE_SSE4_1 0 -#endif - -#ifndef R123_USE_SSE -#define R123_USE_SSE 0 -#endif - -#ifndef R123_USE_AES_OPENSSL -/* There isn't really a good way to tell at compile time whether - openssl is available. Without a pre-compilation configure-like - tool, it's less error-prone to guess that it isn't available. Add - -DR123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to - play with openssl */ -#define R123_USE_AES_OPENSSL 0 -#endif - -#ifndef R123_USE_GNU_UINT128 -#define R123_USE_GNU_UINT128 0 -#endif - -#ifndef R123_USE_ASM_GNU -#define R123_USE_ASM_GNU 1 -#endif - -#ifndef R123_USE_CPUID_MSVC -#define R123_USE_CPUID_MSVC 0 -#endif - -#ifndef R123_USE_X86INTRIN_H -#define R123_USE_X86INTRIN_H 0 -#endif - -#ifndef R123_USE_IA32INTRIN_H -#define R123_USE_IA32INTRIN_H 0 -#endif - -#ifndef R123_USE_XMMINTRIN_H -#define R123_USE_XMMINTRIN_H 0 -#endif - -#ifndef R123_USE_EMMINTRIN_H -#define R123_USE_EMMINTRIN_H 0 -#endif - -#ifndef R123_USE_SMMINTRIN_H -#define R123_USE_SMMINTRIN_H 0 -#endif - -#ifndef R123_USE_WMMINTRIN_H -#define R123_USE_WMMINTRIN_H 0 -#endif - -#ifndef R123_USE_INTRIN_H -#ifdef __ABM__ -#define R123_USE_INTRIN_H 1 -#else -#define R123_USE_INTRIN_H 0 -#endif -#endif - -#ifndef R123_USE_MULHILO32_ASM -#define R123_USE_MULHILO32_ASM 0 -#endif - -#ifndef R123_USE_MULHILO64_MULHI_INTRIN -#define R123_USE_MULHILO64_MULHI_INTRIN (defined(__powerpc64__)) -#endif - -#ifndef R123_MULHILO64_MULHI_INTRIN -#define R123_MULHILO64_MULHI_INTRIN __mulhdu -#endif - -#ifndef R123_USE_MULHILO32_MULHI_INTRIN -#define R123_USE_MULHILO32_MULHI_INTRIN 0 -#endif - -#ifndef R123_MULHILO32_MULHI_INTRIN -#define R123_MULHILO32_MULHI_INTRIN __mulhwu -#endif - -#ifndef R123_USE_MULHILO64_ASM -#define R123_USE_MULHILO64_ASM (defined(__powerpc64__) && !(R123_USE_MULHILO64_MULHI_INTRIN)) -#endif - -#ifndef R123_USE_MULHILO64_MSVC_INTRIN -#define R123_USE_MULHILO64_MSVC_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_CUDA_INTRIN -#define R123_USE_MULHILO64_CUDA_INTRIN 0 -#endif - -#ifndef R123_USE_MULHILO64_OPENCL_INTRIN -#define R123_USE_MULHILO64_OPENCL_INTRIN 0 -#endif - -#ifndef __STDC_CONSTANT_MACROS -#define __STDC_CONSTANT_MACROS -#endif -#include <stdint.h> -#ifndef UINT64_C -#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h> -#endif - -/* If you add something, it must go in all the other XXfeatures.hpp - and in ../ut_features.cpp */ -#endif diff --git a/src/Random123/threefry.h b/src/Random123/threefry.h deleted file mode 100644 index c01fc7f05ed2b649849ef71b8363210f318d4cac..0000000000000000000000000000000000000000 --- a/src/Random123/threefry.h +++ /dev/null @@ -1,866 +0,0 @@ -/* -Copyright 2010-2011, D. E. Shaw Research. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name of D. E. Shaw Research nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -#ifndef _threefry_dot_h_ -#define _threefry_dot_h_ -#include "features/compilerfeatures.h" -#include "array.h" - -/** \cond HIDDEN_FROM_DOXYGEN */ -/* Significant parts of this file were copied from - from: - Skein_FinalRnd/ReferenceImplementation/skein.h - Skein_FinalRnd/ReferenceImplementation/skein_block.c - - in http://csrc.nist.gov/groups/ST/hash/sha-3/Round3/documents/Skein_FinalRnd.zip - - This file has been modified so that it may no longer perform its originally - intended function. If you're looking for a Skein or Threefish source code, - please consult the original file. - - The original file had the following header: -************************************************************************** -** -** Interface declarations and internal definitions for Skein hashing. -** -** Source code author: Doug Whiting, 2008. -** -** This algorithm and source code is released to the public domain. -** -*************************************************************************** - -*/ - -/* See comment at the top of philox.h for the macro pre-process - strategy. */ - -/* Rotation constants: */ -enum r123_enum_threefry64x4 { - /* These are the R_256 constants from the Threefish reference sources - with names changed to R_64x4... */ - R_64x4_0_0=14, R_64x4_0_1=16, - R_64x4_1_0=52, R_64x4_1_1=57, - R_64x4_2_0=23, R_64x4_2_1=40, - R_64x4_3_0= 5, R_64x4_3_1=37, - R_64x4_4_0=25, R_64x4_4_1=33, - R_64x4_5_0=46, R_64x4_5_1=12, - R_64x4_6_0=58, R_64x4_6_1=22, - R_64x4_7_0=32, R_64x4_7_1=32 -}; - -enum r123_enum_threefry64x2 { - /* - // Output from skein_rot_search: (srs64_B64-X1000) - // Random seed = 1. BlockSize = 128 bits. sampleCnt = 1024. rounds = 8, minHW_or=57 - // Start: Tue Mar 1 10:07:48 2011 - // rMin = 0.136. #0325[*15] [CRC=455A682F. hw_OR=64. cnt=16384. blkSize= 128].format - */ - R_64x2_0_0=16, - R_64x2_1_0=42, - R_64x2_2_0=12, - R_64x2_3_0=31, - R_64x2_4_0=16, - R_64x2_5_0=32, - R_64x2_6_0=24, - R_64x2_7_0=21 - /* 4 rounds: minHW = 4 [ 4 4 4 4 ] - // 5 rounds: minHW = 8 [ 8 8 8 8 ] - // 6 rounds: minHW = 16 [ 16 16 16 16 ] - // 7 rounds: minHW = 32 [ 32 32 32 32 ] - // 8 rounds: minHW = 64 [ 64 64 64 64 ] - // 9 rounds: minHW = 64 [ 64 64 64 64 ] - //10 rounds: minHW = 64 [ 64 64 64 64 ] - //11 rounds: minHW = 64 [ 64 64 64 64 ] */ -}; - -enum r123_enum_threefry32x4 { - /* Output from skein_rot_search: (srs-B128-X5000.out) - // Random seed = 1. BlockSize = 64 bits. sampleCnt = 1024. rounds = 8, minHW_or=28 - // Start: Mon Aug 24 22:41:36 2009 - // ... - // rMin = 0.472. #0A4B[*33] [CRC=DD1ECE0F. hw_OR=31. cnt=16384. blkSize= 128].format */ - R_32x4_0_0=10, R_32x4_0_1=26, - R_32x4_1_0=11, R_32x4_1_1=21, - R_32x4_2_0=13, R_32x4_2_1=27, - R_32x4_3_0=23, R_32x4_3_1= 5, - R_32x4_4_0= 6, R_32x4_4_1=20, - R_32x4_5_0=17, R_32x4_5_1=11, - R_32x4_6_0=25, R_32x4_6_1=10, - R_32x4_7_0=18, R_32x4_7_1=20 - - /* 4 rounds: minHW = 3 [ 3 3 3 3 ] - // 5 rounds: minHW = 7 [ 7 7 7 7 ] - // 6 rounds: minHW = 12 [ 13 12 13 12 ] - // 7 rounds: minHW = 22 [ 22 23 22 23 ] - // 8 rounds: minHW = 31 [ 31 31 31 31 ] - // 9 rounds: minHW = 32 [ 32 32 32 32 ] - //10 rounds: minHW = 32 [ 32 32 32 32 ] - //11 rounds: minHW = 32 [ 32 32 32 32 ] */ - -}; - -enum r123_enum_threefry32x2 { - /* Output from skein_rot_search (srs32x2-X5000.out) - // Random seed = 1. BlockSize = 64 bits. sampleCnt = 1024. rounds = 8, minHW_or=28 - // Start: Tue Jul 12 11:11:33 2011 - // rMin = 0.334. #0206[*07] [CRC=1D9765C0. hw_OR=32. cnt=16384. blkSize= 64].format */ - R_32x2_0_0=13, - R_32x2_1_0=15, - R_32x2_2_0=26, - R_32x2_3_0= 6, - R_32x2_4_0=17, - R_32x2_5_0=29, - R_32x2_6_0=16, - R_32x2_7_0=24 - - /* 4 rounds: minHW = 4 [ 4 4 4 4 ] - // 5 rounds: minHW = 6 [ 6 8 6 8 ] - // 6 rounds: minHW = 9 [ 9 12 9 12 ] - // 7 rounds: minHW = 16 [ 16 24 16 24 ] - // 8 rounds: minHW = 32 [ 32 32 32 32 ] - // 9 rounds: minHW = 32 [ 32 32 32 32 ] - //10 rounds: minHW = 32 [ 32 32 32 32 ] - //11 rounds: minHW = 32 [ 32 32 32 32 ] */ - }; - -enum r123_enum_threefry_wcnt { - WCNT2=2, - WCNT4=4 -}; -R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(uint64_t RotL_64(uint64_t x, unsigned int N)); -R123_CUDA_DEVICE R123_STATIC_INLINE uint64_t RotL_64(uint64_t x, unsigned int N) -{ - return (x << (N & 63)) | (x >> ((64-N) & 63)); -} - -R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(uint32_t RotL_32(uint32_t x, unsigned int N)); -R123_CUDA_DEVICE R123_STATIC_INLINE uint32_t RotL_32(uint32_t x, unsigned int N) -{ - return (x << (N & 31)) | (x >> ((32-N) & 31)); -} - -#define SKEIN_MK_64(hi32,lo32) ((lo32) + (((uint64_t) (hi32)) << 32)) -#define SKEIN_KS_PARITY64 SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22) -#define SKEIN_KS_PARITY32 0x1BD11BDA - -#ifndef THREEFRY2x32_DEFAULT_ROUNDS -#define THREEFRY2x32_DEFAULT_ROUNDS 20 -#endif - -#ifndef THREEFRY2x64_DEFAULT_ROUNDS -#define THREEFRY2x64_DEFAULT_ROUNDS 20 -#endif - -#ifndef THREEFRY4x32_DEFAULT_ROUNDS -#define THREEFRY4x32_DEFAULT_ROUNDS 20 -#endif - -#ifndef THREEFRY4x64_DEFAULT_ROUNDS -#define THREEFRY4x64_DEFAULT_ROUNDS 20 -#endif - -#define _threefry2x_tpl(W) \ -typedef struct r123array2x##W threefry2x##W##_ctr_t; \ -typedef struct r123array2x##W threefry2x##W##_key_t; \ -typedef struct r123array2x##W threefry2x##W##_ukey_t; \ -R123_CUDA_DEVICE R123_STATIC_INLINE threefry2x##W##_key_t threefry2x##W##keyinit(threefry2x##W##_ukey_t uk) { return uk; } \ -R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry2x##W##_ctr_t threefry2x##W##_R(unsigned int Nrounds, threefry2x##W##_ctr_t in, threefry2x##W##_key_t k)); \ -R123_CUDA_DEVICE R123_STATIC_INLINE \ -threefry2x##W##_ctr_t threefry2x##W##_R(unsigned int Nrounds, threefry2x##W##_ctr_t in, threefry2x##W##_key_t k){ \ - threefry2x##W##_ctr_t X; \ - uint##W##_t ks[2+1]; \ - int i; /* avoid size_t to avoid need for stddef.h */ \ - R123_ASSERT(Nrounds<=32); \ - ks[2] = SKEIN_KS_PARITY##W; \ - for (i=0;i < 2; i++) \ - { \ - ks[i] = k.v[i]; \ - X.v[i] = in.v[i]; \ - ks[2] ^= k.v[i]; \ - } \ - \ - /* Insert initial key before round 0 */ \ - X.v[0] += ks[0]; X.v[1] += ks[1]; \ - \ - if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>3){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[1]; X.v[1] += ks[2]; \ - X.v[1] += 1; /* X.v[2-1] += r */ \ - } \ - if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>7){ \ - /* InjectKey(r=2) */ \ - X.v[0] += ks[2]; X.v[1] += ks[0]; \ - X.v[1] += 2; \ - } \ - if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>11){ \ - /* InjectKey(r=3) */ \ - X.v[0] += ks[0]; X.v[1] += ks[1]; \ - X.v[1] += 3; \ - } \ - if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>15){ \ - /* InjectKey(r=4) */ \ - X.v[0] += ks[1]; X.v[1] += ks[2]; \ - X.v[1] += 4; \ - } \ - if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>19){ \ - /* InjectKey(r=5) */ \ - X.v[0] += ks[2]; X.v[1] += ks[0]; \ - X.v[1] += 5; \ - } \ - if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>23){ \ - /* InjectKey(r=6) */ \ - X.v[0] += ks[0]; X.v[1] += ks[1]; \ - X.v[1] += 6; \ - } \ - if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>27){ \ - /* InjectKey(r=7) */ \ - X.v[0] += ks[1]; X.v[1] += ks[2]; \ - X.v[1] += 7; \ - } \ - if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \ - if(Nrounds>31){ \ - /* InjectKey(r=8) */ \ - X.v[0] += ks[2]; X.v[1] += ks[0]; \ - X.v[1] += 8; \ - } \ - return X; \ -} \ - /** @ingroup ThreefryNxW */ \ -enum r123_enum_threefry2x##W { threefry2x##W##_rounds = THREEFRY2x##W##_DEFAULT_ROUNDS }; \ -R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry2x##W##_ctr_t threefry2x##W(threefry2x##W##_ctr_t in, threefry2x##W##_key_t k)); \ -R123_CUDA_DEVICE R123_STATIC_INLINE \ -threefry2x##W##_ctr_t threefry2x##W(threefry2x##W##_ctr_t in, threefry2x##W##_key_t k){ \ - return threefry2x##W##_R(threefry2x##W##_rounds, in, k); \ -} - - -#define _threefry4x_tpl(W) \ -typedef struct r123array4x##W threefry4x##W##_ctr_t; \ -typedef struct r123array4x##W threefry4x##W##_key_t; \ -typedef struct r123array4x##W threefry4x##W##_ukey_t; \ -R123_CUDA_DEVICE R123_STATIC_INLINE threefry4x##W##_key_t threefry4x##W##keyinit(threefry4x##W##_ukey_t uk) { return uk; } \ -R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry4x##W##_ctr_t threefry4x##W##_R(unsigned int Nrounds, threefry4x##W##_ctr_t in, threefry4x##W##_key_t k)); \ -R123_CUDA_DEVICE R123_STATIC_INLINE \ -threefry4x##W##_ctr_t threefry4x##W##_R(unsigned int Nrounds, threefry4x##W##_ctr_t in, threefry4x##W##_key_t k){ \ - threefry4x##W##_ctr_t X; \ - uint##W##_t ks[4+1]; \ - int i; /* avoid size_t to avoid need for stddef.h */ \ - R123_ASSERT(Nrounds<=72); \ - ks[4] = SKEIN_KS_PARITY##W; \ - for (i=0;i < 4; i++) \ - { \ - ks[i] = k.v[i]; \ - X.v[i] = in.v[i]; \ - ks[4] ^= k.v[i]; \ - } \ - \ - /* Insert initial key before round 0 */ \ - X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \ - \ - if(Nrounds>0){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>1){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>2){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>3){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>3){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \ - X.v[4-1] += 1; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>4){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>5){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>6){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>7){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>7){ \ - /* InjectKey(r=2) */ \ - X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \ - X.v[4-1] += 2; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>8){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>9){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>10){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>11){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>11){ \ - /* InjectKey(r=3) */ \ - X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \ - X.v[4-1] += 3; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>12){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>13){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>14){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>15){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>15){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \ - X.v[4-1] += 4; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>16){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>17){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>18){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>19){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>19){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \ - X.v[4-1] += 5; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>20){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>21){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>22){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>23){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>23){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \ - X.v[4-1] += 6; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>24){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>25){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>26){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>27){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>27){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \ - X.v[4-1] += 7; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>28){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>29){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>30){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>31){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>31){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \ - X.v[4-1] += 8; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>32){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>33){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>34){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>35){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>35){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \ - X.v[4-1] += 9; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>36){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>37){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>38){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>39){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>39){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \ - X.v[4-1] += 10; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>40){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>41){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>42){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>43){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>43){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \ - X.v[4-1] += 11; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>44){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>45){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>46){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>47){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>47){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \ - X.v[4-1] += 12; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>48){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>49){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>50){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>51){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>51){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \ - X.v[4-1] += 13; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>52){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>53){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>54){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>55){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>55){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \ - X.v[4-1] += 14; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>56){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>57){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>58){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>59){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>59){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \ - X.v[4-1] += 15; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>60){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>61){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>62){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>63){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>63){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \ - X.v[4-1] += 16; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>64){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>65){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>66){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>67){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>67){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \ - X.v[4-1] += 17; /* X.v[WCNT4-1] += r */ \ - } \ - \ - if(Nrounds>68){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>69){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>70){ \ - X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \ - X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \ - } \ - if(Nrounds>71){ \ - X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \ - X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \ - } \ - if(Nrounds>71){ \ - /* InjectKey(r=1) */ \ - X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \ - X.v[4-1] += 18; /* X.v[WCNT4-1] += r */ \ - } \ - \ - return X; \ -} \ - /** @ingroup ThreefryNxW */ \ -enum r123_enum_threefry4x##W { threefry4x##W##_rounds = THREEFRY4x##W##_DEFAULT_ROUNDS }; \ -R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry4x##W##_ctr_t threefry4x##W(threefry4x##W##_ctr_t in, threefry4x##W##_key_t k)); \ -R123_CUDA_DEVICE R123_STATIC_INLINE \ -threefry4x##W##_ctr_t threefry4x##W(threefry4x##W##_ctr_t in, threefry4x##W##_key_t k){ \ - return threefry4x##W##_R(threefry4x##W##_rounds, in, k); \ -} -/** \endcond */ - -_threefry2x_tpl(64) -_threefry2x_tpl(32) -_threefry4x_tpl(64) -_threefry4x_tpl(32) - -/* gcc4.5 and 4.6 seem to optimize a macro-ized threefryNxW better - than a static inline function. Why? */ -#define threefry2x32(c,k) threefry2x32_R(threefry2x32_rounds, c, k) -#define threefry4x32(c,k) threefry4x32_R(threefry4x32_rounds, c, k) -#define threefry2x64(c,k) threefry2x64_R(threefry2x64_rounds, c, k) -#define threefry4x64(c,k) threefry4x64_R(threefry4x64_rounds, c, k) - -#ifdef SWIFT_DEBUG_CHECKS -#ifdef __cplusplus -/** \cond HIDDEN_FROM_DOXYGEN */ -#define _threefryNxWclass_tpl(NxW) \ -namespace r123{ \ -template<unsigned int R> \ - struct Threefry##NxW##_R{ \ - typedef threefry##NxW##_ctr_t ctr_type; \ - typedef threefry##NxW##_key_t key_type; \ - typedef threefry##NxW##_key_t ukey_type; \ - static const unsigned int rounds=R; \ - inline R123_CUDA_DEVICE R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key)){ \ - R123_STATIC_ASSERT(R<=72, "threefry is only unrolled up to 72 rounds\n"); \ - return threefry##NxW##_R(R, ctr, key); \ - } \ -}; \ - typedef Threefry##NxW##_R<threefry##NxW##_rounds> Threefry##NxW; \ -} // namespace r123 - -/** \endcond */ - -_threefryNxWclass_tpl(2x32) -_threefryNxWclass_tpl(4x32) -_threefryNxWclass_tpl(2x64) -_threefryNxWclass_tpl(4x64) - -/* The _tpl macros don't quite work to do string-pasting inside comments. - so we just write out the boilerplate documentation four times... */ - -/** -@defgroup ThreefryNxW Threefry Classes and Typedefs - -The ThreefryNxW classes export the member functions, typedefs and -operator overloads required by a @ref CBRNG "CBRNG" class. - -As described in -<a href="http://dl.acm.org/citation.cfm?doid=2063405"><i>Parallel Random Numbers: As Easy as 1, 2, 3</i> </a>, -the Threefry family is closely related to the Threefish block cipher from -<a href="http://www.skein-hash.info/"> Skein Hash Function</a>. -Threefry is \b not suitable for cryptographic use. - -Threefry uses integer addition, bitwise rotation, xor and permutation of words to randomize its output. - -@class r123::Threefry2x32_R -@ingroup ThreefryNxW - -exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. - -The template argument, ROUNDS, is the number of times the Threefry round -function will be applied. - -As of September 2011, the authors know of no statistical flaws with -ROUNDS=13 or more for Threefry2x32. - -@typedef r123::Threefry2x32 -@ingroup ThreefryNxW - Threefry2x32 is equivalent to Threefry2x32_R<20>. With 20 rounds, - Threefry2x32 has a considerable safety margin over the minimum number - of rounds with no known statistical flaws, but still has excellent - performance. - -@class r123::Threefry2x64_R -@ingroup ThreefryNxW - -exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. - -The template argument, ROUNDS, is the number of times the Threefry round -function will be applied. - -In November 2011, the authors discovered that 13 rounds of -Threefry2x64 sequenced by strided, interleaved key and counter -increments failed a very long (longer than the default BigCrush -length) WeightDistrub test. At the same time, it was confirmed that -14 rounds passes much longer tests (up to 5x10^12 samples) of a -similar nature. The authors know of no statistical flaws with -ROUNDS=14 or more for Threefry2x64. - -@typedef r123::Threefry2x64 -@ingroup ThreefryNxW - Threefry2x64 is equivalent to Threefry2x64_R<20>. With 20 rounds, - Threefry2x64 has a considerable safety margin over the minimum number - of rounds with no known statistical flaws, but still has excellent - performance. - - - -@class r123::Threefry4x32_R -@ingroup ThreefryNxW - -exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. - -The template argument, ROUNDS, is the number of times the Threefry round -function will be applied. - -As of September 2011, the authors know of no statistical flaws with -ROUNDS=12 or more for Threefry4x32. - -@typedef r123::Threefry4x32 -@ingroup ThreefryNxW - Threefry4x32 is equivalent to Threefry4x32_R<20>. With 20 rounds, - Threefry4x32 has a considerable safety margin over the minimum number - of rounds with no known statistical flaws, but still has excellent - performance. - - - -@class r123::Threefry4x64_R -@ingroup ThreefryNxW - -exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class. - -The template argument, ROUNDS, is the number of times the Threefry round -function will be applied. - -As of September 2011, the authors know of no statistical flaws with -ROUNDS=12 or more for Threefry4x64. - -@typedef r123::Threefry4x64 -@ingroup ThreefryNxW - Threefry4x64 is equivalent to Threefry4x64_R<20>. With 20 rounds, - Threefry4x64 has a considerable safety margin over the minimum number - of rounds with no known statistical flaws, but still has excellent - performance. -*/ - -#endif -#endif - -#endif