Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
7db83d29
Commit
7db83d29
authored
Apr 16, 2018
by
Matthieu Schaller
Browse files
Merge branch 'sse-support' into 'master'
SSE support See merge request
!519
parents
047ce843
a6ff82c2
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/vector.h
View file @
7db83d29
...
...
@@ -327,7 +327,9 @@
#define vec_set(a, b, c, d) _mm_set_ps(d, c, b, a)
#define vec_dbl_set(a, b) _mm_set_pd(b, a)
#define vec_add(a, b) _mm_add_ps(a, b)
#define vec_mask_add(a, b, mask) vec_add(a, vec_and(b, mask.v))
#define vec_sub(a, b) _mm_sub_ps(a, b)
#define vec_mask_sub(a, b, mask) vec_sub(a, vec_and(b, mask.v))
#define vec_mul(a, b) _mm_mul_ps(a, b)
#define vec_div(a, b) _mm_div_ps(a, b)
#define vec_sqrt(a) _mm_sqrt_ps(a)
...
...
@@ -340,9 +342,26 @@
#define vec_floor(a) _mm_floor_ps(a)
#define vec_cmp_gt(a, b) _mm_cmpgt_ps(a, b)
#define vec_cmp_lt(a, b) _mm_cmplt_ps(a, b)
#define vec_cmp_lte(a, b) _mm_cmp_ps(a, b, _CMP_LE_OQ)
#define vec_cmp_lte(a, b) _mm_cmple_ps(a, b)
#define vec_cmp_gte(a, b) _mm_cmpge_ps(a, b)
#define vec_cmp_result(a) _mm_movemask_ps(a)
#define vec_is_mask_true(a) _mm_movemask_ps(a.v)
#define vec_and(a, b) _mm_and_ps(a, b)
#define vec_mask_and(a, b) _mm_and_ps(a.v, b.v)
#define vec_and_mask(a, mask) _mm_and_ps(a, mask.v)
#define vec_init_mask_true(mask) mask.m = vec_setint1(0xFFFFFFFF)
#define vec_create_mask(mask, cond) mask.v = cond
#define vec_combine_masks(mask1, mask2) \
({ mask1.v = vec_mask_and(mask1, mask2); })
#define vec_zero_mask(mask) mask.v = vec_setzero()
#define vec_pad_mask(mask, pad) \
for (int i = VEC_SIZE - (pad); i < VEC_SIZE; i++) mask.i[i] = 0
/* If SSE4.1 doesn't exist on architecture use alternative blend strategy. */
#ifdef HAVE_SSE4_1
#define vec_blend(mask, a, b) _mm_blendv_ps(a, b, mask.v)
#else
#define vec_blend(mask, a, b) _mm_or_ps(_mm_and_ps(mask.v,b), _mm_andnot_ps(mask.v,a))
#endif
#define vec_todbl_lo(a) _mm_cvtps_pd(a)
#define vec_todbl_hi(a) _mm_cvtps_pd(_mm_movehl_ps(a, a))
#define vec_dbl_tofloat(a, b) _mm_movelh_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b))
...
...
@@ -364,10 +383,23 @@
a.v = _mm_hadd_ps(a.v, a.v); \
b += a.f[0] + a.f[1];
/* Performs a horizontal maximum on the vector and takes the maximum of the
* result with a float, b. */
#define VEC_HMAX(a, b) \
{ \
for (int k = 0; k < VEC_SIZE; k++) b = max(b, a.f[k]); \
}
/* Create an FMA using vec_add and vec_mul if AVX2 is not present. */
#ifndef vec_fma
#define vec_fma(a, b, c) vec_add(vec_mul(a, b), c)
#endif
/* Create a negated FMA using vec_sub and vec_mul if AVX2 is not present. */
#ifndef vec_fnma
#define vec_fnma(a, b, c) vec_sub(c, vec_mul(a, b))
#endif
#else
#define VEC_SIZE 4
#endif
/* HAVE_SSE2 */
...
...
tests/tolerance_125_perturbed.dat
View file @
7db83d29
# ID pos_x pos_y pos_z v_x v_y v_z h rho div_v S u P c a_x a_y a_z h_dt v_sig dS/dt du/dt
0 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4
0 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 2e-3 2e-3 2e-3 1e-4 1e-4 1e-4 1e-4
0 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 1e-4 2
.3
e-3 2e-3 2e-3 1e-4 1e-4 1e-4 1e-4
0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-4 2e-4 2e-4 1e-6 1e-6 1e-6 1e-6
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment