Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SWIFTsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
SWIFTsim
Merge requests
!519
SSE support
Code
Review changes
Check out branch
Download
Patches
Plain diff
Merged
SSE support
sse-support
into
master
Overview
0
Commits
3
Changes
2
Merged
James Willis
requested to merge
sse-support
into
master
7 years ago
Overview
0
Commits
3
Changes
2
Expand
Update to support SSE instructions
0
0
Merge request reports
Compare
master
version 1
ebe7d396
7 years ago
master (base)
and
latest version
latest version
a6ff82c2
3 commits,
7 years ago
version 1
ebe7d396
2 commits,
7 years ago
2 files
+
34
−
2
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
Files
2
Search (e.g. *.vue) (Ctrl+P)
src/vector.h
+
33
−
1
Options
@@ -327,7 +327,9 @@
#define vec_set(a, b, c, d) _mm_set_ps(d, c, b, a)
#define vec_dbl_set(a, b) _mm_set_pd(b, a)
#define vec_add(a, b) _mm_add_ps(a, b)
#define vec_mask_add(a, b, mask) vec_add(a, vec_and(b, mask.v))
#define vec_sub(a, b) _mm_sub_ps(a, b)
#define vec_mask_sub(a, b, mask) vec_sub(a, vec_and(b, mask.v))
#define vec_mul(a, b) _mm_mul_ps(a, b)
#define vec_div(a, b) _mm_div_ps(a, b)
#define vec_sqrt(a) _mm_sqrt_ps(a)
@@ -340,9 +342,26 @@
#define vec_floor(a) _mm_floor_ps(a)
#define vec_cmp_gt(a, b) _mm_cmpgt_ps(a, b)
#define vec_cmp_lt(a, b) _mm_cmplt_ps(a, b)
#define vec_cmp_lte(a, b) _mm_cmp_ps(a, b, _CMP_LE_OQ)
#define vec_cmp_lte(a, b) _mm_cmple_ps(a, b)
#define vec_cmp_gte(a, b) _mm_cmpge_ps(a, b)
#define vec_cmp_result(a) _mm_movemask_ps(a)
#define vec_is_mask_true(a) _mm_movemask_ps(a.v)
#define vec_and(a, b) _mm_and_ps(a, b)
#define vec_mask_and(a, b) _mm_and_ps(a.v, b.v)
#define vec_and_mask(a, mask) _mm_and_ps(a, mask.v)
#define vec_init_mask_true(mask) mask.m = vec_setint1(0xFFFFFFFF)
#define vec_create_mask(mask, cond) mask.v = cond
#define vec_combine_masks(mask1, mask2) \
({ mask1.v = vec_mask_and(mask1, mask2); })
#define vec_zero_mask(mask) mask.v = vec_setzero()
#define vec_pad_mask(mask, pad) \
for (int i = VEC_SIZE - (pad); i < VEC_SIZE; i++) mask.i[i] = 0
/* If SSE4.1 doesn't exist on architecture use alternative blend strategy. */
#ifdef HAVE_SSE4_1
#define vec_blend(mask, a, b) _mm_blendv_ps(a, b, mask.v)
#else
#define vec_blend(mask, a, b) _mm_or_ps(_mm_and_ps(mask.v,b), _mm_andnot_ps(mask.v,a))
#endif
#define vec_todbl_lo(a) _mm_cvtps_pd(a)
#define vec_todbl_hi(a) _mm_cvtps_pd(_mm_movehl_ps(a, a))
#define vec_dbl_tofloat(a, b) _mm_movelh_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b))
@@ -364,10 +383,23 @@
a.v = _mm_hadd_ps(a.v, a.v); \
b += a.f[0] + a.f[1];
/* Performs a horizontal maximum on the vector and takes the maximum of the
* result with a float, b. */
#define VEC_HMAX(a, b) \
{ \
for (int k = 0; k < VEC_SIZE; k++) b = max(b, a.f[k]); \
}
/* Create an FMA using vec_add and vec_mul if AVX2 is not present. */
#ifndef vec_fma
#define vec_fma(a, b, c) vec_add(vec_mul(a, b), c)
#endif
/* Create a negated FMA using vec_sub and vec_mul if AVX2 is not present. */
#ifndef vec_fnma
#define vec_fnma(a, b, c) vec_sub(c, vec_mul(a, b))
#endif
#else
#define VEC_SIZE 4
#endif
/* HAVE_SSE2 */
Loading