Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SWIFTsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
SWIFTsim
Commits
8cf40a42
Commit
8cf40a42
authored
7 years ago
by
James Willis
Browse files
Options
Downloads
Patches
Plain Diff
Don't require a mask for the first region of the Cubic Spline kernel.
parent
737a8605
Branches
Branches containing commit
Tags
Tags containing commit
1 merge request
!396
Avx512 fixes
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/kernel_hydro.h
+10
-22
10 additions, 22 deletions
src/kernel_hydro.h
with
10 additions
and
22 deletions
src/kernel_hydro.h
+
10
−
22
View file @
8cf40a42
...
@@ -467,10 +467,9 @@ __attribute__((always_inline)) INLINE static void kernel_deval_1_vec(
...
@@ -467,10 +467,9 @@ __attribute__((always_inline)) INLINE static void kernel_deval_1_vec(
w
->
v
=
vec_fma
(
x
.
v
,
w
->
v
,
wendland_const_c5
.
v
);
w
->
v
=
vec_fma
(
x
.
v
,
w
->
v
,
wendland_const_c5
.
v
);
#elif defined(CUBIC_SPLINE_KERNEL)
#elif defined(CUBIC_SPLINE_KERNEL)
vector
w2
,
dw_dx2
;
vector
w2
,
dw_dx2
;
mask_t
mask_reg1
,
mask_reg2
;
mask_t
mask_reg2
;
/* Form a mask for each part of the kernel. */
/* Form a mask for each part of the kernel. */
vec_create_mask
(
mask_reg1
,
vec_cmp_lt
(
x
.
v
,
cond
.
v
));
/* 0 < x < 0.5 */
vec_create_mask
(
mask_reg2
,
vec_cmp_gte
(
x
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
vec_create_mask
(
mask_reg2
,
vec_cmp_gte
(
x
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
/* Work out w for both regions of the kernel and combine the results together
/* Work out w for both regions of the kernel and combine the results together
...
@@ -573,11 +572,9 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec(
...
@@ -573,11 +572,9 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec(
#elif defined(CUBIC_SPLINE_KERNEL)
#elif defined(CUBIC_SPLINE_KERNEL)
vector
w_2
,
dw_dx_2
;
vector
w_2
,
dw_dx_2
;
vector
w2_2
,
dw_dx2_2
;
vector
w2_2
,
dw_dx2_2
;
mask_t
mask_reg
1
,
mask_reg2
,
mask_reg1_v
2
,
mask_reg2_v2
;
mask_t
mask_reg2
,
mask_reg2_v2
;
/* Form a mask for each part of the kernel. */
/* Form a mask for each part of the kernel. */
vec_create_mask
(
mask_reg1
,
vec_cmp_lt
(
x
.
v
,
cond
.
v
));
/* 0 < x < 0.5 */
vec_create_mask
(
mask_reg1_v2
,
vec_cmp_lt
(
x2
.
v
,
cond
.
v
));
/* 0 < x < 0.5 */
vec_create_mask
(
mask_reg2
,
vec_cmp_gte
(
x
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
vec_create_mask
(
mask_reg2
,
vec_cmp_gte
(
x
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
vec_create_mask
(
mask_reg2_v2
,
vec_cmp_gte
(
x2
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
vec_create_mask
(
mask_reg2_v2
,
vec_cmp_gte
(
x2
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
...
@@ -657,10 +654,9 @@ __attribute__((always_inline)) INLINE static void kernel_eval_W_vec(vector *u,
...
@@ -657,10 +654,9 @@ __attribute__((always_inline)) INLINE static void kernel_eval_W_vec(vector *u,
w
->
v
=
vec_fma
(
x
.
v
,
w
->
v
,
wendland_const_c5
.
v
);
w
->
v
=
vec_fma
(
x
.
v
,
w
->
v
,
wendland_const_c5
.
v
);
#elif defined(CUBIC_SPLINE_KERNEL)
#elif defined(CUBIC_SPLINE_KERNEL)
vector
w2
;
vector
w2
;
mask_t
mask_reg1
,
mask_reg2
;
mask_t
mask_reg2
;
/* Form a mask for each part of the kernel. */
/* Form a mask for each part of the kernel. */
vec_create_mask
(
mask_reg1
,
vec_cmp_lt
(
x
.
v
,
cond
.
v
));
/* 0 < x < 0.5 */
vec_create_mask
(
mask_reg2
,
vec_cmp_gte
(
x
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
vec_create_mask
(
mask_reg2
,
vec_cmp_gte
(
x
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
/* Work out w for both regions of the kernel and combine the results together
/* Work out w for both regions of the kernel and combine the results together
...
@@ -718,10 +714,9 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_vec(
...
@@ -718,10 +714,9 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_vec(
#elif defined(CUBIC_SPLINE_KERNEL)
#elif defined(CUBIC_SPLINE_KERNEL)
vector
dw_dx2
;
vector
dw_dx2
;
mask_t
mask_reg1
,
mask_reg2
;
mask_t
mask_reg2
;
/* Form a mask for each part of the kernel. */
/* Form a mask for each part of the kernel. */
vec_create_mask
(
mask_reg1
,
vec_cmp_lt
(
x
.
v
,
cond
.
v
));
/* 0 < x < 0.5 */
vec_create_mask
(
mask_reg2
,
vec_cmp_gte
(
x
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
vec_create_mask
(
mask_reg2
,
vec_cmp_gte
(
x
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
/* Work out w for both regions of the kernel and combine the results together
/* Work out w for both regions of the kernel and combine the results together
...
@@ -788,14 +783,12 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_force_2_vec(
...
@@ -788,14 +783,12 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_force_2_vec(
#elif defined(CUBIC_SPLINE_KERNEL)
#elif defined(CUBIC_SPLINE_KERNEL)
vector
dw_dx2
,
dw_dx2_2
;
vector
dw_dx2
,
dw_dx2_2
;
mask_t
mask_reg1
,
mask_reg2
;
mask_t
mask_reg2
;
mask_t
mask_reg
1_2
,
mask_reg
2_2
;
mask_t
mask_reg2_
v
2
;
/* Form a mask for each part of the kernel. */
/* Form a mask for each part of the kernel. */
vec_create_mask
(
mask_reg1
,
vec_cmp_lt
(
x
.
v
,
cond
.
v
));
/* 0 < x < 0.5 */
vec_create_mask
(
mask_reg1_2
,
vec_cmp_lt
(
x_2
.
v
,
cond
.
v
));
/* 0 < x < 0.5 */
vec_create_mask
(
mask_reg2
,
vec_cmp_gte
(
x
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
vec_create_mask
(
mask_reg2
,
vec_cmp_gte
(
x
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
vec_create_mask
(
mask_reg2_2
,
vec_cmp_gte
(
x_2
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
vec_create_mask
(
mask_reg2_
v
2
,
vec_cmp_gte
(
x_2
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
/* Work out w for both regions of the kernel and combine the results together
/* Work out w for both regions of the kernel and combine the results together
* using masks. */
* using masks. */
...
@@ -813,14 +806,9 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_force_2_vec(
...
@@ -813,14 +806,9 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_force_2_vec(
dw_dx2_2
.
v
=
vec_fma
(
dw_dx2_2
.
v
,
x_2
.
v
,
cubic_2_dwdx_const_c2
.
v
);
dw_dx2_2
.
v
=
vec_fma
(
dw_dx2_2
.
v
,
x_2
.
v
,
cubic_2_dwdx_const_c2
.
v
);
/* Mask out unneeded values. */
/* Mask out unneeded values. */
dw_dx
->
v
=
vec_and_mask
(
dw_dx
->
v
,
mask_reg1
);
dw_dx
->
v
=
vec_blend
(
mask_reg2
,
dw_dx
->
v
,
dw_dx2
.
v
);
dw_dx_2
->
v
=
vec_and_mask
(
dw_dx_2
->
v
,
mask_reg1_2
);
dw_dx_2
->
v
=
vec_blend
(
mask_reg2_v2
,
dw_dx_2
->
v
,
dw_dx2_2
.
v
);
dw_dx2
.
v
=
vec_and_mask
(
dw_dx2
.
v
,
mask_reg2
);
dw_dx2_2
.
v
=
vec_and_mask
(
dw_dx2_2
.
v
,
mask_reg2_2
);
/* Added both dwdx and dwdx2 together to form complete result. */
dw_dx
->
v
=
vec_add
(
dw_dx
->
v
,
dw_dx2
.
v
);
dw_dx_2
->
v
=
vec_add
(
dw_dx_2
->
v
,
dw_dx2_2
.
v
);
#else
#else
#error "Vectorisation not supported for this kernel!!!"
#error "Vectorisation not supported for this kernel!!!"
#endif
#endif
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment