Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
48291b4d
Commit
48291b4d
authored
May 16, 2017
by
James Willis
Browse files
Added a kernel_eval that finds dW_dx with 2 vectors.
parent
b84a9701
Changes
1
Hide whitespace changes
Inline
Side-by-side
src/kernel_hydro.h
View file @
48291b4d
...
...
@@ -473,7 +473,6 @@ __attribute__((always_inline)) INLINE static void kernel_deval_1_vec(
/* Form a mask for each part of the kernel. */
mask_reg1
.
v
=
vec_cmp_lt
(
x
.
v
,
cond
.
v
);
/* 0 < x < 0.5 */
mask_reg2
.
v
=
vec_cmp_gte
(
x
.
v
,
cond
.
v
);
/* 0.5 < x < 1 */
;
/* Work out w for both regions of the kernel and combine the results together
* using masks. */
...
...
@@ -586,9 +585,7 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec(
mask_reg1
.
v
=
vec_cmp_lt
(
x
.
v
,
cond
.
v
);
/* 0 < x < 0.5 */
mask_reg1_v2
.
v
=
vec_cmp_lt
(
x2
.
v
,
cond
.
v
);
/* 0 < x < 0.5 */
mask_reg2
.
v
=
vec_cmp_gte
(
x
.
v
,
cond
.
v
);
/* 0.5 < x < 1 */
;
mask_reg2_v2
.
v
=
vec_cmp_gte
(
x2
.
v
,
cond
.
v
);
/* 0.5 < x < 1 */
;
/* Work out w for both regions of the kernel and combine the results together
* using masks. */
...
...
@@ -676,12 +673,11 @@ __attribute__((always_inline)) INLINE static void kernel_eval_W_vec(vector *u,
w
->
v
=
vec_fma
(
x
.
v
,
w
->
v
,
wendland_const_c5
.
v
);
#elif defined(CUBIC_SPLINE_KERNEL)
vector
w2
;
vector
mask1
,
mask2
;
vector
mask
_reg
1
,
mask
_reg
2
;
/* Form a mask for each part of the kernel. */
mask1
.
v
=
vec_cmp_lt
(
x
.
v
,
cond
.
v
);
/* 0 < x < 0.5 */
mask2
.
v
=
vec_cmp_gte
(
x
.
v
,
cond
.
v
);
/* 0.5 < x < 1 */
;
mask_reg1
.
v
=
vec_cmp_lt
(
x
.
v
,
cond
.
v
);
/* 0 < x < 0.5 */
mask_reg2
.
v
=
vec_cmp_gte
(
x
.
v
,
cond
.
v
);
/* 0.5 < x < 1 */
/* Work out w for both regions of the kernel and combine the results together
* using masks. */
...
...
@@ -698,8 +694,8 @@ __attribute__((always_inline)) INLINE static void kernel_eval_W_vec(vector *u,
w2
.
v
=
vec_fma
(
x
.
v
,
w2
.
v
,
cubic_2_const_c3
.
v
);
/* Mask out unneeded values. */
w
->
v
=
vec_and
(
w
->
v
,
mask1
.
v
);
w2
.
v
=
vec_and
(
w2
.
v
,
mask2
.
v
);
w
->
v
=
vec_and
(
w
->
v
,
mask
_reg
1
.
v
);
w2
.
v
=
vec_and
(
w2
.
v
,
mask
_reg
2
.
v
);
/* Added both w and w2 together to form complete result. */
w
->
v
=
vec_add
(
w
->
v
,
w2
.
v
);
...
...
@@ -741,12 +737,11 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_vec(
#elif defined(CUBIC_SPLINE_KERNEL)
vector
dw_dx2
;
vector
mask1
,
mask2
;
vector
mask
_reg
1
,
mask
_reg
2
;
/* Form a mask for each part of the kernel. */
mask1
.
v
=
vec_cmp_lt
(
x
.
v
,
cond
.
v
);
/* 0 < x < 0.5 */
mask2
.
v
=
vec_cmp_gte
(
x
.
v
,
cond
.
v
);
/* 0.5 < x < 1 */
;
mask_reg1
.
v
=
vec_cmp_lt
(
x
.
v
,
cond
.
v
);
/* 0 < x < 0.5 */
mask_reg2
.
v
=
vec_cmp_gte
(
x
.
v
,
cond
.
v
);
/* 0.5 < x < 1 */
/* Work out w for both regions of the kernel and combine the results together
* using masks. */
...
...
@@ -760,8 +755,8 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_vec(
dw_dx2
.
v
=
vec_fma
(
dw_dx2
.
v
,
x
.
v
,
cubic_2_dwdx_const_c2
.
v
);
/* Mask out unneeded values. */
dw_dx
->
v
=
vec_and
(
dw_dx
->
v
,
mask1
.
v
);
dw_dx2
.
v
=
vec_and
(
dw_dx2
.
v
,
mask2
.
v
);
dw_dx
->
v
=
vec_and
(
dw_dx
->
v
,
mask
_reg
1
.
v
);
dw_dx2
.
v
=
vec_and
(
dw_dx2
.
v
,
mask
_reg
2
.
v
);
/* Added both dwdx and dwdx2 together to form complete result. */
dw_dx
->
v
=
vec_add
(
dw_dx
->
v
,
dw_dx2
.
v
);
...
...
@@ -774,6 +769,156 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_vec(
kernel_gamma_inv_dim_plus_one_vec
.
v
));
}
/**
* @brief Computes the kernel function derivative for two particles
* using vectors.
*
* Return 0 if $u > \\gamma = H/h$
*
* @param u The ratio of the distance to the smoothing length $u = x/h$.
* @param dw_dx (return) The norm of the gradient of $|\\nabla W(x,h)|$.
*/
__attribute__
((
always_inline
))
INLINE
static
void
kernel_eval_dWdx_force_vec
(
vector
*
u
,
vector
*
dw_dx
)
{
/* Go to the range [0,1[ from [0,H[ */
vector
x
;
x
.
v
=
vec_mul
(
u
->
v
,
kernel_gamma_inv_vec
.
v
);
#ifdef WENDLAND_C2_KERNEL
/* Init the iteration for Horner's scheme. */
dw_dx
->
v
=
vec_fma
(
wendland_dwdx_const_c0
.
v
,
x
.
v
,
wendland_dwdx_const_c1
.
v
);
/* Calculate the polynomial interleaving vector operations */
dw_dx
->
v
=
vec_fma
(
dw_dx
->
v
,
x
.
v
,
wendland_dwdx_const_c2
.
v
);
dw_dx
->
v
=
vec_fma
(
dw_dx
->
v
,
x
.
v
,
wendland_dwdx_const_c3
.
v
);
dw_dx
->
v
=
vec_mul
(
dw_dx
->
v
,
x
.
v
);
#elif defined(CUBIC_SPLINE_KERNEL)
vector
dw_dx2
;
vector
mask_reg1
,
mask_reg2
;
/* Form a mask for each part of the kernel. */
mask_reg1
.
v
=
vec_cmp_lt
(
x
.
v
,
cond
.
v
);
/* 0 < x < 0.5 */
mask_reg2
.
v
=
vec_cmp_gte
(
x
.
v
,
cond
.
v
);
/* 0.5 < x < 1 */
/* Work out w for both regions of the kernel and combine the results together
* using masks. */
/* Init the iteration for Horner's scheme. */
dw_dx
->
v
=
vec_fma
(
cubic_1_dwdx_const_c0
.
v
,
x
.
v
,
cubic_1_dwdx_const_c1
.
v
);
dw_dx2
.
v
=
vec_fma
(
cubic_2_dwdx_const_c0
.
v
,
x
.
v
,
cubic_2_dwdx_const_c1
.
v
);
/* Calculate the polynomial interleaving vector operations. */
dw_dx
->
v
=
vec_mul
(
dw_dx
->
v
,
x
.
v
);
/* cubic_1_dwdx_const_c2 is zero. */
dw_dx2
.
v
=
vec_fma
(
dw_dx2
.
v
,
x
.
v
,
cubic_2_dwdx_const_c2
.
v
);
/* Mask out unneeded values. */
dw_dx
->
v
=
vec_and
(
dw_dx
->
v
,
mask_reg1
.
v
);
dw_dx2
.
v
=
vec_and
(
dw_dx2
.
v
,
mask_reg2
.
v
);
/* Added both dwdx and dwdx2 together to form complete result. */
dw_dx
->
v
=
vec_add
(
dw_dx
->
v
,
dw_dx2
.
v
);
#else
#error
#endif
vector
mask
;
mask
.
v
=
vec_cmp_lt
(
x
.
v
,
vec_set1
(
1
.
f
));
dw_dx
->
v
=
vec_and
(
dw_dx
->
v
,
mask
.
v
);
/* Return everything */
dw_dx
->
v
=
vec_mul
(
dw_dx
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_plus_one_vec
.
v
));
}
/**
* @brief Computes the kernel function derivative for two particles
* using vectors.
*
* Return 0 if $u > \\gamma = H/h$
*
* @param u The ratio of the distance to the smoothing length $u = x/h$.
* @param dw_dx (return) The norm of the gradient of $|\\nabla W(x,h)|$.
*/
__attribute__
((
always_inline
))
INLINE
static
void
kernel_eval_dWdx_force_2_vec
(
vector
*
u
,
vector
*
dw_dx
,
vector
*
u_2
,
vector
*
dw_dx_2
)
{
/* Go to the range [0,1[ from [0,H[ */
vector
x
,
x_2
;
x
.
v
=
vec_mul
(
u
->
v
,
kernel_gamma_inv_vec
.
v
);
x_2
.
v
=
vec_mul
(
u_2
->
v
,
kernel_gamma_inv_vec
.
v
);
#ifdef WENDLAND_C2_KERNEL
/* Init the iteration for Horner's scheme. */
dw_dx
->
v
=
vec_fma
(
wendland_dwdx_const_c0
.
v
,
x
.
v
,
wendland_dwdx_const_c1
.
v
);
dw_dx_2
->
v
=
vec_fma
(
wendland_dwdx_const_c0
.
v
,
x_2
.
v
,
wendland_dwdx_const_c1
.
v
);
/* Calculate the polynomial interleaving vector operations */
dw_dx
->
v
=
vec_fma
(
dw_dx
->
v
,
x
.
v
,
wendland_dwdx_const_c2
.
v
);
dw_dx_2
->
v
=
vec_fma
(
dw_dx_2
->
v
,
x_2
.
v
,
wendland_dwdx_const_c2
.
v
);
dw_dx
->
v
=
vec_fma
(
dw_dx
->
v
,
x
.
v
,
wendland_dwdx_const_c3
.
v
);
dw_dx_2
->
v
=
vec_fma
(
dw_dx_2
->
v
,
x_2
.
v
,
wendland_dwdx_const_c3
.
v
);
dw_dx
->
v
=
vec_mul
(
dw_dx
->
v
,
x
.
v
);
dw_dx_2
->
v
=
vec_mul
(
dw_dx_2
->
v
,
x_2
.
v
);
#elif defined(CUBIC_SPLINE_KERNEL)
vector
dw_dx2
,
dw_dx2_2
;
vector
mask_reg1
,
mask_reg2
;
vector
mask_reg1_2
,
mask_reg2_2
;
/* Form a mask for each part of the kernel. */
mask_reg1
.
v
=
vec_cmp_lt
(
x
.
v
,
cond
.
v
);
/* 0 < x < 0.5 */
mask_reg1_2
.
v
=
vec_cmp_lt
(
x_2
.
v
,
cond
.
v
);
/* 0 < x < 0.5 */
mask_reg2
.
v
=
vec_cmp_gte
(
x
.
v
,
cond
.
v
);
/* 0.5 < x < 1 */
mask_reg2_2
.
v
=
vec_cmp_gte
(
x_2
.
v
,
cond
.
v
);
/* 0.5 < x < 1 */
/* Work out w for both regions of the kernel and combine the results together
* using masks. */
/* Init the iteration for Horner's scheme. */
dw_dx
->
v
=
vec_fma
(
cubic_1_dwdx_const_c0
.
v
,
x
.
v
,
cubic_1_dwdx_const_c1
.
v
);
dw_dx_2
->
v
=
vec_fma
(
cubic_1_dwdx_const_c0
.
v
,
x_2
.
v
,
cubic_1_dwdx_const_c1
.
v
);
dw_dx2
.
v
=
vec_fma
(
cubic_2_dwdx_const_c0
.
v
,
x
.
v
,
cubic_2_dwdx_const_c1
.
v
);
dw_dx2_2
.
v
=
vec_fma
(
cubic_2_dwdx_const_c0
.
v
,
x_2
.
v
,
cubic_2_dwdx_const_c1
.
v
);
/* Calculate the polynomial interleaving vector operations. */
dw_dx
->
v
=
vec_mul
(
dw_dx
->
v
,
x
.
v
);
/* cubic_1_dwdx_const_c2 is zero. */
dw_dx_2
->
v
=
vec_mul
(
dw_dx_2
->
v
,
x_2
.
v
);
/* cubic_1_dwdx_const_c2 is zero. */
dw_dx2
.
v
=
vec_fma
(
dw_dx2
.
v
,
x
.
v
,
cubic_2_dwdx_const_c2
.
v
);
dw_dx2_2
.
v
=
vec_fma
(
dw_dx2_2
.
v
,
x_2
.
v
,
cubic_2_dwdx_const_c2
.
v
);
/* Mask out unneeded values. */
dw_dx
->
v
=
vec_and
(
dw_dx
->
v
,
mask_reg1
.
v
);
dw_dx_2
->
v
=
vec_and
(
dw_dx_2
->
v
,
mask_reg1_2
.
v
);
dw_dx2
.
v
=
vec_and
(
dw_dx2
.
v
,
mask_reg2
.
v
);
dw_dx2_2
.
v
=
vec_and
(
dw_dx2_2
.
v
,
mask_reg2_2
.
v
);
/* Added both dwdx and dwdx2 together to form complete result. */
dw_dx
->
v
=
vec_add
(
dw_dx
->
v
,
dw_dx2
.
v
);
dw_dx_2
->
v
=
vec_add
(
dw_dx_2
->
v
,
dw_dx2_2
.
v
);
#else
#error
#endif
vector
mask
,
mask_2
;
mask
.
v
=
vec_cmp_lt
(
x
.
v
,
vec_set1
(
1
.
f
));
mask_2
.
v
=
vec_cmp_lt
(
x_2
.
v
,
vec_set1
(
1
.
f
));
dw_dx
->
v
=
vec_and
(
dw_dx
->
v
,
mask
.
v
);
dw_dx_2
->
v
=
vec_and
(
dw_dx_2
->
v
,
mask_2
.
v
);
/* Return everything */
dw_dx
->
v
=
vec_mul
(
dw_dx
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_plus_one_vec
.
v
));
dw_dx_2
->
v
=
vec_mul
(
dw_dx_2
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_plus_one_vec
.
v
));
}
#endif
/* WITH_VECTORIZATION */
/* Some cross-check functions */
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment