Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
25dc403c
Commit
25dc403c
authored
Jul 30, 2017
by
James Willis
Browse files
Formatting.
parent
050c2040
Changes
5
Hide whitespace changes
Inline
Side-by-side
src/hydro/Gadget2/hydro_iact.h
View file @
25dc403c
...
...
@@ -433,13 +433,12 @@ runner_iact_nonsym_1_vec_density(vector *r2, vector *dx, vector *dy, vector *dz,
curlvrz
.
v
=
vec_mul
(
curlvrz
.
v
,
ri
.
v
);
vector
scaleFactor
;
scaleFactor
.
v
=
vec_fma
(
vec_set1
(
hydro_dimension
),
wi
.
v
,
vec_mul
(
ui
.
v
,
wi_dx
.
v
));
scaleFactor
.
v
=
vec_fma
(
vec_set1
(
hydro_dimension
),
wi
.
v
,
vec_mul
(
ui
.
v
,
wi_dx
.
v
));
/* Mask updates to intermediate vector sums for particle pi. */
/* Mask updates to intermediate vector sums for particle pi. */
rhoSum
->
v
=
vec_mask_add
(
rhoSum
->
v
,
vec_mul
(
mj
.
v
,
wi
.
v
),
mask
);
rho_dhSum
->
v
=
vec_mask_sub
(
rho_dhSum
->
v
,
vec_mul
(
mj
.
v
,
scaleFactor
.
v
),
mask
);
rho_dhSum
->
v
=
vec_mask_sub
(
rho_dhSum
->
v
,
vec_mul
(
mj
.
v
,
scaleFactor
.
v
),
mask
);
wcountSum
->
v
=
vec_mask_add
(
wcountSum
->
v
,
wi
.
v
,
mask
);
wcount_dhSum
->
v
=
vec_mask_sub
(
wcount_dhSum
->
v
,
scaleFactor
.
v
,
mask
);
div_vSum
->
v
=
...
...
@@ -457,11 +456,13 @@ runner_iact_nonsym_1_vec_density(vector *r2, vector *dx, vector *dy, vector *dz,
* (non-symmetric vectorized version).
*/
__attribute__
((
always_inline
))
INLINE
static
void
runner_iact_nonsym_2_vec_density
(
float
*
R2
,
float
*
Dx
,
float
*
Dy
,
float
*
Dz
,
vector
hi_inv
,
vector
vix
,
vector
viy
,
vector
viz
,
float
*
Vjx
,
float
*
Vjy
,
float
*
Vjz
,
float
*
Mj
,
vector
*
rhoSum
,
vector
*
rho_dhSum
,
vector
*
wcountSum
,
vector
*
wcount_dhSum
,
vector
*
div_vSum
,
vector
*
curlvxSum
,
vector
*
curlvySum
,
vector
*
curlvzSum
,
runner_iact_nonsym_2_vec_density
(
float
*
R2
,
float
*
Dx
,
float
*
Dy
,
float
*
Dz
,
vector
hi_inv
,
vector
vix
,
vector
viy
,
vector
viz
,
float
*
Vjx
,
float
*
Vjy
,
float
*
Vjz
,
float
*
Mj
,
vector
*
rhoSum
,
vector
*
rho_dhSum
,
vector
*
wcountSum
,
vector
*
wcount_dhSum
,
vector
*
div_vSum
,
vector
*
curlvxSum
,
vector
*
curlvySum
,
vector
*
curlvzSum
,
mask_t
mask
,
mask_t
mask2
,
short
mask_cond
)
{
vector
r
,
ri
,
r2
,
ui
,
wi
,
wi_dx
;
...
...
@@ -543,20 +544,20 @@ runner_iact_nonsym_2_vec_density(
curlvrz2
.
v
=
vec_mul
(
curlvrz2
.
v
,
ri2
.
v
);
vector
scaleFactor
,
scaleFactor2
;
scaleFactor
.
v
=
vec_fma
(
vec_set1
(
hydro_dimension
),
wi
.
v
,
vec_mul
(
ui
.
v
,
wi_dx
.
v
));
scaleFactor2
.
v
=
vec_fma
(
vec_set1
(
hydro_dimension
),
wi2
.
v
,
vec_mul
(
ui2
.
v
,
wi_dx2
.
v
));
scaleFactor
.
v
=
vec_fma
(
vec_set1
(
hydro_dimension
),
wi
.
v
,
vec_mul
(
ui
.
v
,
wi_dx
.
v
));
scaleFactor2
.
v
=
vec_fma
(
vec_set1
(
hydro_dimension
),
wi2
.
v
,
vec_mul
(
ui2
.
v
,
wi_dx2
.
v
));
/* Mask updates to intermediate vector sums for particle pi. */
/* Mask only when needed. */
if
(
mask_cond
)
{
rhoSum
->
v
=
vec_mask_add
(
rhoSum
->
v
,
vec_mul
(
mj
.
v
,
wi
.
v
),
mask
);
rhoSum
->
v
=
vec_mask_add
(
rhoSum
->
v
,
vec_mul
(
mj2
.
v
,
wi2
.
v
),
mask2
);
rho_dhSum
->
v
=
vec_mask_sub
(
rho_dhSum
->
v
,
vec_mul
(
mj
.
v
,
scaleFactor
.
v
),
mask
);
rho_dhSum
->
v
=
vec_mask_sub
(
rho_dhSum
->
v
,
vec_mul
(
mj2
.
v
,
scaleFactor2
.
v
),
mask2
);
rho_dhSum
->
v
=
vec_mask_sub
(
rho_dhSum
->
v
,
vec_mul
(
mj
.
v
,
scaleFactor
.
v
),
mask
);
rho_dhSum
->
v
=
vec_mask_sub
(
rho_dhSum
->
v
,
vec_mul
(
mj2
.
v
,
scaleFactor2
.
v
),
mask2
);
wcountSum
->
v
=
vec_mask_add
(
wcountSum
->
v
,
wi
.
v
,
mask
);
wcountSum
->
v
=
vec_mask_add
(
wcountSum
->
v
,
wi2
.
v
,
mask2
);
wcount_dhSum
->
v
=
vec_mask_sub
(
wcount_dhSum
->
v
,
scaleFactor
.
v
,
mask
);
...
...
@@ -587,13 +588,20 @@ runner_iact_nonsym_2_vec_density(
wcount_dhSum
->
v
=
vec_sub
(
wcount_dhSum
->
v
,
scaleFactor
.
v
);
wcount_dhSum
->
v
=
vec_sub
(
wcount_dhSum
->
v
,
scaleFactor2
.
v
);
div_vSum
->
v
=
vec_sub
(
div_vSum
->
v
,
vec_mul
(
mj
.
v
,
vec_mul
(
dvdr
.
v
,
wi_dx
.
v
)));
div_vSum
->
v
=
vec_sub
(
div_vSum
->
v
,
vec_mul
(
mj2
.
v
,
vec_mul
(
dvdr2
.
v
,
wi_dx2
.
v
)));
curlvxSum
->
v
=
vec_add
(
curlvxSum
->
v
,
vec_mul
(
mj
.
v
,
vec_mul
(
curlvrx
.
v
,
wi_dx
.
v
)));
curlvxSum
->
v
=
vec_add
(
curlvxSum
->
v
,
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvrx2
.
v
,
wi_dx2
.
v
)));
curlvySum
->
v
=
vec_add
(
curlvySum
->
v
,
vec_mul
(
mj
.
v
,
vec_mul
(
curlvry
.
v
,
wi_dx
.
v
)));
curlvySum
->
v
=
vec_add
(
curlvySum
->
v
,
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvry2
.
v
,
wi_dx2
.
v
)));
curlvzSum
->
v
=
vec_add
(
curlvzSum
->
v
,
vec_mul
(
mj
.
v
,
vec_mul
(
curlvrz
.
v
,
wi_dx
.
v
)));
curlvzSum
->
v
=
vec_add
(
curlvzSum
->
v
,
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvrz2
.
v
,
wi_dx2
.
v
)));
div_vSum
->
v
=
vec_sub
(
div_vSum
->
v
,
vec_mul
(
mj2
.
v
,
vec_mul
(
dvdr2
.
v
,
wi_dx2
.
v
)));
curlvxSum
->
v
=
vec_add
(
curlvxSum
->
v
,
vec_mul
(
mj
.
v
,
vec_mul
(
curlvrx
.
v
,
wi_dx
.
v
)));
curlvxSum
->
v
=
vec_add
(
curlvxSum
->
v
,
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvrx2
.
v
,
wi_dx2
.
v
)));
curlvySum
->
v
=
vec_add
(
curlvySum
->
v
,
vec_mul
(
mj
.
v
,
vec_mul
(
curlvry
.
v
,
wi_dx
.
v
)));
curlvySum
->
v
=
vec_add
(
curlvySum
->
v
,
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvry2
.
v
,
wi_dx2
.
v
)));
curlvzSum
->
v
=
vec_add
(
curlvzSum
->
v
,
vec_mul
(
mj
.
v
,
vec_mul
(
curlvrz
.
v
,
wi_dx
.
v
)));
curlvzSum
->
v
=
vec_add
(
curlvzSum
->
v
,
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvrz2
.
v
,
wi_dx2
.
v
)));
}
}
#endif
...
...
src/kernel_hydro.h
View file @
25dc403c
...
...
@@ -373,7 +373,8 @@ __attribute__((always_inline)) INLINE static void kernel_deval_vec(
/* Load x and get the interval id. */
vector
ind
;
ind
.
m
=
vec_ftoi
(
vec_fmin
(
vec_mul
(
x
.
v
,
kernel_ivals_vec
.
v
),
kernel_ivals_vec
.
v
));
ind
.
m
=
vec_ftoi
(
vec_fmin
(
vec_mul
(
x
.
v
,
kernel_ivals_vec
.
v
),
kernel_ivals_vec
.
v
));
/* load the coefficients. */
vector
c
[
kernel_degree
+
1
];
...
...
@@ -392,8 +393,10 @@ __attribute__((always_inline)) INLINE static void kernel_deval_vec(
}
/* Return everything */
w
->
v
=
vec_mul
(
w
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_vec
.
v
));
dw_dx
->
v
=
vec_mul
(
dw_dx
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_plus_one_vec
.
v
));
w
->
v
=
vec_mul
(
w
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_vec
.
v
));
dw_dx
->
v
=
vec_mul
(
dw_dx
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_plus_one_vec
.
v
));
}
/* Define constant vectors for the Wendland C2 and Cubic Spline kernel
...
...
@@ -615,14 +618,16 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec(
w2
->
v
=
vec_blend
(
mask_reg2_v2
,
w2
->
v
,
w2_2
.
v
);
dw_dx
->
v
=
vec_blend
(
mask_reg2
,
dw_dx
->
v
,
dw_dx_2
.
v
);
dw_dx2
->
v
=
vec_blend
(
mask_reg2_v2
,
dw_dx2
->
v
,
dw_dx2_2
.
v
);
/* Return everything */
w
->
v
=
vec_mul
(
w
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_vec
.
v
));
w2
->
v
=
vec_mul
(
w2
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_vec
.
v
));
dw_dx
->
v
=
vec_mul
(
dw_dx
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_plus_one_vec
.
v
));
dw_dx2
->
v
=
vec_mul
(
dw_dx2
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_plus_one_vec
.
v
));
w
->
v
=
vec_mul
(
w
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_vec
.
v
));
w2
->
v
=
vec_mul
(
w2
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_vec
.
v
));
dw_dx
->
v
=
vec_mul
(
dw_dx
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_plus_one_vec
.
v
));
dw_dx2
->
v
=
vec_mul
(
dw_dx2
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_plus_one_vec
.
v
));
#endif
}
...
...
@@ -658,7 +663,7 @@ __attribute__((always_inline)) INLINE static void kernel_eval_W_vec(vector *u,
/* Form a mask for each part of the kernel. */
vec_create_mask
(
mask_reg2
,
vec_cmp_gte
(
x
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
/* Work out w for both regions of the kernel and combine the results together
* using masks. */
...
...
@@ -739,7 +744,7 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_vec(
/* Mask out result for particles that lie outside of the kernel function. */
mask_t
mask
;
vec_create_mask
(
mask
,
vec_cmp_lt
(
x
.
v
,
vec_set1
(
1
.
f
)));
/* x < 1 */
vec_create_mask
(
mask
,
vec_cmp_lt
(
x
.
v
,
vec_set1
(
1
.
f
)));
/* x < 1 */
dw_dx
->
v
=
vec_and_mask
(
dw_dx
->
v
,
mask
);
...
...
@@ -787,9 +792,9 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_force_2_vec(
mask_t
mask_reg2_v2
;
/* Form a mask for each part of the kernel. */
vec_create_mask
(
mask_reg2
,
vec_cmp_gte
(
x
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
vec_create_mask
(
mask_reg2
,
vec_cmp_gte
(
x
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
vec_create_mask
(
mask_reg2_v2
,
vec_cmp_gte
(
x_2
.
v
,
cond
.
v
));
/* 0.5 < x < 1 */
/* Work out w for both regions of the kernel and combine the results together
* using masks. */
...
...
@@ -815,8 +820,8 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_force_2_vec(
/* Mask out result for particles that lie outside of the kernel function. */
mask_t
mask
,
mask_2
;
vec_create_mask
(
mask
,
vec_cmp_lt
(
x
.
v
,
vec_set1
(
1
.
f
)));
/* x < 1 */
vec_create_mask
(
mask_2
,
vec_cmp_lt
(
x_2
.
v
,
vec_set1
(
1
.
f
)));
/* x < 1 */
vec_create_mask
(
mask
,
vec_cmp_lt
(
x
.
v
,
vec_set1
(
1
.
f
)));
/* x < 1 */
vec_create_mask
(
mask_2
,
vec_cmp_lt
(
x_2
.
v
,
vec_set1
(
1
.
f
)));
/* x < 1 */
dw_dx
->
v
=
vec_and_mask
(
dw_dx
->
v
,
mask
);
dw_dx_2
->
v
=
vec_and_mask
(
dw_dx_2
->
v
,
mask_2
);
...
...
src/vector.h
View file @
25dc403c
...
...
@@ -83,7 +83,8 @@
#define vec_form_int_mask(a) a
#define vec_and(a, b) _mm512_and_ps(a, b)
#define vec_mask_and(a, b) _mm512_kand(a, b)
#define vec_and_mask(a, mask) _mm512_maskz_expand_ps(mask, a)
/* TODO: Alternative needs to be found. */
#define vec_and_mask(a, mask) \
_mm512_maskz_expand_ps(mask, a)
/* TODO: Alternative needs to be found. */
#define vec_init_mask(mask) mask = 0xFFFF
#define vec_zero_mask(mask) mask = 0
#define vec_create_mask(mask, cond) mask = cond
...
...
src/xmf.c
View file @
25dc403c
...
...
@@ -42,13 +42,12 @@
* @param hdfFileName
* @return the basename part of hdfFileName.
*/
static
const
char
*
xmf_basename
(
const
char
*
hdfFileName
)
{
static
const
char
*
xmf_basename
(
const
char
*
hdfFileName
)
{
static
char
buffer
[
FILENAME_BUFFER_SIZE
];
strcpy
(
buffer
,
hdfFileName
);
return
basename
(
buffer
);
}
/**
* @brief Prepare the XMF file corresponding to a snapshot.
*
...
...
@@ -277,7 +276,7 @@ void xmf_write_line(FILE* xmfFile, const char* fileName,
fprintf
(
xmfFile
,
"<DataItem Dimensions=
\"
%zu %d
\"
NumberType=
\"
%s
\"
"
"Precision=
\"
%d
\"
Format=
\"
HDF
\"
>%s:%s/%s</DataItem>
\n
"
,
N
,
dim
,
xmf_type
(
type
),
xmf_precision
(
type
),
xmf_basename
(
fileName
),
partTypeGroupName
,
name
);
N
,
dim
,
xmf_type
(
type
),
xmf_precision
(
type
),
xmf_basename
(
fileName
),
partTypeGroupName
,
name
);
fprintf
(
xmfFile
,
"</Attribute>
\n
"
);
}
tests/testKernel.c
View file @
25dc403c
...
...
@@ -68,7 +68,7 @@ int main() {
vx
.
f
[
j
]
=
(
i
+
j
)
*
2
.
25
f
/
numPoints
;
}
vx_h
.
v
=
vec_mul
(
vx
.
v
,
vec_set1
(
1
.
f
/
h
));
vx_h
.
v
=
vec_mul
(
vx
.
v
,
vec_set1
(
1
.
f
/
h
));
kernel_deval_1_vec
(
&
vx_h
,
&
W_vec
,
&
dW_vec
);
...
...
@@ -106,8 +106,8 @@ int main() {
vx_2
.
f
[
j
]
=
(
i
+
j
)
*
2
.
25
f
/
numPoints
;
}
vx_h
.
v
=
vec_mul
(
vx
.
v
,
vec_set1
(
1
.
f
/
h
));
vx_h_2
.
v
=
vec_mul
(
vx_2
.
v
,
vec_set1
(
1
.
f
/
h
));
vx_h
.
v
=
vec_mul
(
vx
.
v
,
vec_set1
(
1
.
f
/
h
));
vx_h_2
.
v
=
vec_mul
(
vx_2
.
v
,
vec_set1
(
1
.
f
/
h
));
kernel_deval_2_vec
(
&
vx_h
,
&
W_vec
,
&
dW_vec
,
&
vx_h_2
,
&
W_vec_2
,
&
dW_vec_2
);
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment