Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
8758adf1
Commit
8758adf1
authored
Dec 16, 2016
by
James Willis
Browse files
Vectorise the first inner loop with intrinsics.
parent
6f600db9
Changes
1
Hide whitespace changes
Inline
Side-by-side
src/runner_doiact_vec.c
View file @
8758adf1
...
...
@@ -956,25 +956,19 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
const
double
di
=
sort_i
[
pid
].
d
+
hi
*
kernel_gamma
+
dx_max
-
rshift
;
if
(
di
<
dj_min
)
continue
;
float
pix
=
ci_cache
->
x
[
ci_cache_idx
];
float
piy
=
ci_cache
->
y
[
ci_cache_idx
];
float
piz
=
ci_cache
->
z
[
ci_cache_idx
];
const
float
hig2
=
hi
*
hi
*
kernel_gamma2
;
//vector pix, piy, piz;
//const float hi = cell_cache->h[pid];
vector
pix
,
piy
,
piz
;
/* Fill particle pi vectors. */
//
pix.v = vec_set1(
(float)(pi->x[0] - ci->loc[0] - shift[0])
);
//
piy.v = vec_set1(
(float)(pi->x[1] - ci->loc[1] - shift[1])
);
//
piz.v = vec_set1(
(float)(pi->x[2] - ci->loc[2] - shift[2])
);
pix
.
v
=
vec_set1
(
ci_cache
->
x
[
ci_cache_idx
]
);
piy
.
v
=
vec_set1
(
ci_cache
->
y
[
ci_cache_idx
]
);
piz
.
v
=
vec_set1
(
ci_cache
->
z
[
ci_cache_idx
]
);
v_hi
.
v
=
vec_set1
(
hi
);
v_vix
.
v
=
vec_set1
(
pi
->
v
[
0
]);
v_viy
.
v
=
vec_set1
(
pi
->
v
[
1
]);
v_viz
.
v
=
vec_set1
(
pi
->
v
[
2
]);
v_vix
.
v
=
vec_set1
(
ci_cache
->
vx
[
ci_cache_idx
]);
v_viy
.
v
=
vec_set1
(
ci_cache
->
vy
[
ci_cache_idx
]);
v_viz
.
v
=
vec_set1
(
ci_cache
->
vz
[
ci_cache_idx
]);
//const float hig2 = hi * hi * kernel_gamma2;
v_hig2
.
v
=
vec_set1
(
hig2
);
/* Reset cumulative sums of update vectors. */
...
...
@@ -1010,42 +1004,61 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
/* Set positions to the same as particle pi so when the r2 > 0 mask is
* applied these extra contributions are masked out.*/
for
(
int
i
=
exit_iteration
;
i
<
exit_iteration_align
;
i
++
)
{
cj_cache
.
x
[
i
]
=
pix
;
cj_cache
.
y
[
i
]
=
piy
;
cj_cache
.
z
[
i
]
=
piz
;
cj_cache
.
x
[
i
]
=
pix
.
f
[
0
]
;
cj_cache
.
y
[
i
]
=
piy
.
f
[
0
]
;
cj_cache
.
z
[
i
]
=
piz
.
f
[
0
]
;
}
}
vector
pjx
,
pjy
,
pjz
;
vector
pjvx
,
pjvy
,
pjvz
,
mj
;
/* Loop over the parts in cj. */
//for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
for
(
int
pjd
=
0
;
pjd
<
exit_iteration
;
pjd
++
)
{
for
(
int
pjd
=
0
;
pjd
<
exit_iteration_align
;
pjd
+=
VEC_SIZE
)
{
/* Get the cache index to the jth particle. */
//int cj_cache_idx = sort_j[pjd].i;
int
cj_cache_idx
=
pjd
;
vector
v_dx
,
v_dy
,
v_dz
,
v_r2
;
/* Load 2 sets of vectors from the particle cache. */
pjx
.
v
=
vec_load
(
&
cj_cache
.
x
[
cj_cache_idx
]);
pjy
.
v
=
vec_load
(
&
cj_cache
.
y
[
cj_cache_idx
]);
pjz
.
v
=
vec_load
(
&
cj_cache
.
z
[
cj_cache_idx
]);
pjvx
.
v
=
vec_load
(
&
cj_cache
.
vx
[
cj_cache_idx
]);
pjvy
.
v
=
vec_load
(
&
cj_cache
.
vy
[
cj_cache_idx
]);
pjvz
.
v
=
vec_load
(
&
cj_cache
.
vz
[
cj_cache_idx
]);
mj
.
v
=
vec_load
(
&
cj_cache
.
m
[
cj_cache_idx
]);
/* Compute the pairwise distance. */
float
dx
=
pix
-
cj_cache
.
x
[
cj_cache_idx
];
float
dy
=
piy
-
cj_cache
.
y
[
cj_cache_idx
];
float
dz
=
piz
-
cj_cache
.
z
[
cj_cache_idx
];
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
v_dx
.
v
=
vec_sub
(
pix
.
v
,
pjx
.
v
);
v_dy
.
v
=
vec_sub
(
piy
.
v
,
pjy
.
v
);
v_dz
.
v
=
vec_sub
(
piz
.
v
,
pjz
.
v
);
/* Hit or miss? */
if
(
r2
<
hig2
)
{
v_r2
.
v
=
vec_mul
(
v_dx
.
v
,
v_dx
.
v
);
v_r2
.
v
=
vec_fma
(
v_dy
.
v
,
v_dy
.
v
,
v_r2
.
v
);
v_r2
.
v
=
vec_fma
(
v_dz
.
v
,
v_dz
.
v
,
v_r2
.
v
);
/* Add this interaction to the queue. */
int_cache
.
r2q
[
icount
]
=
r2
;
int_cache
.
dxq
[
icount
]
=
dx
;
int_cache
.
dyq
[
icount
]
=
dy
;
int_cache
.
dzq
[
icount
]
=
dz
;
int_cache
.
mq
[
icount
]
=
cj_cache
.
m
[
cj_cache_idx
];
int_cache
.
vxq
[
icount
]
=
cj_cache
.
vx
[
cj_cache_idx
];
int_cache
.
vyq
[
icount
]
=
cj_cache
.
vy
[
cj_cache_idx
];
int_cache
.
vzq
[
icount
]
=
cj_cache
.
vz
[
cj_cache_idx
];
icount
++
;
}
vector
v_doi_mask
,
v_doi_mask_check
;
int
doi_mask
;
/* Form r2 > 0 mask and r2 < hig2 mask. */
v_doi_mask_check
.
v
=
vec_cmp_gt
(
v_r2
.
v
,
vec_setzero
());
v_doi_mask
.
v
=
vec_cmp_lt
(
v_r2
.
v
,
v_hig2
.
v
);
/* Combine two masks and form integer mask. */
doi_mask
=
vec_cmp_result
(
vec_and
(
v_doi_mask
.
v
,
v_doi_mask_check
.
v
));
/* If there are any interactions left pack interaction values into c2
* cache. */
if
(
doi_mask
)
storeInteractions
(
doi_mask
,
cj_cache_idx
,
&
v_r2
,
&
v_dx
,
&
v_dy
,
&
v_dz
,
&
mj
,
&
pjvx
,
&
pjvy
,
&
pjvz
,
&
cj_cache
,
&
int_cache
,
&
icount
,
&
rhoSum
,
&
rho_dhSum
,
&
wcountSum
,
&
wcount_dhSum
,
&
div_vSum
,
&
curlvxSum
,
&
curlvySum
,
&
curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
);
}
/* loop over the parts in cj. */
/* Perform padded vector remainder interactions if any are present. */
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment