Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
5b0080ee
Commit
5b0080ee
authored
Oct 07, 2017
by
James Willis
Browse files
Refactoring.
parent
a0fe87bb
Changes
3
Hide whitespace changes
Inline
Side-by-side
src/cache.h
View file @
5b0080ee
...
...
@@ -198,11 +198,8 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
swift_declare_aligned_ptr
(
float
,
vz
,
ci_cache
->
vz
,
SWIFT_CACHE_ALIGNMENT
);
const
struct
part
*
restrict
parts
=
ci
->
parts
;
double
loc
[
3
];
loc
[
0
]
=
ci
->
loc
[
0
];
loc
[
1
]
=
ci
->
loc
[
1
];
loc
[
2
]
=
ci
->
loc
[
2
];
const
double
loc
[
3
]
=
{
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
]};
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
for
(
int
i
=
0
;
i
<
ci
->
count
;
i
++
)
{
...
...
@@ -210,7 +207,6 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
y
[
i
]
=
(
float
)(
parts
[
i
].
x
[
1
]
-
loc
[
1
]);
z
[
i
]
=
(
float
)(
parts
[
i
].
x
[
2
]
-
loc
[
2
]);
h
[
i
]
=
parts
[
i
].
h
;
m
[
i
]
=
parts
[
i
].
mass
;
vx
[
i
]
=
parts
[
i
].
v
[
0
];
vy
[
i
]
=
parts
[
i
].
v
[
1
];
...
...
@@ -254,10 +250,7 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
SWIFT_CACHE_ALIGNMENT
);
const
struct
part
*
restrict
parts
=
ci
->
parts
;
double
loc
[
3
];
loc
[
0
]
=
ci
->
loc
[
0
];
loc
[
1
]
=
ci
->
loc
[
1
];
loc
[
2
]
=
ci
->
loc
[
2
];
const
double
loc
[
3
]
=
{
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
]};
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
...
...
@@ -266,12 +259,10 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
y
[
i
]
=
(
float
)(
parts
[
i
].
x
[
1
]
-
loc
[
1
]);
z
[
i
]
=
(
float
)(
parts
[
i
].
x
[
2
]
-
loc
[
2
]);
h
[
i
]
=
parts
[
i
].
h
;
m
[
i
]
=
parts
[
i
].
mass
;
vx
[
i
]
=
parts
[
i
].
v
[
0
];
vy
[
i
]
=
parts
[
i
].
v
[
1
];
vz
[
i
]
=
parts
[
i
].
v
[
2
];
rho
[
i
]
=
parts
[
i
].
rho
;
grad_h
[
i
]
=
parts
[
i
].
force
.
f
;
pOrho2
[
i
]
=
parts
[
i
].
force
.
P_over_rho2
;
...
...
@@ -325,17 +316,13 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
const
int
last_pj_align
=
*
last_pj
;
const
struct
part
*
restrict
parts_i
=
ci
->
parts
;
const
struct
part
*
restrict
parts_j
=
cj
->
parts
;
double
loc
[
3
];
loc
[
0
]
=
cj
->
loc
[
0
];
loc
[
1
]
=
cj
->
loc
[
1
];
loc
[
2
]
=
cj
->
loc
[
2
];
/* Shift ci particles for boundary conditions and location of cell.*/
double
total_ci_shift
[
3
];
total_ci_shift
[
0
]
=
loc
[
0
]
+
shift
[
0
];
total_ci_shift
[
1
]
=
loc
[
1
]
+
shift
[
1
];
total_ci_shift
[
2
]
=
loc
[
2
]
+
shift
[
2
];
/* Shift particles to the local frame and account for boundary conditions.*/
const
double
total_ci_shift
[
3
]
=
{
cj
->
loc
[
0
]
+
shift
[
0
],
cj
->
loc
[
1
]
+
shift
[
1
],
cj
->
loc
[
2
]
+
shift
[
2
]};
const
double
total_cj_shift
[
3
]
=
{
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
]};
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
swift_declare_aligned_ptr
(
float
,
x
,
ci_cache
->
x
,
SWIFT_CACHE_ALIGNMENT
);
...
...
@@ -348,6 +335,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
swift_declare_aligned_ptr
(
float
,
vz
,
ci_cache
->
vz
,
SWIFT_CACHE_ALIGNMENT
);
int
ci_cache_count
=
ci
->
count
-
first_pi_align
;
/* Shift the particles positions to a local frame (ci frame) so single
* precision
* can be
...
...
@@ -355,12 +343,12 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
* positions
* due to BCs but leave cell cj. */
for
(
int
i
=
0
;
i
<
ci_cache_count
;
i
++
)
{
/* Make sure ci_cache is filled from the first element. */
idx
=
sort_i
[
i
+
first_pi_align
].
i
;
x
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
0
]
-
total_ci_shift
[
0
]);
y
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
1
]
-
total_ci_shift
[
1
]);
z
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
2
]
-
total_ci_shift
[
2
]);
h
[
i
]
=
parts_i
[
idx
].
h
;
m
[
i
]
=
parts_i
[
idx
].
mass
;
vx
[
i
]
=
parts_i
[
idx
].
v
[
0
];
vy
[
i
]
=
parts_i
[
idx
].
v
[
1
];
...
...
@@ -437,11 +425,10 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
for
(
int
i
=
0
;
i
<=
last_pj_align
;
i
++
)
{
idx
=
sort_j
[
i
].
i
;
xj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
0
]
-
loc
[
0
]);
yj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
1
]
-
loc
[
1
]);
zj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
2
]
-
loc
[
2
]);
xj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
0
]
-
total_cj_shift
[
0
]);
yj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
1
]
-
total_cj_shift
[
1
]);
zj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
2
]
-
total_cj_shift
[
2
]);
hj
[
i
]
=
parts_j
[
idx
].
h
;
mj
[
i
]
=
parts_j
[
idx
].
mass
;
vxj
[
i
]
=
parts_j
[
idx
].
v
[
0
];
vyj
[
i
]
=
parts_j
[
idx
].
v
[
1
];
...
...
@@ -488,7 +475,6 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
yj
[
i
]
=
pos_padded_j
[
1
];
zj
[
i
]
=
pos_padded_j
[
2
];
hj
[
i
]
=
1
.
f
;
mj
[
i
]
=
1
.
f
;
vxj
[
i
]
=
1
.
f
;
vyj
[
i
]
=
1
.
f
;
...
...
@@ -539,10 +525,12 @@ cache_read_two_partial_cells_sorted_force(
const
int
last_pj_align
=
*
last_pj
;
const
struct
part
*
restrict
parts_i
=
ci
->
parts
;
const
struct
part
*
restrict
parts_j
=
cj
->
parts
;
double
loc
[
3
];
loc
[
0
]
=
ci
->
loc
[
0
];
loc
[
1
]
=
ci
->
loc
[
1
];
loc
[
2
]
=
ci
->
loc
[
2
];
/* Shift particles to the local frame and account for boundary conditions.*/
const
double
total_ci_shift
[
3
]
=
{
cj
->
loc
[
0
]
+
shift
[
0
],
cj
->
loc
[
1
]
+
shift
[
1
],
cj
->
loc
[
2
]
+
shift
[
2
]};
const
double
total_cj_shift
[
3
]
=
{
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
]};
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
...
...
@@ -573,18 +561,15 @@ cache_read_two_partial_cells_sorted_force(
* due to BCs but leave cell cj. */
for
(
int
i
=
0
;
i
<
ci_cache_count
;
i
++
)
{
/* Make sure ci_cache is filled from the first element. */
idx
=
sort_i
[
i
+
first_pi_align
].
i
;
x
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
0
]
-
loc
[
0
]
-
shift
[
0
]);
y
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
1
]
-
loc
[
1
]
-
shift
[
1
]);
z
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
2
]
-
loc
[
2
]
-
shift
[
2
]);
x
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
0
]
-
total_ci_
shift
[
0
]);
y
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
1
]
-
total_ci_
shift
[
1
]);
z
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
2
]
-
total_ci_
shift
[
2
]);
h
[
i
]
=
parts_i
[
idx
].
h
;
m
[
i
]
=
parts_i
[
idx
].
mass
;
vx
[
i
]
=
parts_i
[
idx
].
v
[
0
];
vy
[
i
]
=
parts_i
[
idx
].
v
[
1
];
vz
[
i
]
=
parts_i
[
idx
].
v
[
2
];
rho
[
i
]
=
parts_i
[
idx
].
rho
;
grad_h
[
i
]
=
parts_i
[
idx
].
force
.
f
;
pOrho2
[
i
]
=
parts_i
[
idx
].
force
.
P_over_rho2
;
...
...
@@ -606,12 +591,10 @@ cache_read_two_partial_cells_sorted_force(
y
[
i
]
=
pos_padded
[
1
];
z
[
i
]
=
pos_padded
[
2
];
h
[
i
]
=
h_padded
;
m
[
i
]
=
1
.
f
;
vx
[
i
]
=
1
.
f
;
vy
[
i
]
=
1
.
f
;
vz
[
i
]
=
1
.
f
;
rho
[
i
]
=
1
.
f
;
grad_h
[
i
]
=
1
.
f
;
pOrho2
[
i
]
=
1
.
f
;
...
...
@@ -641,16 +624,14 @@ cache_read_two_partial_cells_sorted_force(
for
(
int
i
=
0
;
i
<=
last_pj_align
;
i
++
)
{
idx
=
sort_j
[
i
].
i
;
xj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
0
]
-
loc
[
0
]);
yj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
1
]
-
loc
[
1
]);
zj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
2
]
-
loc
[
2
]);
xj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
0
]
-
total_cj_shift
[
0
]);
yj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
1
]
-
total_cj_shift
[
1
]);
zj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
2
]
-
total_cj_shift
[
2
]);
hj
[
i
]
=
parts_j
[
idx
].
h
;
mj
[
i
]
=
parts_j
[
idx
].
mass
;
vxj
[
i
]
=
parts_j
[
idx
].
v
[
0
];
vyj
[
i
]
=
parts_j
[
idx
].
v
[
1
];
vzj
[
i
]
=
parts_j
[
idx
].
v
[
2
];
rhoj
[
i
]
=
parts_j
[
idx
].
rho
;
grad_hj
[
i
]
=
parts_j
[
idx
].
force
.
f
;
pOrho2j
[
i
]
=
parts_j
[
idx
].
force
.
P_over_rho2
;
...
...
@@ -670,12 +651,10 @@ cache_read_two_partial_cells_sorted_force(
yj
[
i
]
=
pos_padded_j
[
1
];
zj
[
i
]
=
pos_padded_j
[
2
];
hj
[
i
]
=
h_padded_j
;
mj
[
i
]
=
1
.
f
;
vxj
[
i
]
=
1
.
f
;
vyj
[
i
]
=
1
.
f
;
vzj
[
i
]
=
1
.
f
;
rhoj
[
i
]
=
1
.
f
;
grad_hj
[
i
]
=
1
.
f
;
pOrho2j
[
i
]
=
1
.
f
;
...
...
@@ -699,6 +678,11 @@ static INLINE void cache_clean(struct cache *c) {
free
(
c
->
vz
);
free
(
c
->
h
);
free
(
c
->
max_index
);
free
(
c
->
rho
);
free
(
c
->
grad_h
);
free
(
c
->
pOrho2
);
free
(
c
->
balsara
);
free
(
c
->
soundspeed
);
}
}
...
...
src/runner_doiact.h
View file @
5b0080ee
...
...
@@ -1072,37 +1072,6 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
struct
entry
*
restrict
sort_j
=
cj
->
sort
[
sid
];
#ifdef SWIFT_DEBUG_CHECKS
/* Check that the dx_max_sort values in the cell are indeed an upper
bound on particle movement. */
for
(
int
pid
=
0
;
pid
<
ci
->
count
;
pid
++
)
{
const
struct
part
*
p
=
&
ci
->
parts
[
sort_i
[
pid
].
i
];
const
float
d
=
p
->
x
[
0
]
*
runner_shift
[
sid
][
0
]
+
p
->
x
[
1
]
*
runner_shift
[
sid
][
1
]
+
p
->
x
[
2
]
*
runner_shift
[
sid
][
2
];
if
(
fabsf
(
d
-
sort_i
[
pid
].
d
)
-
ci
->
dx_max_sort
>
1.0e-4
*
max
(
fabsf
(
d
),
ci
->
dx_max_sort_old
))
error
(
"particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d "
"cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e "
"ci->dx_max_sort_old=%e"
,
ci
->
nodeID
,
cj
->
nodeID
,
d
,
sort_i
[
pid
].
d
,
ci
->
dx_max_sort
,
ci
->
dx_max_sort_old
);
}
for
(
int
pjd
=
0
;
pjd
<
cj
->
count
;
pjd
++
)
{
const
struct
part
*
p
=
&
cj
->
parts
[
sort_j
[
pjd
].
i
];
const
float
d
=
p
->
x
[
0
]
*
runner_shift
[
sid
][
0
]
+
p
->
x
[
1
]
*
runner_shift
[
sid
][
1
]
+
p
->
x
[
2
]
*
runner_shift
[
sid
][
2
];
if
(
fabsf
(
d
-
sort_j
[
pjd
].
d
)
-
cj
->
dx_max_sort
>
1.0e-4
*
max
(
fabsf
(
d
),
cj
->
dx_max_sort_old
))
error
(
"particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d "
"ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e "
"cj->dx_max_sort_old=%e"
,
cj
->
nodeID
,
ci
->
nodeID
,
d
,
sort_j
[
pjd
].
d
,
cj
->
dx_max_sort
,
cj
->
dx_max_sort_old
);
}
/* Some constants used to checks that the parts are in the right frame */
const
float
shift_threshold_x
=
2
.
*
ci
->
width
[
0
]
+
2
.
*
max
(
ci
->
dx_max_part
,
cj
->
dx_max_part
);
...
...
@@ -1110,7 +1079,6 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
2
.
*
ci
->
width
[
1
]
+
2
.
*
max
(
ci
->
dx_max_part
,
cj
->
dx_max_part
);
const
float
shift_threshold_z
=
2
.
*
ci
->
width
[
2
]
+
2
.
*
max
(
ci
->
dx_max_part
,
cj
->
dx_max_part
);
#endif
/* SWIFT_DEBUG_CHECKS */
/* Get some other useful values. */
...
...
@@ -1514,7 +1482,6 @@ void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) {
error
(
"Interacting unsorted cells."
);
#ifdef SWIFT_DEBUG_CHECKS
/* Pick-out the sorted lists. */
const
struct
entry
*
restrict
sort_i
=
ci
->
sort
[
sid
];
const
struct
entry
*
restrict
sort_j
=
cj
->
sort
[
sid
];
...
...
src/runner_doiact_vec.c
View file @
5b0080ee
...
...
@@ -35,21 +35,21 @@ static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2);
* @param int_cache (return) secondary #cache of interactions between two
* particles.
* @param icount Interaction count.
* @param rhoSum (return) #vector holding the cumulative sum of the density
* @param
v_
rhoSum (return) #vector holding the cumulative sum of the density
* update on pi.
* @param rho_dhSum (return) #vector holding the cumulative sum of the density
* @param
v_
rho_dhSum (return) #vector holding the cumulative sum of the density
* gradient update on pi.
* @param wcountSum (return) #vector holding the cumulative sum of the wcount
* @param
v_
wcountSum (return) #vector holding the cumulative sum of the wcount
* update on pi.
* @param wcount_dhSum (return) #vector holding the cumulative sum of the wcount
* @param
v_
wcount_dhSum (return) #vector holding the cumulative sum of the wcount
* gradient update on pi.
* @param div_vSum (return) #vector holding the cumulative sum of the divergence
* @param
v_
div_vSum (return) #vector holding the cumulative sum of the divergence
* update on pi.
* @param curlvxSum (return) #vector holding the cumulative sum of the curl of
* @param
v_
curlvxSum (return) #vector holding the cumulative sum of the curl of
* vx update on pi.
* @param curlvySum (return) #vector holding the cumulative sum of the curl of
* @param
v_
curlvySum (return) #vector holding the cumulative sum of the curl of
* vy update on pi.
* @param curlvzSum (return) #vector holding the cumulative sum of the curl of
* @param
v_
curlvzSum (return) #vector holding the cumulative sum of the curl of
* vz update on pi.
* @param v_hi_inv #vector of 1/h for pi.
* @param v_vix #vector of x velocity of pi.
...
...
@@ -59,9 +59,9 @@ static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2);
* interactions have been performed, should be a multiple of the vector length.
*/
__attribute__
((
always_inline
))
INLINE
static
void
calcRemInteractions
(
struct
c2_cache
*
const
int_cache
,
const
int
icount
,
vector
*
rhoSum
,
vector
*
rho_dhSum
,
vector
*
wcountSum
,
vector
*
wcount_dhSum
,
vector
*
div_vSum
,
vector
*
curlvxSum
,
vector
*
curlvySum
,
vector
*
curlvzSum
,
struct
c2_cache
*
const
int_cache
,
const
int
icount
,
vector
*
v_
rhoSum
,
vector
*
v_
rho_dhSum
,
vector
*
v_
wcountSum
,
vector
*
v_
wcount_dhSum
,
vector
*
v_
div_vSum
,
vector
*
v_
curlvxSum
,
vector
*
v_
curlvySum
,
vector
*
v_
curlvzSum
,
vector
v_hi_inv
,
vector
v_vix
,
vector
v_viy
,
vector
v_viz
,
int
*
icount_align
)
{
...
...
@@ -107,8 +107,8 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
&
int_cache
->
dyq
[
*
icount_align
],
&
int_cache
->
dzq
[
*
icount_align
],
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
&
int_cache
->
vxq
[
*
icount_align
],
&
int_cache
->
vyq
[
*
icount_align
],
&
int_cache
->
vzq
[
*
icount_align
],
&
int_cache
->
mq
[
*
icount_align
],
rhoSum
,
rho_dhSum
,
wcountSum
,
wcount_dhSum
,
div_vSum
,
curlvxSum
,
curlvySum
,
curlvzSum
,
int_mask
,
&
int_cache
->
mq
[
*
icount_align
],
v_
rhoSum
,
v_
rho_dhSum
,
v_
wcountSum
,
v_
wcount_dhSum
,
v_
div_vSum
,
v_
curlvxSum
,
v_
curlvySum
,
v_
curlvzSum
,
int_mask
,
int_mask2
,
1
);
}
}
...
...
@@ -127,20 +127,20 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
* @param int_cache (return) secondary #cache of interactions between two
* particles.
* @param icount Interaction count.
* @param rhoSum #vector holding the cumulative sum of the density update on pi.
* @param rho_dhSum #vector holding the cumulative sum of the density gradient
* @param
v_
rhoSum #vector holding the cumulative sum of the density update on pi.
* @param
v_
rho_dhSum #vector holding the cumulative sum of the density gradient
* update on pi.
* @param wcountSum #vector holding the cumulative sum of the wcount update on
* @param
v_
wcountSum #vector holding the cumulative sum of the wcount update on
* pi.
* @param wcount_dhSum #vector holding the cumulative sum of the wcount gradient
* @param
v_
wcount_dhSum #vector holding the cumulative sum of the wcount gradient
* update on pi.
* @param div_vSum #vector holding the cumulative sum of the divergence update
* @param
v_
div_vSum #vector holding the cumulative sum of the divergence update
* on pi.
* @param curlvxSum #vector holding the cumulative sum of the curl of vx update
* @param
v_
curlvxSum #vector holding the cumulative sum of the curl of vx update
* on pi.
* @param curlvySum #vector holding the cumulative sum of the curl of vy update
* @param
v_
curlvySum #vector holding the cumulative sum of the curl of vy update
* on pi.
* @param curlvzSum #vector holding the cumulative sum of the curl of vz update
* @param
v_
curlvzSum #vector holding the cumulative sum of the curl of vz update
* on pi.
* @param v_hi_inv #vector of 1/h for pi.
* @param v_vix #vector of x velocity of pi.
...
...
@@ -150,9 +150,9 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
__attribute__
((
always_inline
))
INLINE
static
void
storeInteractions
(
const
int
mask
,
const
int
pjd
,
vector
*
v_r2
,
vector
*
v_dx
,
vector
*
v_dy
,
vector
*
v_dz
,
const
struct
cache
*
const
cell_cache
,
struct
c2_cache
*
const
int_cache
,
int
*
icount
,
vector
*
rhoSum
,
vector
*
rho_dhSum
,
vector
*
wcountSum
,
vector
*
wcount_dhSum
,
vector
*
div_vSum
,
vector
*
curlvxSum
,
vector
*
curlvySum
,
vector
*
curlvzSum
,
struct
c2_cache
*
const
int_cache
,
int
*
icount
,
vector
*
v_
rhoSum
,
vector
*
v_
rho_dhSum
,
vector
*
v_
wcountSum
,
vector
*
v_
wcount_dhSum
,
vector
*
v_
div_vSum
,
vector
*
v_
curlvxSum
,
vector
*
v_
curlvySum
,
vector
*
v_
curlvzSum
,
vector
v_hi_inv
,
vector
v_vix
,
vector
v_viy
,
vector
v_viz
)
{
/* Left-pack values needed into the secondary cache using the interaction mask.
...
...
@@ -202,8 +202,8 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
int
icount_align
=
*
icount
;
/* Peform remainder interactions. */
calcRemInteractions
(
int_cache
,
*
icount
,
rhoSum
,
rho_dhSum
,
wcountSum
,
wcount_dhSum
,
div_vSum
,
curlvxSum
,
curlvySum
,
curlvzSum
,
calcRemInteractions
(
int_cache
,
*
icount
,
v_
rhoSum
,
v_
rho_dhSum
,
v_
wcountSum
,
v_
wcount_dhSum
,
v_
div_vSum
,
v_
curlvxSum
,
v_
curlvySum
,
v_
curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
&
icount_align
);
mask_t
int_mask
,
int_mask2
;
...
...
@@ -215,9 +215,9 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
runner_iact_nonsym_2_vec_density
(
&
int_cache
->
r2q
[
j
],
&
int_cache
->
dxq
[
j
],
&
int_cache
->
dyq
[
j
],
&
int_cache
->
dzq
[
j
],
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
&
int_cache
->
vxq
[
j
],
&
int_cache
->
vyq
[
j
],
&
int_cache
->
vzq
[
j
],
&
int_cache
->
mq
[
j
],
rhoSum
,
rho_dhSum
,
wcountSum
,
wcount_dhSum
,
div_vSum
,
curlvxSum
,
curlvySum
,
curlvzSum
,
int_mask
,
int_mask2
,
0
);
&
int_cache
->
vyq
[
j
],
&
int_cache
->
vzq
[
j
],
&
int_cache
->
mq
[
j
],
v_
rhoSum
,
v_
rho_dhSum
,
v_
wcountSum
,
v_
wcount_dhSum
,
v_
div_vSum
,
v_
curlvxSum
,
v_
curlvySum
,
v_
curlvzSum
,
int_mask
,
int_mask2
,
0
);
}
/* Reset interaction count. */
...
...
@@ -358,6 +358,33 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache(
*
init_pj
=
last_pj
;
}
/**
* @brief Populates the arrays max_index_i and max_index_j with the maximum
* indices of
* particles into their neighbouring cells. Also finds the first pi that
* interacts with any particle in cj and the last pj that interacts with any
* particle in ci.
*
* @param ci #cell pointer to ci
* @param cj #cell pointer to cj
* @param sort_i #entry array for particle distance in ci
* @param sort_j #entry array for particle distance in cj
* @param dx_max maximum particle movement allowed in cell
* @param rshift cutoff shift
* @param hi_max_raw Maximal smoothing length in cell ci
* @param hj_max_raw Maximal smoothing length in cell cj
* @param hi_max Maximal smoothing length in cell ci scaled by kernel_gamma
* @param hj_max Maximal smoothing length in cell cj scaled by kernel_gamma
* @param di_max Maximal position on the axis that can interact in cell ci
* @param dj_min Minimal position on the axis that can interact in cell ci
* @param max_index_i array to hold the maximum distances of pi particles into
* #cell cj
* @param max_index_j array to hold the maximum distances of pj particles into
* #cell cj
* @param init_pi first pi to interact with a pj particle
* @param init_pj last pj to interact with a pi particle
* @param max_active_bin The largest time-bin active during this step.
*/
__attribute__
((
always_inline
))
INLINE
static
void
populate_max_index_no_cache_force
(
const
struct
cell
*
ci
,
const
struct
cell
*
cj
,
const
struct
entry
*
restrict
sort_i
,
...
...
@@ -530,14 +557,14 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
/* Is the ith particle active? */
if
(
!
part_is_active_no_debug
(
pi
,
max_active_bin
))
continue
;
vector
pix
,
piy
,
piz
;
vector
v_
pix
,
v_
piy
,
v_
piz
;
const
float
hi
=
cell_cache
->
h
[
pid
];
/* Fill particle pi vectors. */
pix
.
v
=
vec_set1
(
cell_cache
->
x
[
pid
]);
piy
.
v
=
vec_set1
(
cell_cache
->
y
[
pid
]);
piz
.
v
=
vec_set1
(
cell_cache
->
z
[
pid
]);
v_
pix
.
v
=
vec_set1
(
cell_cache
->
x
[
pid
]);
v_
piy
.
v
=
vec_set1
(
cell_cache
->
y
[
pid
]);
v_
piz
.
v
=
vec_set1
(
cell_cache
->
z
[
pid
]);
v_hi
.
v
=
vec_set1
(
hi
);
v_vix
.
v
=
vec_set1
(
cell_cache
->
vx
[
pid
]);
v_viy
.
v
=
vec_set1
(
cell_cache
->
vy
[
pid
]);
...
...
@@ -547,22 +574,22 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
v_hig2
.
v
=
vec_set1
(
hig2
);
/* Reset cumulative sums of update vectors. */
vector
rhoSum
,
rho_dhSum
,
wcountSum
,
wcount_dhSum
,
div_vSum
,
curlvxSum
,
curlvySum
,
curlvzSum
;
vector
v_
rhoSum
,
v_
rho_dhSum
,
v_
wcountSum
,
v_
wcount_dhSum
,
v_
div_vSum
,
v_
curlvxSum
,
v_
curlvySum
,
v_
curlvzSum
;
/* Get the inverse of hi. */
vector
v_hi_inv
;
v_hi_inv
=
vec_reciprocal
(
v_hi
);
rhoSum
.
v
=
vec_setzero
();
rho_dhSum
.
v
=
vec_setzero
();
wcountSum
.
v
=
vec_setzero
();
wcount_dhSum
.
v
=
vec_setzero
();
div_vSum
.
v
=
vec_setzero
();
curlvxSum
.
v
=
vec_setzero
();
curlvySum
.
v
=
vec_setzero
();
curlvzSum
.
v
=
vec_setzero
();
v_
rhoSum
.
v
=
vec_setzero
();
v_
rho_dhSum
.
v
=
vec_setzero
();
v_
wcountSum
.
v
=
vec_setzero
();
v_
wcount_dhSum
.
v
=
vec_setzero
();
v_
div_vSum
.
v
=
vec_setzero
();
v_
curlvxSum
.
v
=
vec_setzero
();
v_
curlvySum
.
v
=
vec_setzero
();
v_
curlvzSum
.
v
=
vec_setzero
();
/* Pad cache if there is a serial remainder. */
count_align
=
count
;
...
...
@@ -575,38 +602,38 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
/* Set positions to the same as particle pi so when the r2 > 0 mask is
* applied these extra contributions are masked out.*/
for
(
int
i
=
count
;
i
<
count_align
;
i
++
)
{
cell_cache
->
x
[
i
]
=
pix
.
f
[
0
];
cell_cache
->
y
[
i
]
=
piy
.
f
[
0
];
cell_cache
->
z
[
i
]
=
piz
.
f
[
0
];
cell_cache
->
x
[
i
]
=
v_
pix
.
f
[
0
];
cell_cache
->
y
[
i
]
=
v_
piy
.
f
[
0
];
cell_cache
->
z
[
i
]
=
v_
piz
.
f
[
0
];
}
}
vector
pjx
,
pjy
,
pjz
;
vector
pjx2
,
pjy2
,
pjz2
;
vector
v_
pjx
,
v_
pjy
,
v_
pjz
;
vector
v_
pjx2
,
v_
pjy2
,
v_
pjz2
;
/* Find all of particle pi's interacions and store needed values in the
* secondary cache.*/
for
(
int
pjd
=
0
;
pjd
<
count_align
;
pjd
+=
(
num_vec_proc
*
VEC_SIZE
))
{
/* Load 2 sets of vectors from the particle cache. */
pjx
.
v
=
vec_load
(
&
cell_cache
->
x
[
pjd
]);
pjy
.
v
=
vec_load
(
&
cell_cache
->
y
[
pjd
]);
pjz
.
v
=
vec_load
(
&
cell_cache
->
z
[
pjd
]);
v_
pjx
.
v
=
vec_load
(
&
cell_cache
->
x
[
pjd
]);
v_
pjy
.
v
=
vec_load
(
&
cell_cache
->
y
[
pjd
]);
v_
pjz
.
v
=
vec_load
(
&
cell_cache
->
z
[
pjd
]);
pjx2
.
v
=
vec_load
(
&
cell_cache
->
x
[
pjd
+
VEC_SIZE
]);
pjy2
.
v
=
vec_load
(
&
cell_cache
->
y
[
pjd
+
VEC_SIZE
]);
pjz2
.
v
=
vec_load
(
&
cell_cache
->
z
[
pjd
+
VEC_SIZE
]);
v_
pjx2
.
v
=
vec_load
(
&
cell_cache
->
x
[
pjd
+
VEC_SIZE
]);
v_
pjy2
.
v
=
vec_load
(
&
cell_cache
->
y
[
pjd
+
VEC_SIZE
]);
v_
pjz2
.
v
=
vec_load
(
&
cell_cache
->
z
[
pjd
+
VEC_SIZE
]);
/* Compute the pairwise distance. */
vector
v_dx
,
v_dy
,
v_dz
;
vector
v_dx_2
,
v_dy_2
,
v_dz_2
,
v_r2_2
;
v_dx
.
v
=
vec_sub
(
pix
.
v
,
pjx
.
v
);
v_dx_2
.
v
=
vec_sub
(
pix
.
v
,
pjx2
.
v
);
v_dy
.
v
=
vec_sub
(
piy
.
v
,
pjy
.
v
);
v_dy_2
.
v
=
vec_sub
(
piy
.
v
,
pjy2
.
v
);
v_dz
.
v
=
vec_sub
(
piz
.
v
,
pjz
.
v
);
v_dz_2
.
v
=
vec_sub
(
piz
.
v
,
pjz2
.
v
);
v_dx
.
v
=
vec_sub
(
v_
pix
.
v
,
v_
pjx
.
v
);
v_dx_2
.
v
=
vec_sub
(
v_
pix
.
v
,
v_
pjx2
.
v
);
v_dy
.
v
=
vec_sub
(
v_
piy
.
v
,
v_
pjy
.
v
);
v_dy_2
.
v
=
vec_sub
(
v_
piy
.
v
,
v_
pjy2
.
v
);
v_dz
.
v
=
vec_sub
(
v_
piz
.
v
,
v_
pjz
.
v
);
v_dz_2
.
v
=
vec_sub
(
v_
piz
.
v
,
v_
pjz2
.
v
);
v_r2
.
v
=
vec_mul
(
v_dx
.
v
,
v_dx
.
v
);
v_r2_2
.
v
=
vec_mul
(
v_dx_2
.
v
,
v_dx_2
.
v
);
...
...
@@ -644,23 +671,23 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
* cache. */
if
(
doi_mask
)
{
storeInteractions
(
doi_mask
,
pjd
,
&
v_r2
,
&
v_dx
,
&
v_dy
,
&
v_dz
,
cell_cache
,
&
int_cache
,
&
icount
,
&
rhoSum
,
&
rho_dhSum
,
&
wcountSum
,
&
wcount_dhSum
,
&
div_vSum
,
&
curlvxSum
,
&
curlvySum
,
&
curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
);
&
int_cache
,
&
icount
,
&
v_
rhoSum
,
&
v_
rho_dhSum
,
&
v_
wcountSum
,
&
v_
wcount_dhSum
,
&
v_
div_vSum
,
&
v_
curlvxSum
,
&
v_
curlvySum
,
&
v_
curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
);
}
if
(
doi_mask2
)
{
storeInteractions
(
doi_mask2
,
pjd
+
VEC_SIZE
,
&
v_r2_2
,
&
v_dx_2
,
&
v_dy_2
,
&
v_dz_2
,
cell_cache
,
&
int_cache
,
&
icount
,
&
rhoSum
,
&
rho_dhSum
,
&
wcountSum
,
&
wcount_dhSum
,
&
div_vSum
,
&
curlvxSum
,
&
curlvySum
,
&
curlvzSum
,
v_hi_inv
,
v_vix
,
&
v_dz_2
,
cell_cache
,
&
int_cache
,
&
icount
,
&
v_
rhoSum
,
&
v_
rho_dhSum
,
&
v_
wcountSum
,
&
v_
wcount_dhSum
,
&
v_
div_vSum
,
&
v_
curlvxSum
,
&
v_
curlvySum
,
&
v_
curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
);
}
}
/* Perform padded vector remainder interactions if any are present. */
calcRemInteractions
(
&
int_cache
,
icount
,
&
rhoSum
,
&
rho_dhSum
,
&
wcountSum
,
&
wcount_dhSum
,
&
div_vSum
,
&
curlvxSum
,
&
curlvySum
,
&
curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
calcRemInteractions
(
&
int_cache
,
icount
,
&
v_
rhoSum
,
&
v_
rho_dhSum
,
&
v_
wcountSum
,
&
v_
wcount_dhSum
,
&
v_
div_vSum
,
&
v_
curlvxSum
,
&
v_
curlvySum
,
&
v_
curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
&
icount_align
);
/* Initialise masks to true in case remainder interactions have been
...
...
@@ -675,21 +702,21 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
&
int_cache
.
r2q
[
pjd
],
&
int_cache
.
dxq
[
pjd
],
&
int_cache
.
dyq
[
pjd
],
&
int_cache
.
dzq
[
pjd
],
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
&
int_cache
.
vxq
[
pjd
],
&
int_cache
.
vyq
[
pjd
],
&
int_cache
.
vzq
[
pjd
],
&
int_cache
.
mq
[
pjd
],
&
rhoSum
,
&
rho_dhSum
,
&
wcountSum
,
&
wcount_dhSum
,
&
div_vSum
,
&
curlvxSum
,
&
curlvySum
,
&
curlvzSum
,
int_mask
,
int_mask2
,
&
int_cache
.
mq
[
pjd
],
&
v_
rhoSum
,
&
v_
rho_dhSum
,
&
v_
wcountSum
,
&
v_
wcount_dhSum
,
&
v_
div_vSum
,
&
v_
curlvxSum
,
&
v_
curlvySum
,
&
v_
curlvzSum
,
int_mask
,
int_mask2
,
0
);
}
/* Perform horizontal adds on vector sums and store result in particle pi.
*/
VEC_HADD
(
rhoSum
,
pi
->
rho
);
VEC_HADD
(
rho_dhSum
,
pi
->
density
.
rho_dh
);
VEC_HADD
(
wcountSum
,
pi
->
density
.
wcount
);
VEC_HADD
(
wcount_dhSum
,
pi
->
density
.
wcount_dh
);
VEC_HADD
(
div_vSum
,
pi
->
density
.
div_v
);
VEC_HADD
(
curlvxSum
,
pi
->
density
.
rot_v
[
0
]);
VEC_HADD
(
curlvySum
,
pi
->
density
.
rot_v
[
1
]);
VEC_HADD
(
curlvzSum
,
pi
->
density
.
rot_v
[
2
]);
VEC_HADD
(
v_
rhoSum
,
pi
->
rho
);
VEC_HADD
(
v_
rho_dhSum
,
pi
->
density
.
rho_dh
);
VEC_HADD
(
v_
wcountSum
,
pi
->
density
.
wcount
);
VEC_HADD
(
v_
wcount_dhSum
,
pi
->
density
.
wcount_dh
);
VEC_HADD
(
v_
div_vSum
,
pi
->
density
.
div_v
);
VEC_HADD
(
v_
curlvxSum
,
pi
->
density
.
rot_v
[
0
]);
VEC_HADD
(
v_
curlvySum
,
pi
->
density
.
rot_v
[
1
]);
VEC_HADD
(
v_
curlvzSum
,
pi
->
density
.
rot_v
[
2
]);
/* Reset interaction count. */
icount
=
0
;
...
...
@@ -758,14 +785,14 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
/* Is the ith particle active? */
if
(
!
part_is_active_no_debug
(
pi
,
max_active_bin
))
continue
;
vector
pix
,
piy
,
piz
;
vector
v_
pix
,
v_
piy
,
v_
piz
;
const
float
hi
=
cell_cache
->
h
[
pid
];
/* Fill particle pi vectors. */
pix
.
v
=
vec_set1
(
cell_cache
->
x
[
pid
]);
piy
.
v
=
vec_set1
(
cell_cache
->
y
[
pid
]);
piz
.
v
=
vec_set1
(
cell_cache
->
z
[
pid
]);
v_
pix
.
v
=
vec_set1
(
cell_cache
->
x
[
pid
]);
v_
piy
.
v
=
vec_set1
(
cell_cache
->
y
[
pid
]);