Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
20396294
Commit
20396294
authored
May 03, 2017
by
James Willis
Browse files
Only load pj vector quantities when they are needed.
parent
1d0da278
Changes
1
Hide whitespace changes
Inline
Side-by-side
src/runner_doiact_vec.c
View file @
20396294
...
...
@@ -438,9 +438,7 @@ __attribute__((always_inline)) INLINE static void calcRemForceInteractions(
*/
__attribute__
((
always_inline
))
INLINE
static
void
storeForceInteractions
(
const
int
mask
,
const
int
pjd
,
vector
*
v_r2
,
vector
*
v_dx
,
vector
*
v_dy
,
vector
*
v_dz
,
vector
*
v_mj
,
vector
*
v_vjx
,
vector
*
v_vjy
,
vector
*
v_vjz
,
vector
*
v_rhoj
,
vector
*
v_grad_hj
,
vector
*
v_pOrhoj2
,
vector
*
v_balsara_j
,
vector
*
v_cj
,
vector
*
v_hj_inv
,
const
struct
cache
*
const
cell_cache
,
struct
c2_cache
*
const
int_cache
,
vector
*
v_dz
,
const
struct
cache
*
const
cell_cache
,
struct
c2_cache
*
const
int_cache
,
int
*
icount
,
vector
*
a_hydro_xSum
,
vector
*
a_hydro_ySum
,
vector
*
a_hydro_zSum
,
vector
*
h_dtSum
,
vector
*
v_sigSum
,
vector
*
entropy_dtSum
,
vector
v_hi_inv
,
vector
v_vix
,
vector
v_viy
,
vector
v_viz
,
vector
*
v_rhoi
,
vector
*
v_grad_hi
,
vector
*
v_pOrhoi2
,
vector
*
v_balsara_i
,
vector
*
v_ci
)
{
...
...
@@ -509,7 +507,7 @@ __attribute__((always_inline)) INLINE static void storeForceInteractions(
int_cache
->
pOrho2q
[
*
icount
]
=
cell_cache
->
pOrho2
[
pjd
+
bit_index
];
int_cache
->
balsaraq
[
*
icount
]
=
cell_cache
->
balsara
[
pjd
+
bit_index
];
int_cache
->
soundspeedq
[
*
icount
]
=
cell_cache
->
soundspeed
[
pjd
+
bit_index
];
int_cache
->
h_invq
[
*
icount
]
=
v_hj_inv
->
f
[
bit_index
];
int_cache
->
h_invq
[
*
icount
]
=
1
.
f
/
cell_cache
->
h
[
pjd
+
bit_index
];
(
*
icount
)
++
;
}
...
...
@@ -770,6 +768,8 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
pjvz
.
v
=
vec_load
(
&
cell_cache
->
vz
[
pjd
]);
mj
.
v
=
vec_load
(
&
cell_cache
->
m
[
pjd
]);
/* TODO: Don't load unneeded quantities until you have to in storeInteractions()!!! */
pjx2
.
v
=
vec_load
(
&
cell_cache
->
x
[
pjd
+
VEC_SIZE
]);
pjy2
.
v
=
vec_load
(
&
cell_cache
->
y
[
pjd
+
VEC_SIZE
]);
pjz2
.
v
=
vec_load
(
&
cell_cache
->
z
[
pjd
+
VEC_SIZE
]);
...
...
@@ -1597,12 +1597,12 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec_2(
cell_cache
->
x
[
i
]
=
pix
.
f
[
0
];
cell_cache
->
y
[
i
]
=
piy
.
f
[
0
];
cell_cache
->
z
[
i
]
=
piz
.
f
[
0
];
cell_cache
->
h
[
i
]
=
1
.
f
;
}
}
vector
pjx
,
pjy
,
pjz
;
vector
pjvx
,
pjvy
,
pjvz
,
mj
,
hj
,
hjg2
;
vector
v_rhoj
,
v_grad_hj
,
v_pOrhoj2
,
v_balsara_j
,
v_cj
;
vector
hj
,
hjg2
;
//vector pjx2, pjy2, pjz2;
//vector pjvx2, pjvy2, pjvz2, mj2, hj_2, hjg2_2;
...
...
@@ -1614,19 +1614,16 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec_2(
pjx
.
v
=
vec_load
(
&
cell_cache
->
x
[
pjd
]);
pjy
.
v
=
vec_load
(
&
cell_cache
->
y
[
pjd
]);
pjz
.
v
=
vec_load
(
&
cell_cache
->
z
[
pjd
]);
pjvx
.
v
=
vec_load
(
&
cell_cache
->
vx
[
pjd
]);
pjvy
.
v
=
vec_load
(
&
cell_cache
->
vy
[
pjd
]);
pjvz
.
v
=
vec_load
(
&
cell_cache
->
vz
[
pjd
]);
mj
.
v
=
vec_load
(
&
cell_cache
->
m
[
pjd
]);
hj
.
v
=
vec_load
(
&
cell_cache
->
h
[
pjd
]);
hjg2
.
v
=
vec_mul
(
vec_mul
(
hj
.
v
,
hj
.
v
),
kernel_gamma2_vec
.
v
);
//v_hj_inv = vec_reciprocal(hj);
v_rhoj
.
v
=
vec_load
(
&
cell_cache
->
rho
[
pjd
]);
v_grad_hj
.
v
=
vec_load
(
&
cell_cache
->
grad_h
[
pjd
]);
v_pOrhoj2
.
v
=
vec_load
(
&
cell_cache
->
pOrho2
[
pjd
]);
v_balsara_j
.
v
=
vec_load
(
&
cell_cache
->
balsara
[
pjd
]);
v_cj
.
v
=
vec_load
(
&
cell_cache
->
soundspeed
[
pjd
]);
//
v_rhoj.v = vec_load(&cell_cache->rho[pjd]);
//
v_grad_hj.v = vec_load(&cell_cache->grad_h[pjd]);
//
v_pOrhoj2.v = vec_load(&cell_cache->pOrho2[pjd]);
//
v_balsara_j.v = vec_load(&cell_cache->balsara[pjd]);
//
v_cj.v = vec_load(&cell_cache->soundspeed[pjd]);
//pjx2.v = vec_load(&cell_cache->x[pjd + VEC_SIZE]);
//pjy2.v = vec_load(&cell_cache->y[pjd + VEC_SIZE]);
...
...
@@ -1638,11 +1635,7 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec_2(
//hj_2.v = vec_load(&cell_cache->h[pjd + VEC_SIZE]);
//hjg2_2.v = vec_mul(vec_mul(hj_2.v,hj_2.v), kernel_gamma2_vec.v);
vector
v_hj_inv
;
v_hj_inv
=
vec_reciprocal
(
hj
);
/* Compute the pairwise distance. */
vector
v_dx_tmp
,
v_dy_tmp
,
v_dz_tmp
;
//vector v_dx_tmp2, v_dy_tmp2, v_dz_tmp2, v_r2_2;
...
...
@@ -1707,6 +1700,21 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec_2(
//intCount += __builtin_popcount(doi_mask);
vector
pjvx
,
pjvy
,
pjvz
,
mj
,
v_hj_inv
;
vector
v_rhoj
,
v_grad_hj
,
v_pOrhoj2
,
v_balsara_j
,
v_cj
;
v_hj_inv
=
vec_reciprocal
(
hj
);
mj
.
v
=
vec_load
(
&
cell_cache
->
m
[
pjd
]);
pjvx
.
v
=
vec_load
(
&
cell_cache
->
vx
[
pjd
]);
pjvy
.
v
=
vec_load
(
&
cell_cache
->
vy
[
pjd
]);
pjvz
.
v
=
vec_load
(
&
cell_cache
->
vz
[
pjd
]);
v_rhoj
.
v
=
vec_load
(
&
cell_cache
->
rho
[
pjd
]);
v_grad_hj
.
v
=
vec_load
(
&
cell_cache
->
grad_h
[
pjd
]);
v_pOrhoj2
.
v
=
vec_load
(
&
cell_cache
->
pOrho2
[
pjd
]);
v_balsara_j
.
v
=
vec_load
(
&
cell_cache
->
balsara
[
pjd
]);
v_cj
.
v
=
vec_load
(
&
cell_cache
->
soundspeed
[
pjd
]);
runner_iact_nonsym_1_vec_force_2
(
&
v_r2
,
&
v_dx_tmp
,
&
v_dy_tmp
,
&
v_dz_tmp
,
&
v_vix
,
&
v_viy
,
&
v_viz
,
&
v_rhoi
,
&
v_grad_hi
,
&
v_pOrhoi2
,
&
v_balsara_i
,
&
v_ci
,
&
pjvx
,
&
pjvy
,
&
pjvz
,
&
v_rhoj
,
&
v_grad_hj
,
&
v_pOrhoj2
,
&
v_balsara_j
,
&
v_cj
,
&
mj
,
...
...
@@ -1716,7 +1724,6 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec_2(
}
}
VEC_HADD
(
a_hydro_xSum
,
pi
->
a_hydro
[
0
]);
VEC_HADD
(
a_hydro_ySum
,
pi
->
a_hydro
[
1
]);
...
...
@@ -1817,6 +1824,8 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec_3(
v_hi_inv
=
vec_reciprocal
(
v_hi
);
/*TODO: Define hid_inv pow_dimension_plus_one_vec */
a_hydro_xSum
.
v
=
vec_setzero
();
a_hydro_ySum
.
v
=
vec_setzero
();
a_hydro_zSum
.
v
=
vec_setzero
();
...
...
@@ -1842,13 +1851,8 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec_3(
}
}
vector
pjx
,
pjy
,
pjz
;
vector
pjvx
,
pjvy
,
pjvz
,
mj
,
hj
,
hjg2
;
vector
v_rhoj
,
v_grad_hj
,
v_pOrhoj2
,
v_balsara_j
,
v_cj
;
vector
pjx2
,
pjy2
,
pjz2
;
vector
pjvx2
,
pjvy2
,
pjvz2
,
mj2
,
hj_2
,
hjg2_2
;
vector
v_rhoj_2
,
v_grad_hj_2
,
v_pOrhoj2_2
,
v_balsara_j_2
,
v_cj_2
;
vector
pjx
,
pjy
,
pjz
,
hj
,
hjg2
;
vector
pjx2
,
pjy2
,
pjz2
,
hj_2
,
hjg2_2
;
/* Find all of particle pi's interacions and store needed values in the
* secondary cache.*/
...
...
@@ -1858,44 +1862,17 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec_3(
pjx
.
v
=
vec_load
(
&
cell_cache
->
x
[
pjd
]);
pjy
.
v
=
vec_load
(
&
cell_cache
->
y
[
pjd
]);
pjz
.
v
=
vec_load
(
&
cell_cache
->
z
[
pjd
]);
pjvx
.
v
=
vec_load
(
&
cell_cache
->
vx
[
pjd
]);
pjvy
.
v
=
vec_load
(
&
cell_cache
->
vy
[
pjd
]);
pjvz
.
v
=
vec_load
(
&
cell_cache
->
vz
[
pjd
]);
mj
.
v
=
vec_load
(
&
cell_cache
->
m
[
pjd
]);
hj
.
v
=
vec_load
(
&
cell_cache
->
h
[
pjd
]);
hjg2
.
v
=
vec_mul
(
vec_mul
(
hj
.
v
,
hj
.
v
),
kernel_gamma2_vec
.
v
);
/* TODO: Don't load unneeded quantities until you have to in storeInteractions()!!! */
v_rhoj
.
v
=
vec_load
(
&
cell_cache
->
rho
[
pjd
]);
v_grad_hj
.
v
=
vec_load
(
&
cell_cache
->
grad_h
[
pjd
]);
v_pOrhoj2
.
v
=
vec_load
(
&
cell_cache
->
pOrho2
[
pjd
]);
v_balsara_j
.
v
=
vec_load
(
&
cell_cache
->
balsara
[
pjd
]);
v_cj
.
v
=
vec_load
(
&
cell_cache
->
soundspeed
[
pjd
]);
pjx2
.
v
=
vec_load
(
&
cell_cache
->
x
[
pjd
+
VEC_SIZE
]);
pjy2
.
v
=
vec_load
(
&
cell_cache
->
y
[
pjd
+
VEC_SIZE
]);
pjz2
.
v
=
vec_load
(
&
cell_cache
->
z
[
pjd
+
VEC_SIZE
]);
pjvx2
.
v
=
vec_load
(
&
cell_cache
->
vx
[
pjd
+
VEC_SIZE
]);
pjvy2
.
v
=
vec_load
(
&
cell_cache
->
vy
[
pjd
+
VEC_SIZE
]);
pjvz2
.
v
=
vec_load
(
&
cell_cache
->
vz
[
pjd
+
VEC_SIZE
]);
mj2
.
v
=
vec_load
(
&
cell_cache
->
m
[
pjd
+
VEC_SIZE
]);
hj_2
.
v
=
vec_load
(
&
cell_cache
->
h
[
pjd
+
VEC_SIZE
]);
hjg2_2
.
v
=
vec_mul
(
vec_mul
(
hj_2
.
v
,
hj_2
.
v
),
kernel_gamma2_vec
.
v
);
v_rhoj_2
.
v
=
vec_load
(
&
cell_cache
->
rho
[
pjd
+
VEC_SIZE
]);
v_grad_hj_2
.
v
=
vec_load
(
&
cell_cache
->
grad_h
[
pjd
+
VEC_SIZE
]);
v_pOrhoj2_2
.
v
=
vec_load
(
&
cell_cache
->
pOrho2
[
pjd
+
VEC_SIZE
]);
v_balsara_j_2
.
v
=
vec_load
(
&
cell_cache
->
balsara
[
pjd
+
VEC_SIZE
]);
v_cj_2
.
v
=
vec_load
(
&
cell_cache
->
soundspeed
[
pjd
+
VEC_SIZE
]);
vector
v_hj_inv
,
v_hj_inv_2
;
v_hj_inv
=
vec_reciprocal
(
hj
);
v_hj_inv_2
=
vec_reciprocal
(
hj_2
);
/* Compute the pairwise distance. */
vector
v_dx_tmp
,
v_dy_tmp
,
v_dz_tmp
;
vector
v_dx_tmp2
,
v_dy_tmp2
,
v_dz_tmp2
,
v_r2_2
;
...
...
@@ -1957,7 +1934,7 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec_3(
if
(
doi_mask
)
{
storeForceInteractions
(
doi_mask
,
pjd
,
&
v_r2
,
&
v_dx_tmp
,
&
v_dy_tmp
,
&
v_dz_tmp
,
&
mj
,
&
pjvx
,
&
pjvy
,
&
pjvz
,
&
v_rhoj
,
&
v_grad_hj
,
&
v_pOrhoj2
,
&
v_balsara_j
,
&
v_cj
,
&
v_hj_inv
,
cell_cache
,
&
int_cache
,
cell_cache
,
&
int_cache
,
&
icount
,
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
&
v_rhoi
,
&
v_grad_hi
,
&
v_pOrhoi2
,
&
v_balsara_i
,
&
v_ci
);
...
...
@@ -1965,7 +1942,7 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec_3(
if
(
doi_mask2
)
{
storeForceInteractions
(
doi_mask2
,
pjd
+
VEC_SIZE
,
&
v_r2_2
,
&
v_dx_tmp2
,
&
v_dy_tmp2
,
&
v_dz_tmp2
,
&
mj2
,
&
pjvx2
,
&
pjvy2
,
&
pjvz2
,
&
v_rhoj_2
,
&
v_grad_hj_2
,
&
v_pOrhoj2_2
,
&
v_balsara_j_2
,
&
v_cj_2
,
&
v_hj_inv_2
,
cell_cache
,
&
int_cache
,
cell_cache
,
&
int_cache
,
&
icount
,
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
&
v_rhoi
,
&
v_grad_hi
,
&
v_pOrhoi2
,
&
v_balsara_i
,
&
v_ci
);
...
...
@@ -1973,13 +1950,11 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec_3(
}
/* Loop over all other particles. */
//printFloatVector(a_hydro_xSum,"a_hydro_xSum before rem",8);
/* Perform padded vector remainder interactions if any are present. */
calcRemForceInteractions
(
&
int_cache
,
icount
,
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
v_rhoi
,
v_grad_hi
,
v_pOrhoi2
,
v_balsara_i
,
v_ci
,
&
icount_align
,
2
);
//printFloatVector(a_hydro_xSum,"a_hydro_xSum after rem",8);
/* Initialise masks to true in case remainder interactions have been
* performed. */
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment