Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
f0198314
Commit
f0198314
authored
Sep 11, 2017
by
Matthieu Schaller
Browse files
Removed all the old vectorized interaction routines.
parent
62555575
Changes
6
Hide whitespace changes
Inline
Side-by-side
src/hydro/Default/hydro_iact.h
View file @
f0198314
...
...
@@ -96,120 +96,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
for
(
k
=
0
;
k
<
3
;
k
++
)
pj
->
density
.
rot_v
[
k
]
+=
mi
*
curlvr
[
k
]
*
wj_dx
;
}
/**
* @brief Density loop (Vectorized version)
*/
__attribute__
((
always_inline
))
INLINE
static
void
runner_iact_vec_density
(
float
*
R2
,
float
*
Dx
,
float
*
Hi
,
float
*
Hj
,
struct
part
**
pi
,
struct
part
**
pj
)
{
#ifdef WITH_VECTORIZATION
vector
r
,
ri
,
r2
,
xi
,
xj
,
hi
,
hj
,
hi_inv
,
hj_inv
,
wi
,
wj
,
wi_dx
,
wj_dx
;
vector
rhoi
,
rhoj
,
rhoi_dh
,
rhoj_dh
,
wcounti
,
wcountj
,
wcounti_dh
,
wcountj_dh
;
vector
mi
,
mj
;
vector
dx
[
3
],
dv
[
3
];
vector
vi
[
3
],
vj
[
3
];
vector
dvdr
,
div_vi
,
div_vj
;
vector
curlvr
[
3
],
rot_vi
[
3
],
rot_vj
[
3
];
int
k
,
j
;
#if VEC_SIZE == 8
mi
.
v
=
vec_set
(
pi
[
0
]
->
mass
,
pi
[
1
]
->
mass
,
pi
[
2
]
->
mass
,
pi
[
3
]
->
mass
,
pi
[
4
]
->
mass
,
pi
[
5
]
->
mass
,
pi
[
6
]
->
mass
,
pi
[
7
]
->
mass
);
mj
.
v
=
vec_set
(
pj
[
0
]
->
mass
,
pj
[
1
]
->
mass
,
pj
[
2
]
->
mass
,
pj
[
3
]
->
mass
,
pj
[
4
]
->
mass
,
pj
[
5
]
->
mass
,
pj
[
6
]
->
mass
,
pj
[
7
]
->
mass
);
for
(
k
=
0
;
k
<
3
;
k
++
)
{
vi
[
k
].
v
=
vec_set
(
pi
[
0
]
->
v
[
k
],
pi
[
1
]
->
v
[
k
],
pi
[
2
]
->
v
[
k
],
pi
[
3
]
->
v
[
k
],
pi
[
4
]
->
v
[
k
],
pi
[
5
]
->
v
[
k
],
pi
[
6
]
->
v
[
k
],
pi
[
7
]
->
v
[
k
]);
vj
[
k
].
v
=
vec_set
(
pj
[
0
]
->
v
[
k
],
pj
[
1
]
->
v
[
k
],
pj
[
2
]
->
v
[
k
],
pj
[
3
]
->
v
[
k
],
pj
[
4
]
->
v
[
k
],
pj
[
5
]
->
v
[
k
],
pj
[
6
]
->
v
[
k
],
pj
[
7
]
->
v
[
k
]);
}
for
(
k
=
0
;
k
<
3
;
k
++
)
dx
[
k
].
v
=
vec_set
(
Dx
[
0
+
k
],
Dx
[
3
+
k
],
Dx
[
6
+
k
],
Dx
[
9
+
k
],
Dx
[
12
+
k
],
Dx
[
15
+
k
],
Dx
[
18
+
k
],
Dx
[
21
+
k
]);
#elif VEC_SIZE == 4
mi
.
v
=
vec_set
(
pi
[
0
]
->
mass
,
pi
[
1
]
->
mass
,
pi
[
2
]
->
mass
,
pi
[
3
]
->
mass
);
mj
.
v
=
vec_set
(
pj
[
0
]
->
mass
,
pj
[
1
]
->
mass
,
pj
[
2
]
->
mass
,
pj
[
3
]
->
mass
);
for
(
k
=
0
;
k
<
3
;
k
++
)
{
vi
[
k
].
v
=
vec_set
(
pi
[
0
]
->
v
[
k
],
pi
[
1
]
->
v
[
k
],
pi
[
2
]
->
v
[
k
],
pi
[
3
]
->
v
[
k
]);
vj
[
k
].
v
=
vec_set
(
pj
[
0
]
->
v
[
k
],
pj
[
1
]
->
v
[
k
],
pj
[
2
]
->
v
[
k
],
pj
[
3
]
->
v
[
k
]);
}
for
(
k
=
0
;
k
<
3
;
k
++
)
dx
[
k
].
v
=
vec_set
(
Dx
[
0
+
k
],
Dx
[
3
+
k
],
Dx
[
6
+
k
],
Dx
[
9
+
k
]);
#endif
/* Get the radius and inverse radius. */
r2
.
v
=
vec_load
(
R2
);
ri
.
v
=
vec_rsqrt
(
r2
.
v
);
ri
.
v
=
ri
.
v
-
vec_set1
(
0
.
5
f
)
*
ri
.
v
*
(
r2
.
v
*
ri
.
v
*
ri
.
v
-
vec_set1
(
1
.
0
f
));
r
.
v
=
r2
.
v
*
ri
.
v
;
hi
.
v
=
vec_load
(
Hi
);
hi_inv
.
v
=
vec_rcp
(
hi
.
v
);
hi_inv
.
v
=
hi_inv
.
v
-
hi_inv
.
v
*
(
hi_inv
.
v
*
hi
.
v
-
vec_set1
(
1
.
0
f
));
xi
.
v
=
r
.
v
*
hi_inv
.
v
;
hj
.
v
=
vec_load
(
Hj
);
hj_inv
.
v
=
vec_rcp
(
hj
.
v
);
hj_inv
.
v
=
hj_inv
.
v
-
hj_inv
.
v
*
(
hj_inv
.
v
*
hj
.
v
-
vec_set1
(
1
.
0
f
));
xj
.
v
=
r
.
v
*
hj_inv
.
v
;
kernel_deval_vec
(
&
xi
,
&
wi
,
&
wi_dx
);
kernel_deval_vec
(
&
xj
,
&
wj
,
&
wj_dx
);
/* Compute dv. */
dv
[
0
].
v
=
vi
[
0
].
v
-
vj
[
0
].
v
;
dv
[
1
].
v
=
vi
[
1
].
v
-
vj
[
1
].
v
;
dv
[
2
].
v
=
vi
[
2
].
v
-
vj
[
2
].
v
;
/* Compute dv dot r */
dvdr
.
v
=
(
dv
[
0
].
v
*
dx
[
0
].
v
)
+
(
dv
[
1
].
v
*
dx
[
1
].
v
)
+
(
dv
[
2
].
v
*
dx
[
2
].
v
);
dvdr
.
v
=
dvdr
.
v
*
ri
.
v
;
/* Compute dv cross r */
curlvr
[
0
].
v
=
dv
[
1
].
v
*
dx
[
2
].
v
-
dv
[
2
].
v
*
dx
[
1
].
v
;
curlvr
[
1
].
v
=
dv
[
2
].
v
*
dx
[
0
].
v
-
dv
[
0
].
v
*
dx
[
2
].
v
;
curlvr
[
2
].
v
=
dv
[
0
].
v
*
dx
[
1
].
v
-
dv
[
1
].
v
*
dx
[
0
].
v
;
for
(
k
=
0
;
k
<
3
;
k
++
)
curlvr
[
k
].
v
*=
ri
.
v
;
rhoi
.
v
=
mj
.
v
*
wi
.
v
;
rhoi_dh
.
v
=
mj
.
v
*
(
vec_set1
(
hydro_dimension
)
*
wi
.
v
+
xi
.
v
*
wi_dx
.
v
);
wcounti
.
v
=
wi
.
v
;
wcounti_dh
.
v
=
xi
.
v
*
wi_dx
.
v
;
div_vi
.
v
=
mj
.
v
*
dvdr
.
v
*
wi_dx
.
v
;
for
(
k
=
0
;
k
<
3
;
k
++
)
rot_vi
[
k
].
v
=
mj
.
v
*
curlvr
[
k
].
v
*
wi_dx
.
v
;
rhoj
.
v
=
mi
.
v
*
wj
.
v
;
rhoj_dh
.
v
=
mi
.
v
*
(
vec_set1
(
hydro_dimension
)
*
wj
.
v
+
xj
.
v
*
wj_dx
.
v
);
wcountj
.
v
=
wj
.
v
;
wcountj_dh
.
v
=
xj
.
v
*
wj_dx
.
v
;
div_vj
.
v
=
mi
.
v
*
dvdr
.
v
*
wj_dx
.
v
;
for
(
k
=
0
;
k
<
3
;
k
++
)
rot_vj
[
k
].
v
=
mi
.
v
*
curlvr
[
k
].
v
*
wj_dx
.
v
;
for
(
k
=
0
;
k
<
VEC_SIZE
;
k
++
)
{
pi
[
k
]
->
rho
+=
rhoi
.
f
[
k
];
pi
[
k
]
->
rho_dh
-=
rhoi_dh
.
f
[
k
];
pi
[
k
]
->
density
.
wcount
+=
wcounti
.
f
[
k
];
pi
[
k
]
->
density
.
wcount_dh
-=
wcounti_dh
.
f
[
k
];
pi
[
k
]
->
density
.
div_v
-=
div_vi
.
f
[
k
];
for
(
j
=
0
;
j
<
3
;
j
++
)
pi
[
k
]
->
density
.
rot_v
[
j
]
+=
rot_vi
[
j
].
f
[
k
];
pj
[
k
]
->
rho
+=
rhoj
.
f
[
k
];
pj
[
k
]
->
rho_dh
-=
rhoj_dh
.
f
[
k
];
pj
[
k
]
->
density
.
wcount
+=
wcountj
.
f
[
k
];
pj
[
k
]
->
density
.
wcount_dh
-=
wcountj_dh
.
f
[
k
];
pj
[
k
]
->
density
.
div_v
-=
div_vj
.
f
[
k
];
for
(
j
=
0
;
j
<
3
;
j
++
)
pj
[
k
]
->
density
.
rot_v
[
j
]
+=
rot_vj
[
j
].
f
[
k
];
}
#else
for
(
int
k
=
0
;
k
<
VEC_SIZE
;
k
++
)
runner_iact_density
(
R2
[
k
],
&
Dx
[
3
*
k
],
Hi
[
k
],
Hj
[
k
],
pi
[
k
],
pj
[
k
]);
#endif
}
/**
* @brief Density loop (non-symmetric version)
*/
...
...
@@ -258,98 +144,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
for
(
k
=
0
;
k
<
3
;
k
++
)
pi
->
density
.
rot_v
[
k
]
+=
mj
*
curlvr
[
k
]
*
wi_dx
;
}
/**
* @brief Density loop (non-symmetric vectorized version)
*/
__attribute__
((
always_inline
))
INLINE
static
void
runner_iact_nonsym_vec_density
(
float
*
R2
,
float
*
Dx
,
float
*
Hi
,
float
*
Hj
,
struct
part
**
pi
,
struct
part
**
pj
)
{
#ifdef WITH_VECTORIZATION
vector
r
,
ri
,
r2
,
xi
,
hi
,
hi_inv
,
wi
,
wi_dx
;
vector
rhoi
,
rhoi_dh
,
wcounti
,
wcounti_dh
,
div_vi
;
vector
mj
;
vector
dx
[
3
],
dv
[
3
];
vector
vi
[
3
],
vj
[
3
];
vector
dvdr
;
vector
curlvr
[
3
],
rot_vi
[
3
];
int
k
,
j
;
#if VEC_SIZE == 8
mj
.
v
=
vec_set
(
pj
[
0
]
->
mass
,
pj
[
1
]
->
mass
,
pj
[
2
]
->
mass
,
pj
[
3
]
->
mass
,
pj
[
4
]
->
mass
,
pj
[
5
]
->
mass
,
pj
[
6
]
->
mass
,
pj
[
7
]
->
mass
);
for
(
k
=
0
;
k
<
3
;
k
++
)
{
vi
[
k
].
v
=
vec_set
(
pi
[
0
]
->
v
[
k
],
pi
[
1
]
->
v
[
k
],
pi
[
2
]
->
v
[
k
],
pi
[
3
]
->
v
[
k
],
pi
[
4
]
->
v
[
k
],
pi
[
5
]
->
v
[
k
],
pi
[
6
]
->
v
[
k
],
pi
[
7
]
->
v
[
k
]);
vj
[
k
].
v
=
vec_set
(
pj
[
0
]
->
v
[
k
],
pj
[
1
]
->
v
[
k
],
pj
[
2
]
->
v
[
k
],
pj
[
3
]
->
v
[
k
],
pj
[
4
]
->
v
[
k
],
pj
[
5
]
->
v
[
k
],
pj
[
6
]
->
v
[
k
],
pj
[
7
]
->
v
[
k
]);
}
for
(
k
=
0
;
k
<
3
;
k
++
)
dx
[
k
].
v
=
vec_set
(
Dx
[
0
+
k
],
Dx
[
3
+
k
],
Dx
[
6
+
k
],
Dx
[
9
+
k
],
Dx
[
12
+
k
],
Dx
[
15
+
k
],
Dx
[
18
+
k
],
Dx
[
21
+
k
]);
#elif VEC_SIZE == 4
mj
.
v
=
vec_set
(
pj
[
0
]
->
mass
,
pj
[
1
]
->
mass
,
pj
[
2
]
->
mass
,
pj
[
3
]
->
mass
);
for
(
k
=
0
;
k
<
3
;
k
++
)
{
vi
[
k
].
v
=
vec_set
(
pi
[
0
]
->
v
[
k
],
pi
[
1
]
->
v
[
k
],
pi
[
2
]
->
v
[
k
],
pi
[
3
]
->
v
[
k
]);
vj
[
k
].
v
=
vec_set
(
pj
[
0
]
->
v
[
k
],
pj
[
1
]
->
v
[
k
],
pj
[
2
]
->
v
[
k
],
pj
[
3
]
->
v
[
k
]);
}
for
(
k
=
0
;
k
<
3
;
k
++
)
dx
[
k
].
v
=
vec_set
(
Dx
[
0
+
k
],
Dx
[
3
+
k
],
Dx
[
6
+
k
],
Dx
[
9
+
k
]);
#endif
/* Get the radius and inverse radius. */
r2
.
v
=
vec_load
(
R2
);
ri
.
v
=
vec_rsqrt
(
r2
.
v
);
ri
.
v
=
ri
.
v
-
vec_set1
(
0
.
5
f
)
*
ri
.
v
*
(
r2
.
v
*
ri
.
v
*
ri
.
v
-
vec_set1
(
1
.
0
f
));
r
.
v
=
r2
.
v
*
ri
.
v
;
hi
.
v
=
vec_load
(
Hi
);
hi_inv
.
v
=
vec_rcp
(
hi
.
v
);
hi_inv
.
v
=
hi_inv
.
v
-
hi_inv
.
v
*
(
hi_inv
.
v
*
hi
.
v
-
vec_set1
(
1
.
0
f
));
xi
.
v
=
r
.
v
*
hi_inv
.
v
;
kernel_deval_vec
(
&
xi
,
&
wi
,
&
wi_dx
);
/* Compute dv. */
dv
[
0
].
v
=
vi
[
0
].
v
-
vj
[
0
].
v
;
dv
[
1
].
v
=
vi
[
1
].
v
-
vj
[
1
].
v
;
dv
[
2
].
v
=
vi
[
2
].
v
-
vj
[
2
].
v
;
/* Compute dv dot r */
dvdr
.
v
=
(
dv
[
0
].
v
*
dx
[
0
].
v
)
+
(
dv
[
1
].
v
*
dx
[
1
].
v
)
+
(
dv
[
2
].
v
*
dx
[
2
].
v
);
dvdr
.
v
=
dvdr
.
v
*
ri
.
v
;
/* Compute dv cross r */
curlvr
[
0
].
v
=
dv
[
1
].
v
*
dx
[
2
].
v
-
dv
[
2
].
v
*
dx
[
1
].
v
;
curlvr
[
1
].
v
=
dv
[
2
].
v
*
dx
[
0
].
v
-
dv
[
0
].
v
*
dx
[
2
].
v
;
curlvr
[
2
].
v
=
dv
[
0
].
v
*
dx
[
1
].
v
-
dv
[
1
].
v
*
dx
[
0
].
v
;
for
(
k
=
0
;
k
<
3
;
k
++
)
curlvr
[
k
].
v
*=
ri
.
v
;
rhoi
.
v
=
mj
.
v
*
wi
.
v
;
rhoi_dh
.
v
=
mj
.
v
*
(
vec_set1
(
hydro_dimension
)
*
wi
.
v
+
xi
.
v
*
wi_dx
.
v
);
wcounti
.
v
=
wi
.
v
;
wcounti_dh
.
v
=
xi
.
v
*
wi_dx
.
v
;
div_vi
.
v
=
mj
.
v
*
dvdr
.
v
*
wi_dx
.
v
;
for
(
k
=
0
;
k
<
3
;
k
++
)
rot_vi
[
k
].
v
=
mj
.
v
*
curlvr
[
k
].
v
*
wi_dx
.
v
;
for
(
k
=
0
;
k
<
VEC_SIZE
;
k
++
)
{
pi
[
k
]
->
rho
+=
rhoi
.
f
[
k
];
pi
[
k
]
->
rho_dh
-=
rhoi_dh
.
f
[
k
];
pi
[
k
]
->
density
.
wcount
+=
wcounti
.
f
[
k
];
pi
[
k
]
->
density
.
wcount_dh
-=
wcounti_dh
.
f
[
k
];
pi
[
k
]
->
density
.
div_v
-=
div_vi
.
f
[
k
];
for
(
j
=
0
;
j
<
3
;
j
++
)
pi
[
k
]
->
density
.
rot_v
[
j
]
+=
rot_vi
[
j
].
f
[
k
];
}
#else
for
(
int
k
=
0
;
k
<
VEC_SIZE
;
k
++
)
runner_iact_nonsym_density
(
R2
[
k
],
&
Dx
[
3
*
k
],
Hi
[
k
],
Hj
[
k
],
pi
[
k
],
pj
[
k
]);
#endif
}
/**
* @brief Force loop
*/
...
...
@@ -445,212 +239,6 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
pj
->
force
.
v_sig
=
max
(
pj
->
force
.
v_sig
,
v_sig
);
}
/**
* @brief Force loop (Vectorized version)
*/
__attribute__
((
always_inline
))
INLINE
static
void
runner_iact_vec_force
(
float
*
R2
,
float
*
Dx
,
float
*
Hi
,
float
*
Hj
,
struct
part
**
pi
,
struct
part
**
pj
)
{
#ifdef WITH_VECTORIZATION
vector
r
,
r2
,
ri
;
vector
xi
,
xj
;
vector
hi
,
hj
,
hi_inv
,
hj_inv
;
vector
hid_inv
,
hjd_inv
;
vector
wi
,
wj
,
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
dvdr
;
vector
w
;
vector
piPOrho2
,
pjPOrho2
,
pirho
,
pjrho
,
piu
,
pju
;
vector
mi
,
mj
;
vector
f
;
vector
dx
[
3
];
vector
vi
[
3
],
vj
[
3
];
vector
pia
[
3
],
pja
[
3
];
vector
piu_dt
,
pju_dt
;
vector
pih_dt
,
pjh_dt
;
vector
ci
,
cj
,
v_sig
;
vector
omega_ij
,
Pi_ij
,
balsara
;
vector
pialpha
,
pjalpha
,
alpha_ij
,
v_sig_u
,
tc
;
int
j
,
k
;
/* Load stuff. */
#if VEC_SIZE == 8
mi
.
v
=
vec_set
(
pi
[
0
]
->
mass
,
pi
[
1
]
->
mass
,
pi
[
2
]
->
mass
,
pi
[
3
]
->
mass
,
pi
[
4
]
->
mass
,
pi
[
5
]
->
mass
,
pi
[
6
]
->
mass
,
pi
[
7
]
->
mass
);
mj
.
v
=
vec_set
(
pj
[
0
]
->
mass
,
pj
[
1
]
->
mass
,
pj
[
2
]
->
mass
,
pj
[
3
]
->
mass
,
pj
[
4
]
->
mass
,
pj
[
5
]
->
mass
,
pj
[
6
]
->
mass
,
pj
[
7
]
->
mass
);
piPOrho2
.
v
=
vec_set
(
pi
[
0
]
->
force
.
P_over_rho2
,
pi
[
1
]
->
force
.
P_over_rho2
,
pi
[
2
]
->
force
.
P_over_rho2
,
pi
[
3
]
->
force
.
P_over_rho2
,
pi
[
4
]
->
force
.
P_over_rho2
,
pi
[
5
]
->
force
.
P_over_rho2
,
pi
[
6
]
->
force
.
P_over_rho2
,
pi
[
7
]
->
force
.
P_over_rho2
);
pjPOrho2
.
v
=
vec_set
(
pj
[
0
]
->
force
.
P_over_rho2
,
pj
[
1
]
->
force
.
P_over_rho2
,
pj
[
2
]
->
force
.
P_over_rho2
,
pj
[
3
]
->
force
.
P_over_rho2
,
pj
[
4
]
->
force
.
P_over_rho2
,
pj
[
5
]
->
force
.
P_over_rho2
,
pj
[
6
]
->
force
.
P_over_rho2
,
pj
[
7
]
->
force
.
P_over_rho2
);
pirho
.
v
=
vec_set
(
pi
[
0
]
->
rho
,
pi
[
1
]
->
rho
,
pi
[
2
]
->
rho
,
pi
[
3
]
->
rho
,
pi
[
4
]
->
rho
,
pi
[
5
]
->
rho
,
pi
[
6
]
->
rho
,
pi
[
7
]
->
rho
);
pjrho
.
v
=
vec_set
(
pj
[
0
]
->
rho
,
pj
[
1
]
->
rho
,
pj
[
2
]
->
rho
,
pj
[
3
]
->
rho
,
pj
[
4
]
->
rho
,
pj
[
5
]
->
rho
,
pj
[
6
]
->
rho
,
pj
[
7
]
->
rho
);
piu
.
v
=
vec_set
(
pi
[
0
]
->
u
,
pi
[
1
]
->
u
,
pi
[
2
]
->
u
,
pi
[
3
]
->
u
,
pi
[
4
]
->
u
,
pi
[
5
]
->
u
,
pi
[
6
]
->
u
,
pi
[
7
]
->
u
);
pju
.
v
=
vec_set
(
pj
[
0
]
->
u
,
pj
[
1
]
->
u
,
pj
[
2
]
->
u
,
pj
[
3
]
->
u
,
pj
[
4
]
->
u
,
pj
[
5
]
->
u
,
pj
[
6
]
->
u
,
pj
[
7
]
->
u
);
ci
.
v
=
vec_set
(
pi
[
0
]
->
force
.
soundspeed
,
pi
[
1
]
->
force
.
soundspeed
,
pi
[
2
]
->
force
.
soundspeed
,
pi
[
3
]
->
force
.
soundspeed
,
pi
[
4
]
->
force
.
soundspeed
,
pi
[
5
]
->
force
.
soundspeed
,
pi
[
6
]
->
force
.
soundspeed
,
pi
[
7
]
->
force
.
soundspeed
);
cj
.
v
=
vec_set
(
pj
[
0
]
->
force
.
soundspeed
,
pj
[
1
]
->
force
.
soundspeed
,
pj
[
2
]
->
force
.
soundspeed
,
pj
[
3
]
->
force
.
soundspeed
,
pj
[
4
]
->
force
.
soundspeed
,
pj
[
5
]
->
force
.
soundspeed
,
pj
[
6
]
->
force
.
soundspeed
,
pj
[
7
]
->
force
.
soundspeed
);
for
(
k
=
0
;
k
<
3
;
k
++
)
{
vi
[
k
].
v
=
vec_set
(
pi
[
0
]
->
v
[
k
],
pi
[
1
]
->
v
[
k
],
pi
[
2
]
->
v
[
k
],
pi
[
3
]
->
v
[
k
],
pi
[
4
]
->
v
[
k
],
pi
[
5
]
->
v
[
k
],
pi
[
6
]
->
v
[
k
],
pi
[
7
]
->
v
[
k
]);
vj
[
k
].
v
=
vec_set
(
pj
[
0
]
->
v
[
k
],
pj
[
1
]
->
v
[
k
],
pj
[
2
]
->
v
[
k
],
pj
[
3
]
->
v
[
k
],
pj
[
4
]
->
v
[
k
],
pj
[
5
]
->
v
[
k
],
pj
[
6
]
->
v
[
k
],
pj
[
7
]
->
v
[
k
]);
}
for
(
k
=
0
;
k
<
3
;
k
++
)
dx
[
k
].
v
=
vec_set
(
Dx
[
0
+
k
],
Dx
[
3
+
k
],
Dx
[
6
+
k
],
Dx
[
9
+
k
],
Dx
[
12
+
k
],
Dx
[
15
+
k
],
Dx
[
18
+
k
],
Dx
[
21
+
k
]);
balsara
.
v
=
vec_set
(
pi
[
0
]
->
force
.
balsara
,
pi
[
1
]
->
force
.
balsara
,
pi
[
2
]
->
force
.
balsara
,
pi
[
3
]
->
force
.
balsara
,
pi
[
4
]
->
force
.
balsara
,
pi
[
5
]
->
force
.
balsara
,
pi
[
6
]
->
force
.
balsara
,
pi
[
7
]
->
force
.
balsara
)
+
vec_set
(
pj
[
0
]
->
force
.
balsara
,
pj
[
1
]
->
force
.
balsara
,
pj
[
2
]
->
force
.
balsara
,
pj
[
3
]
->
force
.
balsara
,
pj
[
4
]
->
force
.
balsara
,
pj
[
5
]
->
force
.
balsara
,
pj
[
6
]
->
force
.
balsara
,
pj
[
7
]
->
force
.
balsara
);
pialpha
.
v
=
vec_set
(
pi
[
0
]
->
alpha
,
pi
[
1
]
->
alpha
,
pi
[
2
]
->
alpha
,
pi
[
3
]
->
alpha
,
pi
[
4
]
->
alpha
,
pi
[
5
]
->
alpha
,
pi
[
6
]
->
alpha
,
pi
[
7
]
->
alpha
);
pjalpha
.
v
=
vec_set
(
pj
[
0
]
->
alpha
,
pj
[
1
]
->
alpha
,
pj
[
2
]
->
alpha
,
pj
[
3
]
->
alpha
,
pj
[
4
]
->
alpha
,
pj
[
5
]
->
alpha
,
pj
[
6
]
->
alpha
,
pj
[
7
]
->
alpha
);
#elif VEC_SIZE == 4
mi
.
v
=
vec_set
(
pi
[
0
]
->
mass
,
pi
[
1
]
->
mass
,
pi
[
2
]
->
mass
,
pi
[
3
]
->
mass
);
mj
.
v
=
vec_set
(
pj
[
0
]
->
mass
,
pj
[
1
]
->
mass
,
pj
[
2
]
->
mass
,
pj
[
3
]
->
mass
);
piPOrho2
.
v
=
vec_set
(
pi
[
0
]
->
force
.
P_over_rho2
,
pi
[
1
]
->
force
.
P_over_rho2
,
pi
[
2
]
->
force
.
P_over_rho2
,
pi
[
3
]
->
force
.
P_over_rho2
);
pjPOrho2
.
v
=
vec_set
(
pj
[
0
]
->
force
.
P_over_rho2
,
pj
[
1
]
->
force
.
P_over_rho2
,
pj
[
2
]
->
force
.
P_over_rho2
,
pj
[
3
]
->
force
.
P_over_rho2
);
pirho
.
v
=
vec_set
(
pi
[
0
]
->
rho
,
pi
[
1
]
->
rho
,
pi
[
2
]
->
rho
,
pi
[
3
]
->
rho
);
pjrho
.
v
=
vec_set
(
pj
[
0
]
->
rho
,
pj
[
1
]
->
rho
,
pj
[
2
]
->
rho
,
pj
[
3
]
->
rho
);
piu
.
v
=
vec_set
(
pi
[
0
]
->
u
,
pi
[
1
]
->
u
,
pi
[
2
]
->
u
,
pi
[
3
]
->
u
);
pju
.
v
=
vec_set
(
pj
[
0
]
->
u
,
pj
[
1
]
->
u
,
pj
[
2
]
->
u
,
pj
[
3
]
->
u
);
ci
.
v
=
vec_set
(
pi
[
0
]
->
force
.
soundspeed
,
pi
[
1
]
->
force
.
soundspeed
,
pi
[
2
]
->
force
.
soundspeed
,
pi
[
3
]
->
force
.
soundspeed
);
cj
.
v
=
vec_set
(
pj
[
0
]
->
force
.
soundspeed
,
pj
[
1
]
->
force
.
soundspeed
,
pj
[
2
]
->
force
.
soundspeed
,
pj
[
3
]
->
force
.
soundspeed
);
for
(
k
=
0
;
k
<
3
;
k
++
)
{
vi
[
k
].
v
=
vec_set
(
pi
[
0
]
->
v
[
k
],
pi
[
1
]
->
v
[
k
],
pi
[
2
]
->
v
[
k
],
pi
[
3
]
->
v
[
k
]);
vj
[
k
].
v
=
vec_set
(
pj
[
0
]
->
v
[
k
],
pj
[
1
]
->
v
[
k
],
pj
[
2
]
->
v
[
k
],
pj
[
3
]
->
v
[
k
]);
}
for
(
k
=
0
;
k
<
3
;
k
++
)
dx
[
k
].
v
=
vec_set
(
Dx
[
0
+
k
],
Dx
[
3
+
k
],
Dx
[
6
+
k
],
Dx
[
9
+
k
]);
balsara
.
v
=
vec_set
(
pi
[
0
]
->
force
.
balsara
,
pi
[
1
]
->
force
.
balsara
,
pi
[
2
]
->
force
.
balsara
,
pi
[
3
]
->
force
.
balsara
)
+
vec_set
(
pj
[
0
]
->
force
.
balsara
,
pj
[
1
]
->
force
.
balsara
,
pj
[
2
]
->
force
.
balsara
,
pj
[
3
]
->
force
.
balsara
);
pialpha
.
v
=
vec_set
(
pi
[
0
]
->
alpha
,
pi
[
1
]
->
alpha
,
pi
[
2
]
->
alpha
,
pi
[
3
]
->
alpha
);
pjalpha
.
v
=
vec_set
(
pj
[
0
]
->
alpha
,
pj
[
1
]
->
alpha
,
pj
[
2
]
->
alpha
,
pj
[
3
]
->
alpha
);
#else
#error
#endif
/* Get the radius and inverse radius. */
r2
.
v
=
vec_load
(
R2
);
ri
.
v
=
vec_rsqrt
(
r2
.
v
);
ri
.
v
=
ri
.
v
-
vec_set1
(
0
.
5
f
)
*
ri
.
v
*
(
r2
.
v
*
ri
.
v
*
ri
.
v
-
vec_set1
(
1
.
0
f
));
r
.
v
=
r2
.
v
*
ri
.
v
;
/* Get the kernel for hi. */
hi
.
v
=
vec_load
(
Hi
);
hi_inv
.
v
=
vec_rcp
(
hi
.
v
);
hi_inv
.
v
=
hi_inv
.
v
-
hi_inv
.
v
*
(
hi
.
v
*
hi_inv
.
v
-
vec_set1
(
1
.
0
f
));
hid_inv
=
pow_dimension_plus_one_vec
(
hi_inv
);
xi
.
v
=
r
.
v
*
hi_inv
.
v
;
kernel_deval_vec
(
&
xi
,
&
wi
,
&
wi_dx
);
wi_dr
.
v
=
hid_inv
.
v
*
wi_dx
.
v
;
/* Get the kernel for hj. */
hj
.
v
=
vec_load
(
Hj
);
hj_inv
.
v
=
vec_rcp
(
hj
.
v
);
hj_inv
.
v
=
hj_inv
.
v
-
hj_inv
.
v
*
(
hj
.
v
*
hj_inv
.
v
-
vec_set1
(
1
.
0
f
));
hjd_inv
=
pow_dimension_plus_one_vec
(
hj_inv
);
xj
.
v
=
r
.
v
*
hj_inv
.
v
;
kernel_deval_vec
(
&
xj
,
&
wj
,
&
wj_dx
);
wj_dr
.
v
=
hjd_inv
.
v
*
wj_dx
.
v
;
/* Compute dv dot r. */
dvdr
.
v
=
((
vi
[
0
].
v
-
vj
[
0
].
v
)
*
dx
[
0
].
v
)
+
((
vi
[
1
].
v
-
vj
[
1
].
v
)
*
dx
[
1
].
v
)
+
((
vi
[
2
].
v
-
vj
[
2
].
v
)
*
dx
[
2
].
v
);
dvdr
.
v
=
dvdr
.
v
*
ri
.
v
;
/* Get the time derivative for h. */
pih_dt
.
v
=
mj
.
v
/
pjrho
.
v
*
dvdr
.
v
*
wi_dr
.
v
;
pjh_dt
.
v
=
mi
.
v
/
pirho
.
v
*
dvdr
.
v
*
wj_dr
.
v
;
/* Compute the relative velocity. (This is 0 if the particles move away from
* each other and negative otherwise) */
omega_ij
.
v
=
vec_fmin
(
dvdr
.
v
,
vec_set1
(
0
.
0
f
));
/* Compute signal velocity */
v_sig
.
v
=
ci
.
v
+
cj
.
v
-
vec_set1
(
2
.
0
f
)
*
omega_ij
.
v
;
/* Compute viscosity parameter */
alpha_ij
.
v
=
vec_set1
(
-
0
.
5
f
)
*
(
pialpha
.
v
+
pjalpha
.
v
);
/* Compute viscosity tensor */
Pi_ij
.
v
=
balsara
.
v
*
alpha_ij
.
v
*
v_sig
.
v
*
omega_ij
.
v
/
(
pirho
.
v
+
pjrho
.
v
);
Pi_ij
.
v
*=
(
wi_dr
.
v
+
wj_dr
.
v
);
/* Thermal conductivity */
v_sig_u
.
v
=
vec_sqrt
(
vec_set1
(
2
.
f
*
hydro_gamma_minus_one
)
*
vec_fabs
(
pirho
.
v
*
piu
.
v
-
pjrho
.
v
*
pju
.
v
)
/
(
pirho
.
v
+
pjrho
.
v
));
tc
.
v
=
vec_set1
(
const_conductivity_alpha
)
*
v_sig_u
.
v
/
(
pirho
.
v
+
pjrho
.
v
);
tc
.
v
*=
(
wi_dr
.
v
+
wj_dr
.
v
);
/* Get the common factor out. */
w
.
v
=
ri
.
v
*
((
piPOrho2
.
v
*
wi_dr
.
v
+
pjPOrho2
.
v
*
wj_dr
.
v
)
+
vec_set1
(
0
.
25
f
)
*
Pi_ij
.
v
);
/* Use the force, Luke! */
for
(
k
=
0
;
k
<
3
;
k
++
)
{
f
.
v
=
dx
[
k
].
v
*
w
.
v
;
pia
[
k
].
v
=
mj
.
v
*
f
.
v
;
pja
[
k
].
v
=
mi
.
v
*
f
.
v
;
}
/* Get the time derivative for u. */
piu_dt
.
v
=
mj
.
v
*
dvdr
.
v
*
(
piPOrho2
.
v
*
wi_dr
.
v
+
vec_set1
(
0
.
125
f
)
*
Pi_ij
.
v
);
pju_dt
.
v
=
mi
.
v
*
dvdr
.
v
*
(
pjPOrho2
.
v
*
wj_dr
.
v
+
vec_set1
(
0
.
125
f
)
*
Pi_ij
.
v
);
/* Add the thermal conductivity */
piu_dt
.
v
+=
mj
.
v
*
tc
.
v
*
(
piu
.
v
-
pju
.
v
);
pju_dt
.
v
+=
mi
.
v
*
tc
.
v
*
(
pju
.
v
-
piu
.
v
);
/* Store the forces back on the particles. */
for
(
k
=
0
;
k
<
VEC_SIZE
;
k
++
)
{
pi
[
k
]
->
force
.
u_dt
+=
piu_dt
.
f
[
k
];
pj
[
k
]
->
force
.
u_dt
+=
pju_dt
.
f
[
k
];
pi
[
k
]
->
force
.
h_dt
-=
pih_dt
.
f
[
k
];
pj
[
k
]
->
force
.
h_dt
-=
pjh_dt
.
f
[
k
];
pi
[
k
]
->
force
.
v_sig
=
max
(
pi
[
k
]
->
force
.
v_sig
,
v_sig
.
f
[
k
]);
pj
[
k
]
->
force
.
v_sig
=
max
(
pj
[
k
]
->
force
.
v_sig
,
v_sig
.
f
[
k
]);
for
(
j
=
0
;
j
<
3
;
j
++
)
{
pi
[
k
]
->
a_hydro
[
j
]
-=
pia
[
j
].
f
[
k
];
pj
[
k
]
->
a_hydro
[
j
]
+=
pja
[
j
].
f
[
k
];
}
}
#else
for
(
int
k
=
0
;
k
<
VEC_SIZE
;
k
++
)
runner_iact_force
(
R2
[
k
],
&
Dx
[
3
*
k
],
Hi
[
k
],
Hj
[
k
],
pi
[
k
],
pj
[
k
]);
#endif
}
/**
* @brief Force loop (non-symmetric version)
*/
...
...
@@ -740,196 +328,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
pi
->
force
.
v_sig
=
max
(
pi
->
force
.
v_sig
,
v_sig
);
}
/**
* @brief Force loop (Vectorized non-symmetric version)
*/
__attribute__
((
always_inline
))
INLINE
static
void
runner_iact_nonsym_vec_force
(
float
*
R2
,
float
*
Dx
,
float
*
Hi
,
float
*
Hj
,
struct
part
**
pi
,
struct
part
**
pj
)
{
#ifdef WITH_VECTORIZATION
vector
r
,
r2
,
ri
;
vector
xi
,
xj
;
vector
hi
,
hj
,
hi_inv
,
hj_inv
;
vector
hid_inv
,
hjd_inv
;
vector
wi
,
wj
,
wi_dx
,
wj_dx
,
wi_dr
,
wj_dr
,
dvdr
;
vector
w
;
vector
piPOrho2
,
pjPOrho2
,
pirho
,
pjrho
,
piu
,
pju
;
vector
mj
;
vector
f
;
vector
dx
[
3
];
vector
vi
[
3
],
vj
[
3
];
vector
pia
[
3
];
vector
piu_dt
;
vector
pih_dt
;
vector
ci
,
cj
,
v_sig
;
vector
omega_ij
,
Pi_ij
,
balsara
;
vector
pialpha
,
pjalpha
,
alpha_ij
,
v_sig_u
,
tc
;
int
j
,
k
;
/* Load stuff. */
#if VEC_SIZE == 8
mj
.
v
=
vec_set
(
pj
[
0
]
->
mass
,
pj
[
1
]
->
mass
,
pj
[
2
]
->
mass
,
pj
[
3
]
->
mass
,
pj
[
4
]
->
mass
,
pj
[
5
]
->
mass
,
pj
[
6
]
->
mass
,
pj
[
7
]
->
mass
);
piPOrho2
.
v
=
vec_set
(
pi
[
0
]
->
force
.
P_over_rho2
,
pi
[
1
]
->
force
.
P_over_rho2
,
pi
[
2
]
->
force
.
P_over_rho2
,
pi
[
3
]
->
force
.
P_over_rho2
,
pi
[
4
]
->
force
.
P_over_rho2
,
pi
[
5
]
->
force
.
P_over_rho2
,
pi
[
6
]
->
force
.
P_over_rho2
,
pi
[
7
]
->
force
.
P_over_rho2
);
pjPOrho2
.
v
=
vec_set
(
pj
[
0
]
->
force
.
P_over_rho2
,
pj
[
1
]
->
force
.
P_over_rho2
,
pj
[
2
]
->
force
.
P_over_rho2
,
pj
[
3
]
->
force
.
P_over_rho2
,
pj
[
4
]
->
force
.
P_over_rho2
,
pj
[
5
]
->
force
.
P_over_rho2
,
pj
[
6
]
->
force
.
P_over_rho2
,
pj
[
7
]
->
force
.
P_over_rho2
);
pirho
.
v
=
vec_set
(
pi
[
0
]
->
rho
,
pi
[
1
]
->
rho
,
pi
[
2
]
->
rho
,
pi
[
3
]
->
rho
,
pi
[
4
]
->
rho
,
pi
[
5
]
->
rho
,
pi
[
6
]
->
rho
,
pi
[
7
]
->
rho
);
pjrho
.
v
=
vec_set
(
pj
[
0
]
->
rho
,
pj
[
1
]
->
rho
,
pj
[
2
]
->
rho
,
pj
[
3
]
->
rho
,
pj
[
4
]
->
rho
,
pj
[
5
]
->
rho
,
pj
[
6
]
->
rho
,
pj
[
7
]
->
rho
);
piu
.
v
=
vec_set
(
pi
[
0
]
->
u
,
pi
[
1
]
->
u
,
pi
[
2
]
->
u
,
pi
[
3
]
->
u
,
pi
[
4
]
->
u
,
pi
[
5
]
->
u
,
pi
[
6
]
->
u
,
pi
[
7
]
->
u
);
pju
.
v
=
vec_set
(
pj
[
0
]
->
u
,
pj
[
1
]
->
u
,
pj
[
2
]
->
u
,
pj
[
3
]
->
u
,
pj
[
4
]
->
u
,
pj
[
5
]
->
u
,
pj
[
6
]
->
u
,
pj
[
7
]
->
u
);
ci
.
v
=
vec_set
(
pi
[
0
]
->
force
.
soundspeed
,
pi
[
1
]
->
force
.
soundspeed
,
pi
[
2
]
->
force
.
soundspeed
,
pi
[
3
]
->
force
.
soundspeed
,
pi
[
4
]
->
force
.
soundspeed
,
pi
[
5
]
->
force
.
soundspeed
,
pi
[
6
]
->
force
.
soundspeed
,
pi
[
7
]
->
force
.
soundspeed
);
cj
.
v
=
vec_set
(
pj
[
0
]
->
force
.
soundspeed
,
pj
[
1
]
->
force
.
soundspeed
,
pj
[
2
]
->
force
.
soundspeed
,
pj
[
3
]
->
force
.
soundspeed
,
pj
[
4
]
->
force
.
soundspeed
,
pj
[
5
]
->
force
.
soundspeed
,
pj
[
6
]
->
force
.
soundspeed
,
pj
[
7
]
->
force
.
soundspeed
);
for
(
k
=
0
;
k
<
3
;
k
++
)
{
vi
[
k
].
v
=
vec_set
(
pi
[
0
]
->
v
[
k
],
pi
[
1
]
->
v
[
k
],
pi
[
2
]
->
v
[
k
],
pi
[
3
]
->
v
[
k
],
pi
[
4
]
->
v
[
k
],
pi
[
5
]
->
v
[
k
],
pi
[
6
]
->
v
[
k
],
pi
[
7
]
->
v
[
k
]);
vj
[
k
].
v
=
vec_set
(
pj
[
0
]
->
v
[
k
],
pj
[
1
]
->
v
[
k
],
pj
[
2
]
->
v
[
k
],
pj
[
3
]
->
v
[
k
],
pj
[
4
]
->
v
[
k
],
pj
[
5
]
->
v
[
k
],
pj
[
6
]
->
v
[
k
],
pj
[
7
]
->
v
[
k
]);
}
for
(
k
=
0
;
k
<
3
;
k
++
)
dx
[
k
].
v
=
vec_set
(
Dx
[
0
+
k
],
Dx
[
3
+
k
],
Dx
[
6
+
k
],
Dx
[
9
+
k
],
Dx
[
12
+
k
],
Dx
[
15
+
k
],
Dx
[
18
+
k
],
Dx
[
21
+
k
]);
balsara
.
v
=
vec_set
(
pi
[
0
]
->
force
.
balsara
,
pi
[
1
]
->
force
.
balsara
,
pi
[
2
]
->
force
.
balsara
,
pi
[
3
]
->
force
.
balsara
,
pi
[
4
]
->
force
.
balsara
,
pi
[
5
]
->
force
.
balsara
,
pi
[
6
]
->
force
.
balsara
,
pi
[
7
]
->
force
.
balsara
)
+
vec_set
(
pj
[
0
]
->
force
.
balsara
,
pj
[
1
]
->
force
.
balsara
,
pj
[
2
]
->
force
.
balsara
,
pj
[
3
]
->
force
.
balsara
,
pj
[
4
]
->
force
.
balsara
,
pj
[
5
]
->
force
.
balsara
,
pj
[
6
]
->
force
.
balsara
,
pj
[
7
]
->
force
.
balsara
);
pialpha
.
v
=
vec_set
(
pi
[
0
]
->
alpha
,
pi
[
1
]
->
alpha
,
pi
[
2
]
->
alpha
,
pi
[
3
]
->
alpha
,
pi
[
4
]
->
alpha
,
pi
[
5
]
->
alpha
,
pi
[
6
]
->
alpha
,
pi
[
7
]
->
alpha
);
pjalpha
.
v
=
vec_set
(
pj
[
0
]
->
alpha
,
pj
[
1
]
->
alpha
,
pj
[
2
]
->
alpha
,
pj
[
3
]
->
alpha
,
pj
[
4
]
->
alpha
,
pj
[
5
]
->
alpha
,
pj
[
6
]
->
alpha
,
pj
[
7
]
->
alpha
);
#elif VEC_SIZE == 4
mj
.
v
=
vec_set
(
pj
[
0
]
->
mass
,
pj
[
1
]
->
mass
,
pj
[
2
]
->
mass
,
pj
[
3
]
->
mass
);
piPOrho2
.
v
=
vec_set
(
pi
[
0
]
->
force
.
P_over_rho2
,
pi
[
1
]
->
force
.
P_over_rho2
,
pi
[
2
]
->
force
.
P_over_rho2
,
pi
[
3
]
->
force
.
P_over_rho2
);
pjPOrho2
.
v
=
vec_set
(
pj
[
0
]
->
force
.
P_over_rho2
,
pj
[
1
]
->
force
.
P_over_rho2
,
pj
[
2
]
->
force
.
P_over_rho2
,
pj
[
3
]
->
force
.
P_over_rho2
);
pirho
.
v
=
vec_set
(
pi
[
0
]
->
rho
,
pi
[
1
]
->
rho
,
pi
[
2
]
->
rho
,
pi
[
3
]
->
rho
);
pjrho
.
v
=
vec_set
(
pj
[
0
]
->
rho
,
pj
[
1
]
->
rho
,
pj
[
2
]
->
rho
,
pj
[
3
]
->
rho
);
piu
.
v
=
vec_set
(
pi
[
0
]
->
u
,
pi
[
1
]
->
u
,
pi
[
2
]
->
u
,
pi
[
3
]
->
u
);
pju
.
v
=
vec_set
(
pj
[
0
]
->
u
,
pj
[
1
]
->
u
,
pj
[
2
]
->
u
,
pj
[
3
]
->
u
);
ci
.
v
=
vec_set
(
pi
[
0
]
->
force
.
soundspeed
,
pi
[
1
]
->
force
.
soundspeed
,
pi
[
2
]
->
force
.
soundspeed
,
pi
[
3
]
->
force
.
soundspeed
);
cj
.
v
=
vec_set
(
pj
[
0
]
->
force
.
soundspeed
,
pj
[
1
]
->
force
.
soundspeed
,
pj
[
2
]
->
force
.
soundspeed
,
pj
[
3
]
->
force
.
soundspeed
);
for
(
k
=
0
;
k
<
3
;
k
++
)
{
vi
[
k
].
v
=
vec_set
(
pi
[
0
]
->
v
[
k
],
pi
[
1
]
->
v
[
k
],
pi
[
2
]
->
v
[
k
],
pi
[
3
]
->
v
[
k
]);
vj
[
k
].
v
=
vec_set
(
pj
[
0
]
->
v
[
k
],
pj
[
1
]
->
v