Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
4e0393b2
Commit
4e0393b2
authored
Dec 05, 2016
by
James Willis
Browse files
Formatting.
parent
85b1b2cc
Changes
12
Expand all
Hide whitespace changes
Inline
Side-by-side
src/cache.h
View file @
4e0393b2
...
...
@@ -23,22 +23,22 @@
#include
"../config.h"
/* Local headers */
#include
"vector.h"
#include
"part.h"
#include
"cell.h"
#include
"error.h"
#include
"part.h"
#include
"vector.h"
#define NUM_VEC_PROC 2
#define C2_CACHE_SIZE (NUM_VEC_PROC * VEC_SIZE * 6) + (NUM_VEC_PROC * VEC_SIZE)
#define C2_CACHE_ALIGN sizeof(float) * VEC_SIZE
/* Cache struct to hold a local copy of a cells' particle
/* Cache struct to hold a local copy of a cells' particle
* properties required for density/force calculations.*/
struct
cache
{
struct
cache
{
/* Particle x position. */
float
*
restrict
x
__attribute__
((
aligned
(
sizeof
(
float
)
*
VEC_SIZE
)));
float
*
restrict
x
__attribute__
((
aligned
(
sizeof
(
float
)
*
VEC_SIZE
)));
/* Particle y position. */
float
*
restrict
y
__attribute__
((
aligned
(
sizeof
(
float
)
*
VEC_SIZE
)));
...
...
@@ -62,10 +62,10 @@ struct cache {
/* Cache size. */
int
count
;
};
/* Secondary cache struct to hold a list of interactions between two particles.*/
/* Secondary cache struct to hold a list of interactions between two
* particles.*/
struct
c2_cache
{
/* Separation between two particles squared. */
...
...
@@ -81,11 +81,11 @@ struct c2_cache {
float
dzq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2_CACHE_ALIGN
)));
/* Mass of particle pj. */
float
mq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2_CACHE_ALIGN
)));
float
mq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2_CACHE_ALIGN
)));
/* x velocity of particle pj. */
float
vxq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2_CACHE_ALIGN
)));
/* y velocity of particle pj. */
float
vyq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2_CACHE_ALIGN
)));
...
...
@@ -99,9 +99,11 @@ struct c2_cache {
* @param c The cache.
* @param count Number of particles to allocate space for.
*/
__attribute__
((
always_inline
))
INLINE
void
cache_init
(
struct
cache
*
c
,
size_t
count
)
{
__attribute__
((
always_inline
))
INLINE
void
cache_init
(
struct
cache
*
c
,
size_t
count
)
{
/* Align cache on correct byte boundary and pad cache size to include 2 vector lengths for remainder operations. */
/* Align cache on correct byte boundary and pad cache size to include 2 vector
* lengths for remainder operations. */
unsigned
long
alignment
=
sizeof
(
float
)
*
VEC_SIZE
;
unsigned
int
sizeBytes
=
(
count
+
(
2
*
VEC_SIZE
))
*
sizeof
(
float
);
int
error
=
0
;
...
...
@@ -118,16 +120,17 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c, size_t co
free
(
c
->
h
);
}
error
+=
posix_memalign
((
void
**
)
&
c
->
x
,
alignment
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
y
,
alignment
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
z
,
alignment
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
m
,
alignment
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vx
,
alignment
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vy
,
alignment
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vz
,
alignment
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
h
,
alignment
,
sizeBytes
);
if
(
error
!=
0
)
error
(
"Couldn't allocate cache, no. of particles: %d"
,
(
int
)
count
);
error
+=
posix_memalign
((
void
**
)
&
c
->
x
,
alignment
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
y
,
alignment
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
z
,
alignment
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
m
,
alignment
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vx
,
alignment
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vy
,
alignment
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vz
,
alignment
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
h
,
alignment
,
sizeBytes
);
if
(
error
!=
0
)
error
(
"Couldn't allocate cache, no. of particles: %d"
,
(
int
)
count
);
c
->
count
=
count
;
}
...
...
@@ -137,22 +140,22 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c, size_t co
* @param ci The #cell.
* @param ci_cache The cache.
*/
__attribute__
((
always_inline
))
INLINE
void
cache_read_particles
(
const
struct
cell
*
const
ci
,
struct
cache
*
const
ci_cache
)
{
__attribute__
((
always_inline
))
INLINE
void
cache_read_particles
(
const
struct
cell
*
const
ci
,
struct
cache
*
const
ci_cache
)
{
/* Shift the particles positions to a local frame so single precision can be used instead of double precision. */
for
(
int
i
=
0
;
i
<
ci
->
count
;
i
++
)
{
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
for
(
int
i
=
0
;
i
<
ci
->
count
;
i
++
)
{
ci_cache
->
x
[
i
]
=
ci
->
parts
[
i
].
x
[
0
]
-
ci
->
loc
[
0
];
ci_cache
->
y
[
i
]
=
ci
->
parts
[
i
].
x
[
1
]
-
ci
->
loc
[
1
];
ci_cache
->
z
[
i
]
=
ci
->
parts
[
i
].
x
[
2
]
-
ci
->
loc
[
2
];
ci_cache
->
h
[
i
]
=
ci
->
parts
[
i
].
h
;
ci_cache
->
m
[
i
]
=
ci
->
parts
[
i
].
mass
;
ci_cache
->
vx
[
i
]
=
ci
->
parts
[
i
].
v
[
0
];
ci_cache
->
vy
[
i
]
=
ci
->
parts
[
i
].
v
[
1
];
ci_cache
->
vz
[
i
]
=
ci
->
parts
[
i
].
v
[
2
];
}
}
}
#endif
/* SWIFT_CACHE_H */
src/hydro/Gadget2/hydro_iact.h
View file @
4e0393b2
...
...
@@ -384,11 +384,16 @@ runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj,
#ifdef WITH_VECTORIZATION
/**
* @brief Density interaction computed using 2 interleaved vectors (non-symmetric vectorized version).
* @brief Density interaction computed using 2 interleaved vectors
* (non-symmetric vectorized version).
*/
__attribute__
((
always_inline
))
INLINE
static
void
runner_iact_nonsym_2_vec_density
(
float
*
R2
,
float
*
Dx
,
float
*
Dy
,
float
*
Dz
,
vector
hi_inv
,
vector
vix
,
vector
viy
,
vector
viz
,
float
*
Vjx
,
float
*
Vjy
,
float
*
Vjz
,
float
*
Mj
,
vector
*
rhoSum
,
vector
*
rho_dhSum
,
vector
*
wcountSum
,
vector
*
wcount_dhSum
,
vector
*
div_vSum
,
vector
*
curlvxSum
,
vector
*
curlvySum
,
vector
*
curlvzSum
,
vector
mask
,
vector
mask2
,
int
knlMask
,
int
knlMask2
)
{
runner_iact_nonsym_2_vec_density
(
float
*
R2
,
float
*
Dx
,
float
*
Dy
,
float
*
Dz
,
vector
hi_inv
,
vector
vix
,
vector
viy
,
vector
viz
,
float
*
Vjx
,
float
*
Vjy
,
float
*
Vjz
,
float
*
Mj
,
vector
*
rhoSum
,
vector
*
rho_dhSum
,
vector
*
wcountSum
,
vector
*
wcount_dhSum
,
vector
*
div_vSum
,
vector
*
curlvxSum
,
vector
*
curlvySum
,
vector
*
curlvzSum
,
vector
mask
,
vector
mask2
,
int
knlMask
,
int
knlMask2
)
{
vector
r
,
ri
,
r2
,
xi
,
wi
,
wi_dx
;
vector
mj
;
...
...
@@ -431,7 +436,7 @@ runner_iact_nonsym_2_vec_density(float *R2, float *Dx, float *Dy, float *Dz, vec
xi2
.
v
=
vec_mul
(
r_2
.
v
,
hi_inv
.
v
);
/* Calculate the kernel for two particles. */
kernel_deval_2_vec
(
&
xi
,
&
wi
,
&
wi_dx
,
&
xi2
,
&
wi2
,
&
wi_dx2
);
kernel_deval_2_vec
(
&
xi
,
&
wi
,
&
wi_dx
,
&
xi2
,
&
wi2
,
&
wi_dx2
);
/* Compute dv. */
dvx
.
v
=
vec_sub
(
vix
.
v
,
vjx
.
v
);
...
...
@@ -443,66 +448,106 @@ runner_iact_nonsym_2_vec_density(float *R2, float *Dx, float *Dy, float *Dz, vec
/* Compute dv dot r */
dvdr
.
v
=
vec_fma
(
dvx
.
v
,
dx
.
v
,
vec_fma
(
dvy
.
v
,
dy
.
v
,
vec_mul
(
dvz
.
v
,
dz
.
v
)));
dvdr2
.
v
=
vec_fma
(
dvx2
.
v
,
dx2
.
v
,
vec_fma
(
dvy2
.
v
,
dy2
.
v
,
vec_mul
(
dvz2
.
v
,
dz2
.
v
)));
dvdr2
.
v
=
vec_fma
(
dvx2
.
v
,
dx2
.
v
,
vec_fma
(
dvy2
.
v
,
dy2
.
v
,
vec_mul
(
dvz2
.
v
,
dz2
.
v
)));
dvdr
.
v
=
vec_mul
(
dvdr
.
v
,
ri
.
v
);
dvdr2
.
v
=
vec_mul
(
dvdr2
.
v
,
ri2
.
v
);
/* Compute dv cross r */
curlvrx
.
v
=
vec_fma
(
dvy
.
v
,
dz
.
v
,
vec_mul
(
vec_set1
(
-
1
.
0
f
),
vec_mul
(
dvz
.
v
,
dy
.
v
)));
curlvrx2
.
v
=
vec_fma
(
dvy2
.
v
,
dz2
.
v
,
vec_mul
(
vec_set1
(
-
1
.
0
f
),
vec_mul
(
dvz2
.
v
,
dy2
.
v
)));
curlvry
.
v
=
vec_fma
(
dvz
.
v
,
dx
.
v
,
vec_mul
(
vec_set1
(
-
1
.
0
f
),
vec_mul
(
dvx
.
v
,
dz
.
v
)));
curlvry2
.
v
=
vec_fma
(
dvz2
.
v
,
dx2
.
v
,
vec_mul
(
vec_set1
(
-
1
.
0
f
),
vec_mul
(
dvx2
.
v
,
dz2
.
v
)));
curlvrz
.
v
=
vec_fma
(
dvx
.
v
,
dy
.
v
,
vec_mul
(
vec_set1
(
-
1
.
0
f
),
vec_mul
(
dvy
.
v
,
dx
.
v
)));
curlvrz2
.
v
=
vec_fma
(
dvx2
.
v
,
dy2
.
v
,
vec_mul
(
vec_set1
(
-
1
.
0
f
),
vec_mul
(
dvy2
.
v
,
dx2
.
v
)));
curlvrx
.
v
=
vec_mul
(
curlvrx
.
v
,
ri
.
v
);
curlvrx2
.
v
=
vec_mul
(
curlvrx2
.
v
,
ri2
.
v
);
curlvry
.
v
=
vec_mul
(
curlvry
.
v
,
ri
.
v
);
curlvry2
.
v
=
vec_mul
(
curlvry2
.
v
,
ri2
.
v
);
curlvrz
.
v
=
vec_mul
(
curlvrz
.
v
,
ri
.
v
);
curlvrz2
.
v
=
vec_mul
(
curlvrz2
.
v
,
ri2
.
v
);
/* Mask updates to intermediate vector sums for particle pi. */
curlvrx
.
v
=
vec_fma
(
dvy
.
v
,
dz
.
v
,
vec_mul
(
vec_set1
(
-
1
.
0
f
),
vec_mul
(
dvz
.
v
,
dy
.
v
)));
curlvrx2
.
v
=
vec_fma
(
dvy2
.
v
,
dz2
.
v
,
vec_mul
(
vec_set1
(
-
1
.
0
f
),
vec_mul
(
dvz2
.
v
,
dy2
.
v
)));
curlvry
.
v
=
vec_fma
(
dvz
.
v
,
dx
.
v
,
vec_mul
(
vec_set1
(
-
1
.
0
f
),
vec_mul
(
dvx
.
v
,
dz
.
v
)));
curlvry2
.
v
=
vec_fma
(
dvz2
.
v
,
dx2
.
v
,
vec_mul
(
vec_set1
(
-
1
.
0
f
),
vec_mul
(
dvx2
.
v
,
dz2
.
v
)));
curlvrz
.
v
=
vec_fma
(
dvx
.
v
,
dy
.
v
,
vec_mul
(
vec_set1
(
-
1
.
0
f
),
vec_mul
(
dvy
.
v
,
dx
.
v
)));
curlvrz2
.
v
=
vec_fma
(
dvx2
.
v
,
dy2
.
v
,
vec_mul
(
vec_set1
(
-
1
.
0
f
),
vec_mul
(
dvy2
.
v
,
dx2
.
v
)));
curlvrx
.
v
=
vec_mul
(
curlvrx
.
v
,
ri
.
v
);
curlvrx2
.
v
=
vec_mul
(
curlvrx2
.
v
,
ri2
.
v
);
curlvry
.
v
=
vec_mul
(
curlvry
.
v
,
ri
.
v
);
curlvry2
.
v
=
vec_mul
(
curlvry2
.
v
,
ri2
.
v
);
curlvrz
.
v
=
vec_mul
(
curlvrz
.
v
,
ri
.
v
);
curlvrz2
.
v
=
vec_mul
(
curlvrz2
.
v
,
ri2
.
v
);
/* Mask updates to intermediate vector sums for particle pi. */
#ifdef HAVE_AVX512_F
rhoSum
->
v
=
_mm512_mask_add_ps
(
rhoSum
->
v
,
knlMask
,
vec_mul
(
mj
.
v
,
wi
.
v
),
rhoSum
->
v
);
rhoSum
->
v
=
_mm512_mask_add_ps
(
rhoSum
->
v
,
knlMask2
,
vec_mul
(
mj2
.
v
,
wi2
.
v
),
rhoSum
->
v
);
rho_dhSum
->
v
=
_mm512_mask_sub_ps
(
rho_dhSum
->
v
,
knlMask
,
rho_dhSum
->
v
,
vec_mul
(
mj
.
v
,
vec_fma
(
vec_set1
(
hydro_dimension
),
wi
.
v
,
vec_mul
(
xi
.
v
,
wi_dx
.
v
))));
rho_dhSum
->
v
=
_mm512_mask_sub_ps
(
rho_dhSum
->
v
,
knlMask2
,
rho_dhSum
->
v
,
vec_mul
(
mj2
.
v
,
vec_fma
(
vec_set1
(
hydro_dimension
),
wi2
.
v
,
vec_mul
(
xi2
.
v
,
wi_dx2
.
v
))));
rhoSum
->
v
=
_mm512_mask_add_ps
(
rhoSum
->
v
,
knlMask
,
vec_mul
(
mj
.
v
,
wi
.
v
),
rhoSum
->
v
);
rhoSum
->
v
=
_mm512_mask_add_ps
(
rhoSum
->
v
,
knlMask2
,
vec_mul
(
mj2
.
v
,
wi2
.
v
),
rhoSum
->
v
);
rho_dhSum
->
v
=
_mm512_mask_sub_ps
(
rho_dhSum
->
v
,
knlMask
,
rho_dhSum
->
v
,
vec_mul
(
mj
.
v
,
vec_fma
(
vec_set1
(
hydro_dimension
),
wi
.
v
,
vec_mul
(
xi
.
v
,
wi_dx
.
v
))));
rho_dhSum
->
v
=
_mm512_mask_sub_ps
(
rho_dhSum
->
v
,
knlMask2
,
rho_dhSum
->
v
,
vec_mul
(
mj2
.
v
,
vec_fma
(
vec_set1
(
hydro_dimension
),
wi2
.
v
,
vec_mul
(
xi2
.
v
,
wi_dx2
.
v
))));
wcountSum
->
v
=
_mm512_mask_add_ps
(
wcountSum
->
v
,
knlMask
,
wi
.
v
,
wcountSum
->
v
);
wcountSum
->
v
=
_mm512_mask_add_ps
(
wcountSum
->
v
,
knlMask2
,
wi2
.
v
,
wcountSum
->
v
);
wcount_dhSum
->
v
=
_mm512_mask_sub_ps
(
wcount_dhSum
->
v
,
knlMask
,
wcount_dhSum
->
v
,
vec_mul
(
xi
.
v
,
wi_dx
.
v
));
wcount_dhSum
->
v
=
_mm512_mask_sub_ps
(
wcount_dhSum
->
v
,
knlMask2
,
wcount_dhSum
->
v
,
vec_mul
(
xi2
.
v
,
wi_dx2
.
v
));
div_vSum
->
v
=
_mm512_mask_sub_ps
(
div_vSum
->
v
,
knlMask
,
div_vSum
->
v
,
vec_mul
(
mj
.
v
,
vec_mul
(
dvdr
.
v
,
wi_dx
.
v
)));
div_vSum
->
v
=
_mm512_mask_sub_ps
(
div_vSum
->
v
,
knlMask2
,
div_vSum
->
v
,
vec_mul
(
mj2
.
v
,
vec_mul
(
dvdr2
.
v
,
wi_dx2
.
v
)));
curlvxSum
->
v
=
_mm512_mask_add_ps
(
curlvxSum
->
v
,
knlMask
,
vec_mul
(
mj
.
v
,
vec_mul
(
curlvrx
.
v
,
wi_dx
.
v
)),
curlvxSum
->
v
);
curlvxSum
->
v
=
_mm512_mask_add_ps
(
curlvxSum
->
v
,
knlMask2
,
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvrx2
.
v
,
wi_dx2
.
v
)),
curlvxSum
->
v
);
curlvySum
->
v
=
_mm512_mask_add_ps
(
curlvySum
->
v
,
knlMask
,
vec_mul
(
mj
.
v
,
vec_mul
(
curlvry
.
v
,
wi_dx
.
v
)),
curlvySum
->
v
);
curlvySum
->
v
=
_mm512_mask_add_ps
(
curlvySum
->
v
,
knlMask2
,
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvry2
.
v
,
wi_dx2
.
v
)),
curlvySum
->
v
);
curlvzSum
->
v
=
_mm512_mask_add_ps
(
curlvzSum
->
v
,
knlMask
,
vec_mul
(
mj
.
v
,
vec_mul
(
curlvrz
.
v
,
wi_dx
.
v
)),
curlvzSum
->
v
);
curlvzSum
->
v
=
_mm512_mask_add_ps
(
curlvzSum
->
v
,
knlMask2
,
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvrz2
.
v
,
wi_dx2
.
v
)),
curlvzSum
->
v
);
wcountSum
->
v
=
_mm512_mask_add_ps
(
wcountSum
->
v
,
knlMask2
,
wi2
.
v
,
wcountSum
->
v
);
wcount_dhSum
->
v
=
_mm512_mask_sub_ps
(
wcount_dhSum
->
v
,
knlMask
,
wcount_dhSum
->
v
,
vec_mul
(
xi
.
v
,
wi_dx
.
v
));
wcount_dhSum
->
v
=
_mm512_mask_sub_ps
(
wcount_dhSum
->
v
,
knlMask2
,
wcount_dhSum
->
v
,
vec_mul
(
xi2
.
v
,
wi_dx2
.
v
));
div_vSum
->
v
=
_mm512_mask_sub_ps
(
div_vSum
->
v
,
knlMask
,
div_vSum
->
v
,
vec_mul
(
mj
.
v
,
vec_mul
(
dvdr
.
v
,
wi_dx
.
v
)));
div_vSum
->
v
=
_mm512_mask_sub_ps
(
div_vSum
->
v
,
knlMask2
,
div_vSum
->
v
,
vec_mul
(
mj2
.
v
,
vec_mul
(
dvdr2
.
v
,
wi_dx2
.
v
)));
curlvxSum
->
v
=
_mm512_mask_add_ps
(
curlvxSum
->
v
,
knlMask
,
vec_mul
(
mj
.
v
,
vec_mul
(
curlvrx
.
v
,
wi_dx
.
v
)),
curlvxSum
->
v
);
curlvxSum
->
v
=
_mm512_mask_add_ps
(
curlvxSum
->
v
,
knlMask2
,
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvrx2
.
v
,
wi_dx2
.
v
)),
curlvxSum
->
v
);
curlvySum
->
v
=
_mm512_mask_add_ps
(
curlvySum
->
v
,
knlMask
,
vec_mul
(
mj
.
v
,
vec_mul
(
curlvry
.
v
,
wi_dx
.
v
)),
curlvySum
->
v
);
curlvySum
->
v
=
_mm512_mask_add_ps
(
curlvySum
->
v
,
knlMask2
,
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvry2
.
v
,
wi_dx2
.
v
)),
curlvySum
->
v
);
curlvzSum
->
v
=
_mm512_mask_add_ps
(
curlvzSum
->
v
,
knlMask
,
vec_mul
(
mj
.
v
,
vec_mul
(
curlvrz
.
v
,
wi_dx
.
v
)),
curlvzSum
->
v
);
curlvzSum
->
v
=
_mm512_mask_add_ps
(
curlvzSum
->
v
,
knlMask2
,
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvrz2
.
v
,
wi_dx2
.
v
)),
curlvzSum
->
v
);
#else
rhoSum
->
v
+=
vec_and
(
vec_mul
(
mj
.
v
,
wi
.
v
),
mask
.
v
);
rhoSum
->
v
+=
vec_and
(
vec_mul
(
mj2
.
v
,
wi2
.
v
),
mask2
.
v
);
rho_dhSum
->
v
-=
vec_and
(
vec_mul
(
mj
.
v
,
vec_fma
(
vec_set1
(
hydro_dimension
),
wi
.
v
,
vec_mul
(
xi
.
v
,
wi_dx
.
v
))),
mask
.
v
);
rho_dhSum
->
v
-=
vec_and
(
vec_mul
(
mj2
.
v
,
vec_fma
(
vec_set1
(
hydro_dimension
),
wi2
.
v
,
vec_mul
(
xi2
.
v
,
wi_dx2
.
v
))),
mask2
.
v
);
wcountSum
->
v
+=
vec_and
(
wi
.
v
,
mask
.
v
);
wcountSum
->
v
+=
vec_and
(
wi2
.
v
,
mask2
.
v
);
wcount_dhSum
->
v
-=
vec_and
(
vec_mul
(
xi
.
v
,
wi_dx
.
v
),
mask
.
v
);
wcount_dhSum
->
v
-=
vec_and
(
vec_mul
(
xi2
.
v
,
wi_dx2
.
v
),
mask2
.
v
);
div_vSum
->
v
-=
vec_and
(
vec_mul
(
mj
.
v
,
vec_mul
(
dvdr
.
v
,
wi_dx
.
v
)),
mask
.
v
);
div_vSum
->
v
-=
vec_and
(
vec_mul
(
mj2
.
v
,
vec_mul
(
dvdr2
.
v
,
wi_dx2
.
v
)),
mask2
.
v
);
curlvxSum
->
v
+=
vec_and
(
vec_mul
(
mj
.
v
,
vec_mul
(
curlvrx
.
v
,
wi_dx
.
v
)),
mask
.
v
);
curlvxSum
->
v
+=
vec_and
(
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvrx2
.
v
,
wi_dx2
.
v
)),
mask2
.
v
);
curlvySum
->
v
+=
vec_and
(
vec_mul
(
mj
.
v
,
vec_mul
(
curlvry
.
v
,
wi_dx
.
v
)),
mask
.
v
);
curlvySum
->
v
+=
vec_and
(
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvry2
.
v
,
wi_dx2
.
v
)),
mask2
.
v
);
curlvzSum
->
v
+=
vec_and
(
vec_mul
(
mj
.
v
,
vec_mul
(
curlvrz
.
v
,
wi_dx
.
v
)),
mask
.
v
);
curlvzSum
->
v
+=
vec_and
(
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvrz2
.
v
,
wi_dx2
.
v
)),
mask2
.
v
);
rhoSum
->
v
+=
vec_and
(
vec_mul
(
mj
.
v
,
wi
.
v
),
mask
.
v
);
rhoSum
->
v
+=
vec_and
(
vec_mul
(
mj2
.
v
,
wi2
.
v
),
mask2
.
v
);
rho_dhSum
->
v
-=
vec_and
(
vec_mul
(
mj
.
v
,
vec_fma
(
vec_set1
(
hydro_dimension
),
wi
.
v
,
vec_mul
(
xi
.
v
,
wi_dx
.
v
))),
mask
.
v
);
rho_dhSum
->
v
-=
vec_and
(
vec_mul
(
mj2
.
v
,
vec_fma
(
vec_set1
(
hydro_dimension
),
wi2
.
v
,
vec_mul
(
xi2
.
v
,
wi_dx2
.
v
))),
mask2
.
v
);
wcountSum
->
v
+=
vec_and
(
wi
.
v
,
mask
.
v
);
wcountSum
->
v
+=
vec_and
(
wi2
.
v
,
mask2
.
v
);
wcount_dhSum
->
v
-=
vec_and
(
vec_mul
(
xi
.
v
,
wi_dx
.
v
),
mask
.
v
);
wcount_dhSum
->
v
-=
vec_and
(
vec_mul
(
xi2
.
v
,
wi_dx2
.
v
),
mask2
.
v
);
div_vSum
->
v
-=
vec_and
(
vec_mul
(
mj
.
v
,
vec_mul
(
dvdr
.
v
,
wi_dx
.
v
)),
mask
.
v
);
div_vSum
->
v
-=
vec_and
(
vec_mul
(
mj2
.
v
,
vec_mul
(
dvdr2
.
v
,
wi_dx2
.
v
)),
mask2
.
v
);
curlvxSum
->
v
+=
vec_and
(
vec_mul
(
mj
.
v
,
vec_mul
(
curlvrx
.
v
,
wi_dx
.
v
)),
mask
.
v
);
curlvxSum
->
v
+=
vec_and
(
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvrx2
.
v
,
wi_dx2
.
v
)),
mask2
.
v
);
curlvySum
->
v
+=
vec_and
(
vec_mul
(
mj
.
v
,
vec_mul
(
curlvry
.
v
,
wi_dx
.
v
)),
mask
.
v
);
curlvySum
->
v
+=
vec_and
(
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvry2
.
v
,
wi_dx2
.
v
)),
mask2
.
v
);
curlvzSum
->
v
+=
vec_and
(
vec_mul
(
mj
.
v
,
vec_mul
(
curlvrz
.
v
,
wi_dx
.
v
)),
mask
.
v
);
curlvzSum
->
v
+=
vec_and
(
vec_mul
(
mj2
.
v
,
vec_mul
(
curlvrz2
.
v
,
wi_dx2
.
v
)),
mask2
.
v
);
#endif
}
#endif
...
...
@@ -639,17 +684,19 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
mj
.
v
=
vec_set
(
pj
[
0
]
->
mass
,
pj
[
1
]
->
mass
,
pj
[
2
]
->
mass
,
pj
[
3
]
->
mass
,
pj
[
4
]
->
mass
,
pj
[
5
]
->
mass
,
pj
[
6
]
->
mass
,
pj
[
7
]
->
mass
);
piPOrho2
.
v
=
vec_set
(
pi
[
0
]
->
force
.
P_over_rho2
,
pi
[
1
]
->
force
.
P_over_rho2
,
pi
[
2
]
->
force
.
P_over_rho2
,
pi
[
3
]
->
force
.
P_over_rho2
,
pi
[
4
]
->
force
.
P_over_rho2
,
pi
[
5
]
->
force
.
P_over_rho2
,
pi
[
6
]
->
force
.
P_over_rho2
,
pi
[
7
]
->
force
.
P_over_rho2
);
pi
[
2
]
->
force
.
P_over_rho2
,
pi
[
3
]
->
force
.
P_over_rho2
,
pi
[
4
]
->
force
.
P_over_rho2
,
pi
[
5
]
->
force
.
P_over_rho2
,
pi
[
6
]
->
force
.
P_over_rho2
,
pi
[
7
]
->
force
.
P_over_rho2
);
pjPOrho2
.
v
=
vec_set
(
pj
[
0
]
->
force
.
P_over_rho2
,
pj
[
1
]
->
force
.
P_over_rho2
,
pj
[
2
]
->
force
.
P_over_rho2
,
pj
[
3
]
->
force
.
P_over_rho2
,
pj
[
4
]
->
force
.
P_over_rho2
,
pj
[
5
]
->
force
.
P_over_rho2
,
pj
[
6
]
->
force
.
P_over_rho2
,
pj
[
7
]
->
force
.
P_over_rho2
);
grad_hi
.
v
=
vec_set
(
pi
[
0
]
->
force
.
f
,
pi
[
1
]
->
force
.
f
,
pi
[
2
]
->
force
.
f
,
pi
[
3
]
->
force
.
f
,
pi
[
4
]
->
force
.
f
,
pi
[
5
]
->
force
.
f
,
pi
[
6
]
->
force
.
f
,
pi
[
7
]
->
force
.
f
);
grad_hj
.
v
=
vec_set
(
pj
[
0
]
->
force
.
f
,
pj
[
1
]
->
force
.
f
,
pj
[
2
]
->
force
.
f
,
pj
[
3
]
->
force
.
f
,
pj
[
4
]
->
force
.
f
,
pj
[
5
]
->
force
.
f
,
pj
[
6
]
->
force
.
f
,
pj
[
7
]
->
force
.
f
);
pj
[
2
]
->
force
.
P_over_rho2
,
pj
[
3
]
->
force
.
P_over_rho2
,
pj
[
4
]
->
force
.
P_over_rho2
,
pj
[
5
]
->
force
.
P_over_rho2
,
pj
[
6
]
->
force
.
P_over_rho2
,
pj
[
7
]
->
force
.
P_over_rho2
);
grad_hi
.
v
=
vec_set
(
pi
[
0
]
->
force
.
f
,
pi
[
1
]
->
force
.
f
,
pi
[
2
]
->
force
.
f
,
pi
[
3
]
->
force
.
f
,
pi
[
4
]
->
force
.
f
,
pi
[
5
]
->
force
.
f
,
pi
[
6
]
->
force
.
f
,
pi
[
7
]
->
force
.
f
);
grad_hj
.
v
=
vec_set
(
pj
[
0
]
->
force
.
f
,
pj
[
1
]
->
force
.
f
,
pj
[
2
]
->
force
.
f
,
pj
[
3
]
->
force
.
f
,
pj
[
4
]
->
force
.
f
,
pj
[
5
]
->
force
.
f
,
pj
[
6
]
->
force
.
f
,
pj
[
7
]
->
force
.
f
);
pirho
.
v
=
vec_set
(
pi
[
0
]
->
rho
,
pi
[
1
]
->
rho
,
pi
[
2
]
->
rho
,
pi
[
3
]
->
rho
,
pi
[
4
]
->
rho
,
pi
[
5
]
->
rho
,
pi
[
6
]
->
rho
,
pi
[
7
]
->
rho
);
pjrho
.
v
=
vec_set
(
pj
[
0
]
->
rho
,
pj
[
1
]
->
rho
,
pj
[
2
]
->
rho
,
pj
[
3
]
->
rho
,
pj
[
4
]
->
rho
,
...
...
@@ -682,11 +729,13 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
mi
.
v
=
vec_set
(
pi
[
0
]
->
mass
,
pi
[
1
]
->
mass
,
pi
[
2
]
->
mass
,
pi
[
3
]
->
mass
);
mj
.
v
=
vec_set
(
pj
[
0
]
->
mass
,
pj
[
1
]
->
mass
,
pj
[
2
]
->
mass
,
pj
[
3
]
->
mass
);
piPOrho2
.
v
=
vec_set
(
pi
[
0
]
->
force
.
P_over_rho2
,
pi
[
1
]
->
force
.
P_over_rho2
,
pi
[
2
]
->
force
.
P_over_rho2
,
pi
[
3
]
->
force
.
P_over_rho2
);
pi
[
2
]
->
force
.
P_over_rho2
,
pi
[
3
]
->
force
.
P_over_rho2
);
pjPOrho2
.
v
=
vec_set
(
pj
[
0
]
->
force
.
P_over_rho2
,
pj
[
1
]
->
force
.
P_over_rho2
,
pj
[
2
]
->
force
.
P_over_rho2
,
pj
[
3
]
->
force
.
P_over_rho2
);
grad_hi
.
v
=
vec_set
(
pi
[
0
]
->
force
.
f
,
pi
[
1
]
->
force
.
f
,
pi
[
2
]
->
force
.
f
,
pi
[
3
]
->
force
.
f
);
grad_hj
.
v
=
vec_set
(
pj
[
0
]
->
force
.
f
,
pj
[
1
]
->
force
.
f
,
pj
[
2
]
->
force
.
f
,
pj
[
3
]
->
force
.
f
);
pj
[
2
]
->
force
.
P_over_rho2
,
pj
[
3
]
->
force
.
P_over_rho2
);
grad_hi
.
v
=
vec_set
(
pi
[
0
]
->
force
.
f
,
pi
[
1
]
->
force
.
f
,
pi
[
2
]
->
force
.
f
,
pi
[
3
]
->
force
.
f
);
grad_hj
.
v
=
vec_set
(
pj
[
0
]
->
force
.
f
,
pj
[
1
]
->
force
.
f
,
pj
[
2
]
->
force
.
f
,
pj
[
3
]
->
force
.
f
);
pirho
.
v
=
vec_set
(
pi
[
0
]
->
rho
,
pi
[
1
]
->
rho
,
pi
[
2
]
->
rho
,
pi
[
3
]
->
rho
);
pjrho
.
v
=
vec_set
(
pj
[
0
]
->
rho
,
pj
[
1
]
->
rho
,
pj
[
2
]
->
rho
,
pj
[
3
]
->
rho
);
ci
.
v
=
vec_set
(
pi
[
0
]
->
force
.
soundspeed
,
pi
[
1
]
->
force
.
soundspeed
,
...
...
@@ -748,7 +797,9 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force(
/* Now, convolve with the kernel */
visc_term
.
v
=
vec_set1
(
0
.
5
f
)
*
visc
.
v
*
(
wi_dr
.
v
+
wj_dr
.
v
)
*
ri
.
v
;
sph_term
.
v
=
(
grad_hi
.
v
*
piPOrho2
.
v
*
wi_dr
.
v
+
grad_hj
.
v
*
pjPOrho2
.
v
*
wj_dr
.
v
)
*
ri
.
v
;
sph_term
.
v
=
(
grad_hi
.
v
*
piPOrho2
.
v
*
wi_dr
.
v
+
grad_hj
.
v
*
pjPOrho2
.
v
*
wj_dr
.
v
)
*
ri
.
v
;
/* Eventually get the acceleration */
acc
.
v
=
visc_term
.
v
+
sph_term
.
v
;
...
...
@@ -913,17 +964,19 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
mj
.
v
=
vec_set
(
pj
[
0
]
->
mass
,
pj
[
1
]
->
mass
,
pj
[
2
]
->
mass
,
pj
[
3
]
->
mass
,
pj
[
4
]
->
mass
,
pj
[
5
]
->
mass
,
pj
[
6
]
->
mass
,
pj
[
7
]
->
mass
);
piPOrho2
.
v
=
vec_set
(
pi
[
0
]
->
force
.
P_over_rho2
,
pi
[
1
]
->
force
.
P_over_rho2
,
pi
[
2
]
->
force
.
P_over_rho2
,
pi
[
3
]
->
force
.
P_over_rho2
,
pi
[
4
]
->
force
.
P_over_rho2
,
pi
[
5
]
->
force
.
P_over_rho2
,
pi
[
6
]
->
force
.
P_over_rho2
,
pi
[
7
]
->
force
.
P_over_rho2
);
pi
[
2
]
->
force
.
P_over_rho2
,
pi
[
3
]
->
force
.
P_over_rho2
,
pi
[
4
]
->
force
.
P_over_rho2
,
pi
[
5
]
->
force
.
P_over_rho2
,
pi
[
6
]
->
force
.
P_over_rho2
,
pi
[
7
]
->
force
.
P_over_rho2
);
pjPOrho2
.
v
=
vec_set
(
pj
[
0
]
->
force
.
P_over_rho2
,
pj
[
1
]
->
force
.
P_over_rho2
,
pj
[
2
]
->
force
.
P_over_rho2
,
pj
[
3
]
->
force
.
P_over_rho2
,
pj
[
4
]
->
force
.
P_over_rho2
,
pj
[
5
]
->
force
.
P_over_rho2
,
pj
[
6
]
->
force
.
P_over_rho2
,
pj
[
7
]
->
force
.
P_over_rho2
);
grad_hi
.
v
=
vec_set
(
pi
[
0
]
->
force
.
f
,
pi
[
1
]
->
force
.
f
,
pi
[
2
]
->
force
.
f
,
pi
[
3
]
->
force
.
f
,
pi
[
4
]
->
force
.
f
,
pi
[
5
]
->
force
.
f
,
pi
[
6
]
->
force
.
f
,
pi
[
7
]
->
force
.
f
);
grad_hj
.
v
=
vec_set
(
pj
[
0
]
->
force
.
f
,
pj
[
1
]
->
force
.
f
,
pj
[
2
]
->
force
.
f
,
pj
[
3
]
->
force
.
f
,
pj
[
4
]
->
force
.
f
,
pj
[
5
]
->
force
.
f
,
pj
[
6
]
->
force
.
f
,
pj
[
7
]
->
force
.
f
);
pj
[
2
]
->
force
.
P_over_rho2
,
pj
[
3
]
->
force
.
P_over_rho2
,
pj
[
4
]
->
force
.
P_over_rho2
,
pj
[
5
]
->
force
.
P_over_rho2
,
pj
[
6
]
->
force
.
P_over_rho2
,
pj
[
7
]
->
force
.
P_over_rho2
);
grad_hi
.
v
=
vec_set
(
pi
[
0
]
->
force
.
f
,
pi
[
1
]
->
force
.
f
,
pi
[
2
]
->
force
.
f
,
pi
[
3
]
->
force
.
f
,
pi
[
4
]
->
force
.
f
,
pi
[
5
]
->
force
.
f
,
pi
[
6
]
->
force
.
f
,
pi
[
7
]
->
force
.
f
);
grad_hj
.
v
=
vec_set
(
pj
[
0
]
->
force
.
f
,
pj
[
1
]
->
force
.
f
,
pj
[
2
]
->
force
.
f
,
pj
[
3
]
->
force
.
f
,
pj
[
4
]
->
force
.
f
,
pj
[
5
]
->
force
.
f
,
pj
[
6
]
->
force
.
f
,
pj
[
7
]
->
force
.
f
);
pirho
.
v
=
vec_set
(
pi
[
0
]
->
rho
,
pi
[
1
]
->
rho
,
pi
[
2
]
->
rho
,
pi
[
3
]
->
rho
,
pi
[
4
]
->
rho
,
pi
[
5
]
->
rho
,
pi
[
6
]
->
rho
,
pi
[
7
]
->
rho
);
pjrho
.
v
=
vec_set
(
pj
[
0
]
->
rho
,
pj
[
1
]
->
rho
,
pj
[
2
]
->
rho
,
pj
[
3
]
->
rho
,
pj
[
4
]
->
rho
,
...
...
@@ -955,11 +1008,13 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
#elif VEC_SIZE == 4
mj
.
v
=
vec_set
(
pj
[
0
]
->
mass
,
pj
[
1
]
->
mass
,
pj
[
2
]
->
mass
,
pj
[
3
]
->
mass
);
piPOrho2
.
v
=
vec_set
(
pi
[
0
]
->
force
.
P_over_rho2
,
pi
[
1
]
->
force
.
P_over_rho2
,
pi
[
2
]
->
force
.
P_over_rho2
,
pi
[
3
]
->
force
.
P_over_rho2
);
pi
[
2
]
->
force
.
P_over_rho2
,
pi
[
3
]
->
force
.
P_over_rho2
);
pjPOrho2
.
v
=
vec_set
(
pj
[
0
]
->
force
.
P_over_rho2
,
pj
[
1
]
->
force
.
P_over_rho2
,
pj
[
2
]
->
force
.
P_over_rho2
,
pj
[
3
]
->
force
.
P_over_rho2
);
grad_hi
.
v
=
vec_set
(
pi
[
0
]
->
force
.
f
,
pi
[
1
]
->
force
.
f
,
pi
[
2
]
->
force
.
f
,
pi
[
3
]
->
force
.
f
);
grad_hj
.
v
=
vec_set
(
pj
[
0
]
->
force
.
f
,
pj
[
1
]
->
force
.
f
,
pj
[
2
]
->
force
.
f
,
pj
[
3
]
->
force
.
f
);
pj
[
2
]
->
force
.
P_over_rho2
,
pj
[
3
]
->
force
.
P_over_rho2
);
grad_hi
.
v
=
vec_set
(
pi
[
0
]
->
force
.
f
,
pi
[
1
]
->
force
.
f
,
pi
[
2
]
->
force
.
f
,
pi
[
3
]
->
force
.
f
);
grad_hj
.
v
=
vec_set
(
pj
[
0
]
->
force
.
f
,
pj
[
1
]
->
force
.
f
,
pj
[
2
]
->
force
.
f
,
pj
[
3
]
->
force
.
f
);
pirho
.
v
=
vec_set
(
pi
[
0
]
->
rho
,
pi
[
1
]
->
rho
,
pi
[
2
]
->
rho
,
pi
[
3
]
->
rho
);
pjrho
.
v
=
vec_set
(
pj
[
0
]
->
rho
,
pj
[
1
]
->
rho
,
pj
[
2
]
->
rho
,
pj
[
3
]
->
rho
);
ci
.
v
=
vec_set
(
pi
[
0
]
->
force
.
soundspeed
,
pi
[
1
]
->
force
.
soundspeed
,
...
...
@@ -1021,7 +1076,9 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force(
/* Now, convolve with the kernel */
visc_term
.
v
=
vec_set1
(
0
.
5
f
)
*
visc
.
v
*
(
wi_dr
.
v
+
wj_dr
.
v
)
*
ri
.
v
;
sph_term
.
v
=
(
grad_hi
.
v
*
piPOrho2
.
v
*
wi_dr
.
v
+
grad_hj
.
v
*
pjPOrho2
.
v
*
wj_dr
.
v
)
*
ri
.
v
;
sph_term
.
v
=
(
grad_hi
.
v
*
piPOrho2
.
v
*
wi_dr
.
v
+
grad_hj
.
v
*
pjPOrho2
.
v
*
wj_dr
.
v
)
*
ri
.
v
;
/* Eventually get the acceleration */
acc
.
v
=
visc_term
.
v
+
sph_term
.
v
;
...
...
src/kernel_hydro.h
View file @
4e0393b2
...
...
@@ -373,19 +373,24 @@ static const vector c5 = FILL_VEC(1.f);
#endif
/**
* @brief Computes the kernel function and its derivative for two particles using interleaved vectors.
* @brief Computes the kernel function and its derivative for two particles
* using interleaved vectors.
*
* Return 0 if $u > \\gamma = H/h$
*
* @param u The ratio of the distance to the smoothing length $u = x/h$.
* @param w (return) The value of the kernel function $W(x,h)$.
* @param dw_dx (return) The norm of the gradient of $|\\nabla W(x,h)|$.
* @param u2 The ratio of the distance to the smoothing length $u = x/h$ for second particle.
* @param w2 (return) The value of the kernel function $W(x,h)$ for second particle.
* @param dw_dx2 (return) The norm of the gradient of $|\\nabla W(x,h)|$ for second particle.
* @param u2 The ratio of the distance to the smoothing length $u = x/h$ for
* second particle.
* @param w2 (return) The value of the kernel function $W(x,h)$ for second
* particle.
* @param dw_dx2 (return) The norm of the gradient of $|\\nabla W(x,h)|$ for
* second particle.
*/
__attribute__
((
always_inline
))
INLINE
static
void
kernel_deval_2_vec
(
vector
*
u
,
vector
*
w
,
vector
*
dw_dx
,
vector
*
u2
,
vector
*
w2
,
vector
*
dw_dx2
)
{
vector
*
u
,
vector
*
w
,
vector
*
dw_dx
,
vector
*
u2
,
vector
*
w2
,
vector
*
dw_dx2
)
{
/* Go to the range [0,1[ from [0,H[ */
vector
x
,
x2
;
...
...
@@ -414,17 +419,21 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec(
dw_dx2
->
v
=
vec_fma
(
dw_dx2
->
v
,
x2
.
v
,
w2
->
v
);
w
->
v
=
vec_fma
(
x
.
v
,
w
->
v
,
c4
.
v
);
w2
->
v
=
vec_fma
(
x2
.
v
,
w2
->
v
,
c4
.
v
);
dw_dx
->
v
=
vec_fma
(
dw_dx
->
v
,
x
.
v
,
w
->
v
);
dw_dx2
->
v
=
vec_fma
(
dw_dx2
->
v
,
x2
.
v
,
w2
->
v
);
w
->
v
=
vec_fma
(
x
.
v
,
w
->
v
,
c5
.
v
);
w2
->
v
=
vec_fma
(
x2
.
v
,
w2
->
v
,
c5
.
v
);
/* Return everything */
w
->
v
=
vec_mul
(
w
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_vec
.
v
));
w2
->
v
=
vec_mul
(
w2
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_vec
.
v
));
dw_dx
->
v
=
vec_mul
(
dw_dx
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_plus_one_vec
.
v
));
dw_dx2
->
v
=
vec_mul
(
dw_dx2
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_plus_one_vec
.
v
));
w
->
v
=
vec_mul
(
w
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_vec
.
v
));
w2
->
v
=
vec_mul
(
w2
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_vec
.
v
));
dw_dx
->
v
=
vec_mul
(
dw_dx
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_plus_one_vec
.
v
));
dw_dx2
->
v
=
vec_mul
(
dw_dx2
->
v
,
vec_mul
(
kernel_constant_vec
.
v
,
kernel_gamma_inv_dim_plus_one_vec
.
v
));
#else
/* Load x and get the interval id. */
...
...
@@ -462,7 +471,6 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec(
dw_dx2
->
v
*
kernel_constant_vec
.
v
*
kernel_gamma_inv_dim_plus_one_vec
.
v
;
#endif
}
#endif
...
...
src/runner.c
View file @
4e0393b2
...
...
@@ -53,13 +53,13 @@
#include
"hydro_properties.h"
#include
"kick.h"
#include
"minmax.h"
#include
"runner_doiact_vec.h"
#include
"scheduler.h"
#include
"sourceterms.h"
#include
"space.h"
#include
"task.h"
#include
"timers.h"
#include
"timestep.h"
#include
"runner_doiact_vec.h"
/**
* @brief Entry in a list of sorted indices.
...
...
src/runner.h
View file @
4e0393b2
...
...
@@ -47,7 +47,7 @@ struct runner {
/*! The engine owing this runner. */
struct
engine
*
e
;
/*! The particle cache of this runner. */
struct
cache
par_cache
;
};
...
...
src/runner_doiact_vec.c
View file @
4e0393b2
This diff is collapsed.
Click to expand it.
src/runner_doiact_vec.h
View file @
4e0393b2
...
...
@@ -24,13 +24,13 @@
#include
"../config.h"
/* Local headers */
#include
"vector.h"
#include
"part.h"
#include
"cell.h"
#include
"
run
ne
r
.h"
#include
"
engi
ne.h"
#include
"hydro.h"
#include
"part.h"
#include
"runner.h"
#include
"timers.h"
#include
"
engine
.h"
#include
"
vector
.h"
/* Function prototypes. */
void
runner_doself1_density_vec
(
struct
runner
*
r
,
struct
cell
*
restrict
c
);
...
...
src/swift.h
View file @
4e0393b2
...
...
@@ -24,6 +24,7 @@
/* Local headers. */
#include
"atomic.h"
#include
"cache.h"
#include
"cell.h"
#include
"clocks.h"
#include
"const.h"
...
...
@@ -56,6 +57,5 @@
#include
"tools.h"
#include
"units.h"
#include
"version.h"
#include
"cache.h"