Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
e828d7bc
Commit
e828d7bc
authored
Aug 31, 2017
by
James Willis
Browse files
Bug inside debug checks.
parent
3d0eb1c0
Changes
1
Hide whitespace changes
Inline
Side-by-side
src/runner_doiact_vec.c
View file @
e828d7bc
...
@@ -273,7 +273,8 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache(
...
@@ -273,7 +273,8 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache(
while
(
first_pi
>
0
&&
sort_i
[
first_pi
-
1
].
d
+
dx_max
+
hi_max
>
dj_min
)
{
while
(
first_pi
>
0
&&
sort_i
[
first_pi
-
1
].
d
+
dx_max
+
hi_max
>
dj_min
)
{
first_pi
--
;
first_pi
--
;
/* Store the index of the particle if it is active. */
/* Store the index of the particle if it is active. */
if
(
part_is_active_no_debug
(
&
parts_i
[
sort_i
[
first_pi
].
i
],
max_active_bin
))
active_id
=
first_pi
;
if
(
part_is_active_no_debug
(
&
parts_i
[
sort_i
[
first_pi
].
i
],
max_active_bin
))
active_id
=
first_pi
;
}
}
/* Set the first active pi in range of any particle in cell j. */
/* Set the first active pi in range of any particle in cell j. */
...
@@ -320,7 +321,8 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache(
...
@@ -320,7 +321,8 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache(
sort_j
[
last_pj
+
1
].
d
-
hj_max
-
dx_max
<
di_max
)
{
sort_j
[
last_pj
+
1
].
d
-
hj_max
-
dx_max
<
di_max
)
{
last_pj
++
;
last_pj
++
;
/* Store the index of the particle if it is active. */
/* Store the index of the particle if it is active. */
if
(
part_is_active_no_debug
(
&
parts_j
[
sort_j
[
last_pj
].
i
],
max_active_bin
))
active_id
=
last_pj
;
if
(
part_is_active_no_debug
(
&
parts_j
[
sort_j
[
last_pj
].
i
],
max_active_bin
))
active_id
=
last_pj
;
}
}
/* Set the last active pj in range of any particle in cell i. */
/* Set the last active pj in range of any particle in cell i. */
...
@@ -603,14 +605,14 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
...
@@ -603,14 +605,14 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
const
int
num_vec_proc
=
1
;
const
int
num_vec_proc
=
1
;
const
timebin_t
max_active_bin
=
e
->
max_active_bin
;
const
timebin_t
max_active_bin
=
e
->
max_active_bin
;
struct
part
*
restrict
parts
=
c
->
parts
;
struct
part
*
restrict
parts
=
c
->
parts
;
const
int
count
=
c
->
count
;
const
int
count
=
c
->
count
;
vector
v_hi
,
v_vix
,
v_viy
,
v_viz
,
v_hig2
,
v_r2
;
vector
v_hi
,
v_vix
,
v_viy
,
v_viz
,
v_hig2
,
v_r2
;
vector
v_rhoi
,
v_grad_hi
,
v_pOrhoi2
,
v_balsara_i
,
v_ci
;
vector
v_rhoi
,
v_grad_hi
,
v_pOrhoi2
,
v_balsara_i
,
v_ci
;
TIMER_TIC
TIMER_TIC
;
if
(
!
cell_is_active
(
c
,
e
))
return
;
if
(
!
cell_is_active
(
c
,
e
))
return
;
...
@@ -634,145 +636,146 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
...
@@ -634,145 +636,146 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
if
(
pi
->
ti_drift
!=
e
->
ti_current
)
if
(
pi
->
ti_drift
!=
e
->
ti_current
)
error
(
"Particle pi not drifted to current time"
);
error
(
"Particle pi not drifted to current time"
);
}
}
}
#endif
#endif
/* Loop over the particles in the cell. */
/* Loop over the particles in the cell. */
for
(
int
pid
=
0
;
pid
<
count
;
pid
++
)
{
for
(
int
pid
=
0
;
pid
<
count
;
pid
++
)
{
/* Get a pointer to the ith particle. */
/* Get a pointer to the ith particle. */
pi
=
&
parts
[
pid
];
pi
=
&
parts
[
pid
];
/* Is the ith particle active? */
/* Is the ith particle active? */
if
(
!
part_is_active_no_debug
(
pi
,
max_active_bin
))
continue
;
if
(
!
part_is_active_no_debug
(
pi
,
max_active_bin
))
continue
;
vector
pix
,
piy
,
piz
;
vector
pix
,
piy
,
piz
;
const
float
hi
=
cell_cache
->
h
[
pid
];
const
float
hi
=
cell_cache
->
h
[
pid
];
/* Fill particle pi vectors. */
/* Fill particle pi vectors. */
pix
.
v
=
vec_set1
(
cell_cache
->
x
[
pid
]);
pix
.
v
=
vec_set1
(
cell_cache
->
x
[
pid
]);
piy
.
v
=
vec_set1
(
cell_cache
->
y
[
pid
]);
piy
.
v
=
vec_set1
(
cell_cache
->
y
[
pid
]);
piz
.
v
=
vec_set1
(
cell_cache
->
z
[
pid
]);
piz
.
v
=
vec_set1
(
cell_cache
->
z
[
pid
]);
v_hi
.
v
=
vec_set1
(
hi
);
v_hi
.
v
=
vec_set1
(
hi
);
v_vix
.
v
=
vec_set1
(
cell_cache
->
vx
[
pid
]);
v_vix
.
v
=
vec_set1
(
cell_cache
->
vx
[
pid
]);
v_viy
.
v
=
vec_set1
(
cell_cache
->
vy
[
pid
]);
v_viy
.
v
=
vec_set1
(
cell_cache
->
vy
[
pid
]);
v_viz
.
v
=
vec_set1
(
cell_cache
->
vz
[
pid
]);
v_viz
.
v
=
vec_set1
(
cell_cache
->
vz
[
pid
]);
v_rhoi
.
v
=
vec_set1
(
cell_cache
->
rho
[
pid
]);
v_rhoi
.
v
=
vec_set1
(
cell_cache
->
rho
[
pid
]);
v_grad_hi
.
v
=
vec_set1
(
cell_cache
->
grad_h
[
pid
]);
v_grad_hi
.
v
=
vec_set1
(
cell_cache
->
grad_h
[
pid
]);
v_pOrhoi2
.
v
=
vec_set1
(
cell_cache
->
pOrho2
[
pid
]);
v_pOrhoi2
.
v
=
vec_set1
(
cell_cache
->
pOrho2
[
pid
]);
v_balsara_i
.
v
=
vec_set1
(
cell_cache
->
balsara
[
pid
]);
v_balsara_i
.
v
=
vec_set1
(
cell_cache
->
balsara
[
pid
]);
v_ci
.
v
=
vec_set1
(
cell_cache
->
soundspeed
[
pid
]);
v_ci
.
v
=
vec_set1
(
cell_cache
->
soundspeed
[
pid
]);
const
float
hig2
=
hi
*
hi
*
kernel_gamma2
;
const
float
hig2
=
hi
*
hi
*
kernel_gamma2
;
v_hig2
.
v
=
vec_set1
(
hig2
);
v_hig2
.
v
=
vec_set1
(
hig2
);
/* Reset cumulative sums of update vectors. */
/* Reset cumulative sums of update vectors. */
vector
a_hydro_xSum
,
a_hydro_ySum
,
a_hydro_zSum
,
h_dtSum
,
v_sigSum
,
vector
a_hydro_xSum
,
a_hydro_ySum
,
a_hydro_zSum
,
h_dtSum
,
v_sigSum
,
entropy_dtSum
;
entropy_dtSum
;
/* Get the inverse of hi. */
/* Get the inverse of hi. */
vector
v_hi_inv
;
vector
v_hi_inv
;
v_hi_inv
=
vec_reciprocal
(
v_hi
);
v_hi_inv
=
vec_reciprocal
(
v_hi
);
a_hydro_xSum
.
v
=
vec_setzero
();
a_hydro_xSum
.
v
=
vec_setzero
();
a_hydro_ySum
.
v
=
vec_setzero
();
a_hydro_ySum
.
v
=
vec_setzero
();
a_hydro_zSum
.
v
=
vec_setzero
();
a_hydro_zSum
.
v
=
vec_setzero
();
h_dtSum
.
v
=
vec_setzero
();
h_dtSum
.
v
=
vec_setzero
();
v_sigSum
.
v
=
vec_set1
(
pi
->
force
.
v_sig
);
v_sigSum
.
v
=
vec_set1
(
pi
->
force
.
v_sig
);
entropy_dtSum
.
v
=
vec_setzero
();
entropy_dtSum
.
v
=
vec_setzero
();
/* Pad cache if there is a serial remainder. */
/* Pad cache if there is a serial remainder. */
count_align
=
count
;
count_align
=
count
;
int
rem
=
count
%
(
num_vec_proc
*
VEC_SIZE
);
int
rem
=
count
%
(
num_vec_proc
*
VEC_SIZE
);
if
(
rem
!=
0
)
{
if
(
rem
!=
0
)
{
int
pad
=
(
num_vec_proc
*
VEC_SIZE
)
-
rem
;
int
pad
=
(
num_vec_proc
*
VEC_SIZE
)
-
rem
;
count_align
+=
pad
;
count_align
+=
pad
;
/* Set positions to the same as particle pi so when the r2 > 0 mask is
/* Set positions to the same as particle pi so when the r2 > 0 mask is
* applied these extra contributions are masked out.*/
* applied these extra contributions are masked out.*/
for
(
int
i
=
count
;
i
<
count_align
;
i
++
)
{
for
(
int
i
=
count
;
i
<
count_align
;
i
++
)
{
cell_cache
->
x
[
i
]
=
pix
.
f
[
0
];
cell_cache
->
x
[
i
]
=
pix
.
f
[
0
];
cell_cache
->
y
[
i
]
=
piy
.
f
[
0
];
cell_cache
->
y
[
i
]
=
piy
.
f
[
0
];
cell_cache
->
z
[
i
]
=
piz
.
f
[
0
];
cell_cache
->
z
[
i
]
=
piz
.
f
[
0
];
cell_cache
->
h
[
i
]
=
1
.
f
;
cell_cache
->
h
[
i
]
=
1
.
f
;
}
}
}
}
vector
pjx
,
pjy
,
pjz
,
hj
,
hjg2
;
vector
pjx
,
pjy
,
pjz
,
hj
,
hjg2
;
/* Find all of particle pi's interacions and store needed values in the
/* Find all of particle pi's interacions and store needed values in the
* secondary cache.*/
* secondary cache.*/
for
(
int
pjd
=
0
;
pjd
<
count_align
;
pjd
+=
(
num_vec_proc
*
VEC_SIZE
))
{
for
(
int
pjd
=
0
;
pjd
<
count_align
;
pjd
+=
(
num_vec_proc
*
VEC_SIZE
))
{
/* Load 1 set of vectors from the particle cache. */
/* Load 1 set of vectors from the particle cache. */
pjx
.
v
=
vec_load
(
&
cell_cache
->
x
[
pjd
]);
pjx
.
v
=
vec_load
(
&
cell_cache
->
x
[
pjd
]);
pjy
.
v
=
vec_load
(
&
cell_cache
->
y
[
pjd
]);
pjy
.
v
=
vec_load
(
&
cell_cache
->
y
[
pjd
]);
pjz
.
v
=
vec_load
(
&
cell_cache
->
z
[
pjd
]);
pjz
.
v
=
vec_load
(
&
cell_cache
->
z
[
pjd
]);
hj
.
v
=
vec_load
(
&
cell_cache
->
h
[
pjd
]);
hj
.
v
=
vec_load
(
&
cell_cache
->
h
[
pjd
]);
hjg2
.
v
=
vec_mul
(
vec_mul
(
hj
.
v
,
hj
.
v
),
kernel_gamma2_vec
.
v
);
hjg2
.
v
=
vec_mul
(
vec_mul
(
hj
.
v
,
hj
.
v
),
kernel_gamma2_vec
.
v
);
/* Compute the pairwise distance. */
/* Compute the pairwise distance. */
vector
v_dx
,
v_dy
,
v_dz
;
vector
v_dx
,
v_dy
,
v_dz
;
v_dx
.
v
=
vec_sub
(
pix
.
v
,
pjx
.
v
);
v_dx
.
v
=
vec_sub
(
pix
.
v
,
pjx
.
v
);
v_dy
.
v
=
vec_sub
(
piy
.
v
,
pjy
.
v
);
v_dy
.
v
=
vec_sub
(
piy
.
v
,
pjy
.
v
);
v_dz
.
v
=
vec_sub
(
piz
.
v
,
pjz
.
v
);
v_dz
.
v
=
vec_sub
(
piz
.
v
,
pjz
.
v
);
v_r2
.
v
=
vec_mul
(
v_dx
.
v
,
v_dx
.
v
);
v_r2
.
v
=
vec_mul
(
v_dx
.
v
,
v_dx
.
v
);
v_r2
.
v
=
vec_fma
(
v_dy
.
v
,
v_dy
.
v
,
v_r2
.
v
);
v_r2
.
v
=
vec_fma
(
v_dy
.
v
,
v_dy
.
v
,
v_r2
.
v
);
v_r2
.
v
=
vec_fma
(
v_dz
.
v
,
v_dz
.
v
,
v_r2
.
v
);
v_r2
.
v
=
vec_fma
(
v_dz
.
v
,
v_dz
.
v
,
v_r2
.
v
);
/* Form r2 > 0 mask, r2 < hig2 mask and r2 < hjg2 mask. */
/* Form r2 > 0 mask, r2 < hig2 mask and r2 < hjg2 mask. */
mask_t
v_doi_mask
,
v_doi_mask_self_check
;
mask_t
v_doi_mask
,
v_doi_mask_self_check
;
int
doi_mask
;
int
doi_mask
;
/* Form r2 > 0 mask.*/
/* Form r2 > 0 mask.*/
vec_create_mask
(
v_doi_mask_self_check
,
vec_cmp_gt
(
v_r2
.
v
,
vec_setzero
()));
vec_create_mask
(
v_doi_mask_self_check
,
vec_cmp_gt
(
v_r2
.
v
,
vec_setzero
()));
/* Form a mask from r2 < hig2 mask and r2 < hjg2 mask. */
/* Form a mask from r2 < hig2 mask and r2 < hjg2 mask. */
vector
v_h2
;
vector
v_h2
;
v_h2
.
v
=
vec_fmax
(
v_hig2
.
v
,
hjg2
.
v
);
v_h2
.
v
=
vec_fmax
(
v_hig2
.
v
,
hjg2
.
v
);
vec_create_mask
(
v_doi_mask
,
vec_cmp_lt
(
v_r2
.
v
,
v_h2
.
v
));
vec_create_mask
(
v_doi_mask
,
vec_cmp_lt
(
v_r2
.
v
,
v_h2
.
v
));
/* Combine all 3 masks and form integer mask. */
/* Combine all 3 masks and form integer mask. */
v_doi_mask
.
v
=
vec_and
(
v_doi_mask
.
v
,
v_doi_mask_self_check
.
v
);
v_doi_mask
.
v
=
vec_and
(
v_doi_mask
.
v
,
v_doi_mask_self_check
.
v
);
doi_mask
=
vec_form_int_mask
(
v_doi_mask
);
doi_mask
=
vec_form_int_mask
(
v_doi_mask
);
/* If there are any interactions perform them. */
/* If there are any interactions perform them. */
if
(
doi_mask
)
{
if
(
doi_mask
)
{
vector
v_hj_inv
;
vector
v_hj_inv
;
v_hj_inv
=
vec_reciprocal
(
hj
);
v_hj_inv
=
vec_reciprocal
(
hj
);
/* To stop floating point exceptions for when particle separations are 0.
/* To stop floating point exceptions for when particle separations are
*/
* 0.
v_r2
.
v
=
vec_add
(
v_r2
.
v
,
vec_set1
(
FLT_MIN
));
*/
v_r2
.
v
=
vec_add
(
v_r2
.
v
,
vec_set1
(
FLT_MIN
));
runner_iact_nonsym_1_vec_force
(
&
v_r2
,
&
v_dx
,
&
v_dy
,
&
v_dz
,
v_vix
,
v_viy
,
v_viz
,
v_rhoi
,
v_grad_hi
,
runner_iact_nonsym_1_vec_force
(
v_pOrhoi2
,
v_balsara_i
,
v_ci
,
&
cell_cache
->
vx
[
pjd
],
&
v_r2
,
&
v_dx
,
&
v_dy
,
&
v_dz
,
v_vix
,
v_viy
,
v_viz
,
v_rhoi
,
v_grad_hi
,
&
cell_cache
->
vy
[
pjd
],
&
cell_cache
->
vz
[
pjd
],
&
cell_cache
->
rho
[
pjd
],
v_pOrhoi2
,
v_balsara_i
,
v_ci
,
&
cell_cache
->
vx
[
pjd
],
&
cell_cache
->
grad_h
[
pjd
],
&
cell_cache
->
pOrho2
[
pjd
],
&
cell_cache
->
vy
[
pjd
],
&
cell_cache
->
vz
[
pjd
],
&
cell_cache
->
rho
[
pjd
],
&
cell_cache
->
balsara
[
pjd
],
&
cell_cache
->
soundspeed
[
pjd
],
&
cell_cache
->
grad_h
[
pjd
],
&
cell_cache
->
pOrho2
[
pjd
],
&
cell_cache
->
m
[
pjd
],
v_hi_inv
,
v_hj_inv
,
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
cell_cache
->
balsara
[
pjd
],
&
cell_cache
->
soundspeed
[
pjd
],
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
v_doi_mask
);
&
cell_cache
->
m
[
pjd
],
v_hi_inv
,
v_hj_inv
,
&
a_hydro_xSum
,
}
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
v_doi_mask
);
}
}
/* Loop over all other particles. */
}
/* Loop over all other particles. */
VEC_HADD
(
a_hydro_xSum
,
pi
->
a_hydro
[
0
]);
VEC_HADD
(
a_hydro_xSum
,
pi
->
a_hydro
[
0
]);
VEC_HADD
(
a_hydro_ySum
,
pi
->
a_hydro
[
1
]);
VEC_HADD
(
a_hydro_ySum
,
pi
->
a_hydro
[
1
]);
VEC_HADD
(
a_hydro_zSum
,
pi
->
a_hydro
[
2
]);
VEC_HADD
(
a_hydro_zSum
,
pi
->
a_hydro
[
2
]);
VEC_HADD
(
h_dtSum
,
pi
->
force
.
h_dt
);
VEC_HADD
(
h_dtSum
,
pi
->
force
.
h_dt
);
VEC_HMAX
(
v_sigSum
,
pi
->
force
.
v_sig
);
VEC_HMAX
(
v_sigSum
,
pi
->
force
.
v_sig
);
VEC_HADD
(
entropy_dtSum
,
pi
->
entropy_dt
);
VEC_HADD
(
entropy_dtSum
,
pi
->
entropy_dt
);
}
/* loop over all particles. */
}
/* loop over all particles. */
TIMER_TOC
(
timer_doself_force
);
TIMER_TOC
(
timer_doself_force
);
#endif
/* WITH_VECTORIZATION */
#endif
/* WITH_VECTORIZATION */
}
}
...
@@ -894,7 +897,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
...
@@ -894,7 +897,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
max_index_j
=
r
->
cj_cache
.
max_index
;
max_index_j
=
r
->
cj_cache
.
max_index
;
/* Find particles maximum index into cj, max_index_i[] and ci, max_index_j[].
/* Find particles maximum index into cj, max_index_i[] and ci, max_index_j[].
*/
*/
/* Also find the first pi that interacts with any particle in cj and the last
/* Also find the first pi that interacts with any particle in cj and the last
* pj that interacts with any particle in ci. */
* pj that interacts with any particle in ci. */
populate_max_index_no_cache
(
ci
,
cj
,
sort_i
,
sort_j
,
dx_max
,
rshift
,
hi_max
,
populate_max_index_no_cache
(
ci
,
cj
,
sort_i
,
sort_j
,
dx_max
,
rshift
,
hi_max
,
...
@@ -1170,7 +1173,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
...
@@ -1170,7 +1173,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
}
/* loop over the parts in ci. */
}
/* loop over the parts in ci. */
/* Perform horizontal adds on vector sums and store result in particle pj.
/* Perform horizontal adds on vector sums and store result in particle pj.
*/
*/
VEC_HADD
(
rhoSum
,
pj
->
rho
);
VEC_HADD
(
rhoSum
,
pj
->
rho
);
VEC_HADD
(
rho_dhSum
,
pj
->
density
.
rho_dh
);
VEC_HADD
(
rho_dhSum
,
pj
->
density
.
rho_dh
);
VEC_HADD
(
wcountSum
,
pj
->
density
.
wcount
);
VEC_HADD
(
wcountSum
,
pj
->
density
.
wcount
);
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment