Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SWIFTsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
SWIFTsim
Commits
e828d7bc
Commit
e828d7bc
authored
7 years ago
by
James Willis
Browse files
Options
Downloads
Patches
Plain Diff
Bug inside debug checks.
parent
3d0eb1c0
No related branches found
No related tags found
1 merge request
!413
Bug inside debug checks.
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/runner_doiact_vec.c
+116
-113
116 additions, 113 deletions
src/runner_doiact_vec.c
with
116 additions
and
113 deletions
src/runner_doiact_vec.c
+
116
−
113
View file @
e828d7bc
...
...
@@ -273,7 +273,8 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache(
while
(
first_pi
>
0
&&
sort_i
[
first_pi
-
1
].
d
+
dx_max
+
hi_max
>
dj_min
)
{
first_pi
--
;
/* Store the index of the particle if it is active. */
if
(
part_is_active_no_debug
(
&
parts_i
[
sort_i
[
first_pi
].
i
],
max_active_bin
))
active_id
=
first_pi
;
if
(
part_is_active_no_debug
(
&
parts_i
[
sort_i
[
first_pi
].
i
],
max_active_bin
))
active_id
=
first_pi
;
}
/* Set the first active pi in range of any particle in cell j. */
...
...
@@ -320,7 +321,8 @@ __attribute__((always_inline)) INLINE static void populate_max_index_no_cache(
sort_j
[
last_pj
+
1
].
d
-
hj_max
-
dx_max
<
di_max
)
{
last_pj
++
;
/* Store the index of the particle if it is active. */
if
(
part_is_active_no_debug
(
&
parts_j
[
sort_j
[
last_pj
].
i
],
max_active_bin
))
active_id
=
last_pj
;
if
(
part_is_active_no_debug
(
&
parts_j
[
sort_j
[
last_pj
].
i
],
max_active_bin
))
active_id
=
last_pj
;
}
/* Set the last active pj in range of any particle in cell i. */
...
...
@@ -603,14 +605,14 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
const
int
num_vec_proc
=
1
;
const
timebin_t
max_active_bin
=
e
->
max_active_bin
;
struct
part
*
restrict
parts
=
c
->
parts
;
const
int
count
=
c
->
count
;
vector
v_hi
,
v_vix
,
v_viy
,
v_viz
,
v_hig2
,
v_r2
;
vector
v_rhoi
,
v_grad_hi
,
v_pOrhoi2
,
v_balsara_i
,
v_ci
;
TIMER_TIC
TIMER_TIC
;
if
(
!
cell_is_active
(
c
,
e
))
return
;
...
...
@@ -634,145 +636,146 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
if
(
pi
->
ti_drift
!=
e
->
ti_current
)
error
(
"Particle pi not drifted to current time"
);
}
}
#endif
/* Loop over the particles in the cell. */
for
(
int
pid
=
0
;
pid
<
count
;
pid
++
)
{
/* Loop over the particles in the cell. */
for
(
int
pid
=
0
;
pid
<
count
;
pid
++
)
{
/* Get a pointer to the ith particle. */
pi
=
&
parts
[
pid
];
/* Get a pointer to the ith particle. */
pi
=
&
parts
[
pid
];
/* Is the ith particle active? */
if
(
!
part_is_active_no_debug
(
pi
,
max_active_bin
))
continue
;
/* Is the ith particle active? */
if
(
!
part_is_active_no_debug
(
pi
,
max_active_bin
))
continue
;
vector
pix
,
piy
,
piz
;
vector
pix
,
piy
,
piz
;
const
float
hi
=
cell_cache
->
h
[
pid
];
const
float
hi
=
cell_cache
->
h
[
pid
];
/* Fill particle pi vectors. */
pix
.
v
=
vec_set1
(
cell_cache
->
x
[
pid
]);
piy
.
v
=
vec_set1
(
cell_cache
->
y
[
pid
]);
piz
.
v
=
vec_set1
(
cell_cache
->
z
[
pid
]);
v_hi
.
v
=
vec_set1
(
hi
);
v_vix
.
v
=
vec_set1
(
cell_cache
->
vx
[
pid
]);
v_viy
.
v
=
vec_set1
(
cell_cache
->
vy
[
pid
]);
v_viz
.
v
=
vec_set1
(
cell_cache
->
vz
[
pid
]);
/* Fill particle pi vectors. */
pix
.
v
=
vec_set1
(
cell_cache
->
x
[
pid
]);
piy
.
v
=
vec_set1
(
cell_cache
->
y
[
pid
]);
piz
.
v
=
vec_set1
(
cell_cache
->
z
[
pid
]);
v_hi
.
v
=
vec_set1
(
hi
);
v_vix
.
v
=
vec_set1
(
cell_cache
->
vx
[
pid
]);
v_viy
.
v
=
vec_set1
(
cell_cache
->
vy
[
pid
]);
v_viz
.
v
=
vec_set1
(
cell_cache
->
vz
[
pid
]);
v_rhoi
.
v
=
vec_set1
(
cell_cache
->
rho
[
pid
]);
v_grad_hi
.
v
=
vec_set1
(
cell_cache
->
grad_h
[
pid
]);
v_pOrhoi2
.
v
=
vec_set1
(
cell_cache
->
pOrho2
[
pid
]);
v_balsara_i
.
v
=
vec_set1
(
cell_cache
->
balsara
[
pid
]);
v_ci
.
v
=
vec_set1
(
cell_cache
->
soundspeed
[
pid
]);
v_rhoi
.
v
=
vec_set1
(
cell_cache
->
rho
[
pid
]);
v_grad_hi
.
v
=
vec_set1
(
cell_cache
->
grad_h
[
pid
]);
v_pOrhoi2
.
v
=
vec_set1
(
cell_cache
->
pOrho2
[
pid
]);
v_balsara_i
.
v
=
vec_set1
(
cell_cache
->
balsara
[
pid
]);
v_ci
.
v
=
vec_set1
(
cell_cache
->
soundspeed
[
pid
]);
const
float
hig2
=
hi
*
hi
*
kernel_gamma2
;
v_hig2
.
v
=
vec_set1
(
hig2
);
const
float
hig2
=
hi
*
hi
*
kernel_gamma2
;
v_hig2
.
v
=
vec_set1
(
hig2
);
/* Reset cumulative sums of update vectors. */
vector
a_hydro_xSum
,
a_hydro_ySum
,
a_hydro_zSum
,
h_dtSum
,
v_sigSum
,
entropy_dtSum
;
/* Reset cumulative sums of update vectors. */
vector
a_hydro_xSum
,
a_hydro_ySum
,
a_hydro_zSum
,
h_dtSum
,
v_sigSum
,
entropy_dtSum
;
/* Get the inverse of hi. */
vector
v_hi_inv
;
/* Get the inverse of hi. */
vector
v_hi_inv
;
v_hi_inv
=
vec_reciprocal
(
v_hi
);
v_hi_inv
=
vec_reciprocal
(
v_hi
);
a_hydro_xSum
.
v
=
vec_setzero
();
a_hydro_ySum
.
v
=
vec_setzero
();
a_hydro_zSum
.
v
=
vec_setzero
();
h_dtSum
.
v
=
vec_setzero
();
v_sigSum
.
v
=
vec_set1
(
pi
->
force
.
v_sig
);
entropy_dtSum
.
v
=
vec_setzero
();
a_hydro_xSum
.
v
=
vec_setzero
();
a_hydro_ySum
.
v
=
vec_setzero
();
a_hydro_zSum
.
v
=
vec_setzero
();
h_dtSum
.
v
=
vec_setzero
();
v_sigSum
.
v
=
vec_set1
(
pi
->
force
.
v_sig
);
entropy_dtSum
.
v
=
vec_setzero
();
/* Pad cache if there is a serial remainder. */
count_align
=
count
;
int
rem
=
count
%
(
num_vec_proc
*
VEC_SIZE
);
if
(
rem
!=
0
)
{
int
pad
=
(
num_vec_proc
*
VEC_SIZE
)
-
rem
;
/* Pad cache if there is a serial remainder. */
count_align
=
count
;
int
rem
=
count
%
(
num_vec_proc
*
VEC_SIZE
);
if
(
rem
!=
0
)
{
int
pad
=
(
num_vec_proc
*
VEC_SIZE
)
-
rem
;
count_align
+=
pad
;
count_align
+=
pad
;
/* Set positions to the same as particle pi so when the r2 > 0 mask is
* applied these extra contributions are masked out.*/
for
(
int
i
=
count
;
i
<
count_align
;
i
++
)
{
cell_cache
->
x
[
i
]
=
pix
.
f
[
0
];
cell_cache
->
y
[
i
]
=
piy
.
f
[
0
];
cell_cache
->
z
[
i
]
=
piz
.
f
[
0
];
cell_cache
->
h
[
i
]
=
1
.
f
;
/* Set positions to the same as particle pi so when the r2 > 0 mask is
* applied these extra contributions are masked out.*/
for
(
int
i
=
count
;
i
<
count_align
;
i
++
)
{
cell_cache
->
x
[
i
]
=
pix
.
f
[
0
];
cell_cache
->
y
[
i
]
=
piy
.
f
[
0
];
cell_cache
->
z
[
i
]
=
piz
.
f
[
0
];
cell_cache
->
h
[
i
]
=
1
.
f
;
}
}
}
vector
pjx
,
pjy
,
pjz
,
hj
,
hjg2
;
vector
pjx
,
pjy
,
pjz
,
hj
,
hjg2
;
/* Find all of particle pi's interacions and store needed values in the
* secondary cache.*/
for
(
int
pjd
=
0
;
pjd
<
count_align
;
pjd
+=
(
num_vec_proc
*
VEC_SIZE
))
{
/* Find all of particle pi's interacions and store needed values in the
* secondary cache.*/
for
(
int
pjd
=
0
;
pjd
<
count_align
;
pjd
+=
(
num_vec_proc
*
VEC_SIZE
))
{
/* Load 1 set of vectors from the particle cache. */
pjx
.
v
=
vec_load
(
&
cell_cache
->
x
[
pjd
]);
pjy
.
v
=
vec_load
(
&
cell_cache
->
y
[
pjd
]);
pjz
.
v
=
vec_load
(
&
cell_cache
->
z
[
pjd
]);
hj
.
v
=
vec_load
(
&
cell_cache
->
h
[
pjd
]);
hjg2
.
v
=
vec_mul
(
vec_mul
(
hj
.
v
,
hj
.
v
),
kernel_gamma2_vec
.
v
);
/* Load 1 set of vectors from the particle cache. */
pjx
.
v
=
vec_load
(
&
cell_cache
->
x
[
pjd
]);
pjy
.
v
=
vec_load
(
&
cell_cache
->
y
[
pjd
]);
pjz
.
v
=
vec_load
(
&
cell_cache
->
z
[
pjd
]);
hj
.
v
=
vec_load
(
&
cell_cache
->
h
[
pjd
]);
hjg2
.
v
=
vec_mul
(
vec_mul
(
hj
.
v
,
hj
.
v
),
kernel_gamma2_vec
.
v
);
/* Compute the pairwise distance. */
vector
v_dx
,
v_dy
,
v_dz
;
/* Compute the pairwise distance. */
vector
v_dx
,
v_dy
,
v_dz
;
v_dx
.
v
=
vec_sub
(
pix
.
v
,
pjx
.
v
);
v_dy
.
v
=
vec_sub
(
piy
.
v
,
pjy
.
v
);
v_dz
.
v
=
vec_sub
(
piz
.
v
,
pjz
.
v
);
v_dx
.
v
=
vec_sub
(
pix
.
v
,
pjx
.
v
);
v_dy
.
v
=
vec_sub
(
piy
.
v
,
pjy
.
v
);
v_dz
.
v
=
vec_sub
(
piz
.
v
,
pjz
.
v
);
v_r2
.
v
=
vec_mul
(
v_dx
.
v
,
v_dx
.
v
);
v_r2
.
v
=
vec_fma
(
v_dy
.
v
,
v_dy
.
v
,
v_r2
.
v
);
v_r2
.
v
=
vec_fma
(
v_dz
.
v
,
v_dz
.
v
,
v_r2
.
v
);
v_r2
.
v
=
vec_mul
(
v_dx
.
v
,
v_dx
.
v
);
v_r2
.
v
=
vec_fma
(
v_dy
.
v
,
v_dy
.
v
,
v_r2
.
v
);
v_r2
.
v
=
vec_fma
(
v_dz
.
v
,
v_dz
.
v
,
v_r2
.
v
);
/* Form r2 > 0 mask, r2 < hig2 mask and r2 < hjg2 mask. */
mask_t
v_doi_mask
,
v_doi_mask_self_check
;
int
doi_mask
;
/* Form r2 > 0 mask, r2 < hig2 mask and r2 < hjg2 mask. */
mask_t
v_doi_mask
,
v_doi_mask_self_check
;
int
doi_mask
;
/* Form r2 > 0 mask.*/
vec_create_mask
(
v_doi_mask_self_check
,
vec_cmp_gt
(
v_r2
.
v
,
vec_setzero
()));
/* Form r2 > 0 mask.*/
vec_create_mask
(
v_doi_mask_self_check
,
vec_cmp_gt
(
v_r2
.
v
,
vec_setzero
()));
/* Form a mask from r2 < hig2 mask and r2 < hjg2 mask. */
vector
v_h2
;
v_h2
.
v
=
vec_fmax
(
v_hig2
.
v
,
hjg2
.
v
);
vec_create_mask
(
v_doi_mask
,
vec_cmp_lt
(
v_r2
.
v
,
v_h2
.
v
));
/* Form a mask from r2 < hig2 mask and r2 < hjg2 mask. */
vector
v_h2
;
v_h2
.
v
=
vec_fmax
(
v_hig2
.
v
,
hjg2
.
v
);
vec_create_mask
(
v_doi_mask
,
vec_cmp_lt
(
v_r2
.
v
,
v_h2
.
v
));
/* Combine all 3 masks and form integer mask. */
v_doi_mask
.
v
=
vec_and
(
v_doi_mask
.
v
,
v_doi_mask_self_check
.
v
);
doi_mask
=
vec_form_int_mask
(
v_doi_mask
);
/* Combine all 3 masks and form integer mask. */
v_doi_mask
.
v
=
vec_and
(
v_doi_mask
.
v
,
v_doi_mask_self_check
.
v
);
doi_mask
=
vec_form_int_mask
(
v_doi_mask
);
/* If there are any interactions perform them. */
if
(
doi_mask
)
{
vector
v_hj_inv
;
v_hj_inv
=
vec_reciprocal
(
hj
);
/* To stop floating point exceptions for when particle separations are 0.
*/
v_r2
.
v
=
vec_add
(
v_r2
.
v
,
vec_set1
(
FLT_MIN
));
runner_iact_nonsym_1_vec_force
(
&
v_r2
,
&
v_dx
,
&
v_dy
,
&
v_dz
,
v_vix
,
v_viy
,
v_viz
,
v_rhoi
,
v_grad_hi
,
v_pOrhoi2
,
v_balsara_i
,
v_ci
,
&
cell_cache
->
vx
[
pjd
],
&
cell_cache
->
vy
[
pjd
],
&
cell_cache
->
vz
[
pjd
],
&
cell_cache
->
rho
[
pjd
],
&
cell_cache
->
grad_h
[
pjd
],
&
cell_cache
->
pOrho2
[
pjd
],
&
cell_cache
->
balsara
[
pjd
],
&
cell_cache
->
soundspeed
[
pjd
],
&
cell_cache
->
m
[
pjd
],
v_hi_inv
,
v_hj_inv
,
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
v_doi_mask
);
}
/* If there are any interactions perform them. */
if
(
doi_mask
)
{
vector
v_hj_inv
;
v_hj_inv
=
vec_reciprocal
(
hj
);
/* To stop floating point exceptions for when particle separations are
* 0.
*/
v_r2
.
v
=
vec_add
(
v_r2
.
v
,
vec_set1
(
FLT_MIN
));
runner_iact_nonsym_1_vec_force
(
&
v_r2
,
&
v_dx
,
&
v_dy
,
&
v_dz
,
v_vix
,
v_viy
,
v_viz
,
v_rhoi
,
v_grad_hi
,
v_pOrhoi2
,
v_balsara_i
,
v_ci
,
&
cell_cache
->
vx
[
pjd
],
&
cell_cache
->
vy
[
pjd
],
&
cell_cache
->
vz
[
pjd
],
&
cell_cache
->
rho
[
pjd
],
&
cell_cache
->
grad_h
[
pjd
],
&
cell_cache
->
pOrho2
[
pjd
],
&
cell_cache
->
balsara
[
pjd
],
&
cell_cache
->
soundspeed
[
pjd
],
&
cell_cache
->
m
[
pjd
],
v_hi_inv
,
v_hj_inv
,
&
a_hydro_xSum
,
&
a_hydro_ySum
,
&
a_hydro_zSum
,
&
h_dtSum
,
&
v_sigSum
,
&
entropy_dtSum
,
v_doi_mask
);
}
}
/* Loop over all other particles. */
}
/* Loop over all other particles. */
VEC_HADD
(
a_hydro_xSum
,
pi
->
a_hydro
[
0
]);
VEC_HADD
(
a_hydro_ySum
,
pi
->
a_hydro
[
1
]);
VEC_HADD
(
a_hydro_zSum
,
pi
->
a_hydro
[
2
]);
VEC_HADD
(
h_dtSum
,
pi
->
force
.
h_dt
);
VEC_HMAX
(
v_sigSum
,
pi
->
force
.
v_sig
);
VEC_HADD
(
entropy_dtSum
,
pi
->
entropy_dt
);
VEC_HADD
(
a_hydro_xSum
,
pi
->
a_hydro
[
0
]);
VEC_HADD
(
a_hydro_ySum
,
pi
->
a_hydro
[
1
]);
VEC_HADD
(
a_hydro_zSum
,
pi
->
a_hydro
[
2
]);
VEC_HADD
(
h_dtSum
,
pi
->
force
.
h_dt
);
VEC_HMAX
(
v_sigSum
,
pi
->
force
.
v_sig
);
VEC_HADD
(
entropy_dtSum
,
pi
->
entropy_dt
);
}
/* loop over all particles. */
}
/* loop over all particles. */
TIMER_TOC
(
timer_doself_force
);
TIMER_TOC
(
timer_doself_force
);
#endif
/* WITH_VECTORIZATION */
}
...
...
@@ -894,7 +897,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
max_index_j
=
r
->
cj_cache
.
max_index
;
/* Find particles maximum index into cj, max_index_i[] and ci, max_index_j[].
*/
*/
/* Also find the first pi that interacts with any particle in cj and the last
* pj that interacts with any particle in ci. */
populate_max_index_no_cache
(
ci
,
cj
,
sort_i
,
sort_j
,
dx_max
,
rshift
,
hi_max
,
...
...
@@ -1170,7 +1173,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
}
/* loop over the parts in ci. */
/* Perform horizontal adds on vector sums and store result in particle pj.
*/
*/
VEC_HADD
(
rhoSum
,
pj
->
rho
);
VEC_HADD
(
rho_dhSum
,
pj
->
density
.
rho_dh
);
VEC_HADD
(
wcountSum
,
pj
->
density
.
wcount
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment