Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
bcffe08f
Commit
bcffe08f
authored
Oct 19, 2017
by
Matthieu Schaller
Browse files
Code formatting. Small style changes. Updates some comments in the cache construction.
parent
52507e8b
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/cache.h
View file @
bcffe08f
...
...
@@ -198,8 +198,8 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
swift_declare_aligned_ptr
(
float
,
vz
,
ci_cache
->
vz
,
SWIFT_CACHE_ALIGNMENT
);
const
struct
part
*
restrict
parts
=
ci
->
parts
;
const
double
loc
[
3
]
=
{
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
]};
const
double
loc
[
3
]
=
{
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
]};
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
for
(
int
i
=
0
;
i
<
ci
->
count
;
i
++
)
{
...
...
@@ -250,7 +250,7 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
SWIFT_CACHE_ALIGNMENT
);
const
struct
part
*
restrict
parts
=
ci
->
parts
;
const
double
loc
[
3
]
=
{
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
]};
const
double
loc
[
3
]
=
{
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
]};
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
...
...
@@ -296,7 +296,6 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
const
struct
entry
*
restrict
sort_j
,
const
double
*
restrict
const
shift
,
int
*
first_pi
,
int
*
last_pj
)
{
int
idx
;
/* Pad number of particles read to the vector size. */
int
rem
=
(
ci
->
count
-
*
first_pi
)
%
VEC_SIZE
;
if
(
rem
!=
0
)
{
...
...
@@ -312,17 +311,17 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
if
(
*
last_pj
+
pad
<
cj
->
count
)
*
last_pj
+=
pad
;
}
/* Get some local pointers */
const
int
first_pi_align
=
*
first_pi
;
const
int
last_pj_align
=
*
last_pj
;
const
struct
part
*
restrict
parts_i
=
ci
->
parts
;
const
struct
part
*
restrict
parts_j
=
cj
->
parts
;
/* Shift particles to the local frame and account for boundary conditions.*/
const
double
total_ci_shift
[
3
]
=
{
cj
->
loc
[
0
]
+
shift
[
0
],
cj
->
loc
[
1
]
+
shift
[
1
],
cj
->
loc
[
2
]
+
shift
[
2
]};
const
double
total_cj_shift
[
3
]
=
{
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
]};
const
double
total_ci_shift
[
3
]
=
{
cj
->
loc
[
0
]
+
shift
[
0
],
cj
->
loc
[
1
]
+
shift
[
1
],
cj
->
loc
[
2
]
+
shift
[
2
]};
const
double
total_cj_shift
[
3
]
=
{
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
]};
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
swift_declare_aligned_ptr
(
float
,
x
,
ci_cache
->
x
,
SWIFT_CACHE_ALIGNMENT
);
...
...
@@ -335,16 +334,11 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
swift_declare_aligned_ptr
(
float
,
vz
,
ci_cache
->
vz
,
SWIFT_CACHE_ALIGNMENT
);
int
ci_cache_count
=
ci
->
count
-
first_pi_align
;
/* Shift the particles positions to a local frame (ci frame) so single
* precision
* can be
* used instead of double precision. Also shift the cell ci, particles
* positions
* due to BCs but leave cell cj. */
* precision can be used instead of double precision. */
for
(
int
i
=
0
;
i
<
ci_cache_count
;
i
++
)
{
/* Make sure ci_cache is filled from the first element. */
idx
=
sort_i
[
i
+
first_pi_align
].
i
;
const
int
idx
=
sort_i
[
i
+
first_pi_align
].
i
;
x
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
0
]
-
total_ci_shift
[
0
]);
y
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
1
]
-
total_ci_shift
[
1
]);
z
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
2
]
-
total_ci_shift
[
2
]);
...
...
@@ -371,30 +365,31 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
"is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. x=%f, ci->width[0]=%f"
,
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
],
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
],
i
,
x
[
i
],
ci
->
width
[
0
]);
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
],
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
],
i
,
x
[
i
],
ci
->
width
[
0
]);
if
(
y
[
i
]
>
shift_threshold_y
||
y
[
i
]
<
-
shift_threshold_y
)
error
(
"Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d y pos "
"is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. y=%f, ci->width[1]=%f"
,
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
],
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
],
i
,
y
[
i
],
ci
->
width
[
1
]);
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
],
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
],
i
,
y
[
i
],
ci
->
width
[
1
]);
if
(
z
[
i
]
>
shift_threshold_z
||
z
[
i
]
<
-
shift_threshold_z
)
error
(
"Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d z pos "
"is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. z=%f, ci->width[2]=%f"
,
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
],
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
],
i
,
z
[
i
],
ci
->
width
[
2
]);
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
],
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
],
i
,
z
[
i
],
ci
->
width
[
2
]);
}
#endif
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
const
float
max_dx
=
max
(
ci
->
dx_max_part
,
cj
->
dx_max_part
);
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
const
double
max_dx
=
max
(
ci
->
dx_max_part
,
cj
->
dx_max_part
);
const
float
pos_padded
[
3
]
=
{
-
(
2
.
*
ci
->
width
[
0
]
+
max_dx
),
-
(
2
.
*
ci
->
width
[
1
]
+
max_dx
),
-
(
2
.
*
ci
->
width
[
2
]
+
max_dx
)};
...
...
@@ -425,7 +420,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
swift_declare_aligned_ptr
(
float
,
vzj
,
cj_cache
->
vz
,
SWIFT_CACHE_ALIGNMENT
);
for
(
int
i
=
0
;
i
<=
last_pj_align
;
i
++
)
{
idx
=
sort_j
[
i
].
i
;
const
int
idx
=
sort_j
[
i
].
i
;
xj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
0
]
-
total_cj_shift
[
0
]);
yj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
1
]
-
total_cj_shift
[
1
]);
zj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
2
]
-
total_cj_shift
[
2
]);
...
...
@@ -445,29 +440,30 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
"pos is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. xj=%f, ci->width[0]=%f"
,
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
],
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
],
i
,
xj
[
i
],
ci
->
width
[
0
]);
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
],
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
],
i
,
xj
[
i
],
ci
->
width
[
0
]);
if
(
yj
[
i
]
>
shift_threshold_y
||
yj
[
i
]
<
-
shift_threshold_y
)
error
(
"Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d yj "
"pos is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. yj=%f, ci->width[1]=%f"
,
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
],
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
],
i
,
yj
[
i
],
ci
->
width
[
1
]);
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
],
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
],
i
,
yj
[
i
],
ci
->
width
[
1
]);
if
(
zj
[
i
]
>
shift_threshold_z
||
zj
[
i
]
<
-
shift_threshold_z
)
error
(
"Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d zj "
"pos is not within "
"[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
"2*space_maxreldx)]. zj=%f, ci->width[2]=%f"
,
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
],
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
],
i
,
zj
[
i
],
ci
->
width
[
2
]);
ci
->
loc
[
0
],
ci
->
loc
[
1
],
ci
->
loc
[
2
],
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
],
i
,
zj
[
i
],
ci
->
width
[
2
]);
}
#endif
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
const
float
pos_padded_j
[
3
]
=
{
-
(
2
.
*
cj
->
width
[
0
]
+
max_dx
),
-
(
2
.
*
cj
->
width
[
1
]
+
max_dx
),
-
(
2
.
*
cj
->
width
[
2
]
+
max_dx
)};
...
...
@@ -508,7 +504,6 @@ cache_read_two_partial_cells_sorted_force(
const
struct
entry
*
restrict
sort_i
,
const
struct
entry
*
restrict
sort_j
,
const
double
*
const
shift
,
int
*
first_pi
,
int
*
last_pj
)
{
int
idx
;
/* Pad number of particles read to the vector size. */
int
rem
=
(
ci
->
count
-
*
first_pi
)
%
VEC_SIZE
;
if
(
rem
!=
0
)
{
...
...
@@ -524,16 +519,16 @@ cache_read_two_partial_cells_sorted_force(
if
(
*
last_pj
+
pad
<
cj
->
count
)
*
last_pj
+=
pad
;
}
/* Get some local pointers */
const
int
first_pi_align
=
*
first_pi
;
const
int
last_pj_align
=
*
last_pj
;
const
struct
part
*
restrict
parts_i
=
ci
->
parts
;
const
struct
part
*
restrict
parts_j
=
cj
->
parts
;
/* Shift particles to the local frame and account for boundary conditions.*/
const
double
total_ci_shift
[
3
]
=
{
cj
->
loc
[
0
]
+
shift
[
0
],
cj
->
loc
[
1
]
+
shift
[
1
],
cj
->
loc
[
2
]
+
shift
[
2
]};
const
double
total_cj_shift
[
3
]
=
{
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
]};
const
double
total_ci_shift
[
3
]
=
{
cj
->
loc
[
0
]
+
shift
[
0
],
cj
->
loc
[
1
]
+
shift
[
1
],
cj
->
loc
[
2
]
+
shift
[
2
]};
const
double
total_cj_shift
[
3
]
=
{
cj
->
loc
[
0
],
cj
->
loc
[
1
],
cj
->
loc
[
2
]};
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
...
...
@@ -557,14 +552,10 @@ cache_read_two_partial_cells_sorted_force(
int
ci_cache_count
=
ci
->
count
-
first_pi_align
;
/* Shift the particles positions to a local frame (ci frame) so single
* precision
* can be
* used instead of double precision. Also shift the cell ci, particles
* positions
* due to BCs but leave cell cj. */
* precision can be used instead of double precision. */
for
(
int
i
=
0
;
i
<
ci_cache_count
;
i
++
)
{
/* Make sure ci_cache is filled from the first element. */
idx
=
sort_i
[
i
+
first_pi_align
].
i
;
const
int
idx
=
sort_i
[
i
+
first_pi_align
].
i
;
x
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
0
]
-
total_ci_shift
[
0
]);
y
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
1
]
-
total_ci_shift
[
1
]);
z
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
2
]
-
total_ci_shift
[
2
]);
...
...
@@ -581,8 +572,9 @@ cache_read_two_partial_cells_sorted_force(
}
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
const
float
max_dx
=
max
(
ci
->
dx_max_part
,
cj
->
dx_max_part
);
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
const
double
max_dx
=
max
(
ci
->
dx_max_part
,
cj
->
dx_max_part
);
const
float
pos_padded
[
3
]
=
{
-
(
2
.
*
ci
->
width
[
0
]
+
max_dx
),
-
(
2
.
*
ci
->
width
[
1
]
+
max_dx
),
-
(
2
.
*
ci
->
width
[
2
]
+
max_dx
)};
...
...
@@ -626,7 +618,7 @@ cache_read_two_partial_cells_sorted_force(
SWIFT_CACHE_ALIGNMENT
);
for
(
int
i
=
0
;
i
<=
last_pj_align
;
i
++
)
{
idx
=
sort_j
[
i
].
i
;
const
int
idx
=
sort_j
[
i
].
i
;
xj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
0
]
-
total_cj_shift
[
0
]);
yj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
1
]
-
total_cj_shift
[
1
]);
zj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
2
]
-
total_cj_shift
[
2
]);
...
...
@@ -643,7 +635,8 @@ cache_read_two_partial_cells_sorted_force(
}
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
* interact. We use values of the same magnitude (but negative!) as the real
* particles to avoid overflow problems. */
const
float
pos_padded_j
[
3
]
=
{
-
(
2
.
*
cj
->
width
[
0
]
+
max_dx
),
-
(
2
.
*
cj
->
width
[
1
]
+
max_dx
),
-
(
2
.
*
cj
->
width
[
2
]
+
max_dx
)};
...
...
src/runner_doiact_vec.c
View file @
bcffe08f
...
...
@@ -41,9 +41,11 @@ static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2);
* gradient update on pi.
* @param v_wcountSum (return) #vector holding the cumulative sum of the wcount
* update on pi.
* @param v_wcount_dhSum (return) #vector holding the cumulative sum of the wcount
* @param v_wcount_dhSum (return) #vector holding the cumulative sum of the
* wcount
* gradient update on pi.
* @param v_div_vSum (return) #vector holding the cumulative sum of the divergence
* @param v_div_vSum (return) #vector holding the cumulative sum of the
* divergence
* update on pi.
* @param v_curlvxSum (return) #vector holding the cumulative sum of the curl of
* vx update on pi.
...
...
@@ -61,9 +63,9 @@ static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2);
__attribute__
((
always_inline
))
INLINE
static
void
calcRemInteractions
(
struct
c2_cache
*
const
int_cache
,
const
int
icount
,
vector
*
v_rhoSum
,
vector
*
v_rho_dhSum
,
vector
*
v_wcountSum
,
vector
*
v_wcount_dhSum
,
vector
*
v_div_vSum
,
vector
*
v_curlvxSum
,
vector
*
v_curlvySum
,
vector
*
v_curlvzSum
,
vector
v_hi_inv
,
vector
v_vix
,
vector
v_viy
,
vector
v_viz
,
int
*
icount_align
)
{
vector
*
v_div_vSum
,
vector
*
v_curlvxSum
,
vector
*
v_curlvySum
,
vector
*
v_curlvzSum
,
vector
v_hi_inv
,
vector
v_vix
,
vector
v_viy
,
vector
v_viz
,
int
*
icount_align
)
{
mask_t
int_mask
,
int_mask2
;
...
...
@@ -108,8 +110,8 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
&
int_cache
->
vxq
[
*
icount_align
],
&
int_cache
->
vyq
[
*
icount_align
],
&
int_cache
->
vzq
[
*
icount_align
],
&
int_cache
->
mq
[
*
icount_align
],
v_rhoSum
,
v_rho_dhSum
,
v_wcountSum
,
v_wcount_dhSum
,
v_div_vSum
,
v_curlvxSum
,
v_curlvySum
,
v_curlvzSum
,
int_mask
,
int_mask2
,
1
);
v_wcount_dhSum
,
v_div_vSum
,
v_curlvxSum
,
v_curlvySum
,
v_curlvzSum
,
int_mask
,
int_mask2
,
1
);
}
}
...
...
@@ -127,20 +129,25 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
* @param int_cache (return) secondary #cache of interactions between two
* particles.
* @param icount Interaction count.
* @param v_rhoSum #vector holding the cumulative sum of the density update on pi.
* @param v_rhoSum #vector holding the cumulative sum of the density update on
* pi.
* @param v_rho_dhSum #vector holding the cumulative sum of the density gradient
* update on pi.
* @param v_wcountSum #vector holding the cumulative sum of the wcount update on
* pi.
* @param v_wcount_dhSum #vector holding the cumulative sum of the wcount gradient
* @param v_wcount_dhSum #vector holding the cumulative sum of the wcount
* gradient
* update on pi.
* @param v_div_vSum #vector holding the cumulative sum of the divergence update
* on pi.
* @param v_curlvxSum #vector holding the cumulative sum of the curl of vx update
* @param v_curlvxSum #vector holding the cumulative sum of the curl of vx
* update
* on pi.
* @param v_curlvySum #vector holding the cumulative sum of the curl of vy update
* @param v_curlvySum #vector holding the cumulative sum of the curl of vy
* update
* on pi.
* @param v_curlvzSum #vector holding the cumulative sum of the curl of vz update
* @param v_curlvzSum #vector holding the cumulative sum of the curl of vz
* update
* on pi.
* @param v_hi_inv #vector of 1/h for pi.
* @param v_vix #vector of x velocity of pi.
...
...
@@ -152,8 +159,9 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
vector
*
v_dz
,
const
struct
cache
*
const
cell_cache
,
struct
c2_cache
*
const
int_cache
,
int
*
icount
,
vector
*
v_rhoSum
,
vector
*
v_rho_dhSum
,
vector
*
v_wcountSum
,
vector
*
v_wcount_dhSum
,
vector
*
v_div_vSum
,
vector
*
v_curlvxSum
,
vector
*
v_curlvySum
,
vector
*
v_curlvzSum
,
vector
v_hi_inv
,
vector
v_vix
,
vector
v_viy
,
vector
v_viz
)
{
vector
*
v_div_vSum
,
vector
*
v_curlvxSum
,
vector
*
v_curlvySum
,
vector
*
v_curlvzSum
,
vector
v_hi_inv
,
vector
v_vix
,
vector
v_viy
,
vector
v_viz
)
{
/* Left-pack values needed into the secondary cache using the interaction mask.
*/
...
...
@@ -203,8 +211,9 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
/* Peform remainder interactions. */
calcRemInteractions
(
int_cache
,
*
icount
,
v_rhoSum
,
v_rho_dhSum
,
v_wcountSum
,
v_wcount_dhSum
,
v_div_vSum
,
v_curlvxSum
,
v_curlvySum
,
v_curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
&
icount_align
);
v_wcount_dhSum
,
v_div_vSum
,
v_curlvxSum
,
v_curlvySum
,
v_curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
&
icount_align
);
mask_t
int_mask
,
int_mask2
;
vec_init_mask_true
(
int_mask
);
...
...
@@ -216,8 +225,8 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
&
int_cache
->
r2q
[
j
],
&
int_cache
->
dxq
[
j
],
&
int_cache
->
dyq
[
j
],
&
int_cache
->
dzq
[
j
],
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
&
int_cache
->
vxq
[
j
],
&
int_cache
->
vyq
[
j
],
&
int_cache
->
vzq
[
j
],
&
int_cache
->
mq
[
j
],
v_rhoSum
,
v_rho_dhSum
,
v_wcountSum
,
v_wcount_dhSum
,
v_div_vSum
,
v_curlvxSum
,
v_curlvySum
,
v_curlvzSum
,
int_mask
,
int_mask2
,
0
);
v_rho_dhSum
,
v_wcountSum
,
v_wcount_dhSum
,
v_div_vSum
,
v_curlvxSum
,
v_curlvySum
,
v_curlvzSum
,
int_mask
,
int_mask2
,
0
);
}
/* Reset interaction count. */
...
...
@@ -574,8 +583,8 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
v_hig2
.
v
=
vec_set1
(
hig2
);
/* Reset cumulative sums of update vectors. */
vector
v_rhoSum
,
v_rho_dhSum
,
v_wcountSum
,
v_wcount_dhSum
,
v_div_vSum
,
v_curlvxSum
,
v_curlvySum
,
v_curlvzSum
;
vector
v_rhoSum
,
v_rho_dhSum
,
v_wcountSum
,
v_wcount_dhSum
,
v_div_vSum
,
v_curlvxSum
,
v_curlvySum
,
v_curlvzSum
;
/* Get the inverse of hi. */
vector
v_hi_inv
;
...
...
@@ -671,24 +680,25 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
* cache. */
if
(
doi_mask
)
{
storeInteractions
(
doi_mask
,
pjd
,
&
v_r2
,
&
v_dx
,
&
v_dy
,
&
v_dz
,
cell_cache
,
&
int_cache
,
&
icount
,
&
v_rhoSum
,
&
v_rho_dhSum
,
&
v_wcountSum
,
&
v_wcount_dhSum
,
&
v_div_vSum
,
&
v_curlvxSum
,
&
v_curlvySum
,
&
v_curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
);
&
int_cache
,
&
icount
,
&
v_rhoSum
,
&
v_rho_dhSum
,
&
v_wcountSum
,
&
v_wcount_dhSum
,
&
v_div_vSum
,
&
v_curlvxSum
,
&
v_curlvySum
,
&
v_curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
);
}
if
(
doi_mask2
)
{
storeInteractions
(
doi_mask2
,
pjd
+
VEC_SIZE
,
&
v_r2_2
,
&
v_dx_2
,
&
v_dy_2
,
&
v_dz_2
,
cell_cache
,
&
int_cache
,
&
icount
,
&
v_rhoSum
,
&
v_rho_dhSum
,
&
v_wcountSum
,
&
v_wcount_dhSum
,
&
v_div_vSum
,
&
v_curlvxSum
,
&
v_curlvySum
,
&
v_curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
);
&
v_rho_dhSum
,
&
v_wcountSum
,
&
v_wcount_dhSum
,
&
v_div_vSum
,
&
v_curlvxSum
,
&
v_curlvySum
,
&
v_curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
);
}
}
/* Perform padded vector remainder interactions if any are present. */
calcRemInteractions
(
&
int_cache
,
icount
,
&
v_rhoSum
,
&
v_rho_dhSum
,
&
v_wcountSum
,
&
v_wcount
_dh
Sum
,
&
v_
div_vSum
,
&
v_curlvxSum
,
&
v_curlvy
Sum
,
&
v_curlv
z
Sum
,
v_
hi_inv
,
v_
vix
,
v_viy
,
v_viz
,
&
icount_align
);
calcRemInteractions
(
&
int_cache
,
icount
,
&
v_rhoSum
,
&
v_rho_dhSum
,
&
v_wcountSum
,
&
v_
wcount_dhSum
,
&
v_div_v
Sum
,
&
v_curlv
x
Sum
,
&
v_
curlvySum
,
&
v_
curlvzSum
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
&
icount_align
);
/* Initialise masks to true in case remainder interactions have been
* performed. */
...
...
@@ -702,9 +712,9 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
&
int_cache
.
r2q
[
pjd
],
&
int_cache
.
dxq
[
pjd
],
&
int_cache
.
dyq
[
pjd
],
&
int_cache
.
dzq
[
pjd
],
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
&
int_cache
.
vxq
[
pjd
],
&
int_cache
.
vyq
[
pjd
],
&
int_cache
.
vzq
[
pjd
],
&
int_cache
.
mq
[
pjd
],
&
v_rhoSum
,
&
v_rho_dhSum
,
&
v_wcountSum
,
&
v_wcount_dhSum
,
&
v_div_vSum
,
&
v_curlvxSum
,
&
v_curlvySum
,
&
v_curlvzSum
,
int_mask
,
int_mask2
,
0
);
&
int_cache
.
mq
[
pjd
],
&
v_rhoSum
,
&
v_rho_dhSum
,
&
v_wcountSum
,
&
v_wcount_dhSum
,
&
v_div_vSum
,
&
v_curlvxSum
,
&
v_curlvySum
,
&
v_curlvzSum
,
int_mask
,
int_mask2
,
0
);
}
/* Perform horizontal adds on vector sums and store result in particle pi.
...
...
@@ -903,8 +913,8 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
&
cell_cache
->
grad_h
[
pjd
],
&
cell_cache
->
pOrho2
[
pjd
],
&
cell_cache
->
balsara
[
pjd
],
&
cell_cache
->
soundspeed
[
pjd
],
&
cell_cache
->
m
[
pjd
],
v_hi_inv
,
v_hj_inv
,
&
v_a_hydro_xSum
,
&
v_a_hydro_ySum
,
&
v_a_hydro_zSum
,
&
v_h_dtSum
,
&
v_sigSum
,
&
v_entropy_dtSum
,
v_doi_mask
);
&
v_a_hydro_ySum
,
&
v_a_hydro_zSum
,
&
v_h_dtSum
,
&
v_sigSum
,
&
v_entropy_dtSum
,
v_doi_mask
);
}
}
/* Loop over all other particles. */
...
...
@@ -1030,8 +1040,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
/* Read the needed particles into the two caches. */
cache_read_two_partial_cells_sorted
(
ci
,
cj
,
ci_cache
,
cj_cache
,
sort_i
,
sort_j
,
shift
,
&
first_pi
,
&
last_pj
);
sort_j
,
shift
,
&
first_pi
,
&
last_pj
);
/* Get the number of particles read into the ci cache. */
int
ci_cache_count
=
count_i
-
first_pi
;
...
...
@@ -1073,8 +1082,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
v_hig2
.
v
=
vec_set1
(
hig2
);
/* Reset cumulative sums of update vectors. */
vector
v_rhoSum
,
v_rho_dhSum
,
v_wcountSum
,
v_wcount_dhSum
,
v_div_vSum
,
v_curlvxSum
,
v_curlvySum
,
v_curlvzSum
;
vector
v_rhoSum
,
v_rho_dhSum
,
v_wcountSum
,
v_wcount_dhSum
,
v_div_vSum
,
v_curlvxSum
,
v_curlvySum
,
v_curlvzSum
;
/* Get the inverse of hi. */
vector
v_hi_inv
;
...
...
@@ -1113,8 +1122,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
#ifdef SWIFT_DEBUG_CHECKS
if
(
cj_cache_idx
%
VEC_SIZE
!=
0
||
cj_cache_idx
<
0
||
cj_cache_idx
+
(
VEC_SIZE
-
1
)
>
(
last_pj
+
1
+
VEC_SIZE
))
{
error
(
"Unaligned read!!! cj_cache_idx=%d, last_pj=%d"
,
cj_cache_idx
,
last_pj
);
error
(
"Unaligned read!!! cj_cache_idx=%d, last_pj=%d"
,
cj_cache_idx
,
last_pj
);
}
#endif
...
...
@@ -1146,9 +1155,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
runner_iact_nonsym_1_vec_density
(
&
v_r2
,
&
v_dx
,
&
v_dy
,
&
v_dz
,
v_hi_inv
,
v_vix
,
v_viy
,
v_viz
,
&
cj_cache
->
vx
[
cj_cache_idx
],
&
cj_cache
->
vy
[
cj_cache_idx
],
&
cj_cache
->
vz
[
cj_cache_idx
],
&
cj_cache
->
m
[
cj_cache_idx
],
&
v_rhoSum
,
&
v_rho_dhSum
,
&
v_wcountSum
,
&
v_wcount_dhSum
,
&
v_div_vSum
,
&
v_curlvxSum
,
&
v_curlvySum
,
&
v_curlvzSum
,
v_doi_mask
);
&
cj_cache
->
vz
[
cj_cache_idx
],
&
cj_cache
->
m
[
cj_cache_idx
],
&
v_rhoSum
,
&
v_rho_dhSum
,
&
v_wcountSum
,
&
v_wcount_dhSum
,
&
v_div_vSum
,
&
v_curlvxSum
,
&
v_curlvySum
,
&
v_curlvzSum
,
v_doi_mask
);
}
/* loop over the parts in cj. */
...
...
@@ -1203,8 +1213,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
v_hjg2
.
v
=
vec_set1
(
hjg2
);
/* Reset cumulative sums of update vectors. */
vector
v_rhoSum
,
v_rho_dhSum
,
v_wcountSum
,
v_wcount_dhSum
,
v_div_vSum
,
v_curlvxSum
,
v_curlvySum
,
v_curlvzSum
;
vector
v_rhoSum
,
v_rho_dhSum
,
v_wcountSum
,
v_wcount_dhSum
,
v_div_vSum
,
v_curlvxSum
,
v_curlvySum
,
v_curlvzSum
;
/* Get the inverse of hj. */
vector
v_hj_inv
;
...
...
@@ -1238,8 +1248,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
#ifdef SWIFT_DEBUG_CHECKS
if
(
ci_cache_idx
%
VEC_SIZE
!=
0
||
ci_cache_idx
<
0
||
ci_cache_idx
+
(
VEC_SIZE
-
1
)
>
(
count_i
-
first_pi
+
VEC_SIZE
))
{
ci_cache_idx
+
(
VEC_SIZE
-
1
)
>
(
count_i
-
first_pi
+
VEC_SIZE
))
{
error
(
"Unaligned read!!! ci_cache_idx=%d, first_pi=%d, "
"count_i=%d"
,
...
...
@@ -1277,9 +1286,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
runner_iact_nonsym_1_vec_density
(
&
v_r2
,
&
v_dx
,
&
v_dy
,
&
v_dz
,
v_hj_inv
,
v_vjx
,
v_vjy
,
v_vjz
,
&
ci_cache
->
vx
[
ci_cache_idx
],
&
ci_cache
->
vy
[
ci_cache_idx
],
&
ci_cache
->
vz
[
ci_cache_idx
],
&
ci_cache
->
m
[
ci_cache_idx
],
&
v_rhoSum
,
&
v_rho_dhSum
,
&
v_wcountSum
,
&
v_wcount_dhSum
,
&
v_div_vSum
,
&
v_curlvxSum
,
&
v_curlvySum
,
&
v_curlvzSum
,
v_doj_mask
);
&
ci_cache
->
vz
[
ci_cache_idx
],
&
ci_cache
->
m
[
ci_cache_idx
],
&
v_rhoSum
,
&
v_rho_dhSum
,
&
v_wcountSum
,
&
v_wcount_dhSum
,
&
v_div_vSum
,
&
v_curlvxSum
,
&
v_curlvySum
,
&
v_curlvzSum
,
v_doj_mask
);
}
/* loop over the parts in ci. */
...
...
@@ -1351,7 +1361,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/* Check if any particles are active and return if there are none. */
int
numActive
=
0
;
/* Use the largest smoothing length to make sure that no interactions are missed. */
/* Use the largest smoothing length to make sure that no interactions are
* missed. */
const
double
h_max
=
max
(
hi_max
,
hj_max
);
if
(
active_ci
)
{
...
...
@@ -1416,8 +1427,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
/* Read the needed particles into the two caches. */
cache_read_two_partial_cells_sorted_force
(
ci
,
cj
,
ci_cache
,
cj_cache
,
sort_i
,
sort_j
,
shift
,
&
first_pi
,
&
last_pj
);
sort_j
,
shift
,
&
first_pi
,
&
last_pj
);
/* Get the number of particles read into the ci cache. */
int
ci_cache_count
=
count_i
-
first_pi
;
...
...
@@ -1465,8 +1475,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
v_hig2
.
v
=
vec_set1
(
hig2
);
/* Reset cumulative sums of update vectors. */
vector
v_a_hydro_xSum
,
v_a_hydro_ySum
,
v_a_hydro_zSum
,
v_h_dtSum
,
v_sigSum
,
v_entropy_dtSum
;
vector
v_a_hydro_xSum
,
v_a_hydro_ySum
,
v_a_hydro_zSum
,
v_h_dtSum
,
v_sigSum
,
v_entropy_dtSum
;
/* Get the inverse of hi. */
vector
v_hi_inv
;
...
...
@@ -1502,8 +1512,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
#ifdef SWIFT_DEBUG_CHECKS
if
(
cj_cache_idx
%
VEC_SIZE
!=
0
||
cj_cache_idx
<
0
||
cj_cache_idx
+
(
VEC_SIZE
-
1
)
>
(
last_pj
+
1
+
VEC_SIZE
))
{
error
(
"Unaligned read!!! cj_cache_idx=%d, last_pj=%d"
,
cj_cache_idx
,
last_pj
);
error
(
"Unaligned read!!! cj_cache_idx=%d, last_pj=%d"
,
cj_cache_idx
,
last_pj
);
}
#endif
...
...
@@ -1547,8 +1557,9 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
&
cj_cache
->
grad_h
[
cj_cache_idx
],
&
cj_cache
->
pOrho2
[
cj_cache_idx
],
&
cj_cache
->
balsara
[
cj_cache_idx
],
&
cj_cache
->
soundspeed
[
cj_cache_idx
],
&
cj_cache
->
m
[
cj_cache_idx
],
v_hi_inv
,
v_hj_inv
,
&
v_a_hydro_xSum
,
&
v_a_hydro_ySum
,
&
v_a_hydro_zSum
,
&
v_h_dtSum
,
&
v_sigSum
,
&
v_entropy_dtSum
,
v_doi_mask
);
v_hi_inv
,
v_hj_inv
,
&
v_a_hydro_xSum
,
&
v_a_hydro_ySum
,
&
v_a_hydro_zSum
,
&
v_h_dtSum
,
&
v_sigSum
,
&
v_entropy_dtSum
,
v_doi_mask
);
}
}
/* loop over the parts in cj. */
...
...
@@ -1610,8 +1621,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
v_hjg2
.
v
=
vec_set1
(
hjg2
);
/* Reset cumulative sums of update vectors. */
vector
v_a_hydro_xSum
,
v_a_hydro_ySum
,
v_a_hydro_zSum
,
v_h_dtSum
,
v_sigSum
,
v_entropy_dtSum
;
vector
v_a_hydro_xSum
,
v_a_hydro_ySum
,
v_a_hydro_zSum
,
v_h_dtSum
,
v_sigSum
,
v_entropy_dtSum
;
/* Get the inverse of hj. */
vector
v_hj_inv
;
...
...
@@ -1689,8 +1700,9 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
&
ci_cache
->
grad_h
[
ci_cache_idx
],
&
ci_cache
->
pOrho2
[
ci_cache_idx
],
&
ci_cache
->
balsara
[
ci_cache_idx
],
&
ci_cache
->
soundspeed
[
ci_cache_idx
],
&
ci_cache
->
m
[
ci_cache_idx
],
v_hj_inv
,
v_hi_inv
,
&
v_a_hydro_xSum
,
&
v_a_hydro_ySum
,
&
v_a_hydro_zSum
,
&
v_h_dtSum
,
&
v_sigSum
,
&
v_entropy_dtSum
,
v_doj_mask
);
v_hj_inv
,
v_hi_inv
,
&
v_a_hydro_xSum
,
&
v_a_hydro_ySum
,
&
v_a_hydro_zSum
,
&
v_h_dtSum
,
&
v_sigSum
,
&
v_entropy_dtSum
,
v_doj_mask
);
}
}
/* loop over the parts in ci. */
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment