Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
24be404f
Commit
24be404f
authored
Oct 07, 2017
by
James Willis
Browse files
Merge branch 'dopair2-vectorisation' into debug_interactions
parents
ea50f018
c62da58f
Changes
16
Expand all
Hide whitespace changes
Inline
Side-by-side
.gitignore
View file @
24be404f
...
...
@@ -55,7 +55,9 @@ tests/brute_force_125_standard.dat
tests/swift_dopair_125_standard.dat
tests/brute_force_125_perturbed.dat
tests/swift_dopair_125_perturbed.dat
tests/brute_force_active.dat
tests/brute_force_pair_active.dat
tests/brute_force_dopair2_active.dat
tests/swift_dopair2_force_active.dat
tests/brute_force_periodic_BC_perturbed.dat
tests/swift_dopair_active.dat
tests/test_nonsym_density_serial.dat
...
...
src/cache.h
View file @
24be404f
...
...
@@ -30,6 +30,8 @@
#include
"sort_part.h"
#include
"vector.h"
#include
<float.h>
#define NUM_VEC_PROC 2
#define C2_CACHE_SIZE (NUM_VEC_PROC * VEC_SIZE * 6) + (NUM_VEC_PROC * VEC_SIZE)
...
...
@@ -407,12 +409,15 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
float
fake_pix
=
2
.
0
f
*
parts_i
[
sort_i
[
ci
->
count
-
1
].
i
].
x
[
0
];
const
float
max_dx
=
max
(
ci
->
dx_max_part
,
cj
->
dx_max_part
);
const
float
pos_padded
[
3
]
=
{
-
(
2
.
*
ci
->
width
[
0
]
+
max_dx
),
-
(
2
.
*
ci
->
width
[
1
]
+
max_dx
),
-
(
2
.
*
ci
->
width
[
2
]
+
max_dx
)};
for
(
int
i
=
ci
->
count
-
first_pi_align
;
i
<
ci
->
count
-
first_pi_align
+
VEC_SIZE
;
i
++
)
{
x
[
i
]
=
fake_pix
;
y
[
i
]
=
1
.
f
;
z
[
i
]
=
1
.
f
;
x
[
i
]
=
pos_padded
[
0
]
;
y
[
i
]
=
pos_padded
[
1
]
;
z
[
i
]
=
pos_padded
[
2
]
;
h
[
i
]
=
1
.
f
;
m
[
i
]
=
1
.
f
;
...
...
@@ -477,11 +482,12 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
float
fake_pjx
=
2
.
0
f
*
cj
->
parts
[
sort_j
[
cj
->
count
-
1
].
i
].
x
[
0
];
const
float
pos_padded_j
[
3
]
=
{
-
(
2
.
*
cj
->
width
[
0
]
+
max_dx
),
-
(
2
.
*
cj
->
width
[
1
]
+
max_dx
),
-
(
2
.
*
cj
->
width
[
2
]
+
max_dx
)};
for
(
int
i
=
last_pj_align
+
1
;
i
<
last_pj_align
+
1
+
VEC_SIZE
;
i
++
)
{
xj
[
i
]
=
fake_pjx
;
yj
[
i
]
=
1
.
f
;
zj
[
i
]
=
1
.
f
;
xj
[
i
]
=
pos_padded_j
[
0
]
;
yj
[
i
]
=
pos_padded_j
[
1
]
;
zj
[
i
]
=
pos_padded_j
[
2
]
;
hj
[
i
]
=
1
.
f
;
mj
[
i
]
=
1
.
f
;
...
...
@@ -491,6 +497,194 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
}
}
/**
* @brief Populate caches by only reading particles that are within range of
* each other within the adjoining cell.Also read the particles into the cache
* in sorted order.
*
* @param ci The i #cell.
* @param cj The j #cell.
* @param ci_cache The #cache for cell ci.
* @param cj_cache The #cache for cell cj.
* @param sort_i The array of sorted particle indices for cell ci.
* @param sort_j The array of sorted particle indices for cell ci.
* @param shift The amount to shift the particle positions to account for BCs
* @param first_pi The first particle in cell ci that is in range.
* @param last_pj The last particle in cell cj that is in range.
* @param num_vec_proc Number of vectors that will be used to process the
* interaction.
*/
__attribute__
((
always_inline
))
INLINE
void
cache_read_two_partial_cells_sorted_force
(
const
struct
cell
*
const
ci
,
const
struct
cell
*
const
cj
,
struct
cache
*
const
ci_cache
,
struct
cache
*
const
cj_cache
,
const
struct
entry
*
restrict
sort_i
,
const
struct
entry
*
restrict
sort_j
,
const
double
*
const
shift
,
int
*
first_pi
,
int
*
last_pj
,
const
int
num_vec_proc
)
{
int
idx
;
/* Pad number of particles read to the vector size. */
int
rem
=
(
ci
->
count
-
*
first_pi
)
%
(
num_vec_proc
*
VEC_SIZE
);
if
(
rem
!=
0
)
{
int
pad
=
(
num_vec_proc
*
VEC_SIZE
)
-
rem
;
if
(
*
first_pi
-
pad
>=
0
)
*
first_pi
-=
pad
;
}
rem
=
*
last_pj
%
(
num_vec_proc
*
VEC_SIZE
);
if
(
rem
!=
0
)
{
int
pad
=
(
num_vec_proc
*
VEC_SIZE
)
-
rem
;
if
(
*
last_pj
+
pad
<
cj
->
count
)
*
last_pj
+=
pad
;
}
int
first_pi_align
=
*
first_pi
;
int
last_pj_align
=
*
last_pj
;
const
struct
part
*
restrict
parts_i
=
ci
->
parts
;
const
struct
part
*
restrict
parts_j
=
cj
->
parts
;
double
loc
[
3
];
loc
[
0
]
=
ci
->
loc
[
0
];
loc
[
1
]
=
ci
->
loc
[
1
];
loc
[
2
]
=
ci
->
loc
[
2
];
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
swift_declare_aligned_ptr
(
float
,
x
,
ci_cache
->
x
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
y
,
ci_cache
->
y
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
z
,
ci_cache
->
z
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
h
,
ci_cache
->
h
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
m
,
ci_cache
->
m
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vx
,
ci_cache
->
vx
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vy
,
ci_cache
->
vy
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vz
,
ci_cache
->
vz
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
rho
,
ci_cache
->
rho
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
grad_h
,
ci_cache
->
grad_h
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
pOrho2
,
ci_cache
->
pOrho2
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
balsara
,
ci_cache
->
balsara
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
soundspeed
,
ci_cache
->
soundspeed
,
SWIFT_CACHE_ALIGNMENT
);
int
ci_cache_count
=
ci
->
count
-
first_pi_align
;
/* Shift the particles positions to a local frame (ci frame) so single
* precision
* can be
* used instead of double precision. Also shift the cell ci, particles
* positions
* due to BCs but leave cell cj. */
for
(
int
i
=
0
;
i
<
ci_cache_count
;
i
++
)
{
/* Make sure ci_cache is filled from the first element. */
idx
=
sort_i
[
i
+
first_pi_align
].
i
;
x
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
0
]
-
loc
[
0
]
-
shift
[
0
]);
y
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
1
]
-
loc
[
1
]
-
shift
[
1
]);
z
[
i
]
=
(
float
)(
parts_i
[
idx
].
x
[
2
]
-
loc
[
2
]
-
shift
[
2
]);
h
[
i
]
=
parts_i
[
idx
].
h
;
m
[
i
]
=
parts_i
[
idx
].
mass
;
vx
[
i
]
=
parts_i
[
idx
].
v
[
0
];
vy
[
i
]
=
parts_i
[
idx
].
v
[
1
];
vz
[
i
]
=
parts_i
[
idx
].
v
[
2
];
rho
[
i
]
=
parts_i
[
idx
].
rho
;
grad_h
[
i
]
=
parts_i
[
idx
].
force
.
f
;
pOrho2
[
i
]
=
parts_i
[
idx
].
force
.
P_over_rho2
;
balsara
[
i
]
=
parts_i
[
idx
].
force
.
balsara
;
soundspeed
[
i
]
=
parts_i
[
idx
].
force
.
soundspeed
;
}
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
const
float
max_dx
=
max
(
ci
->
dx_max_part
,
cj
->
dx_max_part
);
const
float
pos_padded
[
3
]
=
{
-
(
2
.
*
ci
->
width
[
0
]
+
max_dx
),
-
(
2
.
*
ci
->
width
[
1
]
+
max_dx
),
-
(
2
.
*
ci
->
width
[
2
]
+
max_dx
)};
const
float
h_padded
=
ci
->
parts
[
0
].
h
;
for
(
int
i
=
ci
->
count
-
first_pi_align
;
i
<
ci
->
count
-
first_pi_align
+
VEC_SIZE
;
i
++
)
{
x
[
i
]
=
pos_padded
[
0
];
y
[
i
]
=
pos_padded
[
1
];
z
[
i
]
=
pos_padded
[
2
];
h
[
i
]
=
h_padded
;
m
[
i
]
=
1
.
f
;
vx
[
i
]
=
1
.
f
;
vy
[
i
]
=
1
.
f
;
vz
[
i
]
=
1
.
f
;
rho
[
i
]
=
1
.
f
;
grad_h
[
i
]
=
1
.
f
;
pOrho2
[
i
]
=
1
.
f
;
balsara
[
i
]
=
1
.
f
;
soundspeed
[
i
]
=
1
.
f
;
}
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
swift_declare_aligned_ptr
(
float
,
xj
,
cj_cache
->
x
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
yj
,
cj_cache
->
y
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
zj
,
cj_cache
->
z
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
hj
,
cj_cache
->
h
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
mj
,
cj_cache
->
m
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vxj
,
cj_cache
->
vx
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vyj
,
cj_cache
->
vy
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vzj
,
cj_cache
->
vz
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
rhoj
,
cj_cache
->
rho
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
grad_hj
,
cj_cache
->
grad_h
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
pOrho2j
,
cj_cache
->
pOrho2
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
balsaraj
,
cj_cache
->
balsara
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
soundspeedj
,
cj_cache
->
soundspeed
,
SWIFT_CACHE_ALIGNMENT
);
for
(
int
i
=
0
;
i
<=
last_pj_align
;
i
++
)
{
idx
=
sort_j
[
i
].
i
;
xj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
0
]
-
loc
[
0
]);
yj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
1
]
-
loc
[
1
]);
zj
[
i
]
=
(
float
)(
parts_j
[
idx
].
x
[
2
]
-
loc
[
2
]);
hj
[
i
]
=
parts_j
[
idx
].
h
;
mj
[
i
]
=
parts_j
[
idx
].
mass
;
vxj
[
i
]
=
parts_j
[
idx
].
v
[
0
];
vyj
[
i
]
=
parts_j
[
idx
].
v
[
1
];
vzj
[
i
]
=
parts_j
[
idx
].
v
[
2
];
rhoj
[
i
]
=
parts_j
[
idx
].
rho
;
grad_hj
[
i
]
=
parts_j
[
idx
].
force
.
f
;
pOrho2j
[
i
]
=
parts_j
[
idx
].
force
.
P_over_rho2
;
balsaraj
[
i
]
=
parts_j
[
idx
].
force
.
balsara
;
soundspeedj
[
i
]
=
parts_j
[
idx
].
force
.
soundspeed
;
}
/* Pad cache with fake particles that exist outside the cell so will not
* interact.*/
const
float
pos_padded_j
[
3
]
=
{
-
(
2
.
*
cj
->
width
[
0
]
+
max_dx
),
-
(
2
.
*
cj
->
width
[
1
]
+
max_dx
),
-
(
2
.
*
cj
->
width
[
2
]
+
max_dx
)};
const
float
h_padded_j
=
cj
->
parts
[
0
].
h
;
for
(
int
i
=
last_pj_align
+
1
;
i
<
last_pj_align
+
1
+
VEC_SIZE
;
i
++
)
{
xj
[
i
]
=
pos_padded_j
[
0
];
yj
[
i
]
=
pos_padded_j
[
1
];
zj
[
i
]
=
pos_padded_j
[
2
];
hj
[
i
]
=
h_padded_j
;
mj
[
i
]
=
1
.
f
;
vxj
[
i
]
=
1
.
f
;
vyj
[
i
]
=
1
.
f
;
vzj
[
i
]
=
1
.
f
;
rhoj
[
i
]
=
1
.
f
;
grad_hj
[
i
]
=
1
.
f
;
pOrho2j
[
i
]
=
1
.
f
;
balsaraj
[
i
]
=
1
.
f
;
soundspeedj
[
i
]
=
1
.
f
;
}
}
/* @brief Clean the memory allocated by a #cache object.
*
* @param c The #cache to clean.
...
...
src/hydro/Gadget2/hydro.h
View file @
24be404f
...
...
@@ -175,6 +175,11 @@ __attribute__((always_inline)) INLINE static void hydro_timestep_extra(
__attribute__
((
always_inline
))
INLINE
static
void
hydro_init_part
(
struct
part
*
restrict
p
,
const
struct
hydro_space
*
hs
)
{
#ifdef DEBUG_INTERACTIONS
for
(
int
i
=
0
;
i
<
NUM_OF_NEIGHBOURS
;
++
i
)
p
->
ids_ngbs_density
[
i
]
=
-
1
;
p
->
num_ngb_density
=
0
;
#endif
p
->
rho
=
0
.
f
;
p
->
density
.
wcount
=
0
.
f
;
p
->
density
.
wcount_dh
=
0
.
f
;
...
...
@@ -311,6 +316,11 @@ __attribute__((always_inline)) INLINE static void hydro_prepare_force(
__attribute__
((
always_inline
))
INLINE
static
void
hydro_reset_acceleration
(
struct
part
*
restrict
p
)
{
#ifdef DEBUG_INTERACTIONS
for
(
int
i
=
0
;
i
<
NUM_OF_NEIGHBOURS
;
++
i
)
p
->
ids_ngbs_force
[
i
]
=
-
1
;
p
->
num_ngb_force
=
0
;
#endif
/* Reset the acceleration. */
p
->
a_hydro
[
0
]
=
0
.
0
f
;
p
->
a_hydro
[
1
]
=
0
.
0
f
;
...
...
src/hydro/Gadget2/hydro_iact.h
View file @
24be404f
...
...
@@ -103,6 +103,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
pj
->
density
.
rot_v
[
0
]
+=
facj
*
curlvr
[
0
];
pj
->
density
.
rot_v
[
1
]
+=
facj
*
curlvr
[
1
];
pj
->
density
.
rot_v
[
2
]
+=
facj
*
curlvr
[
2
];
/* Update ngb counters */
#ifdef DEBUG_INTERACTIONS
if
(
pi
->
id
==
CHECK_PART_ID
)
pi
->
ids_ngbs_density
[
pi
->
num_ngb_density
]
=
pj
->
id
;
++
pi
->
num_ngb_density
;
if
(
pj
->
id
==
CHECK_PART_ID
)
pj
->
ids_ngbs_density
[
pj
->
num_ngb_density
]
=
pi
->
id
;
++
pj
->
num_ngb_density
;
#endif
}
/**
...
...
@@ -151,6 +160,13 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density(
pi
->
density
.
rot_v
[
0
]
+=
fac
*
curlvr
[
0
];
pi
->
density
.
rot_v
[
1
]
+=
fac
*
curlvr
[
1
];
pi
->
density
.
rot_v
[
2
]
+=
fac
*
curlvr
[
2
];
#ifdef DEBUG_INTERACTIONS
/* Update ngb counters */
if
(
pi
->
id
==
CHECK_PART_ID
)
pi
->
ids_ngbs_density
[
pi
->
num_ngb_density
]
=
pj
->
id
;
++
pi
->
num_ngb_density
;
#endif
}
#ifdef WITH_VECTORIZATION
...
...
@@ -480,6 +496,19 @@ __attribute__((always_inline)) INLINE static void runner_iact_force(
/* Change in entropy */
pi
->
entropy_dt
+=
mj
*
visc_term
*
dvdr
;
pj
->
entropy_dt
+=
mi
*
visc_term
*
dvdr
;
#ifdef DEBUG_INTERACTIONS
/* Update ngb counters */
if
(
pi
->
id
==
CHECK_PART_ID
)
{
pi
->
ids_ngbs_force
[
pi
->
num_ngb_force
]
=
pj
->
id
;
}
++
pi
->
num_ngb_force
;
if
(
pj
->
id
==
CHECK_PART_ID
)
{
pj
->
ids_ngbs_force
[
pj
->
num_ngb_force
]
=
pi
->
id
;
}
++
pj
->
num_ngb_force
;
#endif
}
/**
...
...
@@ -569,6 +598,15 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force(
/* Change in entropy */
pi
->
entropy_dt
+=
mj
*
visc_term
*
dvdr
;
#ifdef DEBUG_INTERACTIONS
/* Update ngb counters */
if
(
pi
->
id
==
CHECK_PART_ID
)
{
pi
->
ids_ngbs_force
[
pi
->
num_ngb_force
]
=
pj
->
id
;
}
++
pi
->
num_ngb_force
;
#endif
}
#ifdef WITH_VECTORIZATION
...
...
@@ -634,7 +672,7 @@ runner_iact_nonsym_1_vec_force(
hjd_inv
=
pow_dimension_plus_one_vec
(
hj_inv
);
xj
.
v
=
vec_mul
(
r
.
v
,
hj_inv
.
v
);
/* Calculate the kernel
for two particles
. */
/* Calculate the kernel. */
kernel_eval_dWdx_force_vec
(
&
xj
,
&
wj_dx
);
wj_dr
.
v
=
vec_mul
(
hjd_inv
.
v
,
wj_dx
.
v
);
...
...
src/hydro/Gadget2/hydro_io.h
View file @
24be404f
...
...
@@ -77,6 +77,10 @@ void hydro_write_particles(struct part* parts, struct io_props* list,
*
num_fields
=
10
;
#ifdef DEBUG_INTERACTIONS
*
num_fields
=
14
;
#endif
/* List what we want to write */
list
[
0
]
=
io_make_output_field
(
"Coordinates"
,
DOUBLE
,
3
,
UNIT_CONV_LENGTH
,
parts
,
x
);
...
...
@@ -99,6 +103,17 @@ void hydro_write_particles(struct part* parts, struct io_props* list,
parts
,
rho
,
convert_u
);
list
[
9
]
=
io_make_output_field_convert_part
(
"Pressure"
,
FLOAT
,
1
,
UNIT_CONV_PRESSURE
,
parts
,
rho
,
convert_P
);
#ifdef DEBUG_INTERACTIONS
list
[
10
]
=
io_make_output_field
(
"Num_ngb_density"
,
INT
,
1
,
UNIT_CONV_NO_UNITS
,
parts
,
num_ngb_density
);
list
[
11
]
=
io_make_output_field
(
"Num_ngb_force"
,
INT
,
1
,
UNIT_CONV_NO_UNITS
,
parts
,
num_ngb_force
);
list
[
12
]
=
io_make_output_field
(
"Ids_ngb_density"
,
LONGLONG
,
NUM_OF_NEIGHBOURS
,
UNIT_CONV_NO_UNITS
,
parts
,
ids_ngbs_density
);
list
[
13
]
=
io_make_output_field
(
"Ids_ngb_force"
,
LONGLONG
,
NUM_OF_NEIGHBOURS
,
UNIT_CONV_NO_UNITS
,
parts
,
ids_ngbs_force
);
#endif
}
/**
...
...
src/hydro/Gadget2/hydro_part.h
View file @
24be404f
...
...
@@ -33,6 +33,10 @@
#include
"cooling_struct.h"
#define NUM_OF_NEIGHBOURS 128
#define CHECK_PART_ID 999999999999999999999
//#define CHECK_PART_ID 5673486482283
/* Extra particle data not needed during the SPH loops over neighbours. */
struct
xpart
{
...
...
@@ -143,6 +147,20 @@ struct part {
#endif
#ifdef DEBUG_INTERACTIONS
/*! List of interacting particles in the density SELF and PAIR */
long
long
ids_ngbs_density
[
NUM_OF_NEIGHBOURS
];
/*! List of interacting particles in the force SELF and PAIR */
long
long
ids_ngbs_force
[
NUM_OF_NEIGHBOURS
];
/*! Number of interactions in the density SELF and PAIR */
int
num_ngb_density
;
/*! Number of interactions in the force SELF and PAIR */
int
num_ngb_force
;
#endif
}
SWIFT_STRUCT_ALIGN
;
#endif
/* SWIFT_GADGET2_HYDRO_PART_H */
src/runner.c
View file @
24be404f
...
...
@@ -1885,7 +1885,7 @@ void *runner_main(void *data) {
runner_dopair1_branch_gradient
(
r
,
ci
,
cj
);
#endif
else
if
(
t
->
subtype
==
task_subtype_force
)
runner_dopair2_force
(
r
,
ci
,
cj
);
runner_dopair2_
branch_
force
(
r
,
ci
,
cj
);
else
if
(
t
->
subtype
==
task_subtype_grav
)
runner_dopair_grav
(
r
,
ci
,
cj
,
1
);
else
...
...
src/runner_doiact.h
View file @
24be404f
...
...
@@ -32,6 +32,9 @@
#define _DOPAIR1(f) PASTE(runner_dopair1, f)
#define DOPAIR1 _DOPAIR1(FUNCTION)
#define _DOPAIR2_BRANCH(f) PASTE(runner_dopair2_branch, f)
#define DOPAIR2_BRANCH _DOPAIR2_BRANCH(FUNCTION)
#define _DOPAIR2(f) PASTE(runner_dopair2, f)
#define DOPAIR2 _DOPAIR2(FUNCTION)
...
...
@@ -778,37 +781,6 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
const
struct
entry
*
restrict
sort_j
=
cj
->
sort
[
sid
];
#ifdef SWIFT_DEBUG_CHECKS
/* Check that the dx_max_sort values in the cell are indeed an upper
bound on particle movement. */
for
(
int
pid
=
0
;
pid
<
ci
->
count
;
pid
++
)
{
const
struct
part
*
p
=
&
ci
->
parts
[
sort_i
[
pid
].
i
];
const
float
d
=
p
->
x
[
0
]
*
runner_shift
[
sid
][
0
]
+
p
->
x
[
1
]
*
runner_shift
[
sid
][
1
]
+
p
->
x
[
2
]
*
runner_shift
[
sid
][
2
];
if
(
fabsf
(
d
-
sort_i
[
pid
].
d
)
-
ci
->
dx_max_sort
>
1.0e-4
*
max
(
fabsf
(
d
),
ci
->
dx_max_sort_old
))
error
(
"particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d "
"cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e "
"ci->dx_max_sort_old=%e"
,
ci
->
nodeID
,
cj
->
nodeID
,
d
,
sort_i
[
pid
].
d
,
ci
->
dx_max_sort
,
ci
->
dx_max_sort_old
);
}
for
(
int
pjd
=
0
;
pjd
<
cj
->
count
;
pjd
++
)
{
const
struct
part
*
p
=
&
cj
->
parts
[
sort_j
[
pjd
].
i
];
const
float
d
=
p
->
x
[
0
]
*
runner_shift
[
sid
][
0
]
+
p
->
x
[
1
]
*
runner_shift
[
sid
][
1
]
+
p
->
x
[
2
]
*
runner_shift
[
sid
][
2
];
if
(
fabsf
(
d
-
sort_j
[
pjd
].
d
)
-
cj
->
dx_max_sort
>
1.0e-4
*
max
(
fabsf
(
d
),
cj
->
dx_max_sort_old
))
error
(
"particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d "
"ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e "
"cj->dx_max_sort_old=%e"
,
cj
->
nodeID
,
ci
->
nodeID
,
d
,
sort_j
[
pjd
].
d
,
cj
->
dx_max_sort
,
cj
->
dx_max_sort_old
);
}
/* Some constants used to checks that the parts are in the right frame */
const
float
shift_threshold_x
=
2
.
*
ci
->
width
[
0
]
+
2
.
*
max
(
ci
->
dx_max_part
,
cj
->
dx_max_part
);
...
...
@@ -816,7 +788,6 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
2
.
*
ci
->
width
[
1
]
+
2
.
*
max
(
ci
->
dx_max_part
,
cj
->
dx_max_part
);
const
float
shift_threshold_z
=
2
.
*
ci
->
width
[
2
]
+
2
.
*
max
(
ci
->
dx_max_part
,
cj
->
dx_max_part
);
#endif
/* SWIFT_DEBUG_CHECKS */
/* Get some other useful values. */
...
...
@@ -1025,6 +996,43 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) {
cj
->
dx_max_sort_old
>
space_maxreldx
*
cj
->
dmin
)
error
(
"Interacting unsorted cells."
);
#ifdef SWIFT_DEBUG_CHECKS
/* Pick-out the sorted lists. */
const
struct
entry
*
restrict
sort_i
=
ci
->
sort
[
sid
];
const
struct
entry
*
restrict
sort_j
=
cj
->
sort
[
sid
];
/* Check that the dx_max_sort values in the cell are indeed an upper
bound on particle movement. */
for
(
int
pid
=
0
;
pid
<
ci
->
count
;
pid
++
)
{
const
struct
part
*
p
=
&
ci
->
parts
[
sort_i
[
pid
].
i
];
const
float
d
=
p
->
x
[
0
]
*
runner_shift
[
sid
][
0
]
+
p
->
x
[
1
]
*
runner_shift
[
sid
][
1
]
+
p
->
x
[
2
]
*
runner_shift
[
sid
][
2
];
if
(
fabsf
(
d
-
sort_i
[
pid
].
d
)
-
ci
->
dx_max_sort
>
1.0e-4
*
max
(
fabsf
(
d
),
ci
->
dx_max_sort_old
))
error
(
"particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d "
"cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e "
"ci->dx_max_sort_old=%e"
,
ci
->
nodeID
,
cj
->
nodeID
,
d
,
sort_i
[
pid
].
d
,
ci
->
dx_max_sort
,
ci
->
dx_max_sort_old
);
}
for
(
int
pjd
=
0
;
pjd
<
cj
->
count
;
pjd
++
)
{
const
struct
part
*
p
=
&
cj
->
parts
[
sort_j
[
pjd
].
i
];
const
float
d
=
p
->
x
[
0
]
*
runner_shift
[
sid
][
0
]
+
p
->
x
[
1
]
*
runner_shift
[
sid
][
1
]
+
p
->
x
[
2
]
*
runner_shift
[
sid
][
2
];
if
(
fabsf
(
d
-
sort_j
[
pjd
].
d
)
-
cj
->
dx_max_sort
>
1.0e-4
*
max
(
fabsf
(
d
),
cj
->
dx_max_sort_old
))
error
(
"particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d "
"ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e "
"cj->dx_max_sort_old=%e"
,
cj
->
nodeID
,
ci
->
nodeID
,
d
,
sort_j
[
pjd
].
d
,
cj
->
dx_max_sort
,
cj
->
dx_max_sort_old
);
}
#endif
/* SWIFT_DEBUG_CHECKS */
#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && \
(DOPAIR1_BRANCH == runner_dopair1_density_branch)
if
(
!
sort_is_corner
(
sid
))
...
...
@@ -1043,7 +1051,8 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) {
* @param ci The first #cell.
* @param cj The second #cell.
*/
void
DOPAIR2
(
struct
runner
*
r
,
struct
cell
*
ci
,
struct
cell
*
cj
)
{
void
DOPAIR2
(
struct
runner
*
r
,
struct
cell
*
ci
,
struct
cell
*
cj
,
const
int
sid
,
const
double
*
shift
)
{
struct
engine
*
restrict
e
=
r
->
e
;
...
...
@@ -1054,25 +1063,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
TIMER_TIC
;
/* Anything to do here? */
if
(
!
cell_is_active
(
ci
,
e
)
&&
!
cell_is_active
(
cj
,
e
))
return
;
if
(
!
cell_are_part_drifted
(
ci
,
e
)
||
!
cell_are_part_drifted
(
cj
,
e
))
error
(
"Interacting undrifted cells."
);
/* Get the shift ID. */
double
shift
[
3
]
=
{
0
.
0
,
0
.
0
,
0
.
0
};
const
int
sid
=
space_getsid
(
e
->
s
,
&
ci
,
&
cj
,
shift
);
/* Have the cells been sorted? */
if
(
!
(
ci
->
sorted
&
(
1
<<
sid
))
||
ci
->
dx_max_sort_old
>
space_maxreldx
*
ci
->
dmin
)
error
(
"Interacting unsorted cells."
);
if
(
!
(
cj
->
sorted
&
(
1
<<
sid
))
||
cj
->
dx_max_sort_old
>
space_maxreldx
*
cj
->
dmin
)
error
(
"Interacting unsorted cells."
);
/* Get the cutoff shift. */
/* Get the cutoff shift. */
double
rshift
=
0
.
0
;
for
(
int
k
=
0
;
k
<
3
;
k
++
)
rshift
+=
shift
[
k
]
*
runner_shift
[
sid
][
k
];
...
...
@@ -1490,6 +1481,87 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
TIMER_TOC
(
TIMER_DOPAIR
);
}
/**
* @brief Determine which version of DOPAIR2 needs to be called depending on the
* orientation of the cells or whether DOPAIR2 needs to be called at all.
*
* @param r #runner
* @param ci #cell ci
* @param cj #cell cj
*
*/
void
DOPAIR2_BRANCH
(
struct
runner
*
r
,
struct
cell
*
ci
,
struct
cell
*
cj
)
{
const
struct
engine
*
restrict
e
=
r
->
e
;
/* Anything to do here? */
if
(
!
cell_is_active
(
ci
,
e
)
&&
!
cell_is_active
(
cj
,
e
))
return
;
/* Check that cells are drifted. */
if
(
!
cell_are_part_drifted
(
ci
,
e
)
||
!
cell_are_part_drifted
(
cj
,
e
))
error
(
"Interacting undrifted cells."
);
/* Get the sort ID. */
double
shift
[
3
]
=
{
0
.
0
,
0
.
0
,
0
.
0
};
const
int
sid
=
space_getsid
(
e
->
s
,
&
ci
,
&
cj
,
shift
);
/* Have the cells been sorted? */
if
(
!
(
ci
->
sorted
&
(
1
<<
sid
))
||
ci
->
dx_max_sort_old
>
space_maxreldx
*
ci
->
dmin
)
error
(
"Interacting unsorted cells."
);
if
(
!
(
cj
->
sorted
&
(
1
<<
sid
))
||
cj
->
dx_max_sort_old
>
space_maxreldx
*
cj
->
dmin
)
error
(
"Interacting unsorted cells."
);
#ifdef SWIFT_DEBUG_CHECKS
/* Pick-out the sorted lists. */
const
struct
entry
*
restrict
sort_i
=
ci
->
sort
[
sid
];
const
struct
entry
*
restrict
sort_j
=
cj
->
sort
[
sid
];
/* Check that the dx_max_sort values in the cell are indeed an upper
bound on particle movement. */
for
(
int
pid
=
0
;
pid
<
ci
->
count
;
pid
++
)
{
const
struct
part
*
p
=
&
ci
->
parts
[
sort_i
[
pid
].
i
];