Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
699d0b20
Commit
699d0b20
authored
Aug 22, 2017
by
Peter W. Draper
Browse files
Merge remote-tracking branch 'origin/master' into cells-per-task
parents
92b0509c
06771ad2
Changes
52
Hide whitespace changes
Inline
Side-by-side
.gitignore
View file @
699d0b20
...
...
@@ -34,7 +34,7 @@ examples/*/*/*.txt
examples/*/*/used_parameters.yml
examples/*/gravity_checks_*.dat
tests/testPair
tests/test
Active
Pair
tests/brute_force_periodic_BC_standard.dat
tests/swift_periodic_BC_standard.dat
tests/brute_force_periodic_BC_pertrubed.dat
...
...
@@ -54,6 +54,11 @@ tests/brute_force_125_standard.dat
tests/swift_dopair_125_standard.dat
tests/brute_force_125_perturbed.dat
tests/swift_dopair_125_perturbed.dat
tests/brute_force_active.dat
tests/brute_force_periodic_BC_perturbed.dat
tests/swift_dopair_active.dat
tests/test_nonsym_density_serial.dat
tests/test_nonsym_density_vec.dat
tests/testGreetings
tests/testReading
tests/input.hdf5
...
...
@@ -75,8 +80,6 @@ tests/test27cells.sh
tests/test27cellsPerturbed.sh
tests/test125cells.sh
tests/test125cellsPerturbed.sh
tests/testPair.sh
tests/testPairPerturbed.sh
tests/testParser.sh
tests/testReading.sh
tests/testAdiabaticIndex
...
...
configure.ac
View file @
699d0b20
...
...
@@ -861,14 +861,14 @@ AM_CONDITIONAL([HAVE_DOXYGEN], [test "$ac_cv_path_ac_pt_DX_DOXYGEN" != ""])
# Handle .in files.
AC_CONFIG_FILES([Makefile src/Makefile examples/Makefile doc/Makefile doc/Doxyfile tests/Makefile])
AC_CONFIG_FILES([tests/testReading.sh], [chmod +x tests/testReading.sh])
AC_CONFIG_FILES([tests/testPair.sh], [chmod +x tests/testPair.sh])
AC_CONFIG_FILES([tests/testPairPerturbed.sh], [chmod +x tests/testPairPerturbed.sh])
AC_CONFIG_FILES([tests/testActivePair.sh], [chmod +x tests/testActivePair.sh])
AC_CONFIG_FILES([tests/test27cells.sh], [chmod +x tests/test27cells.sh])
AC_CONFIG_FILES([tests/test27cellsPerturbed.sh], [chmod +x tests/test27cellsPerturbed.sh])
AC_CONFIG_FILES([tests/test125cells.sh], [chmod +x tests/test125cells.sh])
AC_CONFIG_FILES([tests/test125cellsPerturbed.sh], [chmod +x tests/test125cellsPerturbed.sh])
AC_CONFIG_FILES([tests/testPeriodicBC.sh], [chmod +x tests/testPeriodicBC.sh])
AC_CONFIG_FILES([tests/testPeriodicBCPerturbed.sh], [chmod +x tests/testPeriodicBCPerturbed.sh])
AC_CONFIG_FILES([tests/testInteractions.sh], [chmod +x tests/testInteractions.sh])
AC_CONFIG_FILES([tests/testParser.sh], [chmod +x tests/testParser.sh])
# Save the compilation options
...
...
examples/EAGLE_12/eagle_12.yml
View file @
699d0b20
...
...
@@ -26,7 +26,7 @@ Statistics:
# Parameters for the self-gravity scheme
Gravity
:
eta
:
0.025
# Constant dimensionless multiplier for time integration.
epsilon
:
0.00
0
1
# Softening length (in internal units).
epsilon
:
0.001
# Softening length (in internal units).
theta
:
0.7
# Opening angle (Multipole acceptance criterion)
# Parameters for the hydrodynamics scheme
...
...
examples/EAGLE_6/eagle_6.yml
View file @
699d0b20
...
...
@@ -12,6 +12,9 @@ TimeIntegration:
time_end
:
1e-2
# The end time of the simulation (in internal units).
dt_min
:
1e-10
# The minimal time-step size of the simulation (in internal units).
dt_max
:
1e-4
# The maximal time-step size of the simulation (in internal units).
Scheduler
:
cell_split_size
:
64
# Parameters governing the snapshots
Snapshots
:
...
...
examples/PerturbedBox_2D/perturbedPlane.yml
View file @
699d0b20
...
...
@@ -9,7 +9,7 @@ InternalUnitSystem:
# Parameters governing the time integration
TimeIntegration
:
time_begin
:
0.
# The starting time of the simulation (in internal units).
time_end
:
10
.
# The end time of the simulation (in internal units).
time_end
:
10
00.
# The end time of the simulation (in internal units).
dt_min
:
1e-6
# The minimal time-step size of the simulation (in internal units).
dt_max
:
1e-2
# The maximal time-step size of the simulation (in internal units).
...
...
@@ -21,12 +21,11 @@ Snapshots:
# Parameters governing the conserved quantities statistics
Statistics
:
delta_time
:
1
e-3
# Time between statistics output
delta_time
:
1
.
# Time between statistics output
# Parameters for the hydrodynamics scheme
SPH
:
resolution_eta
:
1.2348
# Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel).
delta_neighbours
:
0.1
# The tolerance for the targetted number of neighbours.
CFL_condition
:
0.1
# Courant-Friedrich-Levy condition for time integration.
# Parameters related to the initial conditions
...
...
examples/PerturbedBox_3D/perturbedBox.yml
View file @
699d0b20
...
...
@@ -9,9 +9,9 @@ InternalUnitSystem:
# Parameters governing the time integration
TimeIntegration
:
time_begin
:
0.
# The starting time of the simulation (in internal units).
time_end
:
1
.
# The end time of the simulation (in internal units).
time_end
:
1
000
# The end time of the simulation (in internal units).
dt_min
:
1e-6
# The minimal time-step size of the simulation (in internal units).
dt_max
:
1e-
3
# The maximal time-step size of the simulation (in internal units).
dt_max
:
1e-
2
# The maximal time-step size of the simulation (in internal units).
# Parameters governing the snapshots
Snapshots
:
...
...
@@ -21,12 +21,11 @@ Snapshots:
# Parameters governing the conserved quantities statistics
Statistics
:
delta_time
:
1
e-3
# Time between statistics output
delta_time
:
1
.
# Time between statistics output
# Parameters for the hydrodynamics scheme
SPH
:
resolution_eta
:
1.2348
# Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel).
delta_neighbours
:
0.1
# The tolerance for the targetted number of neighbours.
CFL_condition
:
0.1
# Courant-Friedrich-Levy condition for time integration.
# Parameters related to the initial conditions
...
...
examples/main.c
View file @
699d0b20
...
...
@@ -190,7 +190,11 @@ int main(int argc, char *argv[]) {
while
((
c
=
getopt
(
argc
,
argv
,
"acCdDef:FgGhMn:P:sSt:Tv:y:Y:"
))
!=
-
1
)
switch
(
c
)
{
case
'a'
:
#if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA)
with_aff
=
1
;
#else
error
(
"Need NUMA support for thread affinity"
);
#endif
break
;
case
'c'
:
with_cosmology
=
1
;
...
...
@@ -392,8 +396,12 @@ int main(int argc, char *argv[]) {
parser_read_file
(
paramFileName
,
params
);
/* Handle any command-line overrides. */
if
(
nparams
>
0
)
if
(
nparams
>
0
)
{
message
(
"Overwriting values read from the YAML file with command-line "
"values."
);
for
(
int
k
=
0
;
k
<
nparams
;
k
++
)
parser_set_param
(
params
,
cmdparams
[
k
]);
}
/* And dump the parameters as used. */
// parser_print_params(¶ms);
...
...
@@ -565,6 +573,11 @@ int main(int argc, char *argv[]) {
message
(
"nr of cells at depth %i is %i."
,
data
[
0
],
data
[
1
]);
}
/* Initialise the table of Ewald corrections for the gravity checks */
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
if
(
periodic
)
gravity_exact_force_ewald_init
(
dim
[
0
]);
#endif
/* Initialise the external potential properties */
struct
external_potential
potential
;
if
(
with_external_gravity
)
...
...
src/Makefile.am
View file @
699d0b20
...
...
@@ -64,7 +64,7 @@ nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h
kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h runner_doiact_fft.h
\
runner_doiact_nosort.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h
\
dimension.h equation_of_state.h part_type.h periodic.h
\
gravity.h gravity_io.h
\
gravity.h gravity_io.h
gravity_cache.h
\
gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h
\
gravity/Default/gravity_debug.h gravity/Default/gravity_part.h
\
sourceterms.h
\
...
...
src/align.h
View file @
699d0b20
...
...
@@ -23,9 +23,71 @@
* @brief The default struct alignment in SWIFT.
*/
#define SWIFT_STRUCT_ALIGNMENT 32
/**
* @brief Defines alignment of structures
*/
#define SWIFT_STRUCT_ALIGN __attribute__((aligned(SWIFT_STRUCT_ALIGNMENT)))
/**
* @brief The default cache alignment in SWIFT.
*/
#define SWIFT_CACHE_ALIGNMENT 64
/**
* @brief Defines alignment of caches
*/
#define SWIFT_CACHE_ALIGN __attribute__((aligned(SWIFT_CACHE_ALIGNMENT)))
/**
* @brief Macro to tell the compiler that a given array has the specified
* alignment.
*
* Note that this turns into a no-op but gives information to the compiler.
*
* @param array The array.
* @param alignment The alignment in bytes of the array.
*/
#if defined(__ICC)
#define swift_align_information(array, alignment) \
__assume_aligned(array, alignment);
#elif defined(__GNUC__)
#define swift_align_information(array, alignment) \
array = __builtin_assume_aligned(array, alignment);
#else
#define swift_align_information(array, alignment) ;
#endif
/**
* @brief Macro to create a restrict pointer to an array and tell the compiler
* that the given array has the specified
* alignment.
*
* Note that this turns into a no-op but gives information to the compiler.
*
* @param array The array.
* @param ptr Pointer to array
* @param type Type of array
* @param alignment The alignment in bytes of the array.
*/
#define swift_declare_aligned_ptr(type, array, ptr, alignment) \
type *restrict array = ptr; \
swift_align_information(array, alignment);
/**
* @brief Macro to tell the compiler that a given number is 0 modulo a given
* size.
*
* Note that this turns into a no-op but gives information to the compiler.
* GCC does not have the equivalent built-in so defaults to nothing.
*
* @param var The variable
* @param size The modulo of interest.
*/
#if defined(__ICC)
#define swift_assume_size(var, size) __assume(var % size == 0);
#else
#define swift_assume_size(var, size) ;
#endif
#endif
/* SWIFT_ALIGN_H */
src/cache.h
View file @
699d0b20
...
...
@@ -23,6 +23,7 @@
#include
"../config.h"
/* Local headers */
#include
"align.h"
#include
"cell.h"
#include
"error.h"
#include
"part.h"
...
...
@@ -30,9 +31,7 @@
#include
"vector.h"
#define NUM_VEC_PROC 2
#define CACHE_ALIGN 64
#define C2_CACHE_SIZE (NUM_VEC_PROC * VEC_SIZE * 6) + (NUM_VEC_PROC * VEC_SIZE)
#define C2_CACHE_ALIGN sizeof(float) * VEC_SIZE
#ifdef WITH_VECTORIZATION
/* Cache struct to hold a local copy of a cells' particle
...
...
@@ -40,31 +39,31 @@
struct
cache
{
/* Particle x position. */
float
*
restrict
x
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
float
*
restrict
x
SWIFT_
CACHE_ALIGN
;
/* Particle y position. */
float
*
restrict
y
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
float
*
restrict
y
SWIFT_
CACHE_ALIGN
;
/* Particle z position. */
float
*
restrict
z
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
float
*
restrict
z
SWIFT_
CACHE_ALIGN
;
/* Particle smoothing length. */
float
*
restrict
h
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
float
*
restrict
h
SWIFT_
CACHE_ALIGN
;
/* Particle mass. */
float
*
restrict
m
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
float
*
restrict
m
SWIFT_
CACHE_ALIGN
;
/* Particle x velocity. */
float
*
restrict
vx
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
float
*
restrict
vx
SWIFT_
CACHE_ALIGN
;
/* Particle y velocity. */
float
*
restrict
vy
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
float
*
restrict
vy
SWIFT_
CACHE_ALIGN
;
/* Particle z velocity. */
float
*
restrict
vz
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
float
*
restrict
vz
SWIFT_
CACHE_ALIGN
;
/* Maximum
distance of particles into neighbouring cell
. */
floa
t
*
restrict
max_
d
__attribute__
((
aligned
(
CACHE_ALIGN
)))
;
/* Maximum
index into neighbouring cell for particles that are in range
. */
in
t
*
restrict
max_
index
SWIFT_
CACHE_ALIGN
;
/* Cache size. */
int
count
;
...
...
@@ -75,28 +74,28 @@ struct cache {
struct
c2_cache
{
/* Separation between two particles squared. */
float
r2q
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2
_CACHE_ALIGN
)))
;
float
r2q
[
C2_CACHE_SIZE
]
SWIFT
_CACHE_ALIGN
;
/* x separation between two particles. */
float
dxq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2
_CACHE_ALIGN
)))
;
float
dxq
[
C2_CACHE_SIZE
]
SWIFT
_CACHE_ALIGN
;
/* y separation between two particles. */
float
dyq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2
_CACHE_ALIGN
)))
;
float
dyq
[
C2_CACHE_SIZE
]
SWIFT
_CACHE_ALIGN
;
/* z separation between two particles. */
float
dzq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2
_CACHE_ALIGN
)))
;
float
dzq
[
C2_CACHE_SIZE
]
SWIFT
_CACHE_ALIGN
;
/* Mass of particle pj. */
float
mq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2
_CACHE_ALIGN
)))
;
float
mq
[
C2_CACHE_SIZE
]
SWIFT
_CACHE_ALIGN
;
/* x velocity of particle pj. */
float
vxq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2
_CACHE_ALIGN
)))
;
float
vxq
[
C2_CACHE_SIZE
]
SWIFT
_CACHE_ALIGN
;
/* y velocity of particle pj. */
float
vyq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2
_CACHE_ALIGN
)))
;
float
vyq
[
C2_CACHE_SIZE
]
SWIFT
_CACHE_ALIGN
;
/* z velocity of particle pj. */
float
vzq
[
C2_CACHE_SIZE
]
__attribute__
((
aligned
(
C2
_CACHE_ALIGN
)))
;
float
vzq
[
C2_CACHE_SIZE
]
SWIFT
_CACHE_ALIGN
;
};
/**
...
...
@@ -111,9 +110,10 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c,
/* Align cache on correct byte boundary and pad cache size to be a multiple of
* the vector size
* and include 2 vector lengths for remainder operations. */
unsigned
in
t
pad
=
2
*
VEC_SIZE
,
rem
=
count
%
VEC_SIZE
;
size_
t
pad
=
2
*
VEC_SIZE
,
rem
=
count
%
VEC_SIZE
;
if
(
rem
>
0
)
pad
+=
VEC_SIZE
-
rem
;
unsigned
int
sizeBytes
=
(
count
+
pad
)
*
sizeof
(
float
);
size_t
sizeBytes
=
(
count
+
pad
)
*
sizeof
(
float
);
size_t
sizeIntBytes
=
(
count
+
pad
)
*
sizeof
(
int
);
int
error
=
0
;
/* Free memory if cache has already been allocated. */
...
...
@@ -126,18 +126,19 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c,
free
(
c
->
vy
);
free
(
c
->
vz
);
free
(
c
->
h
);
free
(
c
->
max_
d
);
free
(
c
->
max_
index
);
}
error
+=
posix_memalign
((
void
**
)
&
c
->
x
,
CACHE_ALIGN
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
y
,
CACHE_ALIGN
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
z
,
CACHE_ALIGN
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
m
,
CACHE_ALIGN
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vx
,
CACHE_ALIGN
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vy
,
CACHE_ALIGN
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vz
,
CACHE_ALIGN
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
h
,
CACHE_ALIGN
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
max_d
,
CACHE_ALIGN
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
x
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
y
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
z
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
m
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vx
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vy
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
vz
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
h
,
SWIFT_CACHE_ALIGNMENT
,
sizeBytes
);
error
+=
posix_memalign
((
void
**
)
&
c
->
max_index
,
SWIFT_CACHE_ALIGNMENT
,
sizeIntBytes
);
if
(
error
!=
0
)
error
(
"Couldn't allocate cache, no. of particles: %d"
,
(
int
)
count
);
...
...
@@ -151,156 +152,43 @@ __attribute__((always_inline)) INLINE void cache_init(struct cache *c,
* @param ci_cache The cache.
*/
__attribute__
((
always_inline
))
INLINE
void
cache_read_particles
(
const
struct
cell
*
const
ci
,
struct
cache
*
const
ci_cache
)
{
const
struct
cell
*
restrict
const
ci
,
struct
cache
*
restrict
const
ci_cache
)
{
#if defined(GADGET2_SPH)
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
#if defined(WITH_VECTORIZATION) && defined(__ICC)
#pragma vector aligned
#endif
for
(
int
i
=
0
;
i
<
ci
->
count
;
i
++
)
{
ci_cache
->
x
[
i
]
=
ci
->
parts
[
i
].
x
[
0
]
-
ci
->
loc
[
0
];
ci_cache
->
y
[
i
]
=
ci
->
parts
[
i
].
x
[
1
]
-
ci
->
loc
[
1
];
ci_cache
->
z
[
i
]
=
ci
->
parts
[
i
].
x
[
2
]
-
ci
->
loc
[
2
];
ci_cache
->
h
[
i
]
=
ci
->
parts
[
i
].
h
;
ci_cache
->
m
[
i
]
=
ci
->
parts
[
i
].
mass
;
ci_cache
->
vx
[
i
]
=
ci
->
parts
[
i
].
v
[
0
];
ci_cache
->
vy
[
i
]
=
ci
->
parts
[
i
].
v
[
1
];
ci_cache
->
vz
[
i
]
=
ci
->
parts
[
i
].
v
[
2
];
}
#endif
}
/**
* @brief Populate cache by reading in the particles from two cells in unsorted
* order.
*
* @param ci The i #cell.
* @param cj The j #cell.
* @param ci_cache The cache for cell ci.
* @param cj_cache The cache for cell cj.
* @param shift The amount to shift the particle positions to account for BCs
*/
__attribute__
((
always_inline
))
INLINE
void
cache_read_two_cells
(
const
struct
cell
*
const
ci
,
const
struct
cell
*
const
cj
,
struct
cache
*
const
ci_cache
,
struct
cache
*
const
cj_cache
,
const
double
*
const
shift
)
{
/* Shift the particles positions to a local frame (ci frame) so single
* precision can be
* used instead of double precision. Also shift the cell ci, particles
* positions due to BCs but leave cell cj. */
for
(
int
i
=
0
;
i
<
ci
->
count
;
i
++
)
{
ci_cache
->
x
[
i
]
=
ci
->
parts
[
i
].
x
[
0
]
-
ci
->
loc
[
0
]
-
shift
[
0
];
ci_cache
->
y
[
i
]
=
ci
->
parts
[
i
].
x
[
1
]
-
ci
->
loc
[
1
]
-
shift
[
1
];
ci_cache
->
z
[
i
]
=
ci
->
parts
[
i
].
x
[
2
]
-
ci
->
loc
[
2
]
-
shift
[
2
];
ci_cache
->
h
[
i
]
=
ci
->
parts
[
i
].
h
;
ci_cache
->
m
[
i
]
=
ci
->
parts
[
i
].
mass
;
ci_cache
->
vx
[
i
]
=
ci
->
parts
[
i
].
v
[
0
];
ci_cache
->
vy
[
i
]
=
ci
->
parts
[
i
].
v
[
1
];
ci_cache
->
vz
[
i
]
=
ci
->
parts
[
i
].
v
[
2
];
}
for
(
int
i
=
0
;
i
<
cj
->
count
;
i
++
)
{
cj_cache
->
x
[
i
]
=
cj
->
parts
[
i
].
x
[
0
]
-
ci
->
loc
[
0
];
cj_cache
->
y
[
i
]
=
cj
->
parts
[
i
].
x
[
1
]
-
ci
->
loc
[
1
];
cj_cache
->
z
[
i
]
=
cj
->
parts
[
i
].
x
[
2
]
-
ci
->
loc
[
2
];
cj_cache
->
h
[
i
]
=
cj
->
parts
[
i
].
h
;
cj_cache
->
m
[
i
]
=
cj
->
parts
[
i
].
mass
;
cj_cache
->
vx
[
i
]
=
cj
->
parts
[
i
].
v
[
0
];
cj_cache
->
vy
[
i
]
=
cj
->
parts
[
i
].
v
[
1
];
cj_cache
->
vz
[
i
]
=
cj
->
parts
[
i
].
v
[
2
];
}
}
__attribute__
((
always_inline
))
INLINE
void
cache_read_cell_sorted
(
const
struct
cell
*
const
ci
,
struct
cache
*
const
ci_cache
,
const
struct
entry
*
restrict
sort_i
,
double
*
const
loc
,
double
*
const
shift
)
{
int
idx
;
/* Shift the particles positions to a local frame (ci frame) so single precision
* can be
* used instead of double precision. Also shift the cell ci, particles positions
* due to BCs but leave cell cj. */
#if defined(WITH_VECTORIZATION) && defined(__ICC)
#pragma simd
#endif
for
(
int
i
=
0
;
i
<
ci
->
count
;
i
++
)
{
idx
=
sort_i
[
i
].
i
;
ci_cache
->
x
[
i
]
=
ci
->
parts
[
idx
].
x
[
0
]
-
loc
[
0
]
-
shift
[
0
];
ci_cache
->
y
[
i
]
=
ci
->
parts
[
idx
].
x
[
1
]
-
loc
[
1
]
-
shift
[
1
];
ci_cache
->
z
[
i
]
=
ci
->
parts
[
idx
].
x
[
2
]
-
loc
[
2
]
-
shift
[
2
];
ci_cache
->
h
[
i
]
=
ci
->
parts
[
idx
].
h
;
ci_cache
->
m
[
i
]
=
ci
->
parts
[
idx
].
mass
;
ci_cache
->
vx
[
i
]
=
ci
->
parts
[
idx
].
v
[
0
];
ci_cache
->
vy
[
i
]
=
ci
->
parts
[
idx
].
v
[
1
];
ci_cache
->
vz
[
i
]
=
ci
->
parts
[
idx
].
v
[
2
];
}
}
/**
* @brief Populate cache by reading in the particles from two cells in sorted
* order.
*
* @param ci The i #cell.
* @param cj The j #cell.
* @param ci_cache The #cache for cell ci.
* @param cj_cache The #cache for cell cj.
* @param sort_i The array of sorted particle indices for cell ci.
* @param sort_j The array of sorted particle indices for cell ci.
* @param shift The amount to shift the particle positions to account for BCs
*/
__attribute__
((
always_inline
))
INLINE
void
cache_read_two_cells_sorted
(
const
struct
cell
*
const
ci
,
const
struct
cell
*
const
cj
,
struct
cache
*
const
ci_cache
,
struct
cache
*
const
cj_cache
,
const
struct
entry
*
restrict
sort_i
,
const
struct
entry
*
restrict
sort_j
,
const
double
*
const
shift
)
{
int
idx
;
/* Shift the particles positions to a local frame (ci frame) so single precision
* can be
* used instead of double precision. Also shift the cell ci, particles positions
* due to BCs but leave cell cj. */
#if defined(WITH_VECTORIZATION) && defined(__ICC)
#pragma simd
#endif
/* Let the compiler know that the data is aligned and create pointers to the
* arrays inside the cache. */
swift_declare_aligned_ptr
(
float
,
x
,
ci_cache
->
x
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
y
,
ci_cache
->
y
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
z
,
ci_cache
->
z
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
h
,
ci_cache
->
h
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
m
,
ci_cache
->
m
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vx
,
ci_cache
->
vx
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vy
,
ci_cache
->
vy
,
SWIFT_CACHE_ALIGNMENT
);
swift_declare_aligned_ptr
(
float
,
vz
,
ci_cache
->
vz
,
SWIFT_CACHE_ALIGNMENT
);
const
struct
part
*
restrict
parts
=
ci
->
parts
;
double
loc
[
3
];
loc
[
0
]
=
ci
->
loc
[
0
];
loc
[
1
]
=
ci
->
loc
[
1
];
loc
[
2
]
=
ci
->
loc
[
2
];
/* Shift the particles positions to a local frame so single precision can be
* used instead of double precision. */
for
(
int
i
=
0
;
i
<
ci
->
count
;
i
++
)
{
idx
=
sort_i
[
i
].
i
;
ci_cache
->
x
[
i
]
=
ci
->
parts
[
idx
].
x
[
0
]
-
ci
->
loc
[
0
]
-
shift
[
0
];
ci_cache
->
y
[
i
]
=
ci
->
parts
[
idx
].
x
[
1
]
-
ci
->
loc
[
1
]
-
shift
[
1
];
ci_cache
->
z
[
i
]
=
ci
->
parts
[
idx
].
x
[
2
]
-
ci
->
loc
[
2
]
-
shift
[
2
];
ci_cache
->
h
[
i
]
=
ci
->
parts
[
idx
].
h
;
ci_cache
->
m
[
i
]
=
ci
->
parts
[
idx
].
mass
;
ci_cache
->
vx
[
i
]
=
ci
->
parts
[
idx
].
v
[
0
];
ci_cache
->
vy
[
i
]
=
ci
->
parts
[
idx
].
v
[
1
];
ci_cache
->
vz
[
i
]
=
ci
->
parts
[
idx
].
v
[
2
];
x
[
i
]
=
(
float
)(
parts
[
i
].
x
[
0
]
-
loc
[
0
]);
y
[
i
]
=
(
float
)(
parts
[
i
].
x
[
1
]
-
loc
[
1
]);
z
[
i
]
=
(
float
)(
parts
[
i
].
x
[
2
]
-
loc
[
2
]);
h
[
i
]
=
parts
[
i
].
h
;
m
[
i
]
=
parts
[
i
].
mass
;
vx
[
i
]
=
parts
[
i
].
v
[
0
];
vy
[
i
]
=
parts
[
i
].
v
[
1
];
vz
[
i
]
=
parts
[
i
].
v
[
2
];
}
#if defined(WITH_VECTORIZATION) && defined(__ICC)
#pragma simd
#endif
for
(
int
i
=
0
;
i
<
cj
->
count
;
i
++
)
{
idx
=
sort_j
[
i
].
i
;
cj_cache
->
x
[
i
]
=
cj
->
parts
[
idx
].
x
[
0
]
-
ci
->
loc
[
0
];
cj_cache
->
y
[
i
]
=
cj
->
parts
[
idx
].
x
[
1
]
-
ci
->
loc
[
1
];
cj_cache
->
z
[
i
]
=
cj
->
parts
[
idx
].
x
[
2
]
-
ci
->
loc
[
2
];
cj_cache
->
h
[
i
]
=
cj
->
parts
[
idx
].
h
;
cj_cache
->
m
[
i
]
=
cj
->
parts
[
idx
].
mass
;
cj_cache
->
vx
[
i
]
=
cj
->
parts
[
idx
].
v
[
0
];
cj_cache
->
vy
[
i
]
=
cj
->
parts
[
idx
].
v
[
1
];
cj_cache
->
vz
[
i
]
=
cj
->
parts
[
idx
].
v
[
2
];
}
}
/**
...
...
@@ -321,13 +209,13 @@ __attribute__((always_inline)) INLINE void cache_read_two_cells_sorted(
* interaction.
*/
__attribute__
((
always_inline
))
INLINE
void
cache_read_two_partial_cells_sorted
(
const
struct
cell
*
const
ci
,
const
struct
cell
*
const
cj
,
struct
cache
*
const
ci_cache
,
str
u
ct
cache
*
const
c
j
_cache
,
const
struct
entry
*
restrict
sort_i
,
const
struct
entry
*
restrict
sort_
j
,
const
double
*
const
shift
,
int
*
first_pi
,
int
*
last_pj
,
const
int
num_vec_proc
)
{
const
struct
cell
*
restrict
const
ci
,
const
struct
cell
*
restrict
const
cj
,
struct
cache
*
re
str
i
ct
const
c
i
_cache
,
struct
cache
*
restrict
const
cj_cache
,
const
struct
entry
*
restrict
sort_
i
,
const
struct
entry
*
restrict
sort_j
,
const
double
*
restrict
const
shift
,
int
*
first_pi
,
int
*
last_pj
,
const
int
num_vec_proc
)
{
int
idx
,
ci_cache_
idx
;
int
idx
;
/* Pad number of particles read to the vector size. */
int
rem
=
(
ci
->
count
-
*
first_pi
)
%
(
num_vec_proc
*
VEC_SIZE
);
if
(
rem
!=
0
)
{
...
...
@@ -345,74 +233,97 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
int
first_pi_align
=
*
first_pi
;
int
last_pj_align
=
*
last_pj
;
/* Shift the particles positions to a local frame (ci frame) so single precision
* can be
* used instead of double precision. Also shift the cell ci, particles positions
* due to BCs but leave cell cj. */
#if defined(WITH_VECTORIZATION) && defined(__ICC)
#pragma vector aligned
#endif
for
(
int
i
=
first_pi_align
;
i
<
ci
->
count
;
i
++
)
{
/* Make sure ci_cache is filled from the first element. */
ci_cache_idx
=
i
-
first_pi_align
;
idx
=
sort_i
[
i
].
i
;
ci_cache
->
x
[
ci_cache_idx
]
=
ci
->
parts
[
idx
].
x
[
0
]
-
ci
->
loc
[
0
]
-
shift
[
0
];
ci_cache
->
y
[
ci_cache_idx
]
=
ci
->
parts
[
idx
].
x
[
1
]
-
ci
->
loc
[
1
]
-
shift
[
1
];
ci_cache
->
z
[
ci_cache_idx
]
=
ci
->
parts
[
idx
].
x
[
2
]
-
ci
->
loc
[
2
]
-
shift
[
2
];
ci_cache
->
h
[
ci_cache_idx
]
=
ci
->
parts
[
idx
].
h
;
ci_cache
->
m
[
ci_cache_idx
]
=
ci
->
parts
[
idx
].
mass
;
ci_cache
->
vx
[
ci_cache_idx
]
=
ci
->
parts
[
idx
].
v
[
0
];
ci_cache
->
vy
[
ci_cache_idx
]
=
ci
->
parts
[
idx
].
v
[
1
];
ci_cache
->
vz
[
ci_cache_idx
]
=
ci
->
parts
[
idx
].
v
[
2
];
const
struct
part
*
restrict
parts_i
=
ci
->
parts
;
const
struct
part
*
restrict
parts_j
=
cj
->
parts
;
double
loc
[
3
];
loc
[
0
]
=
ci
->
loc
[
0
];