Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
d6590e40
Commit
d6590e40
authored
Aug 16, 2017
by
James Willis
Browse files
Merge branch 'master' into AVX512-Fixes
Conflicts: tests/testPair.c
parents
75d4e1e9
23710c75
Changes
24
Expand all
Hide whitespace changes
Inline
Side-by-side
examples/EAGLE_12/eagle_12.yml
View file @
d6590e40
...
...
@@ -26,7 +26,7 @@ Statistics:
# Parameters for the self-gravity scheme
Gravity
:
eta
:
0.025
# Constant dimensionless multiplier for time integration.
epsilon
:
0.00
0
1
# Softening length (in internal units).
epsilon
:
0.001
# Softening length (in internal units).
theta
:
0.7
# Opening angle (Multipole acceptance criterion)
# Parameters for the hydrodynamics scheme
...
...
examples/EAGLE_6/eagle_6.yml
View file @
d6590e40
...
...
@@ -12,6 +12,9 @@ TimeIntegration:
time_end
:
1e-2
# The end time of the simulation (in internal units).
dt_min
:
1e-10
# The minimal time-step size of the simulation (in internal units).
dt_max
:
1e-4
# The maximal time-step size of the simulation (in internal units).
Scheduler
:
cell_split_size
:
64
# Parameters governing the snapshots
Snapshots
:
...
...
examples/PerturbedBox_2D/perturbedPlane.yml
View file @
d6590e40
...
...
@@ -9,7 +9,7 @@ InternalUnitSystem:
# Parameters governing the time integration
TimeIntegration
:
time_begin
:
0.
# The starting time of the simulation (in internal units).
time_end
:
10
.
# The end time of the simulation (in internal units).
time_end
:
10
00.
# The end time of the simulation (in internal units).
dt_min
:
1e-6
# The minimal time-step size of the simulation (in internal units).
dt_max
:
1e-2
# The maximal time-step size of the simulation (in internal units).
...
...
@@ -21,12 +21,11 @@ Snapshots:
# Parameters governing the conserved quantities statistics
Statistics
:
delta_time
:
1
e-3
# Time between statistics output
delta_time
:
1
.
# Time between statistics output
# Parameters for the hydrodynamics scheme
SPH
:
resolution_eta
:
1.2348
# Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel).
delta_neighbours
:
0.1
# The tolerance for the targetted number of neighbours.
CFL_condition
:
0.1
# Courant-Friedrich-Levy condition for time integration.
# Parameters related to the initial conditions
...
...
examples/PerturbedBox_3D/perturbedBox.yml
View file @
d6590e40
...
...
@@ -9,9 +9,9 @@ InternalUnitSystem:
# Parameters governing the time integration
TimeIntegration
:
time_begin
:
0.
# The starting time of the simulation (in internal units).
time_end
:
1
.
# The end time of the simulation (in internal units).
time_end
:
1
000
# The end time of the simulation (in internal units).
dt_min
:
1e-6
# The minimal time-step size of the simulation (in internal units).
dt_max
:
1e-
3
# The maximal time-step size of the simulation (in internal units).
dt_max
:
1e-
2
# The maximal time-step size of the simulation (in internal units).
# Parameters governing the snapshots
Snapshots
:
...
...
@@ -21,12 +21,11 @@ Snapshots:
# Parameters governing the conserved quantities statistics
Statistics
:
delta_time
:
1
e-3
# Time between statistics output
delta_time
:
1
.
# Time between statistics output
# Parameters for the hydrodynamics scheme
SPH
:
resolution_eta
:
1.2348
# Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel).
delta_neighbours
:
0.1
# The tolerance for the targetted number of neighbours.
CFL_condition
:
0.1
# Courant-Friedrich-Levy condition for time integration.
# Parameters related to the initial conditions
...
...
examples/main.c
View file @
d6590e40
...
...
@@ -190,7 +190,11 @@ int main(int argc, char *argv[]) {
while
((
c
=
getopt
(
argc
,
argv
,
"acCdDef:FgGhMn:P:sSt:Tv:y:Y:"
))
!=
-
1
)
switch
(
c
)
{
case
'a'
:
#if defined(HAVE_SETAFFINITY) && defined(HAVE_LIBNUMA)
with_aff
=
1
;
#else
error
(
"Need NUMA support for thread affinity"
);
#endif
break
;
case
'c'
:
with_cosmology
=
1
;
...
...
@@ -392,8 +396,12 @@ int main(int argc, char *argv[]) {
parser_read_file
(
paramFileName
,
params
);
/* Handle any command-line overrides. */
if
(
nparams
>
0
)
if
(
nparams
>
0
)
{
message
(
"Overwriting values read from the YAML file with command-line "
"values."
);
for
(
int
k
=
0
;
k
<
nparams
;
k
++
)
parser_set_param
(
params
,
cmdparams
[
k
]);
}
/* And dump the parameters as used. */
// parser_print_params(¶ms);
...
...
@@ -565,6 +573,11 @@ int main(int argc, char *argv[]) {
message
(
"nr of cells at depth %i is %i."
,
data
[
0
],
data
[
1
]);
}
/* Initialise the table of Ewald corrections for the gravity checks */
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
if
(
periodic
)
gravity_exact_force_ewald_init
(
dim
[
0
]);
#endif
/* Initialise the external potential properties */
struct
external_potential
potential
;
if
(
with_external_gravity
)
...
...
src/Makefile.am
View file @
d6590e40
...
...
@@ -64,7 +64,7 @@ nobase_noinst_HEADERS = align.h approx_math.h atomic.h cycle.h error.h inline.h
kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h runner_doiact_fft.h
\
runner_doiact_nosort.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h
\
dimension.h equation_of_state.h part_type.h periodic.h
\
gravity.h gravity_io.h
\
gravity.h gravity_io.h
gravity_cache.h
\
gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h
\
gravity/Default/gravity_debug.h gravity/Default/gravity_part.h
\
sourceterms.h
\
...
...
src/align.h
View file @
d6590e40
...
...
@@ -23,9 +23,55 @@
* @brief The default struct alignment in SWIFT.
*/
#define SWIFT_STRUCT_ALIGNMENT 32
/**
* @brief Defines alignment of structures
*/
#define SWIFT_STRUCT_ALIGN __attribute__((aligned(SWIFT_STRUCT_ALIGNMENT)))
/**
* @brief The default cache alignment in SWIFT.
*/
#define SWIFT_CACHE_ALIGNMENT 64
/**
* @brief Defines alignment of caches
*/
#define SWIFT_CACHE_ALIGN __attribute__((aligned(SWIFT_CACHE_ALIGNMENT)))
/**
* @brief Macro to tell the compiler that a given array has the specified
* alignment.
*
* Note that this turns into a no-op but gives information to the compiler.
*
* @param array The array.
* @param alignment The alignment in bytes of the array.
*/
#if defined(__ICC)
#define swift_align_information(array, alignment) \
__assume_aligned(array, alignment);
#elif defined(__GNUC__)
#define swift_align_information(array, alignment) \
array = __builtin_assume_aligned(array, alignment);
#else
#define swift_align_information(array, alignment) ;
#endif
/**
* @brief Macro to tell the compiler that a given number is 0 modulo a given
* size.
*
* Note that this turns into a no-op but gives information to the compiler.
* GCC does not have the equivalent built-in so defaults to nothing.
*
* @param var The variable
* @param size The modulo of interest.
*/
#if defined(__ICC)
#define swift_assume_size(var, size) __assume(var % size == 0);
#else
#define swift_assume_size(var, size) ;
#endif
#endif
/* SWIFT_ALIGN_H */
src/debug.c
View file @
d6590e40
...
...
@@ -26,6 +26,7 @@
/* Some standard headers. */
#include
<float.h>
#include
<stdio.h>
#include
<unistd.h>
/* This object's header. */
#include
"debug.h"
...
...
@@ -450,3 +451,69 @@ void dumpCellRanks(const char *prefix, struct cell *cells_top, int nr_cells) {
}
#endif
/* HAVE_MPI */
/**
* @brief parse the process /proc/self/statm file to get the process
* memory use (in KB). Top field in ().
*
* @param size total virtual memory (VIRT)
* @param resident resident non-swapped memory (RES)
* @param share shared (mmap'd) memory (SHR)
* @param trs text (exe) resident set (CODE)
* @param lrs library resident set
* @param drs data+stack resident set (DATA)
* @param dt dirty pages (nDRT)
*/
void
getProcMemUse
(
long
*
size
,
long
*
resident
,
long
*
share
,
long
*
trs
,
long
*
lrs
,
long
*
drs
,
long
*
dt
)
{
/* Open the file. */
FILE
*
file
=
fopen
(
"/proc/self/statm"
,
"r"
);
if
(
file
!=
NULL
)
{
int
nscan
=
fscanf
(
file
,
"%ld %ld %ld %ld %ld %ld %ld"
,
size
,
resident
,
share
,
trs
,
lrs
,
drs
,
dt
);
if
(
nscan
==
7
)
{
/* Convert pages into bytes. Usually 4096, but could be 512 on some
* systems so take care in conversion to KB. */
long
sz
=
sysconf
(
_SC_PAGESIZE
);
*
size
*=
sz
;
*
resident
*=
sz
;
*
share
*=
sz
;
*
trs
*=
sz
;
*
lrs
*=
sz
;
*
drs
*=
sz
;
*
dt
*=
sz
;
*
size
/=
1024
;
*
resident
/=
1024
;
*
share
/=
1024
;
*
trs
/=
1024
;
*
lrs
/=
1024
;
*
drs
/=
1024
;
*
dt
/=
1024
;
}
else
{
error
(
"Failed to read sufficient fields from /proc/self/statm"
);
}
fclose
(
file
);
}
else
{
error
(
"Failed to open /proc/self/statm"
);
}
}
/**
* @brief Print the current memory use of the process. A la "top".
*/
void
printProcMemUse
()
{
long
size
;
long
resident
;
long
share
;
long
trs
;
long
lrs
;
long
drs
;
long
dt
;
getProcMemUse
(
&
size
,
&
resident
,
&
share
,
&
trs
,
&
lrs
,
&
drs
,
&
dt
);
printf
(
"## VIRT = %ld , RES = %ld , SHR = %ld , CODE = %ld, DATA = %ld
\n
"
,
size
,
resident
,
share
,
trs
,
drs
);
fflush
(
stdout
);
}
src/debug.h
View file @
d6590e40
...
...
@@ -44,4 +44,7 @@ void dumpMETISGraph(const char *prefix, idx_t nvtxs, idx_t ncon, idx_t *xadj,
void
dumpCellRanks
(
const
char
*
prefix
,
struct
cell
*
cells_top
,
int
nr_cells
);
#endif
void
getProcMemUse
(
long
*
size
,
long
*
resident
,
long
*
share
,
long
*
trs
,
long
*
lrs
,
long
*
drs
,
long
*
dt
);
void
printProcMemUse
();
#endif
/* SWIFT_DEBUG_H */
src/engine.c
View file @
d6590e40
...
...
@@ -57,6 +57,7 @@
#include
"error.h"
#include
"gravity.h"
#include
"hydro.h"
#include
"map.h"
#include
"minmax.h"
#include
"parallel_io.h"
#include
"part.h"
...
...
@@ -153,7 +154,7 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
struct
scheduler
*
s
=
&
e
->
sched
;
const
int
periodic
=
e
->
s
->
periodic
;
const
int
is_hydro
=
(
e
->
policy
&
engine_policy_hydro
);
const
int
is_
with_
hydro
=
(
e
->
policy
&
engine_policy_hydro
);
const
int
is_self_gravity
=
(
e
->
policy
&
engine_policy_self_gravity
);
const
int
is_with_cooling
=
(
e
->
policy
&
engine_policy_cooling
);
const
int
is_with_sourceterms
=
(
e
->
policy
&
engine_policy_sourceterms
);
...
...
@@ -162,15 +163,19 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
if
(
c
->
super
==
c
)
{
/* Add the sort task. */
c
->
sorts
=
scheduler_addtask
(
s
,
task_type_sort
,
task_subtype_none
,
0
,
0
,
c
,
NULL
);
if
(
is_with_hydro
)
{
c
->
sorts
=
scheduler_addtask
(
s
,
task_type_sort
,
task_subtype_none
,
0
,
0
,
c
,
NULL
);
}
/* Local tasks only... */
if
(
c
->
nodeID
==
e
->
nodeID
)
{
/* Add the drift task. */
c
->
drift_part
=
scheduler_addtask
(
s
,
task_type_drift_part
,
task_subtype_none
,
0
,
0
,
c
,
NULL
);
if
(
is_with_hydro
)
{
c
->
drift_part
=
scheduler_addtask
(
s
,
task_type_drift_part
,
task_subtype_none
,
0
,
0
,
c
,
NULL
);
}
/* Add the two half kicks */
c
->
kick1
=
scheduler_addtask
(
s
,
task_type_kick1
,
task_subtype_none
,
0
,
0
,
...
...
@@ -207,7 +212,7 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
}
/* Generate the ghost tasks. */
if
(
is_hydro
)
{
if
(
is_
with_
hydro
)
{
c
->
ghost_in
=
scheduler_addtask
(
s
,
task_type_ghost
,
task_subtype_none
,
0
,
/* implicit = */
1
,
c
,
NULL
);
...
...
@@ -215,14 +220,13 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
scheduler_addtask
(
s
,
task_type_ghost
,
task_subtype_none
,
0
,
/* implicit = */
1
,
c
,
NULL
);
engine_add_ghosts
(
e
,
c
,
c
->
ghost_in
,
c
->
ghost_out
);
}
#ifdef EXTRA_HYDRO_LOOP
/* Generate the extra ghost task. */
if
(
is_hydro
)
/* Generate the extra ghost task. */
c
->
extra_ghost
=
scheduler_addtask
(
s
,
task_type_extra_ghost
,
task_subtype_none
,
0
,
0
,
c
,
NULL
);
#endif
}
/* Cooling task */
if
(
is_with_cooling
)
{
...
...
@@ -898,6 +902,16 @@ void engine_repartition(struct engine *e) {
partition_repartition
(
e
->
reparttype
,
e
->
nodeID
,
e
->
nr_nodes
,
e
->
s
,
e
->
sched
.
tasks
,
e
->
sched
.
nr_tasks
);
/* Partitioning requires copies of the particles, so we need to reduce the
* memory in use to the minimum, we can free the sorting indices and the
* tasks as these will be regenerated at the next rebuild. */
/* Sorting indices. */
if
(
e
->
s
->
cells_top
!=
NULL
)
space_free_cells
(
e
->
s
);
/* Task arrays. */
scheduler_free_tasks
(
&
e
->
sched
);
/* Now comes the tricky part: Exchange particles between all nodes.
This is done in two steps, first allreducing a matrix of
how many particles go from where to where, then re-allocating
...
...
@@ -1731,7 +1745,7 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
/* If the cells is local build a self-interaction */
scheduler_addtask
(
sched
,
task_type_self
,
task_subtype_grav
,
0
,
0
,
ci
,
NULL
);
/* Deal with periodicity dependencies */
/* Deal with periodicity
FFT task
dependencies */
const
int
ghost_id
=
cell_getid
(
cdim_ghost
,
i
/
4
,
j
/
4
,
k
/
4
);
if
(
ghost_id
>
n_ghosts
)
error
(
"Invalid ghost_id"
);
if
(
periodic
)
{
...
...
@@ -1739,6 +1753,10 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
ci
->
grav_ghost
[
1
]
=
ghosts
[
2
*
ghost_id
+
1
];
}
/* Recover the multipole information */
struct
gravity_tensors
*
const
multi_i
=
ci
->
multipole
;
const
double
CoM_i
[
3
]
=
{
multi_i
->
CoM
[
0
],
multi_i
->
CoM
[
1
],
multi_i
->
CoM
[
2
]};
/* Loop over every other cell */
for
(
int
ii
=
0
;
ii
<
cdim
[
0
];
ii
++
)
{
for
(
int
jj
=
0
;
jj
<
cdim
[
1
];
jj
++
)
{
...
...
@@ -1757,11 +1775,27 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements,
/* Is that neighbour local ? */
if
(
cj
->
nodeID
!=
nodeID
)
continue
;
// MATTHIEU
/* Are the cells to close for a MM interaction ? */
if
(
!
gravity_multipole_accept_rebuild
(
ci
->
multipole
,
cj
->
multipole
,
theta_crit_inv
,
periodic
,
dim
))
{
/* Recover the multipole information */
struct
gravity_tensors
*
const
multi_j
=
cj
->
multipole
;
/* Get the distance between the CoMs */
double
dx
=
CoM_i
[
0
]
-
multi_j
->
CoM
[
0
];
double
dy
=
CoM_i
[
1
]
-
multi_j
->
CoM
[
1
];
double
dz
=
CoM_i
[
2
]
-
multi_j
->
CoM
[
2
];
/* Apply BC */
if
(
periodic
)
{
dx
=
nearest
(
dx
,
dim
[
0
]);
dy
=
nearest
(
dy
,
dim
[
1
]);
dz
=
nearest
(
dz
,
dim
[
2
]);
}
const
double
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
/* Are the cells too close for a MM interaction ? */
if
(
!
gravity_multipole_accept_rebuild
(
multi_i
,
multi_j
,
theta_crit_inv
,
r2
))
{
/* Ok, we need to add a direct pair calculation */
scheduler_addtask
(
sched
,
task_type_pair
,
task_subtype_grav
,
0
,
0
,
ci
,
cj
);
}
...
...
@@ -2796,8 +2830,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
/* Kick/Drift/init ? */
if
(
t
->
type
==
task_type_kick1
||
t
->
type
==
task_type_kick2
||
t
->
type
==
task_type_drift_part
||
t
->
type
==
task_type_drift_gpart
||
t
->
type
==
task_type_init_grav
)
{
t
->
type
==
task_type_drift_gpart
||
t
->
type
==
task_type_init_grav
)
{
if
(
cell_is_active
(
t
->
ci
,
e
))
scheduler_activate
(
s
,
t
);
}
...
...
@@ -4517,8 +4550,12 @@ void engine_init(struct engine *e, struct space *s,
e
->
runners
[
k
].
qid
=
k
*
nr_queues
/
e
->
nr_threads
;
}
#ifdef WITH_VECTORIZATION
/* Allocate particle caches. */
e
->
runners
[
k
].
ci_gravity_cache
.
count
=
0
;
e
->
runners
[
k
].
cj_gravity_cache
.
count
=
0
;
gravity_cache_init
(
&
e
->
runners
[
k
].
ci_gravity_cache
,
space_splitsize
);
gravity_cache_init
(
&
e
->
runners
[
k
].
cj_gravity_cache
,
space_splitsize
);
#ifdef WITH_VECTORIZATION
e
->
runners
[
k
].
ci_cache
.
count
=
0
;
e
->
runners
[
k
].
cj_cache
.
count
=
0
;
cache_init
(
&
e
->
runners
[
k
].
ci_cache
,
CACHE_SIZE
);
...
...
@@ -4609,8 +4646,12 @@ void engine_compute_next_snapshot_time(struct engine *e) {
void
engine_clean
(
struct
engine
*
e
)
{
#ifdef WITH_VECTORIZATION
for
(
int
i
=
0
;
i
<
e
->
nr_threads
;
++
i
)
cache_clean
(
&
e
->
runners
[
i
].
ci_cache
);
for
(
int
i
=
0
;
i
<
e
->
nr_threads
;
++
i
)
cache_clean
(
&
e
->
runners
[
i
].
cj_cache
);
for
(
int
i
=
0
;
i
<
e
->
nr_threads
;
++
i
)
{
cache_clean
(
&
e
->
runners
[
i
].
ci_cache
);
cache_clean
(
&
e
->
runners
[
i
].
cj_cache
);
gravity_cache_clean
(
&
e
->
runners
[
i
].
ci_gravity_cache
);
gravity_cache_clean
(
&
e
->
runners
[
i
].
cj_gravity_cache
);
}
#endif
free
(
e
->
runners
);
free
(
e
->
snapshotUnits
);
...
...
src/gravity.c
View file @
d6590e40
...
...
@@ -21,9 +21,15 @@
#include
"../config.h"
/* Some standard headers. */
#include
<float.h>
#include
<stdio.h>
#include
<stdlib.h>
#include
<unistd.h>
#ifdef HAVE_HDF5
#include
<hdf5.h>
#endif
/* This object's header. */
#include
"gravity.h"
...
...
@@ -39,6 +45,256 @@ struct exact_force_data {
double
const_G
;
};
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
/* Size of the Ewald table */
#define Newald 64
/* Components of the Ewald correction */
static
float
fewald_x
[
Newald
+
1
][
Newald
+
1
][
Newald
+
1
];
static
float
fewald_y
[
Newald
+
1
][
Newald
+
1
][
Newald
+
1
];
static
float
fewald_z
[
Newald
+
1
][
Newald
+
1
][
Newald
+
1
];
/* Factor used to normalize the access to the Ewald table */
float
ewald_fac
;
#endif
/**
* @brief Allocates the memory and computes one octant of the
* Ewald correction table.
*
* We follow Hernquist, Bouchet & Suto, 1991, ApJS, Volume 75, p.231-240,
* equations (2.14a) and (2.14b) with alpha = 2. We consider all terms with
* |x - nL| < 4L and |h|^2 < 16.
*
* @param boxSize The side-length (L) of the volume.
*/
void
gravity_exact_force_ewald_init
(
double
boxSize
)
{
#ifdef SWIFT_GRAVITY_FORCE_CHECKS
const
ticks
tic
=
getticks
();
message
(
"Computing Ewald correction table..."
);
/* Level of correction (Hernquist et al. 1991)*/
const
float
alpha
=
2
.
f
;
/* some useful constants */
const
float
alpha2
=
alpha
*
alpha
;
const
float
factor_exp1
=
2
.
f
*
alpha
/
sqrt
(
M_PI
);
const
float
factor_exp2
=
-
M_PI
*
M_PI
/
alpha2
;
const
float
factor_sin
=
2
.
f
*
M_PI
;
const
float
boxSize_inv2
=
1
.
f
/
(
boxSize
*
boxSize
);
/* Ewald factor to access the table */
ewald_fac
=
(
double
)(
2
*
Newald
)
/
boxSize
;
/* Zero everything */
bzero
(
fewald_x
,
(
Newald
+
1
)
*
(
Newald
+
1
)
*
(
Newald
+
1
)
*
sizeof
(
float
));
bzero
(
fewald_y
,
(
Newald
+
1
)
*
(
Newald
+
1
)
*
(
Newald
+
1
)
*
sizeof
(
float
));
bzero
(
fewald_z
,
(
Newald
+
1
)
*
(
Newald
+
1
)
*
(
Newald
+
1
)
*
sizeof
(
float
));
/* Compute the values in one of the octants */
for
(
int
i
=
0
;
i
<=
Newald
;
++
i
)
{
for
(
int
j
=
0
;
j
<=
Newald
;
++
j
)
{
for
(
int
k
=
0
;
k
<=
Newald
;
++
k
)
{
if
(
i
==
0
&&
j
==
0
&&
k
==
0
)
continue
;
/* Distance vector */
const
float
r_x
=
0
.
5
f
*
((
float
)
i
)
/
Newald
;
const
float
r_y
=
0
.
5
f
*
((
float
)
j
)
/
Newald
;
const
float
r_z
=
0
.
5
f
*
((
float
)
k
)
/
Newald
;
/* Norm of distance vector */
const
float
r2
=
r_x
*
r_x
+
r_y
*
r_y
+
r_z
*
r_z
;
const
float
r_inv
=
1
.
f
/
sqrtf
(
r2
);
const
float
r_inv3
=
r_inv
*
r_inv
*
r_inv
;
/* Normal gravity potential term */
float
f_x
=
r_x
*
r_inv3
;
float
f_y
=
r_y
*
r_inv3
;
float
f_z
=
r_z
*
r_inv3
;
for
(
int
n_i
=
-
4
;
n_i
<=
4
;
++
n_i
)
{
for
(
int
n_j
=
-
4
;
n_j
<=
4
;
++
n_j
)
{
for
(
int
n_k
=
-
4
;
n_k
<=
4
;
++
n_k
)
{
const
float
d_x
=
r_x
-
n_i
;
const
float
d_y
=
r_y
-
n_j
;
const
float
d_z
=
r_z
-
n_k
;
/* Discretised distance */
const
float
r_tilde2
=
d_x
*
d_x
+
d_y
*
d_y
+
d_z
*
d_z
;
const
float
r_tilde_inv
=
1
.
f
/
sqrtf
(
r_tilde2
);
const
float
r_tilde
=
r_tilde_inv
*
r_tilde2
;
const
float
r_tilde_inv3
=
r_tilde_inv
*
r_tilde_inv
*
r_tilde_inv
;
const
float
val
=
erfcf
(
alpha
*
r_tilde
)
+
factor_exp1
*
r_tilde
*
expf
(
-
alpha2
*
r_tilde2
);
/* First correction term */
const
float
f
=
val
*
r_tilde_inv3
;
f_x
-=
f
*
d_x
;
f_y
-=
f
*
d_y
;
f_z
-=
f
*
d_z
;
}
}
}
for
(
int
h_i
=
-
4
;
h_i
<=
4
;
++
h_i
)
{
for
(
int
h_j
=
-
4
;
h_j
<=
4
;
++
h_j
)
{
for
(
int
h_k
=
-
4
;
h_k
<=
4
;
++
h_k
)
{
const
float
h2
=
h_i
*
h_i
+
h_j
*
h_j
+
h_k
*
h_k
;
const
float
h2_inv
=
1
.
f
/
(
h2
+
FLT_MIN
);
const
float
h_dot_x
=
h_i
*
r_x
+
h_j
*
r_y
+
h_k
*
r_z
;
const
float
val
=
2
.
f
*
h2_inv
*
expf
(
h2
*
factor_exp2
)
*
sinf
(
factor_sin
*
h_dot_x
);
/* Second correction term */
f_x
-=
val
*
h_i
;
f_y
-=
val
*
h_j
;
f_z
-=
val
*
h_k
;
}
}
}
/* Save back to memory */
fewald_x
[
i
][
j
][
k
]
=
f_x
;
fewald_y
[
i
][
j
][
k
]
=
f_y
;
fewald_z
[
i
][
j
][
k
]
=
f_z
;
}
}
}
/* Dump the Ewald table to a file */
#ifdef HAVE_HDF5
hid_t
h_file
=
H5Fcreate
(
"Ewald.hdf5"
,
H5F_ACC_TRUNC
,
H5P_DEFAULT
,
H5P_DEFAULT
);
if
(
h_file
<
0
)
error
(
"Error while opening file for Ewald dump."
);
/* Create dataspace */
hsize_t
dim
[
3
]
=
{
Newald
+
1
,
Newald
+
1
,
Newald
+
1
};
hid_t
h_space
=
H5Screate_simple
(
3
,
dim
,
NULL
);
hid_t
h_data
;
h_data
=
H5Dcreate
(
h_file
,
"Ewald_x"
,
H5T_NATIVE_FLOAT
,
h_space
,
H5P_DEFAULT
,
H5P_DEFAULT
,
H5P_DEFAULT
);
H5Dwrite
(
h_data
,
H5T_NATIVE_FLOAT
,
h_space
,
H5S_ALL
,
H5P_DEFAULT
,
&
(
fewald_x
[
0
][
0
][
0
]));
H5Dclose
(
h_data
);
h_data
=
H5Dcreate
(
h_file
,
"Ewald_y"
,
H5T_NATIVE_FLOAT
,
h_space
,
H5P_DEFAULT
,
H5P_DEFAULT
,
H5P_DEFAULT
);
H5Dwrite
(
h_data
,
H5T_NATIVE_FLOAT
,
h_space
,
H5S_ALL
,
H5P_DEFAULT
,
&
(
fewald_y
[
0
][
0
][
0
]));
H5Dclose
(
h_data
);
h_data
=
H5Dcreate
(
h_file
,
"Ewald_z"
,
H5T_NATIVE_FLOAT
,
h_space
,
H5P_DEFAULT
,
H5P_DEFAULT
,
H5P_DEFAULT
);
H5Dwrite
(
h_data
,
H5T_NATIVE_FLOAT
,
h_space
,
H5S_ALL
,
H5P_DEFAULT
,
&
(
fewald_z
[
0
][
0
][
0
]));
H5Dclose
(
h_data
);
H5Sclose
(
h_space
);
H5Fclose
(
h_file
);
#endif
/* Apply the box-size correction */
for
(
int
i
=
0
;
i
<=
Newald
;
++
i
)
{
for
(
int
j
=
0
;
j
<=
Newald
;
++
j
)
{
for
(
int
k
=
0
;
k
<=
Newald
;
++
k
)
{
fewald_x
[
i
][
j
][
k
]
*=
boxSize_inv2
;
fewald_y
[
i
][
j
][
k
]
*=
boxSize_inv2
;
fewald_z
[
i
][
j
][
k
]
*=
boxSize_inv2
;
}
}
}
/* Say goodbye */
message
(
"Ewald correction table computed (took %.3f %s). "
,
clocks_from_ticks
(
getticks
()
-
tic
),
clocks_getunit
());
#else