Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
05645d2a
Commit
05645d2a
authored
Nov 13, 2017
by
Matthieu Schaller
Browse files
Merge branch 'master' into task_graph
parents
25b1580f
bee23d1c
Changes
33
Expand all
Hide whitespace changes
Inline
Side-by-side
INSTALL.swift
View file @
05645d2a
...
...
@@ -75,6 +75,15 @@ also be switched off for benchmarking purposes. To do so, you can use:
./
configure
--
disable
-
vec
Please
note
that
to
build
SWIFT
on
MacOS
,
you
will
need
to
configure
using
./
configure
--
disable
-
compiler
-
warnings
due
to
the
incorrect
behaviour
of
the
LLVM
compiler
on
this
platform
that
raises
warnings
when
the
pthread
flags
are
passed
to
the
linker
.
Dependencies
============
...
...
configure.ac
View file @
05645d2a
...
...
@@ -379,6 +379,19 @@ AX_PTHREAD([LIBS="$PTHREAD_LIBS $LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
or use CPPFLAGS and LDFLAGS if the library is installed in a
non-standard location.]))
# Check whether POSIX thread barriers are implemented (e.g. OSX does not have them)
have_pthread_barrier="no"
AC_CHECK_LIB(pthread, pthread_barrier_init,
have_pthread_barrier="yes",
AC_MSG_WARN(POSIX implementation does not have barriers. SWIFT will use home-made ones.))
if test "x$have_pthread_barrier" == "xyes"; then
AC_DEFINE([HAVE_PTHREAD_BARRIERS], [1], [The posix library implements barriers])
fi
# Check whether POSIX file allocation functions exist (e.g. OSX does not have them)
AC_CHECK_LIB(pthread, posix_fallocate,
AC_DEFINE([HAVE_POSIX_FALLOCATE], [1], [The posix library implements file allocation functions.]),
AC_MSG_WARN(POSIX implementation does not have file allocation functions.))
# Check for METIS. Note AX_LIB_METIS exists, but cannot be configured
# to be default off (i.e. given no option it tries to locate METIS), so we
...
...
@@ -548,6 +561,10 @@ if test "$with_hdf5" = "yes"; then
fi
AM_CONDITIONAL([HAVEPARALLELHDF5],[test "$have_parallel_hdf5" = "yes"])
# Check for floating-point execeptions
AC_CHECK_FUNC(feenableexcept, AC_DEFINE([HAVE_FE_ENABLE_EXCEPT],[1],
[Defined if the floating-point exception can be enabled using non-standard GNU functions.]))
# Check for setaffinity.
AC_CHECK_FUNC(pthread_setaffinity_np, AC_DEFINE([HAVE_SETAFFINITY],[1],
[Defined if pthread_setaffinity_np exists.]) )
...
...
@@ -901,19 +918,20 @@ AC_MSG_RESULT([
$PACKAGE_NAME v.$PACKAGE_VERSION
Compiler : $CC
- vendor : $ax_cv_c_compiler_vendor
- version : $ax_cv_c_compiler_version
- flags : $CFLAGS
MPI enabled : $enable_mpi
HDF5 enabled : $with_hdf5
- parallel : $have_parallel_hdf5
Metis enabled : $have_metis
FFTW3 enabled : $have_fftw3
libNUMA enabled : $have_numa
Using tcmalloc : $have_tcmalloc
Using jemalloc : $have_jemalloc
CPU profiler : $have_profiler
Compiler : $CC
- vendor : $ax_cv_c_compiler_vendor
- version : $ax_cv_c_compiler_version
- flags : $CFLAGS
MPI enabled : $enable_mpi
HDF5 enabled : $with_hdf5
- parallel : $have_parallel_hdf5
Metis enabled : $have_metis
FFTW3 enabled : $have_fftw3
libNUMA enabled : $have_numa
Using tcmalloc : $have_tcmalloc
Using jemalloc : $have_jemalloc
CPU profiler : $have_profiler
Pthread barriers : $have_pthread_barrier
Hydro scheme : $with_hydro
Dimensionality : $with_dimension
...
...
m4/ax_cc_maxopt.m4
View file @
05645d2a
...
...
@@ -146,6 +146,22 @@ if test "$ac_test_CFLAGS" != "set"; then
fi
;;
clang)
# default optimization flags for clang on all systems
CFLAGS="-O3 -fomit-frame-pointer"
# Always good optimisation to have
AX_CHECK_COMPILE_FLAG(-fstrict-aliasing, CFLAGS="$CFLAGS -fstrict-aliasing")
# note that we enable "unsafe" fp optimization with other compilers, too
AX_CHECK_COMPILE_FLAG(-ffast-math, CFLAGS="$CFLAGS -ffast-math")
# not all codes will benefit from this.
AX_CHECK_COMPILE_FLAG(-funroll-loops, CFLAGS="$CFLAGS -funroll-loops")
AX_GCC_ARCHFLAG($acx_maxopt_portable)
;;
gnu)
# default optimization flags for gcc on all systems
CFLAGS="-O3 -fomit-frame-pointer"
...
...
@@ -155,7 +171,7 @@ if test "$ac_test_CFLAGS" != "set"; then
# -fstrict-aliasing for gcc-2.95+
AX_CHECK_COMPILE_FLAG(-fstrict-aliasing,
CFLAGS="$CFLAGS -fstrict-aliasing")
CFLAGS="$CFLAGS -fstrict-aliasing")
# note that we enable "unsafe" fp optimization with other compilers, too
AX_CHECK_COMPILE_FLAG(-ffast-math, CFLAGS="$CFLAGS -ffast-math")
...
...
m4/ax_gcc_archflag.m4
View file @
05645d2a
...
...
@@ -108,7 +108,8 @@ case $host_cpu in
*3?6[[ae]]?:*:*:*) ax_gcc_arch="ivybridge core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;;
*3?6[[cf]]?:*:*:*|*4?6[[56]]?:*:*:*) ax_gcc_arch="haswell core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;;
*3?6d?:*:*:*|*4?6f?:*:*:*) ax_gcc_arch="broadwell core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;;
*9?6[[de]]?:*:*:*) ax_gcc_arch="kabylake core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;;
*4?6[[de]]?:*:*:*) ax_gcc_arch="skylake core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;;
*9?6[[de]]?:*:*:*) ax_gcc_arch="kabylake core-avx2 core-avx-i corei7-avx corei7 core2 pentium-m pentium3 pentiumpro" ;;
*1?6c?:*:*:*|*2?6[[67]]?:*:*:*|*3?6[[56]]?:*:*:*) ax_gcc_arch="bonnell atom core2 pentium-m pentium3 pentiumpro" ;;
*3?67?:*:*:*|*[[45]]?6[[ad]]?:*:*:*) ax_gcc_arch="silvermont atom core2 pentium-m pentium3 pentiumpro" ;;
*000?f[[012]]?:*:*:*|?f[[012]]?:*:*:*|f[[012]]?:*:*:*) ax_gcc_arch="pentium4 pentiumpro" ;;
...
...
m4/ax_pthread.m4
View file @
05645d2a
This diff is collapsed.
Click to expand it.
src/Makefile.am
View file @
05645d2a
...
...
@@ -60,7 +60,7 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
collectgroup.c hydro_space.c equation_of_state.c
# Include files for distribution, not installation.
nobase_noinst_HEADERS
=
align.h approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h
\
nobase_noinst_HEADERS
=
align.h approx_math.h atomic.h
barrier.h
cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h
\
kernel_long_gravity.h vector.h cache.h runner_doiact.h runner_doiact_vec.h runner_doiact_grav.h runner_doiact_fft.h
\
runner_doiact_nosort.h units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h
\
dimension.h equation_of_state.h part_type.h periodic.h
\
...
...
src/barrier.h
0 → 100644
View file @
05645d2a
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2017 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#ifndef SWIFT_BARRIER_H
#define SWIFT_BARRIER_H
/**
* @file barrier.h
* @brief Define the thread barriers if the POSIX implementation on this system
* does not.
*
* The pthread barriers are only an option of the POSIX norm and they are not
* necessarily implemented. One example is OSX where all the rest of POSIX
* exists but not the barriers.
* We implement them here in a simple way to allow for SWIFT to run on such
* systems but this may lead to poorer performance.
*
* Note that we only define the three functions we need. This is a textbook
* implementation of a barrier that uses the common POSIX features (mutex,
* conditions and broadcasts).
*
* If the pthread barriers exist (Linux systems), we default to them.
*/
/* Config parameters. */
#include
"../config.h"
/* Standard headers */
#include
<pthread.h>
/* Does this POSIX implementation provide barriers? */
#ifdef HAVE_PTHREAD_BARRIERS
#define swift_barrier_t pthread_barrier_t
#define swift_barrier_wait pthread_barrier_wait
#define swift_barrier_init pthread_barrier_init
#define swift_barrier_destroy pthread_barrier_destroy
#else
/* Local headers */
#include
"error.h"
#include
"inline.h"
/**
* @brief An ersatz of POSIX barriers to be used on systems that don't provide
* the good ones.
*/
typedef
struct
{
/*! Barrier mutex */
pthread_mutex_t
mutex
;
/*! Condition to open the barrier */
pthread_cond_t
condition
;
/*! Total number of threads */
int
limit
;
/*! Number of threads that reached the barrier */
int
count
;
}
swift_barrier_t
;
/**
* @brief Initialise a barrier object.
*
* @param barrier The #swift_barrier_t to initialise
* @param unused Unused parameter (NULL) as we don't support barrier attributes.
* @param count The number of threads that will wait at the barrier.
*/
static
INLINE
int
swift_barrier_init
(
swift_barrier_t
*
barrier
,
void
*
unused
,
unsigned
int
count
)
{
/* Initialise the mutex */
if
(
pthread_mutex_init
(
&
barrier
->
mutex
,
0
)
!=
0
)
error
(
"Error initializing the barrier mutex"
);
/* Initialise the condition */
if
(
pthread_cond_init
(
&
barrier
->
condition
,
0
)
!=
0
)
error
(
"Error initializing the barrier condition"
);
barrier
->
limit
=
count
;
barrier
->
count
=
0
;
/* All is good */
return
0
;
}
/**
* @brief Make a set of threads wait at the barrier
*
* Note that once all threads have reached the barrier, we also
* reset the barrier to state where it is ready to be re-used
* without calling swift_barrier_init.
*
* @param barrier The (initialised) #swift_barrier_t to wait at.
*/
static
INLINE
int
swift_barrier_wait
(
swift_barrier_t
*
barrier
)
{
/* Start by locking the barrier */
pthread_mutex_lock
(
&
barrier
->
mutex
);
/* One more thread has gone home*/
barrier
->
count
++
;
/* Are threads still running? */
if
(
barrier
->
count
<
barrier
->
limit
)
{
/* We need to make the thread wait until everyone is back */
pthread_cond_wait
(
&
barrier
->
condition
,
&
(
barrier
->
mutex
));
/* Release the mutex */
pthread_mutex_unlock
(
&
barrier
->
mutex
);
/* Say that this was not the last thread */
return
0
;
}
else
{
/* Everybody is home */
/* Open the barrier (i.e. release the threads blocked in the while loop) */
pthread_cond_broadcast
(
&
barrier
->
condition
);
/* Re-initialize the barrier */
barrier
->
count
=
0
;
/* Release the mutex */
pthread_mutex_unlock
(
&
barrier
->
mutex
);
/* Say that we are all done */
return
1
;
}
}
/**
* @brief Destroy a barrier object
*
* Note that if destroy is called before a barrier is open, we return
* an error message and do not attempt to wait for the barrier to open
* before destroying it.
*
* @param barrier The #swift_barrier_t object to destroy.
*/
static
INLINE
int
swift_barrier_destroy
(
swift_barrier_t
*
barrier
)
{
/* Destroy the pthread things */
pthread_cond_destroy
(
&
barrier
->
condition
);
pthread_mutex_destroy
(
&
barrier
->
mutex
);
/* All is good */
return
0
;
}
#endif
/* HAVE_PTHREAD_BARRIERS */
#endif
/* SWIFT_BARRIER_H */
src/cell.c
View file @
05645d2a
...
...
@@ -50,6 +50,7 @@
#include
"active.h"
#include
"atomic.h"
#include
"drift.h"
#include
"engine.h"
#include
"error.h"
#include
"gravity.h"
#include
"hydro.h"
...
...
@@ -1862,6 +1863,7 @@ void cell_activate_subcell_external_grav_tasks(struct cell *ci,
int
cell_unskip_tasks
(
struct
cell
*
c
,
struct
scheduler
*
s
)
{
struct
engine
*
e
=
s
->
space
->
e
;
const
int
nodeID
=
e
->
nodeID
;
int
rebuild
=
0
;
/* Un-skip the density tasks involved with this cell. */
...
...
@@ -1873,13 +1875,13 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
const
int
cj_active
=
(
cj
!=
NULL
)
?
cell_is_active
(
cj
,
e
)
:
0
;
/* Only activate tasks that involve a local active cell. */
if
((
ci_active
&&
ci
->
nodeID
==
engine_rank
)
||
(
cj_active
&&
cj
->
nodeID
==
engine_rank
))
{
if
((
ci_active
&&
ci
->
nodeID
==
nodeID
)
||
(
cj_active
&&
cj
->
nodeID
==
nodeID
))
{
scheduler_activate
(
s
,
t
);
/* Activate hydro drift */
if
(
t
->
type
==
task_type_self
)
{
if
(
ci
->
nodeID
==
engine_rank
)
cell_activate_drift_part
(
ci
,
s
);
if
(
ci
->
nodeID
==
nodeID
)
cell_activate_drift_part
(
ci
,
s
);
}
/* Set the correct sorting flags and activate hydro drifts */
...
...
@@ -1891,8 +1893,8 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
cj
->
dx_max_sort_old
=
cj
->
dx_max_sort
;
/* Activate the drift tasks. */
if
(
ci
->
nodeID
==
engine_rank
)
cell_activate_drift_part
(
ci
,
s
);
if
(
cj
->
nodeID
==
engine_rank
)
cell_activate_drift_part
(
cj
,
s
);
if
(
ci
->
nodeID
==
nodeID
)
cell_activate_drift_part
(
ci
,
s
);
if
(
cj
->
nodeID
==
nodeID
)
cell_activate_drift_part
(
cj
,
s
);
/* Check the sorts and activate them if needed. */
cell_activate_sorts
(
ci
,
t
->
flags
,
s
);
...
...
@@ -1913,7 +1915,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
#ifdef WITH_MPI
/* Activate the send/recv tasks. */
if
(
ci
->
nodeID
!=
engine_rank
)
{
if
(
ci
->
nodeID
!=
nodeID
)
{
/* If the local cell is active, receive data from the foreign cell. */
if
(
cj_active
)
{
...
...
@@ -1951,7 +1953,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
/* If the local cell is active, send its ti_end values. */
if
(
cj_active
)
scheduler_activate_send
(
s
,
cj
->
send_ti
,
ci
->
nodeID
);
}
else
if
(
cj
->
nodeID
!=
engine_rank
)
{
}
else
if
(
cj
->
nodeID
!=
nodeID
)
{
/* If the local cell is active, receive data from the foreign cell. */
if
(
ci_active
)
{
...
...
@@ -2001,8 +2003,8 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
struct
cell
*
cj
=
t
->
cj
;
/* Only activate tasks that involve a local active cell. */
if
((
cell_is_active
(
ci
,
e
)
&&
ci
->
nodeID
==
engine_rank
)
||
(
cj
!=
NULL
&&
cell_is_active
(
cj
,
e
)
&&
cj
->
nodeID
==
engine_rank
))
{
if
((
cell_is_active
(
ci
,
e
)
&&
ci
->
nodeID
==
nodeID
)
||
(
cj
!=
NULL
&&
cell_is_active
(
cj
,
e
)
&&
cj
->
nodeID
==
nodeID
))
{
scheduler_activate
(
s
,
t
);
/* Set the drifting flags */
...
...
@@ -2018,7 +2020,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) {
}
/* Unskip all the other task types. */
if
(
c
->
nodeID
==
engine_rank
&&
cell_is_active
(
c
,
e
))
{
if
(
c
->
nodeID
==
nodeID
&&
cell_is_active
(
c
,
e
))
{
for
(
struct
link
*
l
=
c
->
gradient
;
l
!=
NULL
;
l
=
l
->
next
)
scheduler_activate
(
s
,
l
->
t
);
...
...
src/common_io.c
View file @
05645d2a
...
...
@@ -386,6 +386,7 @@ void io_write_code_description(hid_t h_file) {
configuration_options
());
io_write_attribute_s
(
h_grpcode
,
"CFLAGS"
,
compilation_cflags
());
io_write_attribute_s
(
h_grpcode
,
"HDF5 library version"
,
hdf5_version
());
io_write_attribute_s
(
h_grpcode
,
"Thread barriers"
,
thread_barrier_version
());
#ifdef HAVE_FFTW
io_write_attribute_s
(
h_grpcode
,
"FFTW library version"
,
fftw3_version
());
#endif
...
...
src/dump.c
View file @
05645d2a
...
...
@@ -20,6 +20,15 @@
/* Config parameters. */
#include
"../config.h"
#ifdef HAVE_POSIX_FALLOCATE
/* This object's header. */
#include
"dump.h"
/* Local headers. */
#include
"atomic.h"
#include
"error.h"
/* Some standard headers. */
#include
<errno.h>
#include
<fcntl.h>
...
...
@@ -29,13 +38,6 @@
#include
<sys/types.h>
#include
<unistd.h>
/* This object's header. */
#include
"dump.h"
/* Local headers. */
#include
"atomic.h"
#include
"error.h"
/**
* @brief Obtain a chunk of memory from a dump.
*
...
...
@@ -44,7 +46,6 @@
* @param offset The offset of the returned memory address within the dump file.
* @return A pointer to the memory-mapped chunk of data.
*/
void
*
dump_get
(
struct
dump
*
d
,
size_t
count
,
size_t
*
offset
)
{
size_t
local_offset
=
atomic_add
(
&
d
->
count
,
count
);
*
offset
=
local_offset
+
d
->
file_offset
;
...
...
@@ -54,7 +55,6 @@ void *dump_get(struct dump *d, size_t count, size_t *offset) {
/**
* @brief Ensure that at least size bytes are available in the #dump.
*/
void
dump_ensure
(
struct
dump
*
d
,
size_t
size
)
{
/* If we have enough space already, just bail. */
...
...
@@ -88,7 +88,6 @@ void dump_ensure(struct dump *d, size_t size) {
/**
* @brief Flush the #dump to disk.
*/
void
dump_sync
(
struct
dump
*
d
)
{
if
(
msync
(
d
->
data
,
d
->
count
,
MS_SYNC
)
!=
0
)
error
(
"Failed to sync memory-mapped data."
);
...
...
@@ -97,7 +96,6 @@ void dump_sync(struct dump *d) {
/**
* @brief Finalize the #dump.
*/
void
dump_close
(
struct
dump
*
d
)
{
/* Unmap the data in memory. */
if
(
munmap
(
d
->
data
,
d
->
count
)
!=
0
)
{
...
...
@@ -121,7 +119,6 @@ void dump_close(struct dump *d) {
* note that it will be overwritten.
* @param size The initial buffer size for this #dump.
*/
void
dump_init
(
struct
dump
*
d
,
const
char
*
filename
,
size_t
size
)
{
/* Create the output file. */
...
...
@@ -151,3 +148,5 @@ void dump_init(struct dump *d, const char *filename, size_t size) {
d
->
file_offset
=
0
;
d
->
page_mask
=
page_mask
;
}
#endif
src/dump.h
View file @
05645d2a
...
...
@@ -19,8 +19,13 @@
#ifndef SWIFT_DUMP_H
#define SWIFT_DUMP_H
/* Includes. */
#include
"lock.h"
/* Config parameters. */
#include
"../config.h"
#ifdef HAVE_POSIX_FALLOCATE
/* Are we on a sensible platform? */
/* Standard headers */
#include
<stdlib.h>
/* Some constants. */
#define dump_grow_ensure_factor 10
...
...
@@ -54,4 +59,6 @@ void dump_sync(struct dump *d);
void
dump_close
(
struct
dump
*
d
);
void
*
dump_get
(
struct
dump
*
d
,
size_t
count
,
size_t
*
offset
);
#endif
/* HAVE_POSIX_FALLOCATE */
#endif
/* SWIFT_DUMP_H */
src/engine.c
View file @
05645d2a
...
...
@@ -880,6 +880,9 @@ void engine_redistribute(struct engine *e) {
nodeID
,
nr_parts
,
nr_sparts
,
nr_gparts
,
my_cells
);
}
/* Flag that a redistribute has taken place */
e
->
step_props
|=
engine_step_prop_redistribute
;
if
(
e
->
verbose
)
message
(
"took %.3f %s."
,
clocks_from_ticks
(
getticks
()
-
tic
),
clocks_getunit
());
...
...
@@ -945,6 +948,9 @@ void engine_repartition(struct engine *e) {
/* Tell the engine it should re-build whenever possible */
e
->
forcerebuild
=
1
;
/* Flag that a repartition has taken place */
e
->
step_props
|=
engine_step_prop_repartition
;
if
(
e
->
verbose
)
message
(
"took %.3f %s."
,
clocks_from_ticks
(
getticks
()
-
tic
),
clocks_getunit
());
...
...
@@ -3098,6 +3104,9 @@ void engine_rebuild(struct engine *e, int clean_h_values) {
/* Print the status of the system */
// if (e->verbose) engine_print_task_counts(e);
/* Flag that a rebuild has taken place */
e
->
step_props
|=
engine_step_prop_rebuild
;
if
(
e
->
verbose
)
message
(
"took %.3f %s."
,
clocks_from_ticks
(
getticks
()
-
tic
),
clocks_getunit
());
...
...
@@ -3153,10 +3162,10 @@ void engine_prepare(struct engine *e) {
void
engine_barrier
(
struct
engine
*
e
)
{
/* Wait at the wait barrier. */
pthread
_barrier_wait
(
&
e
->
wait_barrier
);
swift
_barrier_wait
(
&
e
->
wait_barrier
);
/* Wait at the run barrier. */
pthread
_barrier_wait
(
&
e
->
run_barrier
);
swift
_barrier_wait
(
&
e
->
run_barrier
);
}
/**
...
...
@@ -3472,7 +3481,7 @@ void engine_launch(struct engine *e) {
atomic_inc
(
&
e
->
sched
.
waiting
);
/* Cry havoc and let loose the dogs of war. */
pthread
_barrier_wait
(
&
e
->
run_barrier
);
swift
_barrier_wait
(
&
e
->
run_barrier
);
/* Load the tasks. */
scheduler_start
(
&
e
->
sched
);
...
...
@@ -3484,7 +3493,7 @@ void engine_launch(struct engine *e) {
pthread_mutex_unlock
(
&
e
->
sched
.
sleep_mutex
);
/* Sit back and wait for the runners to come home. */
pthread
_barrier_wait
(
&
e
->
wait_barrier
);
swift
_barrier_wait
(
&
e
->
wait_barrier
);
if
(
e
->
verbose
)
message
(
"took %.3f %s."
,
clocks_from_ticks
(
getticks
()
-
tic
),
...
...
@@ -3699,14 +3708,14 @@ void engine_step(struct engine *e) {
if
(
e
->
nodeID
==
0
)
{
/* Print some information to the screen */
printf
(
" %6d %14e %14e %1
0
zu %1
0
zu %1
0
zu %21.3f
\n
"
,
e
->
step
,
e
->
time
,
printf
(
" %6d %14e %14e %1
2
zu %1
2
zu %1
2
zu %21.3f
%6d
\n
"
,
e
->
step
,
e
->
time
,
e
->
timeStep
,
e
->
updates
,
e
->
g_updates
,
e
->
s_updates
,
e
->
wallclock_time
);
e
->
wallclock_time
,
e
->
step_props
);
fflush
(
stdout
);
fprintf
(
e
->
file_timesteps
,
" %6d %14e %14e %1
0
zu %1
0
zu %1
0
zu %21.3f
\n
"
,
fprintf
(
e
->
file_timesteps
,
" %6d %14e %14e %1
2
zu %1
2
zu %1
2
zu %21.3f
%6d
\n
"
,
e
->
step
,
e
->
time
,
e
->
timeStep
,
e
->
updates
,
e
->
g_updates
,
e
->
s_updates
,
e
->
wallclock_time
);
e
->
s_updates
,
e
->
wallclock_time
,
e
->
step_props
);
fflush
(
e
->
file_timesteps
);
}
...
...
@@ -3718,6 +3727,7 @@ void engine_step(struct engine *e) {
e
->
time
=
e
->
ti_current
*
e
->
timeBase
+
e
->
timeBegin
;
e
->
timeOld
=
e
->
ti_old
*
e
->
timeBase
+
e
->
timeBegin
;
e
->
timeStep
=
(
e
->
ti_current
-
e
->
ti_old
)
*
e
->
timeBase
;
e
->
step_props
=
engine_step_prop_none
;
/* Prepare the tasks to be launched, rebuild or repartition if needed. */
engine_prepare
(
e
);
...
...
@@ -3807,6 +3817,9 @@ void engine_step(struct engine *e) {
/* ... and find the next output time */
engine_compute_next_snapshot_time
(
e
);
/* Flag that we dumped a snapshot */
e
->
step_props
|=
engine_step_prop_snapshot
;
}
/* Save some statistics */
...
...
@@ -3817,6 +3830,9 @@ void engine_step(struct engine *e) {
/* and move on */
e
->
timeLastStatistics
+=
e
->
deltaTimeStatistics
;
/* Flag that we dumped some statistics */
e
->
step_props
|=
engine_step_prop_statistics
;
}
/* Now apply all the collected time step updates and particle counts. */
...
...
@@ -4355,6 +4371,7 @@ void engine_init(struct engine *e, struct space *s,
e
->
reparttype
=
reparttype
;
e
->
dump_snapshot
=
0
;
e
->
save_stats
=
0
;
e
->
step_props
=
engine_step_prop_none
;
e
->
links
=
NULL
;
e
->
nr_links
=
0
;
e
->
timeBegin
=
parser_get_param_double
(
params
,
"TimeIntegration:time_begin"
);
...
...
@@ -4578,9 +4595,16 @@ void engine_init(struct engine *e, struct space *s,
e
->
hydro_properties
->
delta_neighbours
,
e
->
hydro_properties
->
eta_neighbours
);
fprintf
(
e
->
file_timesteps
,
"# %6s %14s %14s %10s %10s %10s %16s [%s]
\n
"
,
fprintf
(
e
->
file_timesteps
,
"# Step Properties: Rebuild=%d, Redistribute=%d, Repartition=%d, "
"Statistics=%d, Snapshot=%d
\n
"
,
engine_step_prop_rebuild
,
engine_step_prop_redistribute
,
engine_step_prop_repartition
,
engine_step_prop_statistics
,
engine_step_prop_snapshot
);
fprintf
(
e
->
file_timesteps
,
"# %6s %14s %14s %12s %12s %12s %16s [%s] %6s
\n
"
,
"Step"
,
"Time"
,
"Time-step"
,
"Updates"
,
"g-Updates"
,
"s-Updates"
,
"Wall-clock time"
,
clocks_getunit
());
"Wall-clock time"
,
clocks_getunit
()
,
"Props"
);
fflush
(
e
->
file_timesteps
);
}
...
...
@@ -4665,8 +4689,8 @@ void engine_init(struct engine *e, struct space *s,
threadpool_init
(
&
e
->
threadpool
,
e
->
nr_threads
);
/* First of all, init the barrier and lock it. */
if
(
pthread
_barrier_init
(
&
e
->
wait_barrier
,
NULL
,
e
->
nr_threads
+
1
)
!=
0
||
pthread
_barrier_init
(
&
e
->
run_barrier
,
NULL
,
e
->
nr_threads
+
1
)
!=
0
)
if
(
swift
_barrier_init
(
&
e
->
wait_barrier
,
NULL
,
e
->
nr_threads
+
1
)
!=
0
||
swift
_barrier_init
(
&
e
->
run_barrier
,
NULL
,
e
->
nr_threads
+
1
)
!=
0
)
error
(
"Failed to initialize barrier."
);
/* Expected average for tasks per cell. If set to zero we use a heuristic
...
...
@@ -4752,7 +4776,7 @@ void engine_init(struct engine *e, struct space *s,
#endif
/* Wait for the runner threads to be in place. */
pthread
_barrier_wait
(
&
e
->
wait_barrier
);
swift
_barrier_wait
(
&
e
->
wait_barrier
);
}
/**
...
...
src/engine.h
View file @
05645d2a
...
...
@@ -32,11 +32,8 @@