Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
b20c2a01
Commit
b20c2a01
authored
Mar 05, 2016
by
Matthieu Schaller
Browse files
Added a command-line flag to increase verbosity
parent
1555aefd
Changes
6
Hide whitespace changes
Inline
Side-by-side
examples/main.c
View file @
b20c2a01
...
...
@@ -77,6 +77,7 @@ int main(int argc, char *argv[]) {
int
nr_nodes
=
1
,
myrank
=
0
;
FILE
*
file_thread
;
int
with_outputs
=
1
;
int
verbose
=
0
;
unsigned
long
long
cpufreq
=
0
;
#ifdef WITH_MPI
...
...
@@ -150,7 +151,7 @@ int main(int argc, char *argv[]) {
bzero
(
&
s
,
sizeof
(
struct
space
));
/* Parse the options */
while
((
c
=
getopt
(
argc
,
argv
,
"a:c:d:e:f:h:m:oP:q:R:s:t:w:y:z:"
))
!=
-
1
)
while
((
c
=
getopt
(
argc
,
argv
,
"a:c:d:e:f:h:m:oP:q:R:s:t:
v
w:y:z:"
))
!=
-
1
)
switch
(
c
)
{
case
'a'
:
if
(
sscanf
(
optarg
,
"%lf"
,
&
scaling
)
!=
1
)
...
...
@@ -226,8 +227,8 @@ int main(int argc, char *argv[]) {
error
(
"Error parsing number of queues."
);
break
;
case
'R'
:
/* Repartition type "n", "b", "v", "e" or "x".
* Note only none is available without METIS. */
/* Repartition type "n", "b", "v", "e" or "x".
* Note only none is available without METIS. */
#ifdef WITH_MPI
switch
(
optarg
[
0
])
{
case
'n'
:
...
...
@@ -261,6 +262,9 @@ int main(int argc, char *argv[]) {
if
(
sscanf
(
optarg
,
"%d"
,
&
nr_threads
)
!=
1
)
error
(
"Error parsing number of threads."
);
break
;
case
'v'
:
verbose
=
1
;
break
;
case
'w'
:
if
(
sscanf
(
optarg
,
"%d"
,
&
space_subsize
)
!=
1
)
error
(
"Error parsing sub size."
);
...
...
@@ -329,7 +333,7 @@ int main(int argc, char *argv[]) {
clocks_set_cpufreq
(
cpufreq
);
cpufreq
=
clocks_get_cpufreq
();
if
(
myrank
==
0
)
message
(
"CPU frequency used for tick conversion: %llu Hz"
,
cpufreq
);
message
(
"CPU frequency used for tick conversion: %llu Hz"
,
cpufreq
);
/* Check we have sensible time step bounds */
if
(
dt_min
>
dt_max
)
...
...
@@ -341,8 +345,7 @@ int main(int argc, char *argv[]) {
/* Read particles and space information from (GADGET) IC */
if
(
myrank
==
0
)
clocks_gettime
(
&
tic
);
if
(
myrank
==
0
)
clocks_gettime
(
&
tic
);
#if defined(WITH_MPI)
#if defined(HAVE_PARALLEL_HDF5)
read_ic_parallel
(
ICfileName
,
dim
,
&
parts
,
&
N
,
&
periodic
,
myrank
,
nr_nodes
,
...
...
@@ -386,10 +389,9 @@ int main(int argc, char *argv[]) {
if
(
nr_queues
<
0
)
nr_queues
=
nr_threads
;
/* Initialize the space with this data. */
if
(
myrank
==
0
)
clocks_gettime
(
&
tic
);
if
(
myrank
==
0
)
clocks_gettime
(
&
tic
);
space_init
(
&
s
,
dim
,
parts
,
N
,
periodic
,
h_max
,
myrank
==
0
);
if
(
myrank
==
0
)
{
if
(
myrank
==
0
&&
verbose
)
{
clocks_gettime
(
&
toc
);
message
(
"space_init took %.3f %s."
,
clocks_diff
(
&
tic
,
&
toc
),
clocks_getunit
());
...
...
@@ -423,13 +425,12 @@ int main(int argc, char *argv[]) {
}
/* Initialize the engine with this space. */
if
(
myrank
==
0
)
clocks_gettime
(
&
tic
);
if
(
myrank
==
0
)
clocks_gettime
(
&
tic
);
if
(
myrank
==
0
)
message
(
"nr_nodes is %i."
,
nr_nodes
);
engine_init
(
&
e
,
&
s
,
dt_max
,
nr_threads
,
nr_queues
,
nr_nodes
,
myrank
,
ENGINE_POLICY
|
engine_policy_steal
|
engine_policy_hydro
,
0
,
time_end
,
dt_min
,
dt_max
);
if
(
myrank
==
0
)
{
time_end
,
dt_min
,
dt_max
,
(
myrank
==
0
&&
verbose
)
);
if
(
myrank
==
0
&&
verbose
)
{
clocks_gettime
(
&
toc
);
message
(
"engine_init took %.3f %s."
,
clocks_diff
(
&
tic
,
&
toc
),
clocks_getunit
());
...
...
@@ -445,8 +446,7 @@ int main(int argc, char *argv[]) {
if
(
with_outputs
)
{
/* Write the state of the system as it is before starting time integration.
*/
if
(
myrank
==
0
)
clocks_gettime
(
&
tic
);
if
(
myrank
==
0
)
clocks_gettime
(
&
tic
);
#if defined(WITH_MPI)
#if defined(HAVE_PARALLEL_HDF5)
write_output_parallel
(
&
e
,
&
us
,
myrank
,
nr_nodes
,
MPI_COMM_WORLD
,
...
...
@@ -458,7 +458,7 @@ int main(int argc, char *argv[]) {
#else
write_output_single
(
&
e
,
&
us
);
#endif
if
(
myrank
==
0
)
{
if
(
myrank
==
0
&&
verbose
)
{
clocks_gettime
(
&
toc
);
message
(
"writing particle properties took %.3f %s."
,
clocks_diff
(
&
tic
,
&
toc
),
clocks_getunit
());
...
...
@@ -486,7 +486,8 @@ int main(int argc, char *argv[]) {
if
(
myrank
==
0
)
printf
(
"# Step Time time-step Number of updates CPU Wall-clock time "
"[%s]
\n
"
,
clocks_getunit
());
"[%s]
\n
"
,
clocks_getunit
());
/* Let loose a runner on the space. */
for
(
j
=
0
;
!
engine_is_done
(
&
e
);
j
++
)
{
...
...
@@ -506,6 +507,7 @@ int main(int argc, char *argv[]) {
if
(
with_outputs
&&
j
%
100
==
0
)
{
if
(
myrank
==
0
)
clocks_gettime
(
&
tic
);
#if defined(WITH_MPI)
#if defined(HAVE_PARALLEL_HDF5)
write_output_parallel
(
&
e
,
&
us
,
myrank
,
nr_nodes
,
MPI_COMM_WORLD
,
...
...
@@ -517,6 +519,12 @@ int main(int argc, char *argv[]) {
#else
write_output_single
(
&
e
,
&
us
);
#endif
if
(
myrank
==
0
&&
verbose
)
{
clocks_gettime
(
&
toc
);
message
(
"writing particle properties took %.3f %s."
,
clocks_diff
(
&
tic
,
&
toc
),
clocks_getunit
());
fflush
(
stdout
);
}
}
/* Dump the task data using the given frequency. */
...
...
@@ -617,6 +625,8 @@ int main(int argc, char *argv[]) {
#endif
if
(
with_outputs
)
{
if
(
myrank
==
0
)
clocks_gettime
(
&
tic
);
/* Write final output. */
#if defined(WITH_MPI)
#if defined(HAVE_PARALLEL_HDF5)
...
...
@@ -629,6 +639,12 @@ int main(int argc, char *argv[]) {
#else
write_output_single
(
&
e
,
&
us
);
#endif
if
(
myrank
==
0
&&
verbose
)
{
clocks_gettime
(
&
toc
);
message
(
"writing particle properties took %.3f %s."
,
clocks_diff
(
&
tic
,
&
toc
),
clocks_getunit
());
fflush
(
stdout
);
}
}
#ifdef WITH_MPI
...
...
src/engine.c
View file @
b20c2a01
...
...
@@ -149,6 +149,7 @@ void engine_redistribute(struct engine *e) {
int
*
cdim
=
s
->
cdim
;
struct
cell
*
cells
=
s
->
cells
;
int
nr_cells
=
s
->
nr_cells
;
ticks
tic
=
getticks
();
/* Start by sorting the particles according to their nodes and
getting the counts. The counts array is indexed as
...
...
@@ -274,13 +275,18 @@ void engine_redistribute(struct engine *e) {
/* Be verbose about what just happened. */
for
(
int
k
=
0
;
k
<
nr_cells
;
k
++
)
if
(
cells
[
k
].
nodeID
==
nodeID
)
my_cells
+=
1
;
message
(
"node %i now has %i parts in %i cells."
,
nodeID
,
nr_parts
,
my_cells
);
if
(
e
->
verbose
)
message
(
"node %i now has %i parts in %i cells."
,
nodeID
,
nr_parts
,
my_cells
);
/* Clean up other stuff. */
free
(
reqs
);
free
(
counts
);
free
(
dest
);
if
(
e
->
verbose
)
message
(
"took %.3f %s."
,
clocks_from_ticks
(
getticks
()
-
tic
),
clocks_getunit
());
#else
error
(
"SWIFT was not compiled with MPI support."
);
#endif
...
...
@@ -296,6 +302,8 @@ void engine_repartition(struct engine *e) {
#if defined(WITH_MPI) && defined(HAVE_METIS)
ticks
tic
=
getticks
();
/* Clear the repartition flag. */
enum
repartition_type
reparttype
=
e
->
forcerepart
;
e
->
forcerepart
=
REPART_NONE
;
...
...
@@ -323,6 +331,9 @@ void engine_repartition(struct engine *e) {
/* Tell the engine it should re-build whenever possible */
e
->
forcerebuild
=
1
;
if
(
e
->
verbose
)
message
(
"took %.3f %s."
,
clocks_from_ticks
(
getticks
()
-
tic
),
clocks_getunit
());
#else
error
(
"SWIFT was not compiled with MPI and METIS support."
);
#endif
...
...
@@ -472,6 +483,7 @@ void engine_exchange_cells(struct engine *e) {
MPI_Request
reqs_in
[
engine_maxproxies
];
MPI_Request
reqs_out
[
engine_maxproxies
];
MPI_Status
status
;
ticks
tic
=
getticks
();
/* Run through the cells and get the size of the ones that will be sent off.
*/
...
...
@@ -564,6 +576,10 @@ void engine_exchange_cells(struct engine *e) {
/* Free the pcell buffer. */
free
(
pcells
);
if
(
e
->
verbose
)
message
(
"took %.3f %s."
,
clocks_from_ticks
(
getticks
()
-
tic
),
clocks_getunit
());
#else
error
(
"SWIFT was not compiled with MPI support."
);
#endif
...
...
@@ -591,6 +607,7 @@ int engine_exchange_strays(struct engine *e, int offset, int *ind, int N) {
MPI_Status
status
;
struct
proxy
*
p
;
struct
space
*
s
=
e
->
s
;
ticks
tic
=
getticks
();
/* Re-set the proxies. */
for
(
k
=
0
;
k
<
e
->
nr_proxies
;
k
++
)
e
->
proxies
[
k
].
nr_parts_out
=
0
;
...
...
@@ -635,7 +652,7 @@ int engine_exchange_strays(struct engine *e, int offset, int *ind, int N) {
enough space to accommodate them. */
int
count_in
=
0
;
for
(
k
=
0
;
k
<
e
->
nr_proxies
;
k
++
)
count_in
+=
e
->
proxies
[
k
].
nr_parts_in
;
message
(
"sent out %i particles, got %i back."
,
N
,
count_in
);
if
(
e
->
verbose
)
message
(
"sent out %i particles, got %i back."
,
N
,
count_in
);
if
(
offset
+
count_in
>
s
->
size_parts
)
{
s
->
size_parts
=
(
offset
+
count_in
)
*
1
.
05
;
struct
part
*
parts_new
=
NULL
;
...
...
@@ -704,6 +721,10 @@ int engine_exchange_strays(struct engine *e, int offset, int *ind, int N) {
MPI_SUCCESS
)
error
(
"MPI_Waitall on sends failed."
);
if
(
e
->
verbose
)
message
(
"took %.3f %s."
,
clocks_from_ticks
(
getticks
()
-
tic
),
clocks_getunit
());
/* Return the number of harvested parts. */
return
count
;
...
...
@@ -730,6 +751,7 @@ void engine_maketasks(struct engine *e) {
int
*
cdim
=
s
->
cdim
;
struct
task
*
t
,
*
t2
;
struct
cell
*
ci
,
*
cj
;
ticks
tic
=
getticks
();
/* Re-set the scheduler. */
scheduler_reset
(
sched
,
s
->
tot_cells
*
engine_maxtaskspercell
);
...
...
@@ -984,6 +1006,10 @@ void engine_maketasks(struct engine *e) {
/* Set the tasks age. */
e
->
tasks_age
=
0
;
if
(
e
->
verbose
)
message
(
"took %.3f %s."
,
clocks_from_ticks
(
getticks
()
-
tic
),
clocks_getunit
());
}
/**
...
...
@@ -999,7 +1025,7 @@ int engine_marktasks(struct engine *e) {
struct
task
*
t
,
*
tasks
=
s
->
tasks
;
float
ti_end
=
e
->
ti_current
;
struct
cell
*
ci
,
*
cj
;
//
ticks tic = getticks();
ticks
tic
=
getticks
();
/* Much less to do here if we're on a fixed time-step. */
if
((
e
->
policy
&
engine_policy_fixdt
)
==
engine_policy_fixdt
)
{
...
...
@@ -1116,8 +1142,9 @@ int engine_marktasks(struct engine *e) {
}
}
// message( "took %.3f %s." , clocks_from_ticks(getticks() - tic),
// clocks_getunit());
if
(
e
->
verbose
)
message
(
"took %.3f %s."
,
clocks_from_ticks
(
getticks
()
-
tic
),
clocks_getunit
());
/* All is well... */
return
0
;
...
...
@@ -1163,38 +1190,33 @@ void engine_print(struct engine *e) {
*/
void
engine_rebuild
(
struct
engine
*
e
)
{
ticks
tic
=
getticks
();
/* Clear the forcerebuild flag, whatever it was. */
e
->
forcerebuild
=
0
;
/* Re-build the space. */
// tic = getticks();
space_rebuild
(
e
->
s
,
0
.
0
,
e
->
nodeID
==
0
);
// message( "space_rebuild took %.3f %s." ,
// clocks_from_ticks(getticks() - tic), clocks_getunit());
space_rebuild
(
e
->
s
,
0
.
0
,
e
->
verbose
);
/* If in parallel, exchange the cell structure. */
#ifdef WITH_MPI
// tic = getticks();
engine_exchange_cells
(
e
);
// message( "engine_exchange_cells took %.3f %s." ,
// clocks_from_ticks(getticks() - tic), clocks_getunit());
#endif
/* Re-build the tasks. */
// tic = getticks();
engine_maketasks
(
e
);
// message( "engine_maketasks took %.3f %s." ,
// clocks_from_ticks(getticks() - tic), clocks_getunit());
/* Run through the tasks and mark as skip or not. */
// tic = getticks();
if
(
engine_marktasks
(
e
))
error
(
"engine_marktasks failed after space_rebuild."
);
// message( "engine_marktasks took %.3f %s." ,
// clocks_from_ticks(getticks() - tic), clocks_getunit());
/* Print the status of the system */
engine_print
(
e
);
if
(
e
->
verbose
)
message
(
"took %.3f %s."
,
clocks_from_ticks
(
getticks
()
-
tic
),
clocks_getunit
());
}
/**
...
...
@@ -1207,45 +1229,37 @@ void engine_prepare(struct engine *e) {
int
rebuild
;
TIMER_TIC
TIMER_TIC
;
/* Run through the tasks and mark as skip or not. */
// tic = getticks();
rebuild
=
(
e
->
forcerebuild
||
engine_marktasks
(
e
));
// message( "space_marktasks took %.3f %s." ,
// clocks_from_ticks(getticks() - tic), clocks_getunit());
/* Collect the values of rebuild from all nodes. */
#ifdef WITH_MPI
// tic = getticks();
int
buff
;
if
(
MPI_Allreduce
(
&
rebuild
,
&
buff
,
1
,
MPI_INT
,
MPI_MAX
,
MPI_COMM_WORLD
)
!=
MPI_SUCCESS
)
error
(
"Failed to aggregate the rebuild flag across nodes."
);
rebuild
=
buff
;
// message( "rebuild allreduce took %.3f %s." ,
// clocks_from_ticks(getticks() - tic), clocks_getunit());
#endif
e
->
tic_step
=
getticks
();
/* Did this not go through? */
if
(
rebuild
)
{
// tic = getticks();
engine_rebuild
(
e
);
// message( "engine_rebuild took %.3f %s." ,
// clocks_from_ticks(getticks() - tic), clocks_getunit());
}
/* Re-rank the tasks every now and then. */
if
(
e
->
tasks_age
%
engine_tasksreweight
==
1
)
{
// tic = getticks();
scheduler_reweight
(
&
e
->
sched
);
// message( "scheduler_reweight took %.3f %s." ,
// clocks_from_ticks(getticks() -tic), clocks_getunit());
}
e
->
tasks_age
+=
1
;
TIMER_TOC
(
timer_prepare
);
if
(
e
->
verbose
)
message
(
"took %.3f %s."
,
clocks_from_ticks
(
getticks
()
-
tic
),
clocks_getunit
());
}
/**
...
...
@@ -1653,6 +1667,7 @@ void engine_makeproxies(struct engine *e) {
struct
space
*
s
=
e
->
s
;
struct
cell
*
cells
=
s
->
cells
;
struct
proxy
*
proxies
=
e
->
proxies
;
ticks
tic
=
getticks
();
/* Prepare the proxies and the proxy index. */
if
(
e
->
proxy_ind
==
NULL
)
...
...
@@ -1735,6 +1750,9 @@ void engine_makeproxies(struct engine *e) {
}
}
if
(
e
->
verbose
)
message
(
"took %.3f %s."
,
clocks_from_ticks
(
getticks
()
-
tic
),
clocks_getunit
());
#else
error
(
"SWIFT was not compiled with MPI support."
);
#endif
...
...
@@ -1817,24 +1835,55 @@ static bool hyperthreads_present(void) {
void
engine_init
(
struct
engine
*
e
,
struct
space
*
s
,
float
dt
,
int
nr_threads
,
int
nr_queues
,
int
nr_nodes
,
int
nodeID
,
int
policy
,
float
timeBegin
,
float
timeEnd
,
float
dt_min
,
float
dt_max
)
{
float
timeBegin
,
float
timeEnd
,
float
dt_min
,
float
dt_max
,
int
verbose
)
{
/* Store the values. */
e
->
s
=
s
;
e
->
nr_threads
=
nr_threads
;
e
->
policy
=
policy
;
e
->
step
=
0
;
e
->
nullstep
=
0
;
e
->
nr_nodes
=
nr_nodes
;
e
->
nodeID
=
nodeID
;
e
->
proxy_ind
=
NULL
;
e
->
nr_proxies
=
0
;
e
->
forcerebuild
=
1
;
e
->
forcerepart
=
REPART_NONE
;
e
->
links
=
NULL
;
e
->
nr_links
=
0
;
e
->
timeBegin
=
timeBegin
;
e
->
timeEnd
=
timeEnd
;
e
->
timeOld
=
timeBegin
;
e
->
time
=
timeBegin
;
e
->
ti_old
=
0
;
e
->
ti_current
=
0
;
e
->
timeStep
=
0
.;
e
->
dt_min
=
dt_min
;
e
->
dt_max
=
dt_max
;
e
->
file_stats
=
NULL
;
e
->
verbose
=
verbose
;
e
->
wallclock_time
=
0
.
f
;
engine_rank
=
nodeID
;
/* Make the space link back to the engine. */
s
->
e
=
e
;
int
i
,
k
;
#if defined(HAVE_SETAFFINITY)
int
nr_cores
=
sysconf
(
_SC_NPROCESSORS_ONLN
);
int
j
,
cpuid
[
nr_cores
];
const
int
nr_cores
=
sysconf
(
_SC_NPROCESSORS_ONLN
);
int
cpuid
[
nr_cores
];
cpu_set_t
cpuset
;
if
((
policy
&
engine_policy_cputight
)
==
engine_policy_cputight
)
{
for
(
k
=
0
;
k
<
nr_cores
;
k
++
)
cpuid
[
k
]
=
k
;
for
(
int
k
=
0
;
k
<
nr_cores
;
k
++
)
cpuid
[
k
]
=
k
;
}
else
{
/* Get next highest power of 2. */
int
maxint
=
1
;
while
(
maxint
<
nr_cores
)
maxint
*=
2
;
cpuid
[
0
]
=
0
;
k
=
1
;
for
(
i
=
1
;
i
<
maxint
;
i
*=
2
)
for
(
j
=
maxint
/
i
/
2
;
j
<
maxint
;
j
+=
maxint
/
i
)
int
k
=
1
;
for
(
int
i
=
1
;
i
<
maxint
;
i
*=
2
)
for
(
int
j
=
maxint
/
i
/
2
;
j
<
maxint
;
j
+=
maxint
/
i
)
if
(
j
<
nr_cores
&&
j
!=
0
)
cpuid
[
k
++
]
=
j
;
#if defined(HAVE_LIBNUMA) && defined(_GNU_SOURCE)
...
...
@@ -1842,22 +1891,24 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
if
(
numa_available
()
>=
0
)
{
if
(
nodeID
==
0
)
message
(
"prefer NUMA-local CPUs"
);
int
home
=
numa_node_of_cpu
(
sched_getcpu
()),
half
=
nr_cores
/
2
;
bool
done
=
false
,
swap_hyperthreads
=
hyperthreads_present
();
const
int
home
=
numa_node_of_cpu
(
sched_getcpu
());
const
int
half
=
nr_cores
/
2
;
const
int
swap_hyperthreads
=
hyperthreads_present
();
bool
done
=
false
;
if
(
swap_hyperthreads
&&
nodeID
==
0
)
message
(
"prefer physical cores to hyperthreads"
);
while
(
!
done
)
{
done
=
true
;
for
(
i
=
1
;
i
<
nr_cores
;
i
++
)
{
int
node_a
=
numa_node_of_cpu
(
cpuid
[
i
-
1
]);
int
node_b
=
numa_node_of_cpu
(
cpuid
[
i
]);
for
(
int
i
=
1
;
i
<
nr_cores
;
i
++
)
{
const
int
node_a
=
numa_node_of_cpu
(
cpuid
[
i
-
1
]);
const
int
node_b
=
numa_node_of_cpu
(
cpuid
[
i
]);
/* Avoid using local hyperthreads over unused remote physical cores.
* Assume two hyperthreads, and that cpuid >= half partitions them.
*/
int
thread_a
=
swap_hyperthreads
&&
cpuid
[
i
-
1
]
>=
half
;
int
thread_b
=
swap_hyperthreads
&&
cpuid
[
i
]
>=
half
;
const
int
thread_a
=
swap_hyperthreads
&&
cpuid
[
i
-
1
]
>=
half
;
const
int
thread_b
=
swap_hyperthreads
&&
cpuid
[
i
]
>=
half
;
bool
swap
=
thread_a
>
thread_b
;
if
(
thread_a
==
thread_b
)
...
...
@@ -1881,42 +1932,12 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
#else
printf
(
"%s engine_init: cpu map is [ "
,
clocks_get_timeofday
());
#endif
for
(
i
=
0
;
i
<
nr_cores
;
i
++
)
printf
(
"%i "
,
cpuid
[
i
]);
for
(
int
i
=
0
;
i
<
nr_cores
;
i
++
)
printf
(
"%i "
,
cpuid
[
i
]);
printf
(
"].
\n
"
);
}
}
#endif
/* Store the values. */
e
->
s
=
s
;
e
->
nr_threads
=
nr_threads
;
e
->
policy
=
policy
;
e
->
step
=
0
;
e
->
nullstep
=
0
;
e
->
nr_nodes
=
nr_nodes
;
e
->
nodeID
=
nodeID
;
e
->
proxy_ind
=
NULL
;
e
->
nr_proxies
=
0
;
e
->
forcerebuild
=
1
;
e
->
forcerepart
=
REPART_NONE
;
e
->
links
=
NULL
;
e
->
nr_links
=
0
;
e
->
timeBegin
=
timeBegin
;
e
->
timeEnd
=
timeEnd
;
e
->
timeOld
=
timeBegin
;
e
->
time
=
timeBegin
;
e
->
ti_old
=
0
;
e
->
ti_current
=
0
;
e
->
timeStep
=
0
.;
e
->
dt_min
=
dt_min
;
e
->
dt_max
=
dt_max
;
e
->
file_stats
=
NULL
;
e
->
wallclock_time
=
0
.
f
;
engine_rank
=
nodeID
;
/* Make the space link back to the engine. */
s
->
e
=
e
;
/* Are we doing stuff in parallel? */
if
(
nr_nodes
>
1
)
{
#ifndef WITH_MPI
...
...
@@ -2023,7 +2044,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
s
->
nr_queues
=
nr_queues
;
/* Create the sorting tasks. */
for
(
i
=
0
;
i
<
e
->
nr_threads
;
i
++
)
for
(
int
i
=
0
;
i
<
e
->
nr_threads
;
i
++
)
scheduler_addtask
(
&
e
->
sched
,
task_type_psort
,
task_subtype_none
,
i
,
0
,
NULL
,
NULL
,
0
);
...
...
@@ -2033,7 +2054,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
if
((
e
->
runners
=
(
struct
runner
*
)
malloc
(
sizeof
(
struct
runner
)
*
nr_threads
))
==
NULL
)
error
(
"Failed to allocate threads array."
);
for
(
k
=
0
;
k
<
nr_threads
;
k
++
)
{
for
(
int
k
=
0
;
k
<
nr_threads
;
k
++
)
{
e
->
runners
[
k
].
id
=
k
;
e
->
runners
[
k
].
e
=
e
;
e
->
barrier_running
+=
1
;
...
...
src/engine.h
View file @
b20c2a01
...
...
@@ -158,6 +158,9 @@ struct engine {
struct
link
*
links
;
int
nr_links
,
size_links
;
/* Are we talkative ? */
int
verbose
;
#ifdef WITH_MPI
/* MPI data type for the particle transfers */
MPI_Datatype
part_mpi_type
;
...
...
@@ -169,7 +172,8 @@ struct engine {
void
engine_barrier
(
struct
engine
*
e
,
int
tid
);
void
engine_init
(
struct
engine
*
e
,
struct
space
*
s
,
float
dt
,
int
nr_threads
,
int
nr_queues
,
int
nr_nodes
,
int
nodeID
,
int
policy
,
float
timeBegin
,
float
timeEnd
,
float
dt_min
,
float
dt_max
);
float
timeBegin
,
float
timeEnd
,
float
dt_min
,
float
dt_max
,
int
verbose
);
void
engine_launch
(
struct
engine
*
e
,
int
nr_runners
,
unsigned
int
mask
,
unsigned
int
submask
);
void
engine_prepare
(
struct
engine
*
e
);
...
...
src/runner.c
View file @
b20c2a01
...
...
@@ -1061,7 +1061,7 @@ void *runner_main(void *data) {
space_do_parts_sort
();
break
;
case
task_type_split_cell
:
space_split
(
e
->
s
,
t
->
ci
);
space_
do_
split
(
e
->
s
,
t
->
ci
);
break
;
case
task_type_rewait
:
scheduler_do_rewait
((
struct
task
*
)
t
->
ci
,
(
struct
task
*
)
t
->
cj
,
...
...
src/space.c
View file @
b20c2a01
...
...
@@ -161,7 +161,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
float
h_max
=
s
->
cell_min
/
kernel_gamma
/
space_stretch
,
dmin
;
int
i
,
j
,
k
,
cdim
[
3
],
nr_parts
=
s
->
nr_parts
;
struct
cell
*
restrict
c
;
//
ticks tic;
ticks
tic
=
getticks
()
;
/* Run through the parts and get the current h_max. */
// tic = getticks();
...
...
@@ -291,6 +291,10 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
}
s
->
maxdepth
=
0
;
}
if
(
verbose
)
message
(
"took %.3f %s."
,
clocks_from_ticks
(
getticks
()
-
tic
),
clocks_getunit
());
}
/**
...
...
@@ -309,7 +313,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
struct
part
*
restrict
p
;
int
*
ind
;
double
ih
[
3
],
dim
[
3
];
//
ticks tic;
ticks
tic
=
getticks
()
;
/* Be verbose about this. */
// message( "re)building space..." ); fflush(stdout);
...
...
@@ -394,10 +398,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
#endif
/* Sort the parts according to their cells. */
// tic = getticks();
space_parts_sort
(
s
,
ind
,
nr_parts
,
0
,
s
->
nr_cells
-
1
);
// message( "parts_sort took %.3f %s." ,
// clocks_from_ticks(getticks() - tic), clocks_getunit());
space_parts_sort
(
s
,
ind
,
nr_parts
,
0
,
s
->
nr_cells
-
1
,
verbose
);
/* Re-link the gparts. */
for
(
k
=
0
;
k
<
nr_parts
;
k
++
)
...
...
@@ -437,10 +438,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
/* TODO: Here we should exchange the gparts as well! */
/* Sort the parts according to their cells. */
// tic = getticks();
gparts_sort
(
s
->
gparts
,
ind
,
nr_gparts
,
0
,
s
->
nr_cells
-
1
);
// message( "gparts_sort took %.3f %s." ,
// clocks_from_ticks(getticks() - tic), clocks_getunit());
space_gparts_sort
(
s
->
gparts
,
ind
,
nr_gparts
,
0
,
s
->
nr_cells
-
1
);
/* Re-link the parts. */
for
(
k
=
0
;
k
<
nr_gparts
;
k
++
)
...
...
@@ -468,15 +466,32 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {