Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
1a631e1d
Commit
1a631e1d
authored
Nov 23, 2015
by
Peter W. Draper
Browse files
Merge branch 'master' into parallel_scheduler
Conflicts: src/runner.c src/space.c src/task.c src/task.h
parents
de72d25a
0689a0cd
Changes
9
Hide whitespace changes
Inline
Side-by-side
src/debug.c
View file @
1a631e1d
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2013 Matthieu Schaller (matthieu.schaller@durham.ac.uk),
* Pedro Gonnet (pedro.gonnet@durham.ac.uk).
* Copyright (c) 2013- 2015:
* Matthieu Schaller (matthieu.schaller@durham.ac.uk),
* Pedro Gonnet (pedro.gonnet@durham.ac.uk),
* Peter W. Draper (p.w.draper@durham.ac.uk).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
...
...
@@ -20,8 +22,10 @@
#include
<stdio.h>
#include
"config.h"
#include
"const.h"
#include
"part.h"
#include
"debug.h"
/**
* @brief Looks for the particle with the given id and prints its information to
...
...
@@ -98,3 +102,140 @@ void printParticle_single(struct part *p) {
p
->
rho_dh
,
p
->
density
.
div_v
,
p
->
u
,
p
->
force
.
u_dt
,
p
->
force
.
balsara
,
p
->
force
.
POrho2
,
p
->
force
.
v_sig
,
p
->
dt
);
}
#ifdef HAVE_METIS
/**
* @brief Dump the METIS graph in standard format, simple format and weights
* only, to a file.
*
* @description The standard format output can be read into the METIS
* command-line tools. The simple format is just the cell connectivity (this
* should not change between calls). The weights format is the standard one,
* minus the cell connectivity.
*
* The output filenames are generated from the prefix and the sequence number
* of calls. So the first is called <prefix>_std_001.dat, <prefix>_simple_001.dat,
* <prefix>_weights_001.dat, etc.
*
* @param prefix base output filename
* @param nvertices the number of vertices
* @param nvertexweights the number vertex weights
* @param cellconruns first part of cell connectivity info (CSR)
* @param cellcon second part of cell connectivity info (CSR)
* @param vertexweights weights of vertices
* @param vertexsizes size of vertices
* @param edgeweights weights of edges
*/
void
dumpMETISGraph
(
const
char
*
prefix
,
idx_t
nvertices
,
idx_t
nvertexweights
,
idx_t
*
cellconruns
,
idx_t
*
cellcon
,
idx_t
*
vertexweights
,
idx_t
*
vertexsizes
,
idx_t
*
edgeweights
)
{
FILE
*
stdfile
=
NULL
;
FILE
*
simplefile
=
NULL
;
FILE
*
weightfile
=
NULL
;
char
fname
[
200
];
idx_t
i
;
idx_t
j
;
int
haveedgeweight
=
0
;
int
havevertexsize
=
0
;
int
havevertexweight
=
0
;
static
int
nseq
=
0
;
nseq
++
;
if
(
vertexweights
!=
NULL
)
{
for
(
i
=
0
;
i
<
nvertices
*
nvertexweights
;
i
++
)
{
if
(
vertexweights
[
i
]
!=
1
)
{
havevertexweight
=
1
;
break
;
}
}
}
if
(
vertexsizes
!=
NULL
)
{
for
(
i
=
0
;
i
<
nvertices
;
i
++
)
{
if
(
vertexsizes
[
i
]
!=
1
)
{
havevertexsize
=
1
;
break
;
}
}
}
if
(
edgeweights
!=
NULL
)
{
for
(
i
=
0
;
i
<
cellconruns
[
nvertices
];
i
++
)
{
if
(
edgeweights
[
i
]
!=
1
)
{
haveedgeweight
=
1
;
break
;
}
}
}
/* Open output files. */
sprintf
(
fname
,
"%s_std_%03d.dat"
,
prefix
,
nseq
);
stdfile
=
fopen
(
fname
,
"w"
);
sprintf
(
fname
,
"%s_simple_%03d.dat"
,
prefix
,
nseq
);
simplefile
=
fopen
(
fname
,
"w"
);
if
(
havevertexweight
||
havevertexsize
||
haveedgeweight
)
{
sprintf
(
fname
,
"%s_weights_%03d.dat"
,
prefix
,
nseq
);
weightfile
=
fopen
(
fname
,
"w"
);
}
/* Write the header lines. */
fprintf
(
stdfile
,
"%"
PRIDX
" %"
PRIDX
,
nvertices
,
cellconruns
[
nvertices
]
/
2
);
fprintf
(
simplefile
,
"%"
PRIDX
" %"
PRIDX
,
nvertices
,
cellconruns
[
nvertices
]
/
2
);
if
(
havevertexweight
||
havevertexsize
||
haveedgeweight
)
{
fprintf
(
weightfile
,
"%"
PRIDX
" %"
PRIDX
,
nvertices
,
cellconruns
[
nvertices
]
/
2
);
fprintf
(
stdfile
,
" %d%d%d"
,
havevertexsize
,
havevertexweight
,
haveedgeweight
);
fprintf
(
weightfile
,
" %d%d%d"
,
havevertexsize
,
havevertexweight
,
haveedgeweight
);
if
(
havevertexweight
)
{
fprintf
(
stdfile
,
" %d"
,
(
int
)
nvertexweights
);
fprintf
(
weightfile
,
" %d"
,
(
int
)
nvertexweights
);
}
}
/* Write the rest of the graph. */
for
(
i
=
0
;
i
<
nvertices
;
i
++
)
{
fprintf
(
stdfile
,
"
\n
"
);
fprintf
(
simplefile
,
"
\n
"
);
if
(
weightfile
!=
NULL
)
{
fprintf
(
weightfile
,
"
\n
"
);
}
if
(
havevertexsize
)
{
fprintf
(
stdfile
,
" %"
PRIDX
,
vertexsizes
[
i
]);
fprintf
(
weightfile
,
" %"
PRIDX
,
vertexsizes
[
i
]);
}
if
(
havevertexweight
)
{
for
(
j
=
0
;
j
<
nvertexweights
;
j
++
)
{
fprintf
(
stdfile
,
" %"
PRIDX
,
vertexweights
[
i
*
nvertexweights
+
j
]);
fprintf
(
weightfile
,
" %"
PRIDX
,
vertexweights
[
i
*
nvertexweights
+
j
]);
}
}
for
(
j
=
cellconruns
[
i
];
j
<
cellconruns
[
i
+
1
];
j
++
)
{
fprintf
(
stdfile
,
" %"
PRIDX
,
cellcon
[
j
]
+
1
);
fprintf
(
simplefile
,
" %"
PRIDX
,
cellcon
[
j
]
+
1
);
if
(
haveedgeweight
)
{
fprintf
(
stdfile
,
" %"
PRIDX
,
edgeweights
[
j
]);
fprintf
(
weightfile
,
" %"
PRIDX
,
edgeweights
[
j
]);
}
}
}
fprintf
(
stdfile
,
"
\n
"
);
fprintf
(
simplefile
,
"
\n
"
);
if
(
weightfile
!=
NULL
)
{
fprintf
(
weightfile
,
"
\n
"
);
}
fclose
(
stdfile
);
fclose
(
simplefile
);
if
(
weightfile
!=
NULL
)
{
fclose
(
weightfile
);
}
}
#endif
src/debug.h
View file @
1a631e1d
...
...
@@ -27,4 +27,11 @@ void printParticle(struct part *parts, long long int i, int N);
void
printgParticle
(
struct
gpart
*
parts
,
long
long
int
i
,
int
N
);
void
printParticle_single
(
struct
part
*
p
);
#ifdef HAVE_METIS
#include
"metis.h"
void
dumpMETISGraph
(
const
char
*
prefix
,
idx_t
nvtxs
,
idx_t
ncon
,
idx_t
*
xadj
,
idx_t
*
adjncy
,
idx_t
*
vwgt
,
idx_t
*
vsize
,
idx_t
*
adjwgt
);
#endif
#endif
/* SWIFT_DEBUG_H */
src/engine.c
View file @
1a631e1d
...
...
@@ -169,6 +169,9 @@ void engine_redistribute(struct engine *e) {
}
const
int
cid
=
cell_getid
(
cdim
,
parts
[
k
].
x
[
0
]
*
ih
[
0
],
parts
[
k
].
x
[
1
]
*
ih
[
1
],
parts
[
k
].
x
[
2
]
*
ih
[
2
]);
/* if (cid < 0 || cid >= s->nr_cells)
error("Bad cell id %i for part %i at [%.3e,%.3e,%.3e].",
cid, k, parts[k].x[0], parts[k].x[1], parts[k].x[2]); */
dest
[
k
]
=
cells
[
cid
].
nodeID
;
counts
[
nodeID
*
nr_nodes
+
dest
[
k
]]
+=
1
;
}
...
...
@@ -303,7 +306,8 @@ void engine_repartition(struct engine *e) {
int
nr_nodes
=
e
->
nr_nodes
,
nodeID
=
e
->
nodeID
;
float
wscale
=
1e-3
,
vscale
=
1e-3
,
wscale_buff
;
idx_t
wtot
=
0
;
const
idx_t
wmax
=
1e9
/
e
->
nr_nodes
;
idx_t
wmax
=
1e9
/
e
->
nr_nodes
;
idx_t
wmin
;
/* Clear the repartition flag. */
e
->
forcerepart
=
0
;
...
...
@@ -486,6 +490,24 @@ void engine_repartition(struct engine *e) {
/* As of here, only one node needs to compute the partition. */
if
(
nodeID
==
0
)
{
/* Final rescale of all weights to avoid a large range. Large ranges have
* been seen to cause an incomplete graph. */
wmin
=
wmax
;
wmax
=
0
.
0
;
for
(
k
=
0
;
k
<
26
*
nr_cells
;
k
++
)
{
wmax
=
weights_e
[
k
]
>
wmax
?
weights_e
[
k
]
:
wmax
;
wmin
=
weights_e
[
k
]
<
wmin
?
weights_e
[
k
]
:
wmin
;
}
if
((
wmax
-
wmin
)
>
engine_maxmetisweight
)
{
wscale
=
engine_maxmetisweight
/
(
wmax
-
wmin
);
for
(
k
=
0
;
k
<
26
*
nr_cells
;
k
++
)
{
weights_e
[
k
]
=
(
weights_e
[
k
]
-
wmin
)
*
wscale
+
1
;
}
for
(
k
=
0
;
k
<
nr_cells
;
k
++
)
{
weights_v
[
k
]
=
(
weights_v
[
k
]
-
wmin
)
*
wscale
+
1
;
}
}
/* Check that the edge weights are fully symmetric. */
/* for ( cid = 0 ; cid < nr_cells ; cid++ )
for ( k = 0 ; k < 26 ; k++ ) {
...
...
@@ -544,16 +566,47 @@ void engine_repartition(struct engine *e) {
/* Call METIS. */
idx_t
one
=
1
,
idx_nr_cells
=
nr_cells
,
idx_nr_nodes
=
nr_nodes
;
idx_t
objval
;
/* Dump graph in METIS format */
/*dumpMETISGraph("metis_graph", idx_nr_cells, one, offsets, inds,
weights_v, NULL, weights_e);*/
if
(
METIS_PartGraphRecursive
(
&
idx_nr_cells
,
&
one
,
offsets
,
inds
,
weights_v
,
NULL
,
weights_e
,
&
idx_nr_nodes
,
NULL
,
NULL
,
options
,
&
objval
,
nodeIDs
)
!=
METIS_OK
)
error
(
"Call to METIS_PartGraph
Kway
failed."
);
error
(
"Call to METIS_PartGraph
Recursive
failed."
);
/* Dump the 3d array of cell IDs. */
/* printf( "engine_repartition: nodeIDs = reshape( [" );
for ( i = 0 ; i < cdim[0]*cdim[1]*cdim[2] ; i++ )
printf( "%i " , (int)nodeIDs[ i ] );
printf("] ,%i,%i,%i);\n",cdim[0],cdim[1],cdim[2]); */
/* Check that the nodeIDs are ok. */
for
(
k
=
0
;
k
<
nr_cells
;
k
++
)
if
(
nodeIDs
[
k
]
<
0
||
nodeIDs
[
k
]
>=
nr_nodes
)
error
(
"Got bad nodeID %"
PRIDX
" for cell %i."
,
nodeIDs
[
k
],
k
);
/* Check that the partition is complete and all nodes have some work. */
int
present
[
nr_nodes
];
int
failed
=
0
;
for
(
i
=
0
;
i
<
nr_nodes
;
i
++
)
present
[
i
]
=
0
;
for
(
i
=
0
;
i
<
nr_cells
;
i
++
)
present
[
nodeIDs
[
i
]]
++
;
for
(
i
=
0
;
i
<
nr_nodes
;
i
++
)
{
if
(
!
present
[
i
])
{
failed
=
1
;
message
(
"Node %d is not present after repartition"
,
i
);
}
}
/* If partition failed continue with the current one, but make this
* clear. */
if
(
failed
)
{
message
(
"WARNING: METIS repartition has failed, continuing with "
"the current partition, load balance will not be optimal"
);
for
(
k
=
0
;
k
<
nr_cells
;
k
++
)
nodeIDs
[
k
]
=
cells
[
k
].
nodeID
;
}
}
/* Broadcast the result of the partition. */
...
...
@@ -2168,7 +2221,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads,
s
->
nr_queues
=
nr_queues
;
/* Append a kick1 task to each cell. */
scheduler_reset
(
&
e
->
sched
,
s
->
tot_cells
+
e
->
nr_threads
);
scheduler_reset
(
&
e
->
sched
,
2
*
s
->
tot_cells
+
e
->
nr_threads
);
for
(
k
=
0
;
k
<
s
->
nr_cells
;
k
++
)
s
->
cells
[
k
].
kick1
=
scheduler_addtask
(
&
e
->
sched
,
task_type_kick1
,
task_subtype_none
,
0
,
0
,
...
...
src/engine.h
View file @
1a631e1d
...
...
@@ -47,6 +47,8 @@
#define engine_maxproxies 64
#define engine_tasksreweight 10
#define engine_maxmetisweight 10000.0f
/* The rank of the engine as a global variable (for messages). */
extern
int
engine_rank
;
...
...
src/runner.c
View file @
1a631e1d
...
...
@@ -1291,6 +1291,9 @@ void *runner_main(void *data) {
case
task_type_psort
:
space_do_parts_sort
();
break
;
case
task_type_split_cell
:
space_split
(
e
->
s
,
t
->
ci
);
break
;
case
task_type_rewait
:
for
(
struct
task
*
t2
=
(
struct
task
*
)
t
->
ci
;
t2
!=
(
struct
task
*
)
t
->
cj
;
t2
++
)
{
...
...
src/scheduler.c
View file @
1a631e1d
...
...
@@ -130,6 +130,9 @@ void scheduler_splittasks(struct scheduler *s) {
break
;
}
/* Skip sorting tasks. */
if
(
t
->
type
==
task_type_psort
)
continue
;
/* Empty task? */
if
(
t
->
ci
==
NULL
||
(
t
->
type
==
task_type_pair
&&
t
->
cj
==
NULL
))
{
t
->
type
=
task_type_none
;
...
...
@@ -1069,7 +1072,7 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) {
}
}
/* Task definitely done. */
/* Task definitely done
, signal any sleeping runners
. */
if
(
!
t
->
implicit
)
{
t
->
toc
=
getticks
();
pthread_mutex_lock
(
&
s
->
sleep_mutex
);
...
...
src/space.c
View file @
1a631e1d
...
...
@@ -466,7 +466,11 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
/* At this point, we have the upper-level cells, old or new. Now make
sure that the parts in each cell are ok. */
// tic = getticks();
for
(
k
=
0
;
k
<
s
->
nr_cells
;
k
++
)
space_split
(
s
,
&
cells
[
k
]);
// for (k = 0; k < s->nr_cells; k++) space_split(s, &cells[k]);
for
(
k
=
0
;
k
<
s
->
nr_cells
;
k
++
)
scheduler_addtask
(
&
s
->
e
->
sched
,
task_type_split_cell
,
task_subtype_none
,
k
,
0
,
&
cells
[
k
],
NULL
,
0
);
engine_launch
(
s
->
e
,
s
->
e
->
nr_threads
,
1
<<
task_type_split_cell
);
// message( "space_split took %.3f ms." , (double)(getticks() - tic) / CPU_TPS
// * 1000 );
...
...
@@ -476,6 +480,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
* @brief Sort the particles and condensed particles according to the given
*indices.
*
* @param s The #space.
* @param ind The indices with respect to which the parts are sorted.
* @param N The number of parts
* @param min Lowest index.
...
...
@@ -483,11 +488,11 @@ void space_rebuild(struct space *s, double cell_max, int verbose) {
*/
void
space_parts_sort
(
struct
space
*
s
,
int
*
ind
,
int
N
,
int
min
,
int
max
)
{
// Populate
a
parallel_sort structure with the input data.
// Populate
the global
parallel_sort structure with the input data.
space_sort_struct
.
parts
=
s
->
parts
;
space_sort_struct
.
xparts
=
s
->
xparts
;
space_sort_struct
.
ind
=
ind
;
space_sort_struct
.
stack_size
=
2
*
(
max
-
min
)
+
10
;
space_sort_struct
.
stack_size
=
2
*
(
max
-
min
+
1
)
+
10
+
s
->
e
->
nr_threads
;
if
((
space_sort_struct
.
stack
=
malloc
(
sizeof
(
struct
qstack
)
*
space_sort_struct
.
stack_size
))
==
NULL
)
error
(
"Failed to allocate sorting stack."
);
...
...
@@ -510,8 +515,9 @@ void space_parts_sort(struct space *s, int *ind, int N, int min, int max) {
/* Verify space_sort_struct. */
/* for (int i = 1; i < N; i++)
if (ind[i - 1] > ind[i])
error("Sorting failed (ind[%i]=%i,ind[%i]=%i).", i - 1, ind[i - 1], i,
ind[i]); */
error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1, ind[i - 1], i,
ind[i], min, max);
message("Sorting succeeded."); */
// Clean up.
free
(
space_sort_struct
.
stack
);
...
...
@@ -525,15 +531,17 @@ void space_do_parts_sort() {
struct
xpart
*
xparts
=
space_sort_struct
.
xparts
;
/* Main loop. */
while
(
space_sort_struct
.
waiting
>
0
)
{
while
(
space_sort_struct
.
waiting
)
{
/* Grab an interval off the queue. */
int
qid
=
atomic_inc
(
&
space_sort_struct
.
first
)
%
space_sort_struct
.
stack_size
;
/*
Get the stack entry
. */
/*
Wait for the entry to be ready, or for the sorting do be done
. */
while
(
!
space_sort_struct
.
stack
[
qid
].
ready
)
if
(
!
space_sort_struct
.
waiting
)
return
;
/* Get the stack entry. */
int
i
=
space_sort_struct
.
stack
[
qid
].
i
;
int
j
=
space_sort_struct
.
stack
[
qid
].
j
;
int
min
=
space_sort_struct
.
stack
[
qid
].
min
;
...
...
@@ -545,6 +553,8 @@ void space_do_parts_sort() {
/* Bring beer. */
const
int
pivot
=
(
min
+
max
)
/
2
;
/* message("Working on interval [%i,%i] with min=%i, max=%i, pivot=%i.",
i, j, min, max, pivot); */
/* One pass of QuickSort's partitioning. */
int
ii
=
i
;
...
...
@@ -566,18 +576,18 @@ void space_do_parts_sort() {
}
/* Verify space_sort_struct. */
/* for (
int k = i
; k <= jj
; k++
)
if (
ind[k] > pivot
) {
message(
"sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
N=%i." , k ,
ind[k]
, pivot
, i
, j
, N
);
error(
"Partition failed (<=pivot)."
);
}
for (
int k = jj
+1
; k <= j
; k++
)
if (
ind[k] <= pivot
) {
message(
"sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
N=%i." , k ,
ind[k]
, pivot
, i
, j
, N
);
error(
"Partition failed (>pivot)."
);
} */
/* for (int k = i; k <= jj; k++)
if (ind[k] > pivot) {
message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i
.", k
,
ind[k], pivot, i, j);
error("Partition failed (<=pivot).");
}
for (int k = jj
+ 1
; k <= j; k++)
if (ind[k] <= pivot) {
message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i
.", k
,
ind[k], pivot, i, j);
error("Partition failed (>pivot).");
} */
/* Split-off largest interval. */
if
(
jj
-
i
>
j
-
jj
+
1
)
{
...
...
@@ -586,14 +596,15 @@ void space_do_parts_sort() {
if
(
jj
>
i
&&
pivot
>
min
)
{
qid
=
atomic_inc
(
&
space_sort_struct
.
last
)
%
space_sort_struct
.
stack_size
;
while
(
space_sort_struct
.
stack
[
qid
].
ready
);
space_sort_struct
.
stack
[
qid
].
i
=
i
;
space_sort_struct
.
stack
[
qid
].
j
=
jj
;
space_sort_struct
.
stack
[
qid
].
min
=
min
;
space_sort_struct
.
stack
[
qid
].
max
=
pivot
;
space_sort_struct
.
stack
[
qid
].
ready
=
1
;
if
(
atomic_inc
(
&
space_sort_struct
.
waiting
)
>=
space_sort_struct
.
stack_size
)
error
(
"Qstack overflow."
);
space_sort_struct
.
stack
[
qid
].
ready
=
1
;
}
/* Recurse on the right? */
...
...
@@ -606,17 +617,18 @@ void space_do_parts_sort() {
}
else
{
/* Recurse on the right? */
if
(
jj
+
1
<
j
&&
pivot
+
1
<
max
)
{
if
(
pivot
+
1
<
max
)
{
qid
=
atomic_inc
(
&
space_sort_struct
.
last
)
%
space_sort_struct
.
stack_size
;
while
(
space_sort_struct
.
stack
[
qid
].
ready
);
space_sort_struct
.
stack
[
qid
].
i
=
jj
+
1
;
space_sort_struct
.
stack
[
qid
].
j
=
j
;
space_sort_struct
.
stack
[
qid
].
min
=
pivot
+
1
;
space_sort_struct
.
stack
[
qid
].
max
=
max
;
space_sort_struct
.
stack
[
qid
].
ready
=
1
;
if
(
atomic_inc
(
&
space_sort_struct
.
waiting
)
>=
space_sort_struct
.
stack_size
)
error
(
"Qstack overflow."
);
space_sort_struct
.
stack
[
qid
].
ready
=
1
;
}
/* Recurse on the left? */
...
...
src/task.c
View file @
1a631e1d
...
...
@@ -46,7 +46,7 @@ const char *taskID_names[task_type_count] = {
"none"
,
"sort"
,
"self"
,
"pair"
,
"sub"
,
"ghost"
,
"kick1"
,
"kick2"
,
"send"
,
"recv"
,
"link"
,
"grav_pp"
,
"grav_mm"
,
"grav_up"
,
"grav_down"
,
"psort"
,
"rewait"
};
"psort"
,
"split_cell"
,
"rewait"
};
/**
* @brief Unlock the cell held by this task.
...
...
src/task.h
View file @
1a631e1d
...
...
@@ -45,6 +45,7 @@ enum task_types {
task_type_grav_up
,
task_type_grav_down
,
task_type_psort
,
task_type_split_cell
,
task_type_rewait
,
task_type_count
};
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment