Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
SWIFTsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SWIFT
SWIFTsim
Commits
aee41060
Commit
aee41060
authored
9 years ago
by
Pedro Gonnet
Browse files
Options
Downloads
Patches
Plain Diff
various fixes to the merge, making sure things compile.
parent
5f3bd99c
No related branches found
No related tags found
2 merge requests
!136
Master
,
!113
Local variable cleanup
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/debug.c
+2
-2
2 additions, 2 deletions
src/debug.c
src/engine.c
+10
-335
10 additions, 335 deletions
src/engine.c
with
12 additions
and
337 deletions
src/debug.c
+
2
−
2
View file @
aee41060
...
...
@@ -58,7 +58,7 @@ void printParticle(struct part *parts, struct xpart *xparts, long long int id,
/* Look for the particle. */
for
(
size_t
i
=
0
;
i
<
N
;
i
++
)
if
(
parts
[
i
].
id
==
id
)
{
printf
(
"## Particle[%d]:
\n
id=%lld"
,
i
,
parts
[
i
].
id
);
printf
(
"## Particle[%
z
d]:
\n
id=%lld"
,
i
,
parts
[
i
].
id
);
hydro_debug_particle
(
&
parts
[
i
],
&
xparts
[
i
]);
found
=
1
;
break
;
...
...
@@ -75,7 +75,7 @@ void printgParticle(struct gpart *parts, long long int id, int N) {
for
(
size_t
i
=
0
;
i
<
N
;
i
++
)
if
(
parts
[
i
].
id
==
-
id
||
(
parts
[
i
].
id
>
0
&&
parts
[
i
].
part
->
id
==
id
))
{
printf
(
"## gParticle[%d]: id=%lld, x=[%.16e,%.16e,%.16e], "
"## gParticle[%
z
d]: id=%lld, x=[%.16e,%.16e,%.16e], "
"v=[%.3e,%.3e,%.3e], a=[%.3e,%.3e,%.3e], m=%.3e, t_begin=%d, "
"t_end=%d
\n
"
,
i
,
parts
[
i
].
part
->
id
,
parts
[
i
].
x
[
0
],
parts
[
i
].
x
[
1
],
parts
[
i
].
x
[
2
],
...
...
This diff is collapsed.
Click to expand it.
src/engine.c
+
10
−
335
View file @
aee41060
...
...
@@ -295,344 +295,17 @@ void engine_repartition(struct engine *e) {
#if defined(WITH_MPI) && defined(HAVE_METIS)
int
res
;
idx_t
*
inds
,
*
nodeIDs
;
idx_t
*
weights_v
=
NULL
,
*
weights_e
=
NULL
;
struct
space
*
s
=
e
->
s
;
int
nr_cells
=
s
->
nr_cells
,
my_cells
=
0
;
struct
cell
*
cells
=
s
->
cells
;
int
ind
[
3
],
*
cdim
=
s
->
cdim
;
struct
task
*
tasks
=
e
->
sched
.
tasks
;
int
nr_nodes
=
e
->
nr_nodes
,
nodeID
=
e
->
nodeID
;
float
wscale
=
1e-3
,
vscale
=
1e-3
,
wscale_buff
;
idx_t
wtot
=
0
;
idx_t
wmax
=
1e9
/
e
->
nr_nodes
;
idx_t
wmin
;
/* Clear the repartition flag. */
enum
repartition_type
reparttype
=
e
->
forcerepart
;
e
->
forcerepart
=
REPART_NONE
;
/* Nothing to do if only using a single node. Also avoids METIS
* bug that doesn't handle this case well. */
if
(
nr_nodes
==
1
)
return
;
/* Allocate the inds and weights. */
if
((
inds
=
(
idx_t
*
)
malloc
(
sizeof
(
idx_t
)
*
26
*
nr_cells
))
==
NULL
||
(
weights_v
=
(
idx_t
*
)
malloc
(
sizeof
(
idx_t
)
*
nr_cells
))
==
NULL
||
(
weights_e
=
(
idx_t
*
)
malloc
(
sizeof
(
idx_t
)
*
26
*
nr_cells
))
==
NULL
||
(
nodeIDs
=
(
idx_t
*
)
malloc
(
sizeof
(
idx_t
)
*
nr_cells
))
==
NULL
)
error
(
"Failed to allocate inds and weights arrays."
);
/* Fill the inds array. */
for
(
int
cid
=
0
;
cid
<
nr_cells
;
cid
++
)
{
ind
[
0
]
=
cells
[
cid
].
loc
[
0
]
/
s
->
cells
[
cid
].
h
[
0
]
+
0
.
5
;
ind
[
1
]
=
cells
[
cid
].
loc
[
1
]
/
s
->
cells
[
cid
].
h
[
1
]
+
0
.
5
;
ind
[
2
]
=
cells
[
cid
].
loc
[
2
]
/
s
->
cells
[
cid
].
h
[
2
]
+
0
.
5
;
int
l
=
0
;
for
(
int
i
=
-
1
;
i
<=
1
;
i
++
)
{
int
ii
=
ind
[
0
]
+
i
;
if
(
ii
<
0
)
ii
+=
cdim
[
0
];
else
if
(
ii
>=
cdim
[
0
])
ii
-=
cdim
[
0
];
for
(
int
j
=
-
1
;
j
<=
1
;
j
++
)
{
int
jj
=
ind
[
1
]
+
j
;
if
(
jj
<
0
)
jj
+=
cdim
[
1
];
else
if
(
jj
>=
cdim
[
1
])
jj
-=
cdim
[
1
];
for
(
int
k
=
-
1
;
k
<=
1
;
k
++
)
{
int
kk
=
ind
[
2
]
+
k
;
if
(
kk
<
0
)
kk
+=
cdim
[
2
];
else
if
(
kk
>=
cdim
[
2
])
kk
-=
cdim
[
2
];
if
(
i
||
j
||
k
)
{
inds
[
cid
*
26
+
l
]
=
cell_getid
(
cdim
,
ii
,
jj
,
kk
);
l
+=
1
;
}
}
}
}
}
/* Init the weights arrays. */
bzero
(
weights_e
,
sizeof
(
idx_t
)
*
26
*
nr_cells
);
bzero
(
weights_v
,
sizeof
(
idx_t
)
*
nr_cells
);
/* Loop over the tasks... */
for
(
int
j
=
0
;
j
<
e
->
sched
.
nr_tasks
;
j
++
)
{
/* Get a pointer to the kth task. */
const
struct
task
*
t
=
&
tasks
[
j
];
/* Skip un-interesting tasks. */
if
(
t
->
type
!=
task_type_self
&&
t
->
type
!=
task_type_pair
&&
t
->
type
!=
task_type_sub
&&
t
->
type
!=
task_type_ghost
&&
t
->
type
!=
task_type_kick1
&&
t
->
type
!=
task_type_kick2
)
continue
;
/* Get the task weight. */
idx_t
w
=
(
t
->
toc
-
t
->
tic
)
*
wscale
;
if
(
w
<
0
)
error
(
"Bad task weight (%"
SCIDX
")."
,
w
);
/* Do we need to re-scale? */
wtot
+=
w
;
while
(
wtot
>
wmax
)
{
wscale
/=
2
;
wtot
/=
2
;
w
/=
2
;
for
(
int
k
=
0
;
k
<
26
*
nr_cells
;
k
++
)
weights_e
[
k
]
*=
0
.
5
;
for
(
int
k
=
0
;
k
<
nr_cells
;
k
++
)
weights_v
[
k
]
*=
0
.
5
;
}
/* Get the top-level cells involved. */
const
struct
cell
*
ci
,
*
cj
=
NULL
;
for
(
ci
=
t
->
ci
;
ci
->
parent
!=
NULL
;
ci
=
ci
->
parent
)
;
if
(
t
->
cj
!=
NULL
)
for
(
cj
=
t
->
cj
;
cj
->
parent
!=
NULL
;
cj
=
cj
->
parent
)
;
/* Get the cell IDs. */
const
int
cid
=
ci
-
cells
;
/* Different weights for different tasks. */
if
(
t
->
type
==
task_type_ghost
||
t
->
type
==
task_type_kick1
||
t
->
type
==
task_type_kick2
)
{
/* Particle updates add only to vertex weight. */
weights_v
[
cid
]
+=
w
;
}
/* Self interaction? */
else
if
((
t
->
type
==
task_type_self
&&
ci
->
nodeID
==
nodeID
)
||
(
t
->
type
==
task_type_sub
&&
cj
==
NULL
&&
ci
->
nodeID
==
nodeID
))
{
/* Self interactions add only to vertex weight. */
weights_v
[
cid
]
+=
w
;
}
/* Pair? */
else
if
(
t
->
type
==
task_type_pair
||
(
t
->
type
==
task_type_sub
&&
cj
!=
NULL
))
{
/* In-cell pair? */
if
(
ci
==
cj
)
{
/* Add weight to vertex for ci. */
weights_v
[
cid
]
+=
w
;
}
/* Distinct cells with local ci? */
else
if
(
ci
->
nodeID
==
nodeID
)
{
/* Index of the jth cell. */
const
int
cjd
=
cj
-
cells
;
/* Add half of weight to each cell. */
if
(
ci
->
nodeID
==
nodeID
)
weights_v
[
cid
]
+=
0
.
5
*
w
;
if
(
cj
->
nodeID
==
nodeID
)
weights_v
[
cjd
]
+=
0
.
5
*
w
;
/* Add Weight to edge. */
int
k
=
26
*
cid
;
while
(
inds
[
k
]
!=
cjd
)
k
++
;
weights_e
[
k
]
+=
w
;
k
=
26
*
cjd
;
while
(
inds
[
k
]
!=
cid
)
k
++
;
weights_e
[
k
]
+=
w
;
}
}
}
if
(
e
->
nr_nodes
==
1
)
return
;
/* Get the minimum scaling and re-scale if necessary. */
if
((
res
=
MPI_Allreduce
(
&
wscale
,
&
wscale_buff
,
1
,
MPI_FLOAT
,
MPI_MIN
,
MPI_COMM_WORLD
))
!=
MPI_SUCCESS
)
{
char
buff
[
MPI_MAX_ERROR_STRING
];
int
len
;
MPI_Error_string
(
res
,
buff
,
&
len
);
error
(
"Failed to allreduce the weight scale (%s)."
,
buff
);
}
if
(
wscale_buff
!=
wscale
)
{
float
scale
=
wscale_buff
/
wscale
;
for
(
int
k
=
0
;
k
<
26
*
nr_cells
;
k
++
)
weights_e
[
k
]
*=
scale
;
for
(
int
k
=
0
;
k
<
nr_cells
;
k
++
)
weights_v
[
k
]
*=
scale
;
}
/* Merge the weights arrays across all nodes. */
#if IDXTYPEWIDTH == 32
if
((
res
=
MPI_Reduce
((
nodeID
==
0
)
?
MPI_IN_PLACE
:
weights_v
,
weights_v
,
nr_cells
,
MPI_INT
,
MPI_SUM
,
0
,
MPI_COMM_WORLD
))
!=
MPI_SUCCESS
)
{
#else
if
((
res
=
MPI_Reduce
((
nodeID
==
0
)
?
MPI_IN_PLACE
:
weights_v
,
weights_v
,
nr_cells
,
MPI_LONG_LONG_INT
,
MPI_SUM
,
0
,
MPI_COMM_WORLD
))
!=
MPI_SUCCESS
)
{
#endif
char
buff
[
MPI_MAX_ERROR_STRING
];
int
len
;
MPI_Error_string
(
res
,
buff
,
&
len
);
error
(
"Failed to allreduce vertex weights (%s)."
,
buff
);
}
#if IDXTYPEWIDTH == 32
if
(
MPI_Reduce
((
nodeID
==
0
)
?
MPI_IN_PLACE
:
weights_e
,
weights_e
,
26
*
nr_cells
,
MPI_INT
,
MPI_SUM
,
0
,
MPI_COMM_WORLD
)
!=
MPI_SUCCESS
)
#else
if
(
MPI_Reduce
((
nodeID
==
0
)
?
MPI_IN_PLACE
:
weights_e
,
weights_e
,
26
*
nr_cells
,
MPI_LONG_LONG_INT
,
MPI_SUM
,
0
,
MPI_COMM_WORLD
)
!=
MPI_SUCCESS
)
#endif
error
(
"Failed to allreduce edge weights."
);
/* As of here, only one node needs to compute the partition. */
if
(
nodeID
==
0
)
{
/* Final rescale of all weights to avoid a large range. Large ranges have
* been seen to cause an incomplete graph. */
wmin
=
wmax
;
wmax
=
0
.
0
;
for
(
int
k
=
0
;
k
<
26
*
nr_cells
;
k
++
)
{
wmax
=
weights_e
[
k
]
>
wmax
?
weights_e
[
k
]
:
wmax
;
wmin
=
weights_e
[
k
]
<
wmin
?
weights_e
[
k
]
:
wmin
;
}
if
((
wmax
-
wmin
)
>
engine_maxmetisweight
)
{
wscale
=
engine_maxmetisweight
/
(
wmax
-
wmin
);
for
(
int
k
=
0
;
k
<
26
*
nr_cells
;
k
++
)
{
weights_e
[
k
]
=
(
weights_e
[
k
]
-
wmin
)
*
wscale
+
1
;
}
for
(
int
k
=
0
;
k
<
nr_cells
;
k
++
)
{
weights_v
[
k
]
=
(
weights_v
[
k
]
-
wmin
)
*
wscale
+
1
;
}
}
/* Check that the edge weights are fully symmetric. */
/* for ( cid = 0 ; cid < nr_cells ; cid++ )
for ( k = 0 ; k < 26 ; k++ ) {
cjd = inds[ cid*26 + k ];
for ( j = 26*cjd ; inds[j] != cid ; j++ );
if ( weights_e[ cid*26+k ] != weights_e[ j ] )
error( "Unsymmetric edge weights detected (%i vs %i)." ,
weights_e[ cid*26+k ] , weights_e[ j ] );
} */
/* int w_min = weights_e[0], w_max = weights_e[0], w_tot = weights_e[0];
for ( k = 1 ; k < 26*nr_cells ; k++ ) {
w_tot += weights_e[k];
if ( weights_e[k] < w_min )
w_min = weights_e[k];
else if ( weights_e[k] > w_max )
w_max = weights_e[k];
}
message( "edge weights in [ %i , %i ], tot=%i." , w_min , w_max , w_tot );
w_min = weights_e[0], w_max = weights_e[0]; w_tot = weights_v[0];
for ( k = 1 ; k < nr_cells ; k++ ) {
w_tot += weights_v[k];
if ( weights_v[k] < w_min )
w_min = weights_v[k];
else if ( weights_v[k] > w_max )
w_max = weights_v[k];
}
message( "vertex weights in [ %i , %i ], tot=%i." , w_min , w_max , w_tot );
*/
/* Make sure there are no zero weights. */
for
(
int
k
=
0
;
k
<
26
*
nr_cells
;
k
++
)
if
(
weights_e
[
k
]
==
0
)
weights_e
[
k
]
=
1
;
for
(
int
k
=
0
;
k
<
nr_cells
;
k
++
)
if
((
weights_v
[
k
]
*=
vscale
)
==
0
)
weights_v
[
k
]
=
1
;
/* Allocate and fill the connection array. */
idx_t
*
offsets
;
if
((
offsets
=
(
idx_t
*
)
malloc
(
sizeof
(
idx_t
)
*
(
nr_cells
+
1
)))
==
NULL
)
error
(
"Failed to allocate offsets buffer."
);
offsets
[
0
]
=
0
;
for
(
int
k
=
0
;
k
<
nr_cells
;
k
++
)
offsets
[
k
+
1
]
=
offsets
[
k
]
+
26
;
/* Set the METIS options. +1 to keep the GCC sanitizer happy. */
idx_t
options
[
METIS_NOPTIONS
+
1
];
METIS_SetDefaultOptions
(
options
);
options
[
METIS_OPTION_OBJTYPE
]
=
METIS_OBJTYPE_CUT
;
options
[
METIS_OPTION_NUMBERING
]
=
0
;
options
[
METIS_OPTION_CONTIG
]
=
1
;
options
[
METIS_OPTION_NCUTS
]
=
10
;
options
[
METIS_OPTION_NITER
]
=
20
;
// options[ METIS_OPTION_UFACTOR ] = 1;
/* Set the initial partition, although this is probably ignored. */
for
(
int
k
=
0
;
k
<
nr_cells
;
k
++
)
nodeIDs
[
k
]
=
cells
[
k
].
nodeID
;
/* Call METIS. */
idx_t
one
=
1
,
idx_nr_cells
=
nr_cells
,
idx_nr_nodes
=
nr_nodes
;
idx_t
objval
;
/* Dump graph in METIS format */
/*dumpMETISGraph("metis_graph", idx_nr_cells, one, offsets, inds,
weights_v, NULL, weights_e);*/
if
(
METIS_PartGraphRecursive
(
&
idx_nr_cells
,
&
one
,
offsets
,
inds
,
weights_v
,
NULL
,
weights_e
,
&
idx_nr_nodes
,
NULL
,
NULL
,
options
,
&
objval
,
nodeIDs
)
!=
METIS_OK
)
error
(
"Call to METIS_PartGraphRecursive failed."
);
/* Dump the 3d array of cell IDs. */
/* printf( "engine_repartition: nodeIDs = reshape( [" );
for ( i = 0 ; i < cdim[0]*cdim[1]*cdim[2] ; i++ )
printf( "%i " , (int)nodeIDs[ i ] );
printf("] ,%i,%i,%i);\n",cdim[0],cdim[1],cdim[2]); */
/* Check that the nodeIDs are ok. */
for
(
int
k
=
0
;
k
<
nr_cells
;
k
++
)
{
if
(
nodeIDs
[
k
]
<
0
||
nodeIDs
[
k
]
>=
nr_nodes
)
error
(
"Got bad nodeID %"
PRIDX
" for cell %i."
,
nodeIDs
[
k
],
k
);
}
/* Check that the partition is complete and all nodes have some work. */
int
present
[
nr_nodes
];
int
failed
=
0
;
for
(
int
i
=
0
;
i
<
nr_nodes
;
i
++
)
present
[
i
]
=
0
;
for
(
int
i
=
0
;
i
<
nr_cells
;
i
++
)
present
[
nodeIDs
[
i
]]
++
;
for
(
int
i
=
0
;
i
<
nr_nodes
;
i
++
)
{
if
(
!
present
[
i
])
{
failed
=
1
;
message
(
"Node %d is not present after repartition"
,
i
);
}
}
/* If partition failed continue with the current one, but make this
* clear. */
if
(
failed
)
{
message
(
"WARNING: METIS repartition has failed, continuing with "
"the current partition, load balance will not be optimal"
);
for
(
int
k
=
0
;
k
<
nr_cells
;
k
++
)
nodeIDs
[
k
]
=
cells
[
k
].
nodeID
;
}
}
/* Broadcast the result of the partition. */
#if IDXTYPEWIDTH == 32
if
(
MPI_Bcast
(
nodeIDs
,
nr_cells
,
MPI_INT
,
0
,
MPI_COMM_WORLD
)
!=
MPI_SUCCESS
)
error
(
"Failed to bcast the node IDs."
);
#else
if
(
MPI_Bcast
(
nodeIDs
,
nr_cells
,
MPI_LONG_LONG_INT
,
0
,
MPI_COMM_WORLD
)
!=
MPI_SUCCESS
)
error
(
"Failed to bcast the node IDs."
);
#endif
/* Set the cell nodeIDs and clear any non-local parts. */
for
(
int
k
=
0
;
k
<
nr_cells
;
k
++
)
{
cells
[
k
].
nodeID
=
nodeIDs
[
k
];
if
(
nodeIDs
[
k
]
==
nodeID
)
my_cells
+=
1
;
}
/* Clean up. */
free
(
inds
);
free
(
weights_v
);
free
(
weights_e
);
free
(
nodeIDs
);
/* Do the repartitioning. */
partition_repartition
(
reparttype
,
e
->
nodeID
,
e
->
nr_nodes
,
e
->
s
,
e
->
sched
.
tasks
,
e
->
sched
.
nr_tasks
);
/* Now comes the tricky part: Exchange particles between all nodes.
This is done in two steps, first allreducing a matrix of
...
...
@@ -687,6 +360,7 @@ void engine_addtasks_grav(struct engine *e, struct cell *c, struct task *up,
void
engine_addtasks_send
(
struct
engine
*
e
,
struct
cell
*
ci
,
struct
cell
*
cj
)
{
#ifdef WITH_MPI
struct
link
*
l
=
NULL
;
struct
scheduler
*
s
=
&
e
->
sched
;
...
...
@@ -740,6 +414,7 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj) {
void
engine_addtasks_recv
(
struct
engine
*
e
,
struct
cell
*
c
,
struct
task
*
t_xv
,
struct
task
*
t_rho
)
{
#ifdef WITH_MPI
struct
scheduler
*
s
=
&
e
->
sched
;
/* Do we need to construct a recv task? */
...
...
@@ -1321,7 +996,7 @@ int engine_marktasks(struct engine *e) {
struct
scheduler
*
s
=
&
e
->
sched
;
const
int
nr_tasks
=
s
->
nr_tasks
,
*
ind
=
s
->
tasks_ind
;
struct
task
*
tasks
=
s
->
tasks
;
const
float
dt_step
=
e
->
dt_step
;
float
ti_end
=
e
->
ti_current
;
/* Much less to do here if we're on a fixed time-step. */
if
((
e
->
policy
&
engine_policy_fixdt
)
==
engine_policy_fixdt
)
{
...
...
@@ -1456,8 +1131,8 @@ void engine_print(struct engine *e) {
/* Count and print the number of each task type. */
int
counts
[
task_type_count
+
1
];
for
(
k
=
0
;
k
<=
task_type_count
;
k
++
)
counts
[
k
]
=
0
;
for
(
k
=
0
;
k
<
sched
->
nr_tasks
;
k
++
)
for
(
int
k
=
0
;
k
<=
task_type_count
;
k
++
)
counts
[
k
]
=
0
;
for
(
int
k
=
0
;
k
<
sched
->
nr_tasks
;
k
++
)
if
(
!
sched
->
tasks
[
k
].
skip
)
counts
[(
int
)
sched
->
tasks
[
k
].
type
]
+=
1
;
else
...
...
@@ -1468,7 +1143,7 @@ void engine_print(struct engine *e) {
#else
printf
(
"engine_print: task counts are [ %s=%i"
,
taskID_names
[
0
],
counts
[
0
]);
#endif
for
(
k
=
1
;
k
<
task_type_count
;
k
++
)
for
(
int
k
=
1
;
k
<
task_type_count
;
k
++
)
printf
(
" %s=%i"
,
taskID_names
[
k
],
counts
[
k
]);
printf
(
" skipped=%i ]
\n
"
,
counts
[
task_type_count
]);
fflush
(
stdout
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment