Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
SWIFT
SWIFTsim
Commits
7949ef0f
Commit
7949ef0f
authored
Feb 11, 2020
by
Matthieu Schaller
Browse files
Merge branch 'partition-memory-edges' into 'master'
Initial partition with memory weighted edges See merge request
!1009
parents
80de01e8
9e0d044a
Changes
4
Hide whitespace changes
Inline
Side-by-side
doc/RTD/source/ParameterFiles/parameter_description.rst
View file @
7949ef0f
...
@@ -986,26 +986,34 @@ should contain. The type of partitioning attempted is controlled by the::
...
@@ -986,26 +986,34 @@ should contain. The type of partitioning attempted is controlled by the::
DomainDecomposition:
DomainDecomposition:
initial_type:
initial_type:
parameter. Which can have the values *memory*, *region*, *grid* or
parameter. Which can have the values *memory*,
*edgememory*,
*region*, *grid* or
*vectorized*:
*vectorized*:
* *edgememory*
This is the default if METIS or ParMETIS is available. It performs a
partition based on the memory use of all the particles in each cell.
The total memory per cell is used to weight the cell vertex and all the
associated edges. This attempts to equalize the memory used by all the
ranks but with some consideration given to the need to not cut dense
regions (by also minimizing the edge cut). How successful this
attempt is depends on the granularity of cells and particles and the
number of ranks, clearly if most of the particles are in one cell, or a
small region of the volume, balance is impossible or difficult. Having
more top-level cells makes it easier to calculate a good distribution
(but this comes at the cost of greater overheads).
* *memory*
* *memory*
This is the default if METIS or ParMETIS is available. It performs a
This is like *edgememory*, but doesn't include any edge weights, it should
partition based on the memory use of all the particles in each cell,
balance the particle memory use per rank more exactly (but note effects
attempting to equalize the memory used by all the ranks.
like the numbers of cells per rank will also have an effect, as that
How successful this attempt is depends on the granularity of cells and particles
changes the need for foreign cells).
and the number of ranks, clearly if most of the particles are in one cell,
or a small region of the volume, balance is impossible or
difficult. Having more top-level cells makes it easier to calculate a
good distribution (but this comes at the cost of greater overheads).
* *region*
* *region*
The one other METIS/ParMETIS option is "region". This attempts to assign equal
The one other METIS/ParMETIS option is "region". This attempts to assign equal
numbers of cells to each rank, with the surface area of the regions minimised
numbers of cells to each rank, with the surface area of the regions minimised.
(so we get blobs, rather than rectangular volumes of cells).
If ParMETIS and METIS are not available two other options are possible, but
If ParMETIS and METIS are not available two other options are possible, but
will give a poorer partition:
will give a poorer partition:
...
...
src/debug.c
View file @
7949ef0f
...
@@ -375,31 +375,11 @@ static void dumpCells_map(struct cell *c, void *data) {
...
@@ -375,31 +375,11 @@ static void dumpCells_map(struct cell *c, void *data) {
size_t
*
ldata
=
(
size_t
*
)
data
;
size_t
*
ldata
=
(
size_t
*
)
data
;
FILE
*
file
=
(
FILE
*
)
ldata
[
0
];
FILE
*
file
=
(
FILE
*
)
ldata
[
0
];
struct
engine
*
e
=
(
struct
engine
*
)
ldata
[
1
];
struct
engine
*
e
=
(
struct
engine
*
)
ldata
[
1
];
float
ntasks
=
c
->
nr_tasks
;
int
super
=
(
int
)
ldata
[
2
];
int
super
=
(
int
)
ldata
[
2
];
int
active
=
(
int
)
ldata
[
3
];
int
active
=
(
int
)
ldata
[
3
];
int
mpiactive
=
(
int
)
ldata
[
4
];
int
mpiactive
=
(
int
)
ldata
[
4
];
int
pactive
=
(
int
)
ldata
[
5
];
int
pactive
=
(
int
)
ldata
[
5
];
#if SWIFT_DEBUG_CHECKS
/* The c->nr_tasks field does not include all the tasks. So let's check this
* the hard way. Note pairs share the task 50/50 with the other cell. */
ntasks
=
0
.
0
f
;
struct
task
*
tasks
=
e
->
sched
.
tasks
;
int
nr_tasks
=
e
->
sched
.
nr_tasks
;
for
(
int
k
=
0
;
k
<
nr_tasks
;
k
++
)
{
if
(
tasks
[
k
].
cj
==
NULL
)
{
if
(
c
==
tasks
[
k
].
ci
)
{
ntasks
=
ntasks
+
1
.
0
f
;
}
}
else
{
if
(
c
==
tasks
[
k
].
ci
||
c
==
tasks
[
k
].
cj
)
{
ntasks
=
ntasks
+
0
.
5
f
;
}
}
}
#endif
/* Only cells with particles are dumped. */
/* Only cells with particles are dumped. */
if
(
c
->
hydro
.
count
>
0
||
c
->
grav
.
count
>
0
||
c
->
stars
.
count
>
0
)
{
if
(
c
->
hydro
.
count
>
0
||
c
->
grav
.
count
>
0
||
c
->
stars
.
count
>
0
)
{
...
@@ -429,6 +409,36 @@ static void dumpCells_map(struct cell *c, void *data) {
...
@@ -429,6 +409,36 @@ static void dumpCells_map(struct cell *c, void *data) {
(
!
super
||
((
super
&&
c
->
super
==
c
)
||
(
c
->
parent
==
NULL
)))
&&
(
!
super
||
((
super
&&
c
->
super
==
c
)
||
(
c
->
parent
==
NULL
)))
&&
active
&&
mpiactive
)
{
active
&&
mpiactive
)
{
/* The c->nr_tasks field does not include all the tasks. So let's check
* this the hard way. Note pairs share the task 50/50 with the other
* cell. Also accumulate all the time used by tasks of this cell and
* form some idea of the effective task depth. */
float
ntasks
=
0
.
0
f
;
struct
task
*
tasks
=
e
->
sched
.
tasks
;
int
nr_tasks
=
e
->
sched
.
nr_tasks
;
double
ticsum
=
0
.
0
;
/* Sum of work for this cell. */
double
dsum
=
0
.
0
;
for
(
int
k
=
0
;
k
<
nr_tasks
;
k
++
)
{
if
(
tasks
[
k
].
cj
==
NULL
)
{
if
(
tasks
[
k
].
ci
!=
NULL
)
{
if
(
c
==
tasks
[
k
].
ci
||
c
==
tasks
[
k
].
ci
->
super
)
{
ntasks
=
ntasks
+
1
.
0
f
;
ticsum
+=
(
tasks
[
k
].
toc
-
tasks
[
k
].
tic
);
dsum
+=
tasks
[
k
].
ci
->
depth
;
}
}
}
else
{
if
(
c
==
tasks
[
k
].
ci
||
c
==
tasks
[
k
].
ci
->
super
||
c
==
tasks
[
k
].
cj
||
c
==
tasks
[
k
].
cj
->
super
)
{
ntasks
=
ntasks
+
0
.
5
f
;
ticsum
+=
0
.
5
*
(
tasks
[
k
].
toc
-
tasks
[
k
].
tic
);
if
(
tasks
[
k
].
ci
!=
NULL
)
dsum
+=
(
tasks
[
k
].
ci
->
depth
*
0
.
5
);
dsum
+=
(
tasks
[
k
].
cj
->
depth
*
0
.
5
);
}
}
}
dsum
/=
(
double
)
ntasks
;
/* If requested we work out how many particles are active in this cell. */
/* If requested we work out how many particles are active in this cell. */
int
pactcount
=
0
;
int
pactcount
=
0
;
if
(
pactive
)
{
if
(
pactive
)
{
...
@@ -443,15 +453,16 @@ static void dumpCells_map(struct cell *c, void *data) {
...
@@ -443,15 +453,16 @@ static void dumpCells_map(struct cell *c, void *data) {
if
(
spart_is_active
(
&
sparts
[
k
],
e
))
pactcount
++
;
if
(
spart_is_active
(
&
sparts
[
k
],
e
))
pactcount
++
;
}
}
fprintf
(
file
,
fprintf
(
" %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6d %6d %6d %6d %6d %6d "
file
,
"%6.1f %20lld %6d %6d %6d %6d %6d %6d %6d
\n
"
,
" %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6d %6d %6d %6d %6d %6d %6d "
c
->
loc
[
0
],
c
->
loc
[
1
],
c
->
loc
[
2
],
c
->
width
[
0
],
c
->
width
[
1
],
"%6.1f %20lld %6d %6d %6d %6d %6d %6d %6d %f %f
\n
"
,
c
->
width
[
2
],
e
->
step
,
c
->
hydro
.
count
,
c
->
grav
.
count
,
c
->
loc
[
0
],
c
->
loc
[
1
],
c
->
loc
[
2
],
c
->
width
[
0
],
c
->
width
[
1
],
c
->
stars
.
count
,
pactcount
,
c
->
depth
,
ntasks
,
c
->
hydro
.
ti_end_min
,
c
->
width
[
2
],
e
->
step
,
c
->
hydro
.
count
,
c
->
grav
.
count
,
c
->
stars
.
count
,
get_time_bin
(
c
->
hydro
.
ti_end_min
),
(
c
->
super
==
c
),
pactcount
,
c
->
depth
,
c
->
maxdepth
,
ntasks
,
c
->
hydro
.
ti_end_min
,
(
c
->
parent
==
NULL
),
cell_is_active_hydro
(
c
,
e
),
c
->
nodeID
,
get_time_bin
(
c
->
hydro
.
ti_end_min
),
(
c
->
super
==
c
),
c
->
nodeID
==
e
->
nodeID
,
ismpiactive
);
(
c
->
parent
==
NULL
),
cell_is_active_hydro
(
c
,
e
),
c
->
nodeID
,
c
->
nodeID
==
e
->
nodeID
,
ismpiactive
,
ticsum
,
dsum
);
}
}
}
}
}
}
...
@@ -483,11 +494,12 @@ void dumpCells(const char *prefix, int super, int active, int mpiactive,
...
@@ -483,11 +494,12 @@ void dumpCells(const char *prefix, int super, int active, int mpiactive,
/* Header. */
/* Header. */
fprintf
(
file
,
fprintf
(
file
,
"# %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s "
"# %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s
%6s
"
"%20s %6s %6s %6s %6s %6s %6s %6s
\n
"
,
"%20s %6s %6s %6s %6s %6s %6s
%6s %6s
%6s
\n
"
,
"x"
,
"y"
,
"z"
,
"xw"
,
"yw"
,
"zw"
,
"step"
,
"count"
,
"gcount"
,
"scount"
,
"x"
,
"y"
,
"z"
,
"xw"
,
"yw"
,
"zw"
,
"step"
,
"count"
,
"gcount"
,
"scount"
,
"actcount"
,
"depth"
,
"tasks"
,
"ti_end_min"
,
"timebin"
,
"issuper"
,
"actcount"
,
"depth"
,
"maxdepth"
,
"tasks"
,
"ti_end_min"
,
"timebin"
,
"istop"
,
"active"
,
"rank"
,
"local"
,
"mpiactive"
);
"issuper"
,
"istop"
,
"active"
,
"rank"
,
"local"
,
"mpiactive"
,
"ticsum"
,
"avedepth"
);
size_t
data
[
6
];
size_t
data
[
6
];
data
[
0
]
=
(
size_t
)
file
;
data
[
0
]
=
(
size_t
)
file
;
...
...
src/partition.c
View file @
7949ef0f
...
@@ -66,7 +66,8 @@
...
@@ -66,7 +66,8 @@
const
char
*
initial_partition_name
[]
=
{
const
char
*
initial_partition_name
[]
=
{
"axis aligned grids of cells"
,
"vectorized point associated cells"
,
"axis aligned grids of cells"
,
"vectorized point associated cells"
,
"memory balanced, using particle weighted cells"
,
"memory balanced, using particle weighted cells"
,
"similar sized regions, using unweighted cells"
};
"similar sized regions, using unweighted cells"
,
"memory and edge balanced cells using particle weights"
};
/* Simple descriptions of repartition types for reports. */
/* Simple descriptions of repartition types for reports. */
const
char
*
repartition_name
[]
=
{
const
char
*
repartition_name
[]
=
{
...
@@ -399,55 +400,158 @@ static void ACCUMULATE_SIZES_MAPPER(gpart);
...
@@ -399,55 +400,158 @@ static void ACCUMULATE_SIZES_MAPPER(gpart);
*/
*/
static
void
ACCUMULATE_SIZES_MAPPER
(
spart
);
static
void
ACCUMULATE_SIZES_MAPPER
(
spart
);
/* qsort support. */
static
int
ptrcmp
(
const
void
*
p1
,
const
void
*
p2
)
{
const
double
*
v1
=
*
(
const
double
**
)
p1
;
const
double
*
v2
=
*
(
const
double
**
)
p2
;
return
(
*
v1
)
-
(
*
v2
);
}
/**
/**
* @brief Accumulate total memory size in particles per cell.
* @brief Accumulate total memory size in particles per cell.
*
*
* @param s the space containing the cells.
* @param s the space containing the cells.
* @param verbose whether to report any clipped cell counts.
* @param counts the number of bytes in particles per cell. Should be
* @param counts the number of bytes in particles per cell. Should be
* allocated as size s->nr_cells.
* allocated as size s->nr_cells.
*/
*/
static
void
accumulate_sizes
(
struct
space
*
s
,
double
*
counts
)
{
static
void
accumulate_sizes
(
struct
space
*
s
,
int
verbose
,
double
*
counts
)
{
bzero
(
counts
,
sizeof
(
double
)
*
s
->
nr_cells
);
bzero
(
counts
,
sizeof
(
double
)
*
s
->
nr_cells
);
struct
counts_mapper_data
mapper_data
;
struct
counts_mapper_data
mapper_data
;
mapper_data
.
counts
=
counts
;
mapper_data
.
s
=
s
;
mapper_data
.
s
=
s
;
double
gsize
=
0
.
0
;
double
*
gcounts
=
NULL
;
double
hsize
=
0
.
0
;
double
ssize
=
0
.
0
;
double
hsize
=
(
double
)
sizeof
(
struct
part
);
if
(
s
->
nr_parts
>
0
)
{
mapper_data
.
size
=
hsize
;
threadpool_map
(
&
s
->
e
->
threadpool
,
accumulate_sizes_mapper_part
,
s
->
parts
,
s
->
nr_parts
,
sizeof
(
struct
part
),
space_splitsize
,
&
mapper_data
);
}
double
gsize
=
(
double
)
sizeof
(
struct
gpart
);
if
(
s
->
nr_gparts
>
0
)
{
if
(
s
->
nr_gparts
>
0
)
{
/* Self-gravity gets more efficient with density (see gitlab issue #640)
* so we end up with too much weight in cells with larger numbers of
* gparts, to suppress this we fix a upper weight limit based on a
* percentile clip to on the numbers of cells. Should be relatively
* harmless when not really needed. */
if
((
gcounts
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
s
->
nr_cells
))
==
NULL
)
error
(
"Failed to allocate gcounts buffer."
);
bzero
(
gcounts
,
sizeof
(
double
)
*
s
->
nr_cells
);
gsize
=
(
double
)
sizeof
(
struct
gpart
);
mapper_data
.
counts
=
gcounts
;
mapper_data
.
size
=
gsize
;
mapper_data
.
size
=
gsize
;
threadpool_map
(
&
s
->
e
->
threadpool
,
accumulate_sizes_mapper_gpart
,
s
->
gparts
,
threadpool_map
(
&
s
->
e
->
threadpool
,
accumulate_sizes_mapper_gpart
,
s
->
gparts
,
s
->
nr_gparts
,
sizeof
(
struct
gpart
),
space_splitsize
,
s
->
nr_gparts
,
sizeof
(
struct
gpart
),
space_splitsize
,
&
mapper_data
);
&
mapper_data
);
/* Get all the counts from all the nodes. */
if
(
MPI_Allreduce
(
MPI_IN_PLACE
,
gcounts
,
s
->
nr_cells
,
MPI_DOUBLE
,
MPI_SUM
,
MPI_COMM_WORLD
)
!=
MPI_SUCCESS
)
error
(
"Failed to allreduce particle cell gpart weights."
);
/* Now we need to sort... */
double
**
ptrs
=
NULL
;
if
((
ptrs
=
(
double
**
)
malloc
(
sizeof
(
double
*
)
*
s
->
nr_cells
))
==
NULL
)
error
(
"Failed to allocate pointers buffer."
);
for
(
int
k
=
0
;
k
<
s
->
nr_cells
;
k
++
)
{
ptrs
[
k
]
=
&
gcounts
[
k
];
}
/* Sort pointers, not counts... */
qsort
(
ptrs
,
s
->
nr_cells
,
sizeof
(
double
*
),
ptrcmp
);
/* Percentile cut keeps 99.8% of cells and clips above. */
int
cut
=
ceil
(
s
->
nr_cells
*
0
.
998
);
/* And clip. */
int
nadj
=
0
;
double
clip
=
*
ptrs
[
cut
];
for
(
int
k
=
cut
+
1
;
k
<
s
->
nr_cells
;
k
++
)
{
*
ptrs
[
k
]
=
clip
;
nadj
++
;
}
if
(
verbose
)
message
(
"clipped gravity part counts of %d cells"
,
nadj
);
free
(
ptrs
);
}
/* Other particle types are assumed to correlate with processing time. */
if
(
s
->
nr_parts
>
0
)
{
mapper_data
.
counts
=
counts
;
hsize
=
(
double
)
sizeof
(
struct
part
);
mapper_data
.
size
=
hsize
;
threadpool_map
(
&
s
->
e
->
threadpool
,
accumulate_sizes_mapper_part
,
s
->
parts
,
s
->
nr_parts
,
sizeof
(
struct
part
),
space_splitsize
,
&
mapper_data
);
}
}
double
ssize
=
(
double
)
sizeof
(
struct
spart
);
if
(
s
->
nr_sparts
>
0
)
{
if
(
s
->
nr_sparts
>
0
)
{
ssize
=
(
double
)
sizeof
(
struct
spart
);
mapper_data
.
size
=
ssize
;
mapper_data
.
size
=
ssize
;
threadpool_map
(
&
s
->
e
->
threadpool
,
accumulate_sizes_mapper_spart
,
s
->
sparts
,
threadpool_map
(
&
s
->
e
->
threadpool
,
accumulate_sizes_mapper_spart
,
s
->
sparts
,
s
->
nr_sparts
,
sizeof
(
struct
spart
),
space_splitsize
,
s
->
nr_sparts
,
sizeof
(
struct
spart
),
space_splitsize
,
&
mapper_data
);
&
mapper_data
);
}
}
/* Merge the counts arrays across all nodes, if needed. Doesn't include any
* gparts. */
if
(
s
->
nr_parts
>
0
||
s
->
nr_sparts
>
0
)
{
if
(
MPI_Allreduce
(
MPI_IN_PLACE
,
counts
,
s
->
nr_cells
,
MPI_DOUBLE
,
MPI_SUM
,
MPI_COMM_WORLD
)
!=
MPI_SUCCESS
)
error
(
"Failed to allreduce particle cell weights."
);
}
/* And merge with gravity counts. */
double
sum
=
0
.
0
;
if
(
s
->
nr_gparts
>
0
)
{
for
(
int
k
=
0
;
k
<
s
->
nr_cells
;
k
++
)
{
counts
[
k
]
+=
gcounts
[
k
];
sum
+=
counts
[
k
];
}
free
(
gcounts
);
}
else
{
for
(
int
k
=
0
;
k
<
s
->
nr_cells
;
k
++
)
{
sum
+=
counts
[
k
];
}
}
/* Keep the sum of particles across all ranks in the range of IDX_MAX. */
/* Keep the sum of particles across all ranks in the range of IDX_MAX. */
if
((
s
->
e
->
total_nr_parts
*
hsize
+
s
->
e
->
total_nr_gparts
*
gsize
+
if
(
sum
>
(
double
)(
IDX_MAX
-
10000
))
{
s
->
e
->
total_nr_sparts
*
ssize
)
>
(
double
)
IDX_MAX
)
{
double
vscale
=
(
double
)(
IDX_MAX
-
10000
)
/
sum
;
double
vscale
=
(
double
)(
IDX_MAX
-
1000
)
/
(
double
)(
s
->
e
->
total_nr_parts
*
hsize
+
s
->
e
->
total_nr_gparts
*
gsize
+
s
->
e
->
total_nr_sparts
*
ssize
);
for
(
int
k
=
0
;
k
<
s
->
nr_cells
;
k
++
)
counts
[
k
]
*=
vscale
;
for
(
int
k
=
0
;
k
<
s
->
nr_cells
;
k
++
)
counts
[
k
]
*=
vscale
;
}
}
}
}
/**
* @brief Make edge weights from the accumulated particle sizes per cell.
*
* @param s the space containing the cells.
* @param counts the number of bytes in particles per cell.
* @param edges weights for the edges of these regions. Should be 26 * counts.
*/
static
void
sizes_to_edges
(
struct
space
*
s
,
double
*
counts
,
double
*
edges
)
{
bzero
(
edges
,
sizeof
(
double
)
*
s
->
nr_cells
*
26
);
for
(
int
l
=
0
;
l
<
s
->
nr_cells
;
l
++
)
{
int
p
=
0
;
for
(
int
i
=
-
1
;
i
<=
1
;
i
++
)
{
int
isid
=
((
i
<
0
)
?
0
:
((
i
>
0
)
?
2
:
1
));
for
(
int
j
=
-
1
;
j
<=
1
;
j
++
)
{
int
jsid
=
isid
*
3
+
((
j
<
0
)
?
0
:
((
j
>
0
)
?
2
:
1
));
for
(
int
k
=
-
1
;
k
<=
1
;
k
++
)
{
int
ksid
=
jsid
*
3
+
((
k
<
0
)
?
0
:
((
k
>
0
)
?
2
:
1
));
/* If not self, we work out the sort indices to get the expected
* fractional weight and add that. Scale to keep sum less than
* counts and a bit of tuning... */
if
(
i
||
j
||
k
)
{
edges
[
l
*
26
+
p
]
=
counts
[
l
]
*
sid_scale
[
sortlistID
[
ksid
]]
/
26
.
0
;
p
++
;
}
}
}
}
}
}
#endif
#endif
#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS))
#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS))
...
@@ -464,7 +568,7 @@ static void split_metis(struct space *s, int nregions, int *celllist) {
...
@@ -464,7 +568,7 @@ static void split_metis(struct space *s, int nregions, int *celllist) {
/* To check or visualise the partition dump all the cells. */
/* To check or visualise the partition dump all the cells. */
/*if (engine_rank == 0) dumpCellRanks("metis_partition", s->cells_top,
/*if (engine_rank == 0) dumpCellRanks("metis_partition", s->cells_top,
*
s->nr_cells);*/
s->nr_cells);*/
}
}
#endif
#endif
...
@@ -780,7 +884,7 @@ static void pick_parmetis(int nodeID, struct space *s, int nregions,
...
@@ -780,7 +884,7 @@ static void pick_parmetis(int nodeID, struct space *s, int nregions,
/* Dump graphs to disk files for testing. */
/* Dump graphs to disk files for testing. */
/*dumpMETISGraph("parmetis_graph", ncells, 1, std_xadj, full_adjncy,
/*dumpMETISGraph("parmetis_graph", ncells, 1, std_xadj, full_adjncy,
full_weights_v, NULL, full_weights_e);*/
full_weights_v, NULL, full_weights_e);
*/
/* xadj is set for each rank, different to serial version in that each
/* xadj is set for each rank, different to serial version in that each
* rank starts with 0, so we need to re-offset. */
* rank starts with 0, so we need to re-offset. */
...
@@ -1213,7 +1317,7 @@ static void pick_metis(int nodeID, struct space *s, int nregions,
...
@@ -1213,7 +1317,7 @@ static void pick_metis(int nodeID, struct space *s, int nregions,
/* Dump graph in METIS format */
/* Dump graph in METIS format */
/*dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy, weights_v,
/*dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy, weights_v,
NULL, weights_e);*/
NULL, weights_e);*/
if
(
METIS_PartGraphKway
(
&
idx_ncells
,
&
one
,
xadj
,
adjncy
,
weights_v
,
NULL
,
if
(
METIS_PartGraphKway
(
&
idx_ncells
,
&
one
,
xadj
,
adjncy
,
weights_v
,
NULL
,
weights_e
,
&
idx_nregions
,
NULL
,
NULL
,
options
,
weights_e
,
&
idx_nregions
,
NULL
,
NULL
,
options
,
...
@@ -1536,22 +1640,22 @@ static void repart_edge_metis(int vweights, int eweights, int timebins,
...
@@ -1536,22 +1640,22 @@ static void repart_edge_metis(int vweights, int eweights, int timebins,
if
(
vweights
&&
eweights
)
{
if
(
vweights
&&
eweights
)
{
if
(
vsum
>
esum
)
{
if
(
vsum
>
esum
)
{
if
(
vsum
>
(
double
)
IDX_MAX
)
{
if
(
vsum
>
(
double
)
IDX_MAX
)
{
vscale
=
(
double
)(
IDX_MAX
-
1000
)
/
vsum
;
vscale
=
(
double
)(
IDX_MAX
-
1000
0
)
/
vsum
;
escale
=
vscale
;
escale
=
vscale
;
}
}
}
else
{
}
else
{
if
(
esum
>
(
double
)
IDX_MAX
)
{
if
(
esum
>
(
double
)
IDX_MAX
)
{
escale
=
(
double
)(
IDX_MAX
-
1000
)
/
esum
;
escale
=
(
double
)(
IDX_MAX
-
1000
0
)
/
esum
;
vscale
=
escale
;
vscale
=
escale
;
}
}
}
}
}
else
if
(
vweights
)
{
}
else
if
(
vweights
)
{
if
(
vsum
>
(
double
)
IDX_MAX
)
{
if
(
vsum
>
(
double
)
IDX_MAX
)
{
vscale
=
(
double
)(
IDX_MAX
-
1000
)
/
vsum
;
vscale
=
(
double
)(
IDX_MAX
-
1000
0
)
/
vsum
;
}
}
}
else
if
(
eweights
)
{
}
else
if
(
eweights
)
{
if
(
esum
>
(
double
)
IDX_MAX
)
{
if
(
esum
>
(
double
)
IDX_MAX
)
{
escale
=
(
double
)(
IDX_MAX
-
1000
)
/
esum
;
escale
=
(
double
)(
IDX_MAX
-
1000
0
)
/
esum
;
}
}
}
}
...
@@ -1655,17 +1759,11 @@ static void repart_memory_metis(struct repartition *repartition, int nodeID,
...
@@ -1655,17 +1759,11 @@ static void repart_memory_metis(struct repartition *repartition, int nodeID,
double
*
weights
=
NULL
;
double
*
weights
=
NULL
;
if
((
weights
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
s
->
nr_cells
))
==
NULL
)
if
((
weights
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
s
->
nr_cells
))
==
NULL
)
error
(
"Failed to allocate cell weights buffer."
);
error
(
"Failed to allocate cell weights buffer."
);
bzero
(
weights
,
sizeof
(
double
)
*
s
->
nr_cells
);
/* Check each particle and accumulate the sizes per cell. */
/* Check each particle and accumulate the sizes per cell. */
accumulate_sizes
(
s
,
weights
);
accumulate_sizes
(
s
,
s
->
e
->
verbose
,
weights
);
/* Get all the counts from all the nodes. */
/* Allocate cell list for the partition. If not already done. */
if
(
MPI_Allreduce
(
MPI_IN_PLACE
,
weights
,
s
->
nr_cells
,
MPI_DOUBLE
,
MPI_SUM
,
MPI_COMM_WORLD
)
!=
MPI_SUCCESS
)
error
(
"Failed to allreduce particle cell weights."
);
/* Allocate cell list for the partition. If not already done. */
#ifdef HAVE_PARMETIS
#ifdef HAVE_PARMETIS
int
refine
=
1
;
int
refine
=
1
;
#endif
#endif
...
@@ -1839,27 +1937,38 @@ void partition_initial_partition(struct partition *initial_partition,
...
@@ -1839,27 +1937,38 @@ void partition_initial_partition(struct partition *initial_partition,
}
}
}
else
if
(
initial_partition
->
type
==
INITPART_METIS_WEIGHT
||
}
else
if
(
initial_partition
->
type
==
INITPART_METIS_WEIGHT
||
initial_partition
->
type
==
INITPART_METIS_WEIGHT_EDGE
||
initial_partition
->
type
==
INITPART_METIS_NOWEIGHT
)
{
initial_partition
->
type
==
INITPART_METIS_NOWEIGHT
)
{
#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS))
#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS))
/* Simple k-way partition selected by METIS using cell particle
/* Simple k-way partition selected by METIS using cell particle
* counts as weights or not. Should be best when starting with a
* counts as weights or not. Should be best when starting with a
* inhomogeneous dist.
* inhomogeneous dist.
*/
*/
double
*
weights_v
=
NULL
;
/* Space for particles sizes per cell, which will be used as weights. */
double
*
weights_e
=
NULL
;
double
*
weights
=
NULL
;
if
(
initial_partition
->
type
==
INITPART_METIS_WEIGHT
)
{
if
(
initial_partition
->
type
==
INITPART_METIS_WEIGHT
)
{
if
((
weights
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
s
->
nr_cells
))
==
NULL
)
/* Particles sizes per cell, which will be used as weights. */
error
(
"Failed to allocate weights buffer."
);
if
((
weights_v
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
s
->
nr_cells
))
==
NULL
)
bzero
(
weights
,
sizeof
(
double
)
*
s
->
nr_cells
);
error
(
"Failed to allocate weights_v buffer."
);
/* Check each particle and accumulate the sizes per cell. */
accumulate_sizes
(
s
,
s
->
e
->
verbose
,
weights_v
);
}
else
if
(
initial_partition
->
type
==
INITPART_METIS_WEIGHT_EDGE
)
{
/* Check each particle and accumilate the sizes per cell. */
/* Particle sizes also counted towards the edges. */
accumulate_sizes
(
s
,
weights
);
/* Get all the counts from all the nodes. */
if
((
weights_v
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
s
->
nr_cells
))
==
NULL
)
if
(
MPI_Allreduce
(
MPI_IN_PLACE
,
weights
,
s
->
nr_cells
,
MPI_DOUBLE
,
MPI_SUM
,
error
(
"Failed to allocate weights_v buffer."
);
MPI_COMM_WORLD
)
!=
MPI_SUCCESS
)
if
((
weights_e
=
(
double
*
)
malloc
(
sizeof
(
double
)
*
s
->
nr_cells
*
26
))
==
error
(
"Failed to allreduce particle cell weights."
);
NULL
)
error
(
"Failed to allocate weights_e buffer."
);
/* Check each particle and accumulate the sizes per cell. */
accumulate_sizes
(
s
,
s
->
e
->
verbose
,
weights_v
);
/* Spread these into edge weights. */
sizes_to_edges
(
s
,
weights_v
,
weights_e
);
}
}
/* Do the calculation. */
/* Do the calculation. */
...
@@ -1868,12 +1977,13 @@ void partition_initial_partition(struct partition *initial_partition,
...
@@ -1868,12 +1977,13 @@ void partition_initial_partition(struct partition *initial_partition,
error
(
"Failed to allocate celllist"
);
error
(
"Failed to allocate celllist"
);
#ifdef HAVE_PARMETIS
#ifdef HAVE_PARMETIS
if
(
initial_partition
->
usemetis
)
{
if
(
initial_partition
->
usemetis
)
{
pick_metis
(
nodeID
,
s
,
nr_nodes
,
weights
,
NULL
,
celllist
);
pick_metis
(
nodeID
,
s
,
nr_nodes
,
weights
_v
,
weights_e
,
celllist
);
}
else
{
}
else
{
pick_parmetis
(
nodeID
,
s
,
nr_nodes
,
weights
,
NULL
,
0
,
0
,
0
.
0
f
,
celllist
);
pick_parmetis
(
nodeID
,
s
,
nr_nodes
,
weights_v
,
weights_e
,
0
,
0
,
0
.
0
f
,
celllist
);
}
}
#else
#else
pick_metis
(
nodeID
,
s
,
nr_nodes
,
weights
,
NULL
,
celllist
);
pick_metis
(
nodeID
,
s
,
nr_nodes
,
weights
_v
,
weights_e
,
celllist
);
#endif
#endif
/* And apply to our cells */
/* And apply to our cells */
...
@@ -1888,7 +1998,8 @@ void partition_initial_partition(struct partition *initial_partition,
...
@@ -1888,7 +1998,8 @@ void partition_initial_partition(struct partition *initial_partition,
partition_initial_partition
(
initial_partition
,
nodeID
,
nr_nodes
,
s
);
partition_initial_partition
(
initial_partition
,
nodeID
,
nr_nodes
,
s
);
}
}
if
(
weights
!=
NULL
)
free
(
weights
);
if
(
weights_v
!=
NULL
)
free
(
weights_v
);
if
(
weights_e
!=
NULL
)
free
(
weights_e
);
free
(
celllist
);
free
(
celllist
);
#else