Commit b75463d2 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge branch 'mpi-fixes-local' into repart-time

parents 38fbdf9c 205a2066
#!/usr/bin/env python
"""
Usage:
analysedumpcells.py nx ny nx cell<1>.dat cell<2>.dat ...
Analyses a number of output files created by calls to the dumpCells() debug
function (presumably called in engine_step()) to output a list of active
top-level cells, identifying those that are on the edges of the volumes being
processed on by various nodes. The point is that these should be minimised to
reduce the MPI communications.
The nx, ny and nz arguments are the number of cells in the complete space,
we need these so that we can wrap around the edge of space correctly.
This file is part of SWIFT.
Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import pylab as pl
import numpy as np
import sys
import pandas
xcol = 0
ycol = 1
zcol = 2
xwcol = 3
ywcol = 4
zwcol = 5
localcol = 18
supercol = 15
activecol = 16
# Command-line arguments.
if len(sys.argv) < 5:
print "usage: ", sys.argv[0], " nx ny nz cell1.dat cell2.dat ..."
sys.exit(1)
nx = int(sys.argv[1])
ny = int(sys.argv[2])
nz = int(sys.argv[3])
print "# x y z onedge"
allactives = []
onedge = 0
tcount = 0
for i in range(4, len(sys.argv)):
# Read the file.
data = pl.loadtxt(sys.argv[i])
#print data
# Select cells that are on the current rank and are super cells.
rdata = data[data[:,localcol] == 1]
tdata = rdata[rdata[:,supercol] == 1]
# Separation of the cells is in data.
xwidth = tdata[0,xwcol]
ywidth = tdata[0,ywcol]
zwidth = tdata[0,zwcol]
# Fill space nx, ny,n nz with all toplevel cells and flag their active
# state.
space = np.zeros((nx,ny,nz))
actives = []
for line in tdata:
ix = int(np.rint(line[xcol] / xwidth))
iy = int(np.rint(line[ycol] / ywidth))
iz = int(np.rint(line[zcol] / zwidth))
active = int(line[activecol])
space[ix,iy,iz] = 1 + active
tcount = tcount + 1
if active == 1:
actives.append([ix, iy, iz, line])
# Report all active cells and flag any without 26 neighbours. These are
# on the edge of the partition volume and will have foreign neighbour
# cells.
for active in actives:
count = 0
for ii in [-1, 0, 1]:
i = active[0] + ii
if i < 0:
i = i + nx
elif i >= nx:
i = i - nx
for jj in [-1, 0, 1]:
j = active[1] + jj
if j < 0:
j = j + ny
elif j >= ny:
j = j - ny
for kk in [-1, 0, 1]:
k = active[2] + kk
if k < 0:
k = k + nz
elif k >= nz:
k = k - nz
if space[i, j, k] > 0:
count = count + 1
if count < 27:
onedge = onedge + 1
print active[3][0], active[3][1], active[3][2], 1
else:
print active[3][0], active[3][1], active[3][2], 0
allactives.extend(actives)
print "# top cells: ", tcount, " active: ", len(allactives), " on edge: ", onedge
sys.exit(0)
#!/bin/bash
#
# Usage:
# process_cells nprocess
# process_cells nx ny nz nprocess
#
# Description:
# Process all the cell dumps in the current directory
# Process all the cell dumps in the current directory.
# Outputs file per rank with the active cells identified and marked as to
# whether they are near an edge or not. Note requires the numbers of cells
# per dimension of the space.
#
# Outputs file per rank with the active cells identified and marked
# as to whether they are near an edge or not.
# Also outputs a graphic showing the fraction of active cells on edges
# for each step.
# Handle command-line
if test "$1" = ""; then
echo "Usage: $0 nprocess"
if test "$4" = ""; then
echo "Usage: $0 nx ny nz nprocess"
exit 1
fi
NPROCS=$1
NX=$1
NY=$2
NZ=$3
NPROCS=$4
# Locate script.
SCRIPTHOME=$(dirname "$0")
......@@ -33,19 +40,25 @@ echo "Number of ranks = $ranks"
# Now construct a list of files ordered by rank, not step.
files=$(ls cells_*.dat | sort -t "_" -k 3,3 -n | xargs -n 4)
# Need number of steps.
nfiles=$(echo $files| wc -w)
echo "Number of files = $nfiles"
steps=$(( $nfiles / $ranks + 1 ))
echo "Number of steps = $steps"
# And process them,
echo "Processing cell dumps files..."
echo $files | xargs -P $NPROCS -n 4 /bin/bash -c "${SCRIPTHOME}/process_cells_helper 20 20 20 \$0 \$1 \$2 \$3"
#echo $files | xargs -P $NPROCS -n 4 /bin/bash -c "${SCRIPTHOME}/process_cells_helper $NX $NY $NZ \$0 \$1 \$2 \$3"
# Create summary.
grep "top cells" step*-active-cells.dat | sort -h > active_cells.log
# And plot of active cells to edge cells.
stilts plot2plane ifmt=ascii in=active_cells.log xmin=-0.1 xmax=1.1 ymin=-100 ymax=2200 grid=1 \
stilts plot2plane ifmt=ascii in=active_cells.log xmin=-0.1 xmax=1.1 ymin=0 ymax=$steps grid=1 \
legend=false xpix=600 ypix=500 xlabel="Edge cells/Active cells" ylabel="Step" \
layer1=mark x1="col9/1.0/col6" y1="index*7" size1=3 shading1=aux auxmap=rainbow \
layer1=mark x1="col9/1.0/col6" y1="index" size1=3 shading1=aux auxmap=rainbow \
aux=col6 auxfunc=log auxlabel="Active cells" layer2=histogram x2="col9/1.0/col6" \
color2=grey binsize2=0.01 phase2=0.5 barform2=semi_steps weight2=30 thick2=1 \
color2=grey binsize2=0.01 phase2=0.5 barform2=semi_steps thick2=1 \
out=active_cells.png
exit
......@@ -6,6 +6,7 @@
SCRIPTHOME=$(dirname "$0")
step=$(echo $4|sed 's,cells_\(.*\)_\(.*\).dat,\2,')
echo "${SCRIPTHOME}/analyse_dump_cells.py $* > step${step}-active-cells.dat"
${SCRIPTHOME}/analyse_dump_cells.py $* > step${step}-active-cells.dat
exit
......@@ -276,11 +276,13 @@ int checkCellhdxmax(const struct cell *c, int *depth) {
* only.
*/
static void dumpCells_map(struct cell *c, void *data) {
uintptr_t *ldata = (uintptr_t *)data;
size_t *ldata = (size_t *)data;
FILE *file = (FILE *)ldata[0];
struct engine *e = (struct engine *)ldata[1];
float ntasks = c->nr_tasks;
int pactive = (int)ldata[2];
int active = (int)ldata[2];
int mpiactive = (int)ldata[3];
int pactive = (int)ldata[4];
#if SWIFT_DEBUG_CHECKS
/* The c->nr_tasks field does not include all the tasks. So let's check this
......@@ -301,30 +303,44 @@ static void dumpCells_map(struct cell *c, void *data) {
}
#endif
/* Only locally active cells are dumped. */
/* Only cells with particles are dumped. */
if (c->count > 0 || c->gcount > 0 || c->scount > 0) {
/* If requested we work out how many particles are active in this cell. */
int pactcount = 0;
if (pactive) {
const struct part *parts = c->parts;
for (int k = 0; k < c->count; k++)
if (part_is_active(&parts[k], e)) pactcount++;
struct gpart *gparts = c->gparts;
for (int k = 0; k < c->gcount; k++)
if (gpart_is_active(&gparts[k], e)) pactcount++;
struct spart *sparts = c->sparts;
for (int k = 0; k < c->scount; k++)
if (spart_is_active(&sparts[k], e)) pactcount++;
}
/* In MPI mode we may only output cells with foreign partners.
* These define the edges of the partitions. */
#if WITH_MPI
int sendto = (c->send_xv != NULL);
if (mpiactive)
mpiactive = (c->send_xv != NULL);
else
mpiactive = 1;
#else
int sendto = 0;
mpiactive = 1;
#endif
/* Local cells that are active and are super cells and have MPI tasks. */
if (c->nodeID == e->nodeID && cell_is_active(c, e) && (c->super == c) && sendto)
/* Active cells, otherwise all. */
if (active)
active = cell_is_active(c, e);
else
active = 1;
/* So output local super cells that are active and have MPI tasks as
* requested. */
if (c->nodeID == e->nodeID && (c->super == c) && active && mpiactive) {
/* If requested we work out how many particles are active in this cell. */
int pactcount = 0;
if (pactive) {
const struct part *parts = c->parts;
for (int k = 0; k < c->count; k++)
if (part_is_active(&parts[k], e)) pactcount++;
struct gpart *gparts = c->gparts;
for (int k = 0; k < c->gcount; k++)
if (gpart_is_active(&gparts[k], e)) pactcount++;
struct spart *sparts = c->sparts;
for (int k = 0; k < c->scount; k++)
if (spart_is_active(&sparts[k], e)) pactcount++;
}
fprintf(file,
" %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6d %6d %6d %6d %6d %6d "
"%6.1f %20lld %6d %6d %6d %6d %6d\n",
......@@ -333,7 +349,7 @@ static void dumpCells_map(struct cell *c, void *data) {
c->depth, ntasks, c->ti_end_min, get_time_bin(c->ti_end_min),
(c->super == c), cell_is_active(c, e), c->nodeID,
c->nodeID == e->nodeID);
}
}
}
......@@ -344,19 +360,21 @@ static void dumpCells_map(struct cell *c, void *data) {
*
* @param prefix base output filename, result is written to
* %prefix%_%rank%_%step%.dat
* @param active just output active cells.
* @param mpiactive just output MPI active cells, i.e. those with foreign cells.
* @param pactive also output a count of active particles.
* @param s the space holding the cells to dump.
* @param rank node ID of MPI rank, or 0 if not relevant.
* @param step the current engine step, or some unique integer.
*/
void dumpCells(const char *prefix, int pactive, struct space *s, int rank,
int step) {
void dumpCells(const char *prefix, int active, int mpiactive, int pactive,
struct space *s, int rank, int step) {
FILE *file = NULL;
/* Name of output file. */
char fname[200];
sprintf(fname, "%s_%03d.dat", prefix, step);
sprintf(fname, "%s_%03d_%03d.dat", prefix, rank, step);
file = fopen(fname, "w");
/* Header. */
......@@ -364,13 +382,15 @@ void dumpCells(const char *prefix, int pactive, struct space *s, int rank,
"# %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s "
"%20s %6s %6s %6s %6s %6s\n",
"x", "y", "z", "xw", "yw", "zw", "step", "count", "gcount", "scount",
"actcount", "depth", "tasks", "ti_end_min", "timebin",
"issuper", "active", "rank", "local");
"actcount", "depth", "tasks", "ti_end_min", "timebin", "issuper",
"active", "rank", "local");
uintptr_t data[3];
size_t data[5];
data[0] = (size_t)file;
data[1] = (size_t)s->e;
data[2] = (size_t)pactive;
data[2] = (size_t)active;
data[3] = (size_t)mpiactive;
data[4] = (size_t)pactive;
space_map_cells_pre(s, 1, dumpCells_map, &data);
fclose(file);
}
......
......@@ -36,8 +36,8 @@ void printgParticle_single(struct gpart *gp);
int checkSpacehmax(struct space *s);
int checkCellhdxmax(const struct cell *c, int *depth);
void dumpCells(const char *prefix, int pactive, struct space *s, int rank,
int step);
void dumpCells(const char *prefix, int active, int mpiactive, int pactive,
struct space *s, int rank, int step);
#ifdef HAVE_METIS
#include "metis.h"
......
......@@ -3750,6 +3750,9 @@ void engine_step(struct engine *e) {
/* Print the number of active tasks ? */
if (e->verbose) engine_print_task_counts(e);
/* Dump local cells and active particle counts. */
/* dumpCells("cells", 0, 0, 1, e->s, e->nodeID, e->step); */
#ifdef SWIFT_DEBUG_CHECKS
/* Check that we have the correct total mass in the top-level multipoles */
size_t num_gpart_mpole = 0;
......
......@@ -528,7 +528,7 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins,
if (t->cost == 0) continue;
/* Get the task weight based on costs. */
double w = (double) t->cost;
double w = (double)t->cost;
/* Get the top-level cells involved. */
struct cell *ci, *cj;
......@@ -590,15 +590,17 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins,
* overflow int, so take care. */
int dti = num_time_bins - get_time_bin(ci->ti_end_min);
int dtj = num_time_bins - get_time_bin(cj->ti_end_min);
double dt = (double)(1<<dti) + (double)(1<<dtj);
double dt = (double)(1 << dti) + (double)(1 << dtj);
/* ci */
int kk;
for (kk = 26 * cid; inds[kk] != cjd; kk++);
for (kk = 26 * cid; inds[kk] != cjd; kk++)
;
weights_e[kk] += dt;
/* cj */
for (kk = 26 * cjd; inds[kk] != cid; kk++);
for (kk = 26 * cjd; inds[kk] != cid; kk++)
;
weights_e[kk] += dt;
} else {
......@@ -606,11 +608,13 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins,
/* ci */
int kk;
for (kk = 26 * cid; inds[kk] != cjd; kk++);
for (kk = 26 * cid; inds[kk] != cjd; kk++)
;
weights_e[kk] += w;
/* cj */
for (kk = 26 * cjd; inds[kk] != cid; kk++);
for (kk = 26 * cjd; inds[kk] != cid; kk++)
;
weights_e[kk] += w;
}
}
......@@ -624,14 +628,14 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins,
int res;
if (bothweights) {
if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v,
nr_cells, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD))
!= MPI_SUCCESS)
nr_cells, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD)) !=
MPI_SUCCESS)
mpi_error(res, "Failed to allreduce vertex weights.");
}
if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e,
26 * nr_cells, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD))
!= MPI_SUCCESS)
26 * nr_cells, MPI_DOUBLE, MPI_SUM, 0,
MPI_COMM_WORLD)) != MPI_SUCCESS)
mpi_error(res, "Failed to allreduce edge weights.");
/* Allocate cell list for the partition. */
......@@ -642,8 +646,8 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins,
if (nodeID == 0) {
/* We need to rescale the weights into the range of an integer for METIS
* really range of idx_t). Also we would like the range of vertex and
* edges weights to be simila r so they balance. */
* (really range of idx_t). Also we would like the range of vertex and
* edges weights to be similar so they balance. */
double wminv = 0.0;
double wmaxv = 0.0;
if (bothweights) {
......@@ -680,7 +684,6 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins,
for (int k = 0; k < nr_cells; k++) {
weights_v[k] = (weights_v[k] - wminv) * wscalev + 1.0;
}
}
/* Scale to the METIS range. */
......@@ -905,8 +908,8 @@ void partition_initial_partition(struct partition *initial_partition,
accumulate_counts(s, weights);
/* Get all the counts from all the nodes. */
if (MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_DOUBLE,
MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
if (MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_DOUBLE, MPI_SUM,
MPI_COMM_WORLD) != MPI_SUCCESS)
error("Failed to allreduce particle cell weights.");
}
......@@ -1040,41 +1043,40 @@ void partition_init(struct partition *partition,
parser_get_opt_param_string(params, "DomainDecomposition:repartition_type",
part_type, default_repart);
if (strcmp("none/none", part_type) == 0) {
repartition->type = REPART_NONE;
repartition->type = REPART_NONE;
#ifdef HAVE_METIS
} else if (strcmp("costs/costs", part_type) == 0) {
repartition->type = REPART_METIS_VERTEX_COSTS_EDGE_COSTS;
repartition->type = REPART_METIS_VERTEX_COSTS_EDGE_COSTS;
} else if (strcmp("counts/none", part_type) == 0) {
repartition->type = REPART_METIS_VERTEX_COUNTS;
repartition->type = REPART_METIS_VERTEX_COUNTS;
} else if (strcmp("none/costs", part_type) == 0) {
repartition->type = REPART_METIS_EDGE_COSTS;
repartition->type = REPART_METIS_EDGE_COSTS;
} else if (strcmp("counts/costs", part_type) == 0) {
repartition->type = REPART_METIS_VERTEX_COUNTS_EDGE_COSTS;
repartition->type = REPART_METIS_VERTEX_COUNTS_EDGE_COSTS;
} else if (strcmp("costs/time", part_type) == 0) {
repartition->type = REPART_METIS_VERTEX_COSTS_EDGE_TIMEBINS;
repartition->type = REPART_METIS_VERTEX_COSTS_EDGE_TIMEBINS;
} else if (strcmp("counts/time", part_type) == 0) {
repartition->type = REPART_METIS_VERTEX_COUNTS_EDGE_TIMEBINS;
repartition->type = REPART_METIS_VERTEX_COUNTS_EDGE_TIMEBINS;
} else if (strcmp("none/time", part_type) == 0) {
repartition->type = REPART_METIS_EDGE_TIMEBINS;
repartition->type = REPART_METIS_EDGE_TIMEBINS;
} else {
message("Invalid choice of re-partition type '%s'.", part_type);
error(
"Permitted values are: 'none/none', 'costs/costs',"
"'counts/none', 'none/costs', 'counts/costs', "
"'costs/time', 'counts/time' or 'none/time'");
message("Invalid choice of re-partition type '%s'.", part_type);
error(
"Permitted values are: 'none/none', 'costs/costs',"
"'counts/none', 'none/costs', 'counts/costs', "
"'costs/time', 'counts/time' or 'none/time'");
#else
} else {
message("Invalid choice of re-partition type '%s'.", part_type);
error("Permitted values are: 'none/none' when compiled without METIS.");
message("Invalid choice of re-partition type '%s'.", part_type);
error("Permitted values are: 'none/none' when compiled without METIS.");
#endif
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment