Commit 11906d34 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'repart-time' into 'master'

Add time based edge repartitioning

See merge request !458
parents 3a9792ab b75463d2
#!/usr/bin/env python
"""
Usage:
analysedumpcells.py nx ny nx cell<1>.dat cell<2>.dat ...
Analyses a number of output files created by calls to the dumpCells() debug
function (presumably called in engine_step()) to output a list of active
top-level cells, identifying those that are on the edges of the volumes being
processed on by various nodes. The point is that these should be minimised to
reduce the MPI communications.
The nx, ny and nz arguments are the number of cells in the complete space,
we need these so that we can wrap around the edge of space correctly.
This file is part of SWIFT.
Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import pylab as pl
import numpy as np
import sys
import pandas
xcol = 0
ycol = 1
zcol = 2
xwcol = 3
ywcol = 4
zwcol = 5
localcol = 18
supercol = 15
activecol = 16
# Command-line arguments.
if len(sys.argv) < 5:
print "usage: ", sys.argv[0], " nx ny nz cell1.dat cell2.dat ..."
sys.exit(1)
nx = int(sys.argv[1])
ny = int(sys.argv[2])
nz = int(sys.argv[3])
print "# x y z onedge"
allactives = []
onedge = 0
tcount = 0
for i in range(4, len(sys.argv)):
# Read the file.
data = pl.loadtxt(sys.argv[i])
#print data
# Select cells that are on the current rank and are super cells.
rdata = data[data[:,localcol] == 1]
tdata = rdata[rdata[:,supercol] == 1]
# Separation of the cells is in data.
xwidth = tdata[0,xwcol]
ywidth = tdata[0,ywcol]
zwidth = tdata[0,zwcol]
# Fill space nx, ny,n nz with all toplevel cells and flag their active
# state.
space = np.zeros((nx,ny,nz))
actives = []
for line in tdata:
ix = int(np.rint(line[xcol] / xwidth))
iy = int(np.rint(line[ycol] / ywidth))
iz = int(np.rint(line[zcol] / zwidth))
active = int(line[activecol])
space[ix,iy,iz] = 1 + active
tcount = tcount + 1
if active == 1:
actives.append([ix, iy, iz, line])
# Report all active cells and flag any without 26 neighbours. These are
# on the edge of the partition volume and will have foreign neighbour
# cells.
for active in actives:
count = 0
for ii in [-1, 0, 1]:
i = active[0] + ii
if i < 0:
i = i + nx
elif i >= nx:
i = i - nx
for jj in [-1, 0, 1]:
j = active[1] + jj
if j < 0:
j = j + ny
elif j >= ny:
j = j - ny
for kk in [-1, 0, 1]:
k = active[2] + kk
if k < 0:
k = k + nz
elif k >= nz:
k = k - nz
if space[i, j, k] > 0:
count = count + 1
if count < 27:
onedge = onedge + 1
print active[3][0], active[3][1], active[3][2], 1
else:
print active[3][0], active[3][1], active[3][2], 0
allactives.extend(actives)
print "# top cells: ", tcount, " active: ", len(allactives), " on edge: ", onedge
sys.exit(0)
......@@ -80,8 +80,13 @@ DomainDecomposition:
initial_grid_x: 10 # (Optional) Grid size if the "grid" strategy is chosen.
initial_grid_y: 10 # ""
initial_grid_z: 10 # ""
repartition_type: task_weights # (Optional) The re-decomposition strategy: "none", "task_weights", "particle_weights",
# "edge_task_weights" or "hybrid_weights".
repartition_type: costs/costs # (Optional) The re-decomposition strategy, one of:
# "none/none", "costs/costs", "counts/none", "none/costs", "counts/costs",
# "costs/time" or "none/time".
# These are vertex/edge weights with "costs" as task timing, "counts" as
# sum of particles and "time" as the expected time of the next updates
trigger: 0.05 # (Optional) Fractional (<1) CPU time difference between MPI ranks required to trigger a
# new decomposition, or number of steps (>1) between decompositions
minfrac: 0.9 # (Optional) Fractional of all particles that should be updated in previous step when
......@@ -121,7 +126,7 @@ SineWavePotential:
amplitude: 10. # Amplitude of the sine wave (internal units)
timestep_limit: 1. # Time-step dimensionless pre-factor.
growth_time: 0. # (Optional) Time for the potential to grow to its final size.
# Parameters related to cooling function ----------------------------------------------
# Constant du/dt cooling function
......
#!/bin/bash
#
# Usage:
# process_cells nx ny nz nprocess
#
# Description:
# Process all the cell dumps in the current directory.
# Outputs file per rank with the active cells identified and marked as to
# whether they are near an edge or not. Note requires the numbers of cells
# per dimension of the space.
#
# Also outputs a graphic showing the fraction of active cells on edges
# for each step.
# Handle command-line
if test "$4" = ""; then
echo "Usage: $0 nx ny nz nprocess"
exit 1
fi
NX=$1
NY=$2
NZ=$3
NPROCS=$4
# Locate script.
SCRIPTHOME=$(dirname "$0")
# Find all files. Use version sort to get into correct order.
files=$(ls -v cells_*.dat)
if test $? != 0; then
echo "Failed to find any cell dump files"
exit 1
fi
# Construct list of names need the number of ranks.
ranks=$(ls -v cells_*.dat | sed 's,cells_\(.*\)_.*.dat,\1,' | sort | uniq | wc -l)
echo "Number of ranks = $ranks"
# Now construct a list of files ordered by rank, not step.
files=$(ls cells_*.dat | sort -t "_" -k 3,3 -n | xargs -n 4)
# Need number of steps.
nfiles=$(echo $files| wc -w)
echo "Number of files = $nfiles"
steps=$(( $nfiles / $ranks + 1 ))
echo "Number of steps = $steps"
# And process them,
echo "Processing cell dumps files..."
#echo $files | xargs -P $NPROCS -n 4 /bin/bash -c "${SCRIPTHOME}/process_cells_helper $NX $NY $NZ \$0 \$1 \$2 \$3"
# Create summary.
grep "top cells" step*-active-cells.dat | sort -h > active_cells.log
# And plot of active cells to edge cells.
stilts plot2plane ifmt=ascii in=active_cells.log xmin=-0.1 xmax=1.1 ymin=0 ymax=$steps grid=1 \
legend=false xpix=600 ypix=500 xlabel="Edge cells/Active cells" ylabel="Step" \
layer1=mark x1="col9/1.0/col6" y1="index" size1=3 shading1=aux auxmap=rainbow \
aux=col6 auxfunc=log auxlabel="Active cells" layer2=histogram x2="col9/1.0/col6" \
color2=grey binsize2=0.01 phase2=0.5 barform2=semi_steps thick2=1 \
out=active_cells.png
exit
#!/bin/bash
# Helper for process_cells.
# Locate script.
SCRIPTHOME=$(dirname "$0")
step=$(echo $4|sed 's,cells_\(.*\)_\(.*\).dat,\2,')
echo "${SCRIPTHOME}/analyse_dump_cells.py $* > step${step}-active-cells.dat"
${SCRIPTHOME}/analyse_dump_cells.py $* > step${step}-active-cells.dat
exit
......@@ -276,10 +276,13 @@ int checkCellhdxmax(const struct cell *c, int *depth) {
* only.
*/
static void dumpCells_map(struct cell *c, void *data) {
uintptr_t *ldata = (uintptr_t *)data;
size_t *ldata = (size_t *)data;
FILE *file = (FILE *)ldata[0];
struct engine *e = (struct engine *)ldata[1];
float ntasks = c->nr_tasks;
int active = (int)ldata[2];
int mpiactive = (int)ldata[3];
int pactive = (int)ldata[4];
#if SWIFT_DEBUG_CHECKS
/* The c->nr_tasks field does not include all the tasks. So let's check this
......@@ -300,46 +303,94 @@ static void dumpCells_map(struct cell *c, void *data) {
}
#endif
/* Only locally active cells are dumped. */
if (c->count > 0 || c->gcount > 0 || c->scount > 0)
fprintf(file,
" %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6d %6d %6d %6d "
"%6.1f %20lld %6d %6d %6d %6d\n",
c->loc[0], c->loc[1], c->loc[2], c->width[0], c->width[1],
c->width[2], c->count, c->gcount, c->scount, c->depth, ntasks,
c->ti_end_min, get_time_bin(c->ti_end_min), (c->super == c),
cell_is_active(c, e), c->nodeID);
/* Only cells with particles are dumped. */
if (c->count > 0 || c->gcount > 0 || c->scount > 0) {
/* In MPI mode we may only output cells with foreign partners.
* These define the edges of the partitions. */
#if WITH_MPI
if (mpiactive)
mpiactive = (c->send_xv != NULL);
else
mpiactive = 1;
#else
mpiactive = 1;
#endif
/* Active cells, otherwise all. */
if (active)
active = cell_is_active(c, e);
else
active = 1;
/* So output local super cells that are active and have MPI tasks as
* requested. */
if (c->nodeID == e->nodeID && (c->super == c) && active && mpiactive) {
/* If requested we work out how many particles are active in this cell. */
int pactcount = 0;
if (pactive) {
const struct part *parts = c->parts;
for (int k = 0; k < c->count; k++)
if (part_is_active(&parts[k], e)) pactcount++;
struct gpart *gparts = c->gparts;
for (int k = 0; k < c->gcount; k++)
if (gpart_is_active(&gparts[k], e)) pactcount++;
struct spart *sparts = c->sparts;
for (int k = 0; k < c->scount; k++)
if (spart_is_active(&sparts[k], e)) pactcount++;
}
fprintf(file,
" %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6d %6d %6d %6d %6d %6d "
"%6.1f %20lld %6d %6d %6d %6d %6d\n",
c->loc[0], c->loc[1], c->loc[2], c->width[0], c->width[1],
c->width[2], e->step, c->count, c->gcount, c->scount, pactcount,
c->depth, ntasks, c->ti_end_min, get_time_bin(c->ti_end_min),
(c->super == c), cell_is_active(c, e), c->nodeID,
c->nodeID == e->nodeID);
}
}
}
/**
* @brief Dump the location, depth, task counts and timebins and active state,
* for all cells to a simple text file.
* for all cells to a simple text file. A more costly count of the active
* particles in a cell can also be output.
*
* @param prefix base output filename
* @param prefix base output filename, result is written to
* %prefix%_%rank%_%step%.dat
* @param active just output active cells.
* @param mpiactive just output MPI active cells, i.e. those with foreign cells.
* @param pactive also output a count of active particles.
* @param s the space holding the cells to dump.
* @param rank node ID of MPI rank, or 0 if not relevant.
* @param step the current engine step, or some unique integer.
*/
void dumpCells(const char *prefix, struct space *s) {
void dumpCells(const char *prefix, int active, int mpiactive, int pactive,
struct space *s, int rank, int step) {
FILE *file = NULL;
/* Name of output file. */
static int nseq = 0;
char fname[200];
int uniq = atomic_inc(&nseq);
sprintf(fname, "%s_%03d.dat", prefix, uniq);
sprintf(fname, "%s_%03d_%03d.dat", prefix, rank, step);
file = fopen(fname, "w");
/* Header. */
fprintf(file,
"# %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s "
"%20s %6s %6s %6s\n",
"x", "y", "z", "xw", "yw", "zw", "count", "gcount", "scount", "depth",
"tasks", "ti_end_min", "timebin", "issuper", "active", "rank");
"# %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s "
"%20s %6s %6s %6s %6s %6s\n",
"x", "y", "z", "xw", "yw", "zw", "step", "count", "gcount", "scount",
"actcount", "depth", "tasks", "ti_end_min", "timebin", "issuper",
"active", "rank", "local");
uintptr_t data[2];
size_t data[5];
data[0] = (size_t)file;
data[1] = (size_t)s->e;
data[2] = (size_t)active;
data[3] = (size_t)mpiactive;
data[4] = (size_t)pactive;
space_map_cells_pre(s, 1, dumpCells_map, &data);
fclose(file);
}
......
......@@ -36,7 +36,8 @@ void printgParticle_single(struct gpart *gp);
int checkSpacehmax(struct space *s);
int checkCellhdxmax(const struct cell *c, int *depth);
void dumpCells(const char *prefix, struct space *s);
void dumpCells(const char *prefix, int active, int mpiactive, int pactive,
struct space *s, int rank, int step);
#ifdef HAVE_METIS
#include "metis.h"
......
......@@ -3752,6 +3752,9 @@ void engine_step(struct engine *e) {
/* Print the number of active tasks ? */
if (e->verbose) engine_print_task_counts(e);
/* Dump local cells and active particle counts. */
/* dumpCells("cells", 0, 0, 1, e->s, e->nodeID, e->step); */
#ifdef SWIFT_DEBUG_CHECKS
/* Check that we have the correct total mass in the top-level multipoles */
size_t num_gpart_mpole = 0;
......
This diff is collapsed.
......@@ -43,10 +43,13 @@ struct partition {
/* Repartition type to use. */
enum repartition_type {
REPART_NONE = 0,
REPART_METIS_BOTH,
REPART_METIS_VERTEX,
REPART_METIS_EDGE,
REPART_METIS_VERTEX_EDGE
REPART_METIS_VERTEX_COSTS_EDGE_COSTS,
REPART_METIS_VERTEX_COUNTS,
REPART_METIS_EDGE_COSTS,
REPART_METIS_VERTEX_COUNTS_EDGE_COSTS,
REPART_METIS_VERTEX_COSTS_EDGE_TIMEBINS,
REPART_METIS_VERTEX_COUNTS_EDGE_TIMEBINS,
REPART_METIS_EDGE_TIMEBINS
};
/* Repartition preferences. */
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment