diff --git a/examples/analyse_dump_cells.py b/examples/analyse_dump_cells.py new file mode 100755 index 0000000000000000000000000000000000000000..3140e799566c75fe494a75895db0f4a8dcff4e57 --- /dev/null +++ b/examples/analyse_dump_cells.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python +""" +Usage: + analysedumpcells.py nx ny nx cell<1>.dat cell<2>.dat ... + +Analyses a number of output files created by calls to the dumpCells() debug +function (presumably called in engine_step()) to output a list of active +top-level cells, identifying those that are on the edges of the volumes being +processed on by various nodes. The point is that these should be minimised to +reduce the MPI communications. + +The nx, ny and nz arguments are the number of cells in the complete space, +we need these so that we can wrap around the edge of space correctly. + +This file is part of SWIFT. +Copyright (c) 2017 Peter W. Draper (p.w.draper@durham.ac.uk) + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +import pylab as pl +import numpy as np +import sys +import pandas + +xcol = 0 +ycol = 1 +zcol = 2 +xwcol = 3 +ywcol = 4 +zwcol = 5 +localcol = 18 +supercol = 15 +activecol = 16 + +# Command-line arguments. +if len(sys.argv) < 5: + print "usage: ", sys.argv[0], " nx ny nz cell1.dat cell2.dat ..." + sys.exit(1) +nx = int(sys.argv[1]) +ny = int(sys.argv[2]) +nz = int(sys.argv[3]) + +print "# x y z onedge" +allactives = [] +onedge = 0 +tcount = 0 +for i in range(4, len(sys.argv)): + + # Read the file. + data = pl.loadtxt(sys.argv[i]) + #print data + + # Select cells that are on the current rank and are super cells. + rdata = data[data[:,localcol] == 1] + tdata = rdata[rdata[:,supercol] == 1] + + # Separation of the cells is in data. + xwidth = tdata[0,xwcol] + ywidth = tdata[0,ywcol] + zwidth = tdata[0,zwcol] + + # Fill space nx, ny,n nz with all toplevel cells and flag their active + # state. + space = np.zeros((nx,ny,nz)) + actives = [] + for line in tdata: + ix = int(np.rint(line[xcol] / xwidth)) + iy = int(np.rint(line[ycol] / ywidth)) + iz = int(np.rint(line[zcol] / zwidth)) + active = int(line[activecol]) + space[ix,iy,iz] = 1 + active + tcount = tcount + 1 + if active == 1: + actives.append([ix, iy, iz, line]) + + # Report all active cells and flag any without 26 neighbours. These are + # on the edge of the partition volume and will have foreign neighbour + # cells. + for active in actives: + count = 0 + for ii in [-1, 0, 1]: + i = active[0] + ii + if i < 0: + i = i + nx + elif i >= nx: + i = i - nx + + for jj in [-1, 0, 1]: + j = active[1] + jj + if j < 0: + j = j + ny + elif j >= ny: + j = j - ny + + for kk in [-1, 0, 1]: + k = active[2] + kk + if k < 0: + k = k + nz + elif k >= nz: + k = k - nz + if space[i, j, k] > 0: + count = count + 1 + if count < 27: + onedge = onedge + 1 + print active[3][0], active[3][1], active[3][2], 1 + else: + print active[3][0], active[3][1], active[3][2], 0 + + allactives.extend(actives) + +print "# top cells: ", tcount, " active: ", len(allactives), " on edge: ", onedge + +sys.exit(0) + diff --git a/examples/process_cells b/examples/process_cells index a9c72e4a092b41e7f49036937c6f8c53fc0339a8..eead4572387f2732cf0b86265f14d20039f73ae1 100755 --- a/examples/process_cells +++ b/examples/process_cells @@ -1,20 +1,27 @@ #!/bin/bash # # Usage: -# process_cells nprocess +# process_cells nx ny nz nprocess # # Description: -# Process all the cell dumps in the current directory +# Process all the cell dumps in the current directory. + +# Outputs file per rank with the active cells identified and marked as to +# whether they are near an edge or not. Note requires the numbers of cells +# per dimension of the space. # -# Outputs file per rank with the active cells identified and marked -# as to whether they are near an edge or not. +# Also outputs a graphic showing the fraction of active cells on edges +# for each step. # Handle command-line -if test "$1" = ""; then - echo "Usage: $0 nprocess" +if test "$4" = ""; then + echo "Usage: $0 nx ny nz nprocess" exit 1 fi -NPROCS=$1 +NX=$1 +NY=$2 +NZ=$3 +NPROCS=$4 # Locate script. SCRIPTHOME=$(dirname "$0") @@ -33,19 +40,25 @@ echo "Number of ranks = $ranks" # Now construct a list of files ordered by rank, not step. files=$(ls cells_*.dat | sort -t "_" -k 3,3 -n | xargs -n 4) +# Need number of steps. +nfiles=$(echo $files| wc -w) +echo "Number of files = $nfiles" +steps=$(( $nfiles / $ranks + 1 )) +echo "Number of steps = $steps" + # And process them, echo "Processing cell dumps files..." -echo $files | xargs -P $NPROCS -n 4 /bin/bash -c "${SCRIPTHOME}/process_cells_helper 20 20 20 \$0 \$1 \$2 \$3" +#echo $files | xargs -P $NPROCS -n 4 /bin/bash -c "${SCRIPTHOME}/process_cells_helper $NX $NY $NZ \$0 \$1 \$2 \$3" # Create summary. grep "top cells" step*-active-cells.dat | sort -h > active_cells.log # And plot of active cells to edge cells. -stilts plot2plane ifmt=ascii in=active_cells.log xmin=-0.1 xmax=1.1 ymin=-100 ymax=2200 grid=1 \ +stilts plot2plane ifmt=ascii in=active_cells.log xmin=-0.1 xmax=1.1 ymin=0 ymax=$steps grid=1 \ legend=false xpix=600 ypix=500 xlabel="Edge cells/Active cells" ylabel="Step" \ - layer1=mark x1="col9/1.0/col6" y1="index*7" size1=3 shading1=aux auxmap=rainbow \ + layer1=mark x1="col9/1.0/col6" y1="index" size1=3 shading1=aux auxmap=rainbow \ aux=col6 auxfunc=log auxlabel="Active cells" layer2=histogram x2="col9/1.0/col6" \ - color2=grey binsize2=0.01 phase2=0.5 barform2=semi_steps weight2=30 thick2=1 \ + color2=grey binsize2=0.01 phase2=0.5 barform2=semi_steps thick2=1 \ out=active_cells.png exit diff --git a/examples/process_cells_helper b/examples/process_cells_helper index 7862e7def0cc2770566f935c90851881413404f7..a96bdba0de56e04c28ed6d8675c705241ce241d9 100755 --- a/examples/process_cells_helper +++ b/examples/process_cells_helper @@ -6,6 +6,7 @@ SCRIPTHOME=$(dirname "$0") step=$(echo $4|sed 's,cells_\(.*\)_\(.*\).dat,\2,') +echo "${SCRIPTHOME}/analyse_dump_cells.py $* > step${step}-active-cells.dat" ${SCRIPTHOME}/analyse_dump_cells.py $* > step${step}-active-cells.dat exit diff --git a/src/debug.c b/src/debug.c index 4e99108f509fa4c327e0d2a801920cd8fb374bc2..4c521397b12c555923f8cea8a4f0cdb4c4551b97 100644 --- a/src/debug.c +++ b/src/debug.c @@ -276,11 +276,13 @@ int checkCellhdxmax(const struct cell *c, int *depth) { * only. */ static void dumpCells_map(struct cell *c, void *data) { - uintptr_t *ldata = (uintptr_t *)data; + size_t *ldata = (size_t *)data; FILE *file = (FILE *)ldata[0]; struct engine *e = (struct engine *)ldata[1]; float ntasks = c->nr_tasks; - int pactive = (int)ldata[2]; + int active = (int)ldata[2]; + int mpiactive = (int)ldata[3]; + int pactive = (int)ldata[4]; #if SWIFT_DEBUG_CHECKS /* The c->nr_tasks field does not include all the tasks. So let's check this @@ -301,30 +303,44 @@ static void dumpCells_map(struct cell *c, void *data) { } #endif - /* Only locally active cells are dumped. */ + /* Only cells with particles are dumped. */ if (c->count > 0 || c->gcount > 0 || c->scount > 0) { - /* If requested we work out how many particles are active in this cell. */ - int pactcount = 0; - if (pactive) { - const struct part *parts = c->parts; - for (int k = 0; k < c->count; k++) - if (part_is_active(&parts[k], e)) pactcount++; - struct gpart *gparts = c->gparts; - for (int k = 0; k < c->gcount; k++) - if (gpart_is_active(&gparts[k], e)) pactcount++; - struct spart *sparts = c->sparts; - for (int k = 0; k < c->scount; k++) - if (spart_is_active(&sparts[k], e)) pactcount++; - } +/* In MPI mode we may only output cells with foreign partners. + * These define the edges of the partitions. */ #if WITH_MPI - int sendto = (c->send_xv != NULL); + if (mpiactive) + mpiactive = (c->send_xv != NULL); + else + mpiactive = 1; #else - int sendto = 0; + mpiactive = 1; #endif - /* Local cells that are active and are super cells and have MPI tasks. */ - if (c->nodeID == e->nodeID && cell_is_active(c, e) && (c->super == c) && sendto) + /* Active cells, otherwise all. */ + if (active) + active = cell_is_active(c, e); + else + active = 1; + + /* So output local super cells that are active and have MPI tasks as + * requested. */ + if (c->nodeID == e->nodeID && (c->super == c) && active && mpiactive) { + + /* If requested we work out how many particles are active in this cell. */ + int pactcount = 0; + if (pactive) { + const struct part *parts = c->parts; + for (int k = 0; k < c->count; k++) + if (part_is_active(&parts[k], e)) pactcount++; + struct gpart *gparts = c->gparts; + for (int k = 0; k < c->gcount; k++) + if (gpart_is_active(&gparts[k], e)) pactcount++; + struct spart *sparts = c->sparts; + for (int k = 0; k < c->scount; k++) + if (spart_is_active(&sparts[k], e)) pactcount++; + } + fprintf(file, " %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6d %6d %6d %6d %6d %6d " "%6.1f %20lld %6d %6d %6d %6d %6d\n", @@ -333,7 +349,7 @@ static void dumpCells_map(struct cell *c, void *data) { c->depth, ntasks, c->ti_end_min, get_time_bin(c->ti_end_min), (c->super == c), cell_is_active(c, e), c->nodeID, c->nodeID == e->nodeID); - + } } } @@ -344,19 +360,21 @@ static void dumpCells_map(struct cell *c, void *data) { * * @param prefix base output filename, result is written to * %prefix%_%rank%_%step%.dat + * @param active just output active cells. + * @param mpiactive just output MPI active cells, i.e. those with foreign cells. * @param pactive also output a count of active particles. * @param s the space holding the cells to dump. * @param rank node ID of MPI rank, or 0 if not relevant. * @param step the current engine step, or some unique integer. */ -void dumpCells(const char *prefix, int pactive, struct space *s, int rank, - int step) { +void dumpCells(const char *prefix, int active, int mpiactive, int pactive, + struct space *s, int rank, int step) { FILE *file = NULL; /* Name of output file. */ char fname[200]; - sprintf(fname, "%s_%03d.dat", prefix, step); + sprintf(fname, "%s_%03d_%03d.dat", prefix, rank, step); file = fopen(fname, "w"); /* Header. */ @@ -364,13 +382,15 @@ void dumpCells(const char *prefix, int pactive, struct space *s, int rank, "# %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s " "%20s %6s %6s %6s %6s %6s\n", "x", "y", "z", "xw", "yw", "zw", "step", "count", "gcount", "scount", - "actcount", "depth", "tasks", "ti_end_min", "timebin", - "issuper", "active", "rank", "local"); + "actcount", "depth", "tasks", "ti_end_min", "timebin", "issuper", + "active", "rank", "local"); - uintptr_t data[3]; + size_t data[5]; data[0] = (size_t)file; data[1] = (size_t)s->e; - data[2] = (size_t)pactive; + data[2] = (size_t)active; + data[3] = (size_t)mpiactive; + data[4] = (size_t)pactive; space_map_cells_pre(s, 1, dumpCells_map, &data); fclose(file); } diff --git a/src/debug.h b/src/debug.h index 5e5fb4819e990349b57db7fa6c0bfe9dacba9d21..2cff37fa6f6f03185dc769bb770fe3a98a424ce1 100644 --- a/src/debug.h +++ b/src/debug.h @@ -36,8 +36,8 @@ void printgParticle_single(struct gpart *gp); int checkSpacehmax(struct space *s); int checkCellhdxmax(const struct cell *c, int *depth); -void dumpCells(const char *prefix, int pactive, struct space *s, int rank, - int step); +void dumpCells(const char *prefix, int active, int mpiactive, int pactive, + struct space *s, int rank, int step); #ifdef HAVE_METIS #include "metis.h" diff --git a/src/engine.c b/src/engine.c index 869432417d848e68290a44ba4e63cc9b3dc75b9c..fdcf80abd69450ad2eabbcc05d90d248ca5efa7f 100644 --- a/src/engine.c +++ b/src/engine.c @@ -3750,6 +3750,9 @@ void engine_step(struct engine *e) { /* Print the number of active tasks ? */ if (e->verbose) engine_print_task_counts(e); +/* Dump local cells and active particle counts. */ +/* dumpCells("cells", 0, 0, 1, e->s, e->nodeID, e->step); */ + #ifdef SWIFT_DEBUG_CHECKS /* Check that we have the correct total mass in the top-level multipoles */ size_t num_gpart_mpole = 0; diff --git a/src/partition.c b/src/partition.c index 3b916c8a906ea7bfe1ba546c5e3e7f65d27aa76b..567fcb38c030a0dec1f052329111b0cc6d82d8f7 100644 --- a/src/partition.c +++ b/src/partition.c @@ -528,7 +528,7 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins, if (t->cost == 0) continue; /* Get the task weight based on costs. */ - double w = (double) t->cost; + double w = (double)t->cost; /* Get the top-level cells involved. */ struct cell *ci, *cj; @@ -590,15 +590,17 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins, * overflow int, so take care. */ int dti = num_time_bins - get_time_bin(ci->ti_end_min); int dtj = num_time_bins - get_time_bin(cj->ti_end_min); - double dt = (double)(1<<dti) + (double)(1<<dtj); + double dt = (double)(1 << dti) + (double)(1 << dtj); /* ci */ int kk; - for (kk = 26 * cid; inds[kk] != cjd; kk++); + for (kk = 26 * cid; inds[kk] != cjd; kk++) + ; weights_e[kk] += dt; /* cj */ - for (kk = 26 * cjd; inds[kk] != cid; kk++); + for (kk = 26 * cjd; inds[kk] != cid; kk++) + ; weights_e[kk] += dt; } else { @@ -606,11 +608,13 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins, /* ci */ int kk; - for (kk = 26 * cid; inds[kk] != cjd; kk++); + for (kk = 26 * cid; inds[kk] != cjd; kk++) + ; weights_e[kk] += w; /* cj */ - for (kk = 26 * cjd; inds[kk] != cid; kk++); + for (kk = 26 * cjd; inds[kk] != cid; kk++) + ; weights_e[kk] += w; } } @@ -624,14 +628,14 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins, int res; if (bothweights) { if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v, - nr_cells, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD)) - != MPI_SUCCESS) + nr_cells, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD)) != + MPI_SUCCESS) mpi_error(res, "Failed to allreduce vertex weights."); } if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e, - 26 * nr_cells, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD)) - != MPI_SUCCESS) + 26 * nr_cells, MPI_DOUBLE, MPI_SUM, 0, + MPI_COMM_WORLD)) != MPI_SUCCESS) mpi_error(res, "Failed to allreduce edge weights."); /* Allocate cell list for the partition. */ @@ -642,8 +646,8 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins, if (nodeID == 0) { /* We need to rescale the weights into the range of an integer for METIS - * really range of idx_t). Also we would like the range of vertex and - * edges weights to be simila r so they balance. */ + * (really range of idx_t). Also we would like the range of vertex and + * edges weights to be similar so they balance. */ double wminv = 0.0; double wmaxv = 0.0; if (bothweights) { @@ -680,7 +684,6 @@ static void repart_edge_metis(int partweights, int bothweights, int timebins, for (int k = 0; k < nr_cells; k++) { weights_v[k] = (weights_v[k] - wminv) * wscalev + 1.0; } - } /* Scale to the METIS range. */ @@ -905,8 +908,8 @@ void partition_initial_partition(struct partition *initial_partition, accumulate_counts(s, weights); /* Get all the counts from all the nodes. */ - if (MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_DOUBLE, - MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) + if (MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_DOUBLE, MPI_SUM, + MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to allreduce particle cell weights."); } @@ -1040,41 +1043,40 @@ void partition_init(struct partition *partition, parser_get_opt_param_string(params, "DomainDecomposition:repartition_type", part_type, default_repart); - if (strcmp("none/none", part_type) == 0) { - repartition->type = REPART_NONE; + repartition->type = REPART_NONE; #ifdef HAVE_METIS } else if (strcmp("costs/costs", part_type) == 0) { - repartition->type = REPART_METIS_VERTEX_COSTS_EDGE_COSTS; + repartition->type = REPART_METIS_VERTEX_COSTS_EDGE_COSTS; } else if (strcmp("counts/none", part_type) == 0) { - repartition->type = REPART_METIS_VERTEX_COUNTS; + repartition->type = REPART_METIS_VERTEX_COUNTS; } else if (strcmp("none/costs", part_type) == 0) { - repartition->type = REPART_METIS_EDGE_COSTS; + repartition->type = REPART_METIS_EDGE_COSTS; } else if (strcmp("counts/costs", part_type) == 0) { - repartition->type = REPART_METIS_VERTEX_COUNTS_EDGE_COSTS; + repartition->type = REPART_METIS_VERTEX_COUNTS_EDGE_COSTS; } else if (strcmp("costs/time", part_type) == 0) { - repartition->type = REPART_METIS_VERTEX_COSTS_EDGE_TIMEBINS; + repartition->type = REPART_METIS_VERTEX_COSTS_EDGE_TIMEBINS; } else if (strcmp("counts/time", part_type) == 0) { - repartition->type = REPART_METIS_VERTEX_COUNTS_EDGE_TIMEBINS; + repartition->type = REPART_METIS_VERTEX_COUNTS_EDGE_TIMEBINS; } else if (strcmp("none/time", part_type) == 0) { - repartition->type = REPART_METIS_EDGE_TIMEBINS; + repartition->type = REPART_METIS_EDGE_TIMEBINS; } else { - message("Invalid choice of re-partition type '%s'.", part_type); - error( - "Permitted values are: 'none/none', 'costs/costs'," - "'counts/none', 'none/costs', 'counts/costs', " - "'costs/time', 'counts/time' or 'none/time'"); + message("Invalid choice of re-partition type '%s'.", part_type); + error( + "Permitted values are: 'none/none', 'costs/costs'," + "'counts/none', 'none/costs', 'counts/costs', " + "'costs/time', 'counts/time' or 'none/time'"); #else } else { - message("Invalid choice of re-partition type '%s'.", part_type); - error("Permitted values are: 'none/none' when compiled without METIS."); + message("Invalid choice of re-partition type '%s'.", part_type); + error("Permitted values are: 'none/none' when compiled without METIS."); #endif }