diff --git a/src/debug.c b/src/debug.c index f245e881d718c27c075a6ae13d62a49d43b5f7e9..7376e4558b48fa0414cf1df02d4a7c1de400d1d9 100644 --- a/src/debug.c +++ b/src/debug.c @@ -1,7 +1,9 @@ /******************************************************************************* * This file is part of SWIFT. - * Copyright (c) 2013 Matthieu Schaller (matthieu.schaller@durham.ac.uk), - * Pedro Gonnet (pedro.gonnet@durham.ac.uk). + * Copyright (c) 2013- 2015: + * Matthieu Schaller (matthieu.schaller@durham.ac.uk), + * Pedro Gonnet (pedro.gonnet@durham.ac.uk), + * Peter W. Draper (p.w.draper@durham.ac.uk). * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -20,8 +22,10 @@ #include <stdio.h> +#include "config.h" #include "const.h" #include "part.h" +#include "debug.h" /** * @brief Looks for the particle with the given id and prints its information to @@ -98,3 +102,140 @@ void printParticle_single(struct part *p) { p->rho_dh, p->density.div_v, p->u, p->force.u_dt, p->force.balsara, p->force.POrho2, p->force.v_sig, p->dt); } + +#ifdef HAVE_METIS + +/** + * @brief Dump the METIS graph in standard format, simple format and weights + * only, to a file. + * + * @description The standard format output can be read into the METIS + * command-line tools. The simple format is just the cell connectivity (this + * should not change between calls). The weights format is the standard one, + * minus the cell connectivity. + * + * The output filenames are generated from the prefix and the sequence number + * of calls. So the first is called <prefix>_std_001.dat, <prefix>_simple_001.dat, + * <prefix>_weights_001.dat, etc. + * + * @param prefix base output filename + * @param nvertices the number of vertices + * @param nvertexweights the number vertex weights + * @param cellconruns first part of cell connectivity info (CSR) + * @param cellcon second part of cell connectivity info (CSR) + * @param vertexweights weights of vertices + * @param vertexsizes size of vertices + * @param edgeweights weights of edges + */ +void dumpMETISGraph(const char *prefix, idx_t nvertices, idx_t nvertexweights, + idx_t *cellconruns, idx_t *cellcon, idx_t *vertexweights, + idx_t *vertexsizes, idx_t *edgeweights) { + FILE *stdfile = NULL; + FILE *simplefile = NULL; + FILE *weightfile = NULL; + char fname[200]; + idx_t i; + idx_t j; + int haveedgeweight = 0; + int havevertexsize = 0; + int havevertexweight = 0; + static int nseq = 0; + nseq++; + + if (vertexweights != NULL) { + for (i = 0; i < nvertices * nvertexweights; i++) { + if (vertexweights[i] != 1) { + havevertexweight = 1; + break; + } + } + } + + if (vertexsizes != NULL) { + for (i = 0; i < nvertices; i++) { + if (vertexsizes[i] != 1) { + havevertexsize = 1; + break; + } + } + } + + if (edgeweights != NULL) { + for (i = 0; i < cellconruns[nvertices]; i++) { + if (edgeweights[i] != 1) { + haveedgeweight = 1; + break; + } + } + } + + /* Open output files. */ + sprintf(fname, "%s_std_%03d.dat", prefix, nseq); + stdfile = fopen( fname, "w" ); + + sprintf(fname, "%s_simple_%03d.dat", prefix, nseq); + simplefile = fopen( fname, "w" ); + + if (havevertexweight || havevertexsize || haveedgeweight) { + sprintf(fname, "%s_weights_%03d.dat", prefix, nseq); + weightfile = fopen( fname, "w" ); + } + + /* Write the header lines. */ + fprintf(stdfile, "%" PRIDX " %" PRIDX, nvertices, cellconruns[nvertices] / 2); + fprintf(simplefile, "%" PRIDX " %" PRIDX, nvertices, cellconruns[nvertices] / 2); + if (havevertexweight || havevertexsize || haveedgeweight) { + fprintf(weightfile, "%" PRIDX " %" PRIDX, nvertices, cellconruns[nvertices] / 2); + + fprintf(stdfile, " %d%d%d", havevertexsize, havevertexweight, haveedgeweight); + fprintf(weightfile, " %d%d%d", havevertexsize, havevertexweight, haveedgeweight); + + if (havevertexweight) { + fprintf(stdfile, " %d", (int)nvertexweights); + fprintf(weightfile, " %d", (int)nvertexweights); + } + } + + /* Write the rest of the graph. */ + for (i = 0; i < nvertices; i++) { + fprintf(stdfile, "\n"); + fprintf(simplefile, "\n"); + if (weightfile != NULL) { + fprintf(weightfile, "\n"); + } + + if (havevertexsize) { + fprintf(stdfile, " %" PRIDX, vertexsizes[i]); + fprintf(weightfile, " %" PRIDX, vertexsizes[i]); + } + + if (havevertexweight) { + for (j = 0; j < nvertexweights; j++) { + fprintf(stdfile, " %" PRIDX, vertexweights[i * nvertexweights + j]); + fprintf(weightfile, " %" PRIDX, vertexweights[i * nvertexweights + j]); + } + } + + for (j = cellconruns[i]; j < cellconruns[i + 1]; j++) { + fprintf(stdfile, " %" PRIDX, cellcon[j] + 1); + fprintf(simplefile, " %" PRIDX, cellcon[j] + 1); + if (haveedgeweight) { + fprintf(stdfile, " %" PRIDX, edgeweights[j]); + fprintf(weightfile, " %" PRIDX, edgeweights[j]); + } + } + } + fprintf(stdfile, "\n"); + fprintf(simplefile, "\n"); + if (weightfile != NULL) { + fprintf(weightfile, "\n"); + } + + fclose(stdfile); + fclose(simplefile); + if (weightfile != NULL) { + fclose(weightfile); + } +} + +#endif diff --git a/src/debug.h b/src/debug.h index 83461df45e3c0fb137557fba5fdf68cac9d4915a..27b2f94eff28c0d2fd0bc76f548d5d775414d2c2 100644 --- a/src/debug.h +++ b/src/debug.h @@ -27,4 +27,11 @@ void printParticle(struct part *parts, long long int i, int N); void printgParticle(struct gpart *parts, long long int i, int N); void printParticle_single(struct part *p); +#ifdef HAVE_METIS +#include "metis.h" +void dumpMETISGraph(const char *prefix, idx_t nvtxs, idx_t ncon, + idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *vsize, + idx_t *adjwgt); + +#endif #endif /* SWIFT_DEBUG_H */ diff --git a/src/engine.c b/src/engine.c index 70bd5e31c61c38028a4e8f773fe722f61840700e..354fa42af9c955f09b64ea2bb6af84e010f3537b 100644 --- a/src/engine.c +++ b/src/engine.c @@ -306,7 +306,8 @@ void engine_repartition(struct engine *e) { int nr_nodes = e->nr_nodes, nodeID = e->nodeID; float wscale = 1e-3, vscale = 1e-3, wscale_buff; idx_t wtot = 0; - const idx_t wmax = 1e9 / e->nr_nodes; + idx_t wmax = 1e9 / e->nr_nodes; + idx_t wmin; /* Clear the repartition flag. */ e->forcerepart = 0; @@ -489,6 +490,24 @@ void engine_repartition(struct engine *e) { /* As of here, only one node needs to compute the partition. */ if (nodeID == 0) { + /* Final rescale of all weights to avoid a large range. Large ranges have + * been seen to cause an incomplete graph. */ + wmin = wmax; + wmax = 0.0; + for (k = 0; k < 26 * nr_cells; k++) { + wmax = weights_e[k] > wmax ? weights_e[k] : wmax; + wmin = weights_e[k] < wmin ? weights_e[k] : wmin; + } + if ((wmax - wmin) > engine_maxmetisweight) { + wscale = engine_maxmetisweight / (wmax - wmin); + for (k = 0; k < 26 * nr_cells; k++) { + weights_e[k] = (weights_e[k] - wmin) * wscale + 1; + } + for (k = 0; k < nr_cells; k++) { + weights_v[k] = (weights_v[k] - wmin) * wscale + 1; + } + } + /* Check that the edge weights are fully symmetric. */ /* for ( cid = 0 ; cid < nr_cells ; cid++ ) for ( k = 0 ; k < 26 ; k++ ) { @@ -547,21 +566,47 @@ void engine_repartition(struct engine *e) { /* Call METIS. */ idx_t one = 1, idx_nr_cells = nr_cells, idx_nr_nodes = nr_nodes; idx_t objval; + + /* Dump graph in METIS format */ + /*dumpMETISGraph("metis_graph", idx_nr_cells, one, offsets, inds, + weights_v, NULL, weights_e);*/ + if (METIS_PartGraphRecursive(&idx_nr_cells, &one, offsets, inds, weights_v, NULL, weights_e, &idx_nr_nodes, NULL, NULL, options, &objval, nodeIDs) != METIS_OK) - error("Call to METIS_PartGraphKway failed."); + error("Call to METIS_PartGraphRecursive failed."); /* Dump the 3d array of cell IDs. */ /* printf( "engine_repartition: nodeIDs = reshape( [" ); for ( i = 0 ; i < cdim[0]*cdim[1]*cdim[2] ; i++ ) printf( "%i " , (int)nodeIDs[ i ] ); printf("] ,%i,%i,%i);\n",cdim[0],cdim[1],cdim[2]); */ - + /* Check that the nodeIDs are ok. */ for (k = 0; k < nr_cells; k++) if (nodeIDs[k] < 0 || nodeIDs[k] >= nr_nodes) error("Got bad nodeID %"PRIDX" for cell %i.", nodeIDs[k], k); + + /* Check that the partition is complete and all nodes have some work. */ + int present[nr_nodes]; + int failed = 0; + for (i = 0; i < nr_nodes; i++) present[i] = 0; + for (i = 0; i < nr_cells; i++) present[nodeIDs[i]]++; + for (i = 0; i < nr_nodes; i++) { + if (! present[i]) { + failed = 1; + message("Node %d is not present after repartition", i); + } + } + + /* If partition failed continue with the current one, but make this + * clear. */ + if (failed) { + message("WARNING: METIS repartition has failed, continuing with " + "the current partition, load balance will not be optimal"); + for (k = 0; k < nr_cells; k++) nodeIDs[k] = cells[k].nodeID; + } + } /* Broadcast the result of the partition. */ diff --git a/src/engine.h b/src/engine.h index c450532909557f2374ec76af6b58050fd9483bb9..b2beeaa1b4a1b4aebe650209739fec91c74b1ba7 100644 --- a/src/engine.h +++ b/src/engine.h @@ -47,6 +47,8 @@ #define engine_maxproxies 64 #define engine_tasksreweight 10 +#define engine_maxmetisweight 10000.0f + /* The rank of the engine as a global variable (for messages). */ extern int engine_rank;