Skip to content
Snippets Groups Projects
Commit 0689a0cd authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

Merge branch 'metis-tests' into 'master'

 METIS fixes

Code to re-scale the METIS weights in an attempt to avoid large ranges
that fail to produce a complete partition. If that still happens then
handle that case by continuing with the current partition, noting this
problem with a warning.

Also adds function to dump the METIS graphs, possibly separated
into only weights for off-line analysis.

See merge request !61
parents bbabe60c 4cbe46eb
No related branches found
No related tags found
No related merge requests found
/******************************************************************************* /*******************************************************************************
* This file is part of SWIFT. * This file is part of SWIFT.
* Copyright (c) 2013 Matthieu Schaller (matthieu.schaller@durham.ac.uk), * Copyright (c) 2013- 2015:
* Pedro Gonnet (pedro.gonnet@durham.ac.uk). * Matthieu Schaller (matthieu.schaller@durham.ac.uk),
* Pedro Gonnet (pedro.gonnet@durham.ac.uk),
* Peter W. Draper (p.w.draper@durham.ac.uk).
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published * it under the terms of the GNU Lesser General Public License as published
...@@ -20,8 +22,10 @@ ...@@ -20,8 +22,10 @@
#include <stdio.h> #include <stdio.h>
#include "config.h"
#include "const.h" #include "const.h"
#include "part.h" #include "part.h"
#include "debug.h"
/** /**
* @brief Looks for the particle with the given id and prints its information to * @brief Looks for the particle with the given id and prints its information to
...@@ -98,3 +102,140 @@ void printParticle_single(struct part *p) { ...@@ -98,3 +102,140 @@ void printParticle_single(struct part *p) {
p->rho_dh, p->density.div_v, p->u, p->force.u_dt, p->force.balsara, p->rho_dh, p->density.div_v, p->u, p->force.u_dt, p->force.balsara,
p->force.POrho2, p->force.v_sig, p->dt); p->force.POrho2, p->force.v_sig, p->dt);
} }
#ifdef HAVE_METIS
/**
* @brief Dump the METIS graph in standard format, simple format and weights
* only, to a file.
*
* @description The standard format output can be read into the METIS
* command-line tools. The simple format is just the cell connectivity (this
* should not change between calls). The weights format is the standard one,
* minus the cell connectivity.
*
* The output filenames are generated from the prefix and the sequence number
* of calls. So the first is called <prefix>_std_001.dat, <prefix>_simple_001.dat,
* <prefix>_weights_001.dat, etc.
*
* @param prefix base output filename
* @param nvertices the number of vertices
* @param nvertexweights the number vertex weights
* @param cellconruns first part of cell connectivity info (CSR)
* @param cellcon second part of cell connectivity info (CSR)
* @param vertexweights weights of vertices
* @param vertexsizes size of vertices
* @param edgeweights weights of edges
*/
void dumpMETISGraph(const char *prefix, idx_t nvertices, idx_t nvertexweights,
idx_t *cellconruns, idx_t *cellcon, idx_t *vertexweights,
idx_t *vertexsizes, idx_t *edgeweights) {
FILE *stdfile = NULL;
FILE *simplefile = NULL;
FILE *weightfile = NULL;
char fname[200];
idx_t i;
idx_t j;
int haveedgeweight = 0;
int havevertexsize = 0;
int havevertexweight = 0;
static int nseq = 0;
nseq++;
if (vertexweights != NULL) {
for (i = 0; i < nvertices * nvertexweights; i++) {
if (vertexweights[i] != 1) {
havevertexweight = 1;
break;
}
}
}
if (vertexsizes != NULL) {
for (i = 0; i < nvertices; i++) {
if (vertexsizes[i] != 1) {
havevertexsize = 1;
break;
}
}
}
if (edgeweights != NULL) {
for (i = 0; i < cellconruns[nvertices]; i++) {
if (edgeweights[i] != 1) {
haveedgeweight = 1;
break;
}
}
}
/* Open output files. */
sprintf(fname, "%s_std_%03d.dat", prefix, nseq);
stdfile = fopen( fname, "w" );
sprintf(fname, "%s_simple_%03d.dat", prefix, nseq);
simplefile = fopen( fname, "w" );
if (havevertexweight || havevertexsize || haveedgeweight) {
sprintf(fname, "%s_weights_%03d.dat", prefix, nseq);
weightfile = fopen( fname, "w" );
}
/* Write the header lines. */
fprintf(stdfile, "%" PRIDX " %" PRIDX, nvertices, cellconruns[nvertices] / 2);
fprintf(simplefile, "%" PRIDX " %" PRIDX, nvertices, cellconruns[nvertices] / 2);
if (havevertexweight || havevertexsize || haveedgeweight) {
fprintf(weightfile, "%" PRIDX " %" PRIDX, nvertices, cellconruns[nvertices] / 2);
fprintf(stdfile, " %d%d%d", havevertexsize, havevertexweight, haveedgeweight);
fprintf(weightfile, " %d%d%d", havevertexsize, havevertexweight, haveedgeweight);
if (havevertexweight) {
fprintf(stdfile, " %d", (int)nvertexweights);
fprintf(weightfile, " %d", (int)nvertexweights);
}
}
/* Write the rest of the graph. */
for (i = 0; i < nvertices; i++) {
fprintf(stdfile, "\n");
fprintf(simplefile, "\n");
if (weightfile != NULL) {
fprintf(weightfile, "\n");
}
if (havevertexsize) {
fprintf(stdfile, " %" PRIDX, vertexsizes[i]);
fprintf(weightfile, " %" PRIDX, vertexsizes[i]);
}
if (havevertexweight) {
for (j = 0; j < nvertexweights; j++) {
fprintf(stdfile, " %" PRIDX, vertexweights[i * nvertexweights + j]);
fprintf(weightfile, " %" PRIDX, vertexweights[i * nvertexweights + j]);
}
}
for (j = cellconruns[i]; j < cellconruns[i + 1]; j++) {
fprintf(stdfile, " %" PRIDX, cellcon[j] + 1);
fprintf(simplefile, " %" PRIDX, cellcon[j] + 1);
if (haveedgeweight) {
fprintf(stdfile, " %" PRIDX, edgeweights[j]);
fprintf(weightfile, " %" PRIDX, edgeweights[j]);
}
}
}
fprintf(stdfile, "\n");
fprintf(simplefile, "\n");
if (weightfile != NULL) {
fprintf(weightfile, "\n");
}
fclose(stdfile);
fclose(simplefile);
if (weightfile != NULL) {
fclose(weightfile);
}
}
#endif
...@@ -27,4 +27,11 @@ void printParticle(struct part *parts, long long int i, int N); ...@@ -27,4 +27,11 @@ void printParticle(struct part *parts, long long int i, int N);
void printgParticle(struct gpart *parts, long long int i, int N); void printgParticle(struct gpart *parts, long long int i, int N);
void printParticle_single(struct part *p); void printParticle_single(struct part *p);
#ifdef HAVE_METIS
#include "metis.h"
void dumpMETISGraph(const char *prefix, idx_t nvtxs, idx_t ncon,
idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *vsize,
idx_t *adjwgt);
#endif
#endif /* SWIFT_DEBUG_H */ #endif /* SWIFT_DEBUG_H */
...@@ -306,7 +306,8 @@ void engine_repartition(struct engine *e) { ...@@ -306,7 +306,8 @@ void engine_repartition(struct engine *e) {
int nr_nodes = e->nr_nodes, nodeID = e->nodeID; int nr_nodes = e->nr_nodes, nodeID = e->nodeID;
float wscale = 1e-3, vscale = 1e-3, wscale_buff; float wscale = 1e-3, vscale = 1e-3, wscale_buff;
idx_t wtot = 0; idx_t wtot = 0;
const idx_t wmax = 1e9 / e->nr_nodes; idx_t wmax = 1e9 / e->nr_nodes;
idx_t wmin;
/* Clear the repartition flag. */ /* Clear the repartition flag. */
e->forcerepart = 0; e->forcerepart = 0;
...@@ -489,6 +490,24 @@ void engine_repartition(struct engine *e) { ...@@ -489,6 +490,24 @@ void engine_repartition(struct engine *e) {
/* As of here, only one node needs to compute the partition. */ /* As of here, only one node needs to compute the partition. */
if (nodeID == 0) { if (nodeID == 0) {
/* Final rescale of all weights to avoid a large range. Large ranges have
* been seen to cause an incomplete graph. */
wmin = wmax;
wmax = 0.0;
for (k = 0; k < 26 * nr_cells; k++) {
wmax = weights_e[k] > wmax ? weights_e[k] : wmax;
wmin = weights_e[k] < wmin ? weights_e[k] : wmin;
}
if ((wmax - wmin) > engine_maxmetisweight) {
wscale = engine_maxmetisweight / (wmax - wmin);
for (k = 0; k < 26 * nr_cells; k++) {
weights_e[k] = (weights_e[k] - wmin) * wscale + 1;
}
for (k = 0; k < nr_cells; k++) {
weights_v[k] = (weights_v[k] - wmin) * wscale + 1;
}
}
/* Check that the edge weights are fully symmetric. */ /* Check that the edge weights are fully symmetric. */
/* for ( cid = 0 ; cid < nr_cells ; cid++ ) /* for ( cid = 0 ; cid < nr_cells ; cid++ )
for ( k = 0 ; k < 26 ; k++ ) { for ( k = 0 ; k < 26 ; k++ ) {
...@@ -547,21 +566,47 @@ void engine_repartition(struct engine *e) { ...@@ -547,21 +566,47 @@ void engine_repartition(struct engine *e) {
/* Call METIS. */ /* Call METIS. */
idx_t one = 1, idx_nr_cells = nr_cells, idx_nr_nodes = nr_nodes; idx_t one = 1, idx_nr_cells = nr_cells, idx_nr_nodes = nr_nodes;
idx_t objval; idx_t objval;
/* Dump graph in METIS format */
/*dumpMETISGraph("metis_graph", idx_nr_cells, one, offsets, inds,
weights_v, NULL, weights_e);*/
if (METIS_PartGraphRecursive(&idx_nr_cells, &one, offsets, inds, weights_v, if (METIS_PartGraphRecursive(&idx_nr_cells, &one, offsets, inds, weights_v,
NULL, weights_e, &idx_nr_nodes, NULL, NULL, NULL, weights_e, &idx_nr_nodes, NULL, NULL,
options, &objval, nodeIDs) != METIS_OK) options, &objval, nodeIDs) != METIS_OK)
error("Call to METIS_PartGraphKway failed."); error("Call to METIS_PartGraphRecursive failed.");
/* Dump the 3d array of cell IDs. */ /* Dump the 3d array of cell IDs. */
/* printf( "engine_repartition: nodeIDs = reshape( [" ); /* printf( "engine_repartition: nodeIDs = reshape( [" );
for ( i = 0 ; i < cdim[0]*cdim[1]*cdim[2] ; i++ ) for ( i = 0 ; i < cdim[0]*cdim[1]*cdim[2] ; i++ )
printf( "%i " , (int)nodeIDs[ i ] ); printf( "%i " , (int)nodeIDs[ i ] );
printf("] ,%i,%i,%i);\n",cdim[0],cdim[1],cdim[2]); */ printf("] ,%i,%i,%i);\n",cdim[0],cdim[1],cdim[2]); */
/* Check that the nodeIDs are ok. */ /* Check that the nodeIDs are ok. */
for (k = 0; k < nr_cells; k++) for (k = 0; k < nr_cells; k++)
if (nodeIDs[k] < 0 || nodeIDs[k] >= nr_nodes) if (nodeIDs[k] < 0 || nodeIDs[k] >= nr_nodes)
error("Got bad nodeID %"PRIDX" for cell %i.", nodeIDs[k], k); error("Got bad nodeID %"PRIDX" for cell %i.", nodeIDs[k], k);
/* Check that the partition is complete and all nodes have some work. */
int present[nr_nodes];
int failed = 0;
for (i = 0; i < nr_nodes; i++) present[i] = 0;
for (i = 0; i < nr_cells; i++) present[nodeIDs[i]]++;
for (i = 0; i < nr_nodes; i++) {
if (! present[i]) {
failed = 1;
message("Node %d is not present after repartition", i);
}
}
/* If partition failed continue with the current one, but make this
* clear. */
if (failed) {
message("WARNING: METIS repartition has failed, continuing with "
"the current partition, load balance will not be optimal");
for (k = 0; k < nr_cells; k++) nodeIDs[k] = cells[k].nodeID;
}
} }
/* Broadcast the result of the partition. */ /* Broadcast the result of the partition. */
......
...@@ -47,6 +47,8 @@ ...@@ -47,6 +47,8 @@
#define engine_maxproxies 64 #define engine_maxproxies 64
#define engine_tasksreweight 10 #define engine_tasksreweight 10
#define engine_maxmetisweight 10000.0f
/* The rank of the engine as a global variable (for messages). */ /* The rank of the engine as a global variable (for messages). */
extern int engine_rank; extern int engine_rank;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment