diff --git a/examples/main.c b/examples/main.c index ef54c35d46e674d3041fc7e434b409e71cb6ddbe..1d12ad5853de3951c8a6f39c1c1179ac71be3265 100644 --- a/examples/main.c +++ b/examples/main.c @@ -84,14 +84,20 @@ int main(int argc, char *argv[]) { int nr_nodes = 1, myrank = 0; FILE *file_thread; int with_outputs = 1; - struct pgrid pgrid; - - /* Default parition type is grid. */ - pgrid.type = GRID_GRID; - pgrid.grid[0] = 1; - pgrid.grid[1] = 1; - pgrid.grid[2] = 1; + struct initpart ipart; +#if defined(WITH_MPI) && defined(HAVE_METIS) + enum repart_type reparttype = REPART_METIS_BOTH; +#endif + /* Default initial partition type is grid for shared memory and when + * METIS is not available. */ + ipart.type = INITPART_GRID; + ipart.grid[0] = 1; + ipart.grid[1] = 1; + ipart.grid[2] = 1; +#if defined(WITH_MPI) && defined(HAVE_METIS) + ipart.type = INITPART_METIS_NOWEIGHT; +#endif /* Choke on FP-exceptions. */ // feenableexcept( FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW ); @@ -115,9 +121,9 @@ int main(int argc, char *argv[]) { fflush(stdout); /* Set a default grid so that grid[0]*grid[1]*grid[2] == nr_nodes. */ - factor(nr_nodes, &pgrid.grid[0], &pgrid.grid[1]); - factor(nr_nodes / pgrid.grid[1], &pgrid.grid[0], &pgrid.grid[2]); - factor(pgrid.grid[0] * pgrid.grid[1], &pgrid.grid[1], &pgrid.grid[0]); + factor(nr_nodes, &ipart.grid[0], &ipart.grid[1]); + factor(nr_nodes / ipart.grid[1], &ipart.grid[0], &ipart.grid[2]); + factor(ipart.grid[0] * ipart.grid[1], &ipart.grid[1], &ipart.grid[0]); #endif /* Greeting message */ @@ -127,7 +133,7 @@ int main(int argc, char *argv[]) { bzero(&s, sizeof(struct space)); /* Parse the options */ - while ((c = getopt(argc, argv, "a:c:d:f:g:m:q:r:s:t:w:yz:")) != -1) + while ((c = getopt(argc, argv, "a:c:d:e:f:m:p:q:r:s:t:w:yz:")) != -1) switch (c) { case 'a': if (sscanf(optarg, "%lf", &scaling) != 1) @@ -146,32 +152,48 @@ int main(int argc, char *argv[]) { if (myrank == 0) message("dt set to %e.", dt_max); fflush(stdout); break; + case 'e': + /* REpartition type "b", "v", "e". */ +#if defined(WITH_MPI) && defined(HAVE_METIS) + switch (optarg[0]) { + case 'b': + reparttype = REPART_METIS_BOTH; + break; + case 'v': + reparttype = REPART_METIS_VERTEX; + break; + case 'e': + reparttype = REPART_METIS_EDGE; + break; + } +#endif + break; case 'f': if (!strcpy(ICfileName, optarg)) error("Error parsing IC file name."); break; - case 'g': - /* Grid is one of "g", "r", "m", "w", or "v". g can be followed by three - * numbers. */ + case 'p': + /* Parition type is one of "g", "r", "m", "w", or "v"; "g" can be + * followed by three numbers defining the grid. */ switch (optarg[0]) { case 'g': - pgrid.type = GRID_GRID; + ipart.type = INITPART_GRID; if (strlen(optarg) > 2) { - if (sscanf(optarg, "g %i %i %i", &pgrid.grid[0], &pgrid.grid[1], - &pgrid.grid[2]) != 3) + if (sscanf(optarg, "g %i %i %i", &ipart.grid[0], &ipart.grid[1], + &ipart.grid[2]) != 3) error("Error parsing grid."); } break; case 'r': - pgrid.type = GRID_RANDOM; + ipart.type = INITPART_RANDOM; break; case 'm': - pgrid.type = GRID_METIS_NOWEIGHT; + ipart.type = INITPART_METIS_NOWEIGHT; break; case 'w': - pgrid.type = GRID_METIS_WEIGHT; + ipart.type = INITPART_METIS_WEIGHT; break; case 'v': - pgrid.type = GRID_VECTORIZE; + ipart.type = INITPART_VECTORIZE; break; } break; @@ -223,7 +245,8 @@ int main(int argc, char *argv[]) { #if defined(WITH_MPI) if (myrank == 0) { message("Running with %i thread(s) per node.", nr_threads); - message("grid set to [ %i %i %i ].", pgrid.grid[0], pgrid.grid[1], pgrid.grid[2]); + + message("grid set to [ %i %i %i ].", ipart.grid[0], ipart.grid[1], ipart.grid[2]); if (nr_nodes == 1) { message("WARNING: you are running with one MPI rank."); @@ -356,7 +379,7 @@ int main(int argc, char *argv[]) { #ifdef WITH_MPI /* Split the space. */ - engine_split(&e, &pgrid); + engine_split(&e, &ipart); engine_redistribute(&e); #endif @@ -410,8 +433,8 @@ int main(int argc, char *argv[]) { /* Repartition the space amongst the nodes? */ #if defined(WITH_MPI) && defined(HAVE_METIS) - //if (j % 100 == 2) e.forcerepart = 1; - if (j % 10 == 9) e.forcerepart = 1; + //if (j % 100 == 2) e.forcerepart = reparttype; + if (j % 10 == 9) e.forcerepart = reparttype; #endif timers_reset(timers_mask_all); diff --git a/src/engine.c b/src/engine.c index 7ca83cd33fdb849e586e6c1a4172a5e098f8f110..4c58d879e5a02974910cb711b8bc1b104f5c1ac2 100644 --- a/src/engine.c +++ b/src/engine.c @@ -296,342 +296,321 @@ void engine_repartition(struct engine *e) { #if defined(WITH_MPI) && defined(HAVE_METIS) - int i, j, k, l, cid, cjd, ii, jj, kk, res; - idx_t *inds, *nodeIDs; - idx_t *weights_v = NULL, *weights_e = NULL; + int nr_nodes = e->nr_nodes; + int nodeID = e->nodeID; struct space *s = e->s; - int nr_cells = s->nr_cells, my_cells = 0; - struct cell *cells = s->cells; - int ind[3], *cdim = s->cdim; - struct task *t, *tasks = e->sched.tasks; - struct cell *ci, *cj; - int nr_nodes = e->nr_nodes, nodeID = e->nodeID; - float wscale = 1e-3, vscale = 1e-3, wscale_buff; - idx_t wtot = 0; - idx_t wmax = 1e9 / e->nr_nodes; - idx_t wmin; /* Clear the repartition flag. */ - e->forcerepart = 0; + enum repart_type reparttype = e->forcerepart; + e->forcerepart = REPART_NONE; /* Nothing to do if only using a single node. Also avoids METIS * bug that doesn't handle this case well. */ - if ( nr_nodes == 1 ) return; - - /* Allocate the inds and weights. */ - if ((inds = (idx_t *)malloc(sizeof(idx_t) * 26 *nr_cells)) == NULL || - (weights_v = (idx_t *)malloc(sizeof(idx_t) *nr_cells)) == NULL || - (weights_e = (idx_t *)malloc(sizeof(idx_t) * 26 *nr_cells)) == NULL || - (nodeIDs = (idx_t *)malloc(sizeof(idx_t) * nr_cells)) == NULL) - error("Failed to allocate inds and weights arrays."); - - /* Fill the inds array. */ - for (cid = 0; cid < nr_cells; cid++) { - ind[0] = cells[cid].loc[0] / s->cells[cid].h[0] + 0.5; - ind[1] = cells[cid].loc[1] / s->cells[cid].h[1] + 0.5; - ind[2] = cells[cid].loc[2] / s->cells[cid].h[2] + 0.5; - l = 0; - for (i = -1; i <= 1; i++) { - ii = ind[0] + i; - if (ii < 0) - ii += cdim[0]; - else if (ii >= cdim[0]) - ii -= cdim[0]; - for (j = -1; j <= 1; j++) { - jj = ind[1] + j; - if (jj < 0) - jj += cdim[1]; - else if (jj >= cdim[1]) - jj -= cdim[1]; - for (k = -1; k <= 1; k++) { - kk = ind[2] + k; - if (kk < 0) - kk += cdim[2]; - else if (kk >= cdim[2]) - kk -= cdim[2]; - if (i || j || k) { - inds[cid * 26 + l] = cell_getid(cdim, ii, jj, kk); - l += 1; + if (nr_nodes == 1) return; + + if (reparttype == REPART_METIS_BOTH || reparttype == REPART_METIS_EDGE) { + + /* Use task ticks as vertex and edge weights. */ + int nr_cells = s->nr_cells; + struct cell *cells = s->cells; + int ind[3], *cdim = s->cdim; + struct task *t, *tasks = e->sched.tasks; + float wscale = 1e-3, vscale = 1e-3, wscale_buff; + idx_t wtot = 0; + idx_t wmax = 1e9 / e->nr_nodes; + idx_t wmin; + + /* Allocate and fill the adjcny indexing array. */ + int *inds; + if ((inds = (int *)malloc(sizeof(int) * 26 *nr_cells)) == NULL) + error("Failed to allocate the inds array"); + + for (int cid = 0; cid < nr_cells; cid++) { + ind[0] = cells[cid].loc[0] / s->cells[cid].h[0] + 0.5; + ind[1] = cells[cid].loc[1] / s->cells[cid].h[1] + 0.5; + ind[2] = cells[cid].loc[2] / s->cells[cid].h[2] + 0.5; + int l = 0; + for (int i = -1; i <= 1; i++) { + int ii = ind[0] + i; + if (ii < 0) + ii += cdim[0]; + else if (ii >= cdim[0]) + ii -= cdim[0]; + for (int j = -1; j <= 1; j++) { + int jj = ind[1] + j; + if (jj < 0) + jj += cdim[1]; + else if (jj >= cdim[1]) + jj -= cdim[1]; + for (int k = -1; k <= 1; k++) { + int kk = ind[2] + k; + if (kk < 0) + kk += cdim[2]; + else if (kk >= cdim[2]) + kk -= cdim[2]; + if (i || j || k) { + inds[cid * 26 + l] = cell_getid(cdim, ii, jj, kk); + l += 1; + } } } } } - } - /* Init the weights arrays. */ - bzero(weights_e, sizeof(idx_t) * 26 * nr_cells); - bzero(weights_v, sizeof(idx_t) * nr_cells); - - /* Loop over the tasks... */ - for (j = 0; j < e->sched.nr_tasks; j++) { - - /* Get a pointer to the kth task. */ - t = &tasks[j]; - - /* Skip un-interesting tasks. */ - if (t->type != task_type_self && t->type != task_type_pair && - t->type != task_type_sub && t->type != task_type_ghost && - t->type != task_type_kick1 && t->type != task_type_kick2) - continue; - - /* Get the task weight. */ - idx_t w = (t->toc - t->tic) * wscale; - if (w < 0) error("Bad task weight (%" SCIDX ").", w); - - /* Do we need to re-scale? */ - wtot += w; - while (wtot > wmax) { - wscale /= 2; - wtot /= 2; - w /= 2; - for (k = 0; k < 26 * nr_cells; k++) weights_e[k] *= 0.5; - for (k = 0; k < nr_cells; k++) weights_v[k] *= 0.5; - } + /* Allocate and init weights. */ + int *weights_v = NULL; + int *weights_e = NULL; + if ((weights_v = (int *)malloc(sizeof(int) * nr_cells)) == NULL || + (weights_e = (int *)malloc(sizeof(int) * 26 * nr_cells)) == NULL) + error("Failed to allocate weights arrays."); + bzero(weights_v, sizeof(int) * nr_cells); + bzero(weights_e, sizeof(int) * 26 * nr_cells); + + /* Loop over the tasks... */ + for (int j = 0; j < e->sched.nr_tasks; j++) { + + /* Get a pointer to the kth task. */ + t = &tasks[j]; + + /* Skip un-interesting tasks. */ + if (t->type != task_type_self && t->type != task_type_pair && + t->type != task_type_sub && t->type != task_type_ghost && + t->type != task_type_kick1 && t->type != task_type_kick2) + continue; + + /* Get the task weight. */ + idx_t w = (t->toc - t->tic) * wscale; + if (w < 0) error("Bad task weight (%" SCIDX ").", w); + + /* Do we need to re-scale? */ + wtot += w; + while (wtot > wmax) { + wscale /= 2; + wtot /= 2; + w /= 2; + for (int k = 0; k < 26 * nr_cells; k++) weights_e[k] *= 0.5; + for (int k = 0; k < nr_cells; k++) weights_v[k] *= 0.5; + } - /* Get the top-level cells involved. */ - for (ci = t->ci; ci->parent != NULL; ci = ci->parent) - ; - if (t->cj != NULL) - for (cj = t->cj; cj->parent != NULL; cj = cj->parent) - ; - else - cj = NULL; + /* Get the top-level cells involved. */ + struct cell *ci, *cj; + for (ci = t->ci; ci->parent != NULL; ci = ci->parent); + if (t->cj != NULL) + for (cj = t->cj; cj->parent != NULL; cj = cj->parent); + else + cj = NULL; - /* Get the cell IDs. */ - cid = ci - cells; + /* Get the cell IDs. */ + int cid = ci - cells; - /* Different weights for different tasks. */ - if (t->type == task_type_ghost || t->type == task_type_kick1 || - t->type == task_type_kick2) { + /* Different weights for different tasks. */ + if (t->type == task_type_ghost || t->type == task_type_kick1 || + t->type == task_type_kick2) { - /* Particle updates add only to vertex weight. */ - weights_v[cid] += w; + /* Particle updates add only to vertex weight. */ + weights_v[cid] += w; - } + } - /* Self interaction? */ - else if ((t->type == task_type_self && ci->nodeID == nodeID) || - (t->type == task_type_sub && cj == NULL && ci->nodeID == nodeID)) { + /* Self interaction? */ + else if ((t->type == task_type_self && ci->nodeID == nodeID) || + (t->type == task_type_sub && cj == NULL && + ci->nodeID == nodeID)) { - /* Self interactions add only to vertex weight. */ - weights_v[cid] += w; + /* Self interactions add only to vertex weight. */ + weights_v[cid] += w; - } + } - /* Pair? */ - else if (t->type == task_type_pair || - (t->type == task_type_sub && cj != NULL)) { + /* Pair? */ + else if (t->type == task_type_pair || + (t->type == task_type_sub && cj != NULL)) { - /* In-cell pair? */ - if (ci == cj) { + /* In-cell pair? */ + if (ci == cj) { - /* Add weight to vertex for ci. */ - weights_v[cid] += w; + /* Add weight to vertex for ci. */ + weights_v[cid] += w; - } + } - /* Distinct cells with local ci? */ - else if (ci->nodeID == nodeID) { + /* Distinct cells with local ci? */ + else if (ci->nodeID == nodeID) { - /* Index of the jth cell. */ - cjd = cj - cells; + /* Index of the jth cell. */ + int cjd = cj - cells; - /* Add half of weight to each cell. */ - if (ci->nodeID == nodeID) weights_v[cid] += 0.5 * w; - if (cj->nodeID == nodeID) weights_v[cjd] += 0.5 * w; + /* Add half of weight to each cell. */ + if (ci->nodeID == nodeID) weights_v[cid] += 0.5 * w; + if (cj->nodeID == nodeID) weights_v[cjd] += 0.5 * w; - /* Add Weight to edge. */ - for (k = 26 * cid; inds[k] != cjd; k++) - ; - weights_e[k] += w; - for (k = 26 * cjd; inds[k] != cid; k++) - ; - weights_e[k] += w; + /* Add weights to edge. */ + int kk; + for (kk = 26 * cid; inds[kk] != cjd; kk++); + weights_e[kk] += w; + for (kk = 26 * cjd; inds[kk] != cid; kk++); + weights_e[kk] += w; + } } } - } - /* Get the minimum scaling and re-scale if necessary. */ - if ((res = MPI_Allreduce(&wscale, &wscale_buff, 1, MPI_FLOAT, MPI_MIN, - MPI_COMM_WORLD)) != MPI_SUCCESS) { - char buff[MPI_MAX_ERROR_STRING]; - MPI_Error_string(res, buff, &i); - error("Failed to allreduce the weight scales (%s).", buff); - } - if (wscale_buff != wscale) { - float scale = wscale_buff / wscale; - for (k = 0; k < 26 * nr_cells; k++) weights_e[k] *= scale; - for (k = 0; k < nr_cells; k++) weights_v[k] *= scale; - } + /* Get the minimum scaling and re-scale if necessary. */ + int res; + if ((res = MPI_Allreduce(&wscale, &wscale_buff, 1, MPI_FLOAT, MPI_MIN, + MPI_COMM_WORLD)) != MPI_SUCCESS) + mpi_error(res,"Failed to allreduce the weight scales."); -/* Merge the weights arrays across all nodes. */ -#if IDXTYPEWIDTH == 32 - if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v, - nr_cells, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD)) != - MPI_SUCCESS) { -#else - if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v, - nr_cells, MPI_LONG_LONG_INT, MPI_SUM, 0, - MPI_COMM_WORLD)) != MPI_SUCCESS) { -#endif - char buff[MPI_MAX_ERROR_STRING]; - MPI_Error_string(res, buff, &i); - error("Failed to allreduce vertex weights (%s).", buff); - } -#if IDXTYPEWIDTH == 32 - if (MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e, - 26 * nr_cells, MPI_INT, MPI_SUM, 0, - MPI_COMM_WORLD) != MPI_SUCCESS) -#else - if (MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e, - 26 * nr_cells, MPI_LONG_LONG_INT, MPI_SUM, 0, - MPI_COMM_WORLD) != MPI_SUCCESS) -#endif - error("Failed to allreduce edge weights."); - - /* As of here, only one node needs to compute the partition. */ - if (nodeID == 0) { - - /* Final rescale of all weights to avoid a large range. Large ranges have - * been seen to cause an incomplete graph. */ - wmin = wmax; - wmax = 0.0; - for (k = 0; k < 26 * nr_cells; k++) { - wmax = weights_e[k] > wmax ? weights_e[k] : wmax; - wmin = weights_e[k] < wmin ? weights_e[k] : wmin; + if (wscale_buff != wscale) { + float scale = wscale_buff / wscale; + for (int k = 0; k < 26 * nr_cells; k++) weights_e[k] *= scale; + for (int k = 0; k < nr_cells; k++) weights_v[k] *= scale; } - if ((wmax - wmin) > engine_maxmetisweight) { - wscale = engine_maxmetisweight / (wmax - wmin); - for (k = 0; k < 26 * nr_cells; k++) { - weights_e[k] = (weights_e[k] - wmin) * wscale + 1; + + /* Merge the weights arrays across all nodes. */ + if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v, + nr_cells, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD)) + != MPI_SUCCESS) + mpi_error(res,"Failed to allreduce vertex weights."); + + if ((res=MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e, + 26 * nr_cells, MPI_INT, MPI_SUM, 0, + MPI_COMM_WORLD)) != MPI_SUCCESS) + mpi_error(res,"Failed to allreduce edge weights."); + + /* Allocate cell list for the partition. */ + int *celllist = malloc(sizeof(int) * s->nr_cells); + if (celllist == NULL) + error("Failed to allocate celllist"); + + /* As of here, only one node needs to compute the partition. */ + if (nodeID == 0) { + + /* Final rescale of all weights to avoid a large range. Large ranges + * have been seen to cause an incomplete graph. */ + wmin = wmax; + wmax = 0.0; + for (int k = 0; k < 26 * nr_cells; k++) { + wmax = weights_e[k] > wmax ? weights_e[k] : wmax; + wmin = weights_e[k] < wmin ? weights_e[k] : wmin; } - for (k = 0; k < nr_cells; k++) { - weights_v[k] = (weights_v[k] - wmin) * wscale + 1; + if ((wmax - wmin) > engine_maxmetisweight) { + wscale = engine_maxmetisweight / (wmax - wmin); + for (int k = 0; k < 26 * nr_cells; k++) { + weights_e[k] = (weights_e[k] - wmin) * wscale + 1; + } + for (int k = 0; k < nr_cells; k++) { + weights_v[k] = (weights_v[k] - wmin) * wscale + 1; + } } - } - /* Check that the edge weights are fully symmetric. */ - /* for ( cid = 0 ; cid < nr_cells ; cid++ ) - for ( k = 0 ; k < 26 ; k++ ) { - cjd = inds[ cid*26 + k ]; - for ( j = 26*cjd ; inds[j] != cid ; j++ ); - if ( weights_e[ cid*26+k ] != weights_e[ j ] ) - error( "Unsymmetric edge weights detected (%i vs %i)." , - weights_e[ cid*26+k ] , weights_e[ j ] ); - } */ - /* int w_min = weights_e[0], w_max = weights_e[0], w_tot = weights_e[0]; - for ( k = 1 ; k < 26*nr_cells ; k++ ) { - w_tot += weights_e[k]; - if ( weights_e[k] < w_min ) - w_min = weights_e[k]; - else if ( weights_e[k] > w_max ) - w_max = weights_e[k]; - } - message( "edge weights in [ %i , %i ], tot=%i." , w_min , w_max , w_tot ); - w_min = weights_e[0], w_max = weights_e[0]; w_tot = weights_v[0]; - for ( k = 1 ; k < nr_cells ; k++ ) { - w_tot += weights_v[k]; - if ( weights_v[k] < w_min ) - w_min = weights_v[k]; - else if ( weights_v[k] > w_max ) - w_max = weights_v[k]; + /* Make sure there are no zero weights. */ + for (int k = 0; k < 26 * nr_cells; k++) + if (weights_e[k] == 0) weights_e[k] = 1; + for (int k = 0; k < nr_cells; k++) + if ((weights_v[k] *= vscale) == 0) weights_v[k] = 1; + + /* And partition, use both weights or not as requested. */ + if (reparttype == REPART_METIS_BOTH) + part_pick_metis(s, e->nr_nodes, weights_v, weights_e, celllist); + else + part_pick_metis(s, e->nr_nodes, NULL, weights_e, celllist); + + /* Check that all cells have good values. */ + for (int k = 0; k < nr_cells; k++) + if (celllist[k] < 0 || celllist[k] >= nr_nodes) + error("Got bad nodeID %"PRIDX" for cell %i.", celllist[k], k); + + /* Check that the partition is complete and all nodes have some work. */ + int present[nr_nodes]; + int failed = 0; + for (int i = 0; i < nr_nodes; i++) present[i] = 0; + for (int i = 0; i < nr_cells; i++) present[celllist[i]]++; + for (int i = 0; i < nr_nodes; i++) { + if (! present[i]) { + failed = 1; + message("Node %d is not present after repartition", i); } - message( "vertex weights in [ %i , %i ], tot=%i." , w_min , w_max , w_tot ); - */ - - /* Make sure there are no zero weights. */ - for (k = 0; k < 26 * nr_cells; k++) - if (weights_e[k] == 0) weights_e[k] = 1; - for (k = 0; k < nr_cells; k++) - if ((weights_v[k] *= vscale) == 0) weights_v[k] = 1; - - /* Allocate and fill the connection array. */ - idx_t *offsets; - if ((offsets = (idx_t *)malloc(sizeof(idx_t) * (nr_cells + 1))) == NULL) - error("Failed to allocate offsets buffer."); - offsets[0] = 0; - for (k = 0; k < nr_cells; k++) offsets[k + 1] = offsets[k] + 26; - - /* Set the METIS options. +1 to keep the GCC sanitizer happy. */ - idx_t options[METIS_NOPTIONS + 1]; - METIS_SetDefaultOptions(options); - options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT; - options[METIS_OPTION_NUMBERING] = 0; - options[METIS_OPTION_CONTIG] = 1; - options[METIS_OPTION_NCUTS] = 10; - options[METIS_OPTION_NITER] = 20; - // options[ METIS_OPTION_UFACTOR ] = 1; - - /* Set the initial partition, although this is probably ignored. */ - for (k = 0; k < nr_cells; k++) nodeIDs[k] = cells[k].nodeID; - - /* Call METIS. */ - idx_t one = 1, idx_nr_cells = nr_cells, idx_nr_nodes = nr_nodes; - idx_t objval; - - /* Dump graph in METIS format */ - /*dumpMETISGraph("metis_graph", idx_nr_cells, one, offsets, inds, - weights_v, NULL, weights_e);*/ - - if (METIS_PartGraphRecursive(&idx_nr_cells, &one, offsets, inds, weights_v, - NULL, weights_e, &idx_nr_nodes, NULL, NULL, - options, &objval, nodeIDs) != METIS_OK) - error("Call to METIS_PartGraphRecursive failed."); - - /* Dump the 3d array of cell IDs. */ - /* printf( "engine_repartition: nodeIDs = reshape( [" ); - for ( i = 0 ; i < cdim[0]*cdim[1]*cdim[2] ; i++ ) - printf( "%i " , (int)nodeIDs[ i ] ); - printf("] ,%i,%i,%i);\n",cdim[0],cdim[1],cdim[2]); */ - - /* Check that the nodeIDs are ok. */ - for (k = 0; k < nr_cells; k++) - if (nodeIDs[k] < 0 || nodeIDs[k] >= nr_nodes) - error("Got bad nodeID %"PRIDX" for cell %i.", nodeIDs[k], k); - - /* Check that the partition is complete and all nodes have some work. */ - int present[nr_nodes]; - int failed = 0; - for (i = 0; i < nr_nodes; i++) present[i] = 0; - for (i = 0; i < nr_cells; i++) present[nodeIDs[i]]++; - for (i = 0; i < nr_nodes; i++) { - if (! present[i]) { - failed = 1; - message("Node %d is not present after repartition", i); } + + /* If partition failed continue with the current one, but make this + * clear. */ + if (failed) { + message("WARNING: METIS repartition has failed, continuing with " + "the current partition, load balance will not be optimal"); + for (int k = 0; k < nr_cells; k++) celllist[k] = cells[k].nodeID; + } + } - /* If partition failed continue with the current one, but make this - * clear. */ - if (failed) { - message("WARNING: METIS repartition has failed, continuing with " - "the current partition, load balance will not be optimal"); - for (k = 0; k < nr_cells; k++) nodeIDs[k] = cells[k].nodeID; + /* Distribute the celllist partition and apply. */ + if ((res = MPI_Bcast(celllist, s->nr_cells, MPI_INT, 0, MPI_COMM_WORLD)) + != MPI_SUCCESS) + mpi_error(res,"Failed to bcast the cell list"); + + /* And apply to our cells */ + part_split_metis(s, e->nr_nodes, celllist); + + /* Clean up. */ + free(weights_v); + free(weights_e); + free(celllist); + + } else if (reparttype == REPART_METIS_VERTEX) { + + /* Use particle counts as vertex weights. */ + /* Space for particles per cell counts, which will be used as weights. */ + int *weights = NULL; + if ((weights = malloc(sizeof(int) * s->nr_cells)) == NULL) + error("Failed to allocate weights buffer."); + bzero(weights, sizeof(int) * s->nr_cells); + + /* Check each particle and accumilate the counts per cell. */ + struct part *parts = s->parts; + int *cdim = s->cdim; + double ih[3], dim[3]; + ih[0] = s->ih[0]; + ih[1] = s->ih[1]; + ih[2] = s->ih[2]; + dim[0] = s->dim[0]; + dim[1] = s->dim[1]; + dim[2] = s->dim[2]; + for (int k = 0; k < s->nr_parts; k++) { + for (int j = 0; j < 3; j++) { + if (parts[k].x[j] < 0.0) + parts[k].x[j] += dim[j]; + else if (parts[k].x[j] >= dim[j]) + parts[k].x[j] -= dim[j]; + } + const int cid = cell_getid(cdim, parts[k].x[0] * ih[0], + parts[k].x[1] * ih[1], parts[k].x[2] * ih[2]); + weights[cid]++; } - } + /* Get all the counts from all the nodes. */ + int res; + if ((res=MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_INT, + MPI_SUM, MPI_COMM_WORLD)) != MPI_SUCCESS) + mpi_error(res,"Failed to allreduce particle cell weights."); -/* Broadcast the result of the partition. */ -#if IDXTYPEWIDTH == 32 - if (MPI_Bcast(nodeIDs, nr_cells, MPI_INT, 0, MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to bcast the node IDs."); -#else - if (MPI_Bcast(nodeIDs, nr_cells, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD) != - MPI_SUCCESS) - error("Failed to bcast the node IDs."); -#endif + /* Main node does the partition calculation. */ + int *celllist = malloc(sizeof(int) * s->nr_cells); + if (celllist == NULL) + error("Failed to allocate celllist"); - /* Set the cell nodeIDs and clear any non-local parts. */ - for (k = 0; k < nr_cells; k++) { - cells[k].nodeID = nodeIDs[k]; - if (nodeIDs[k] == nodeID) my_cells += 1; - } + if (e->nodeID == 0) + part_pick_metis(s, e->nr_nodes, weights, NULL, celllist); + + /* Distribute the celllist partition and apply. */ + if ((res = MPI_Bcast(celllist, s->nr_cells, MPI_INT, 0, MPI_COMM_WORLD)) + != MPI_SUCCESS) + mpi_error(res,"Failed to bcast the cell list"); - /* Clean up. */ - free(inds); - free(weights_v); - free(weights_e); - free(nodeIDs); + /* And apply to our cells */ + part_split_metis(s, e->nr_nodes, celllist); + + if (weights != NULL) free(weights); + free(celllist); + } /* Now comes the tricky part: Exchange particles between all nodes. This is done in two steps, first allreducing a matrix of @@ -990,10 +969,7 @@ int engine_exchange_strays(struct engine *e, int offset, int *ind, int N) { int err; if ((err = MPI_Waitany(2 * e->nr_proxies, reqs_in, &pid, &status)) != MPI_SUCCESS) { - char buff[MPI_MAX_ERROR_STRING]; - int res; - MPI_Error_string(err, buff, &res); - error("MPI_Waitany failed (%s).", buff); + mpi_error(err,"MPI_Waitany failed."); } if (pid == MPI_UNDEFINED) break; // message( "request from proxy %i has arrived." , pid ); @@ -1822,7 +1798,7 @@ void engine_step(struct engine *e) { // printParticle( e->s->parts , 3392063069037 , e->s->nr_parts ); /* Re-distribute the particles amongst the nodes? */ - if (e->forcerepart) engine_repartition(e); + if (e->forcerepart != REPART_NONE) engine_repartition(e); /* Prepare the space. */ engine_prepare(e); @@ -2065,39 +2041,41 @@ void engine_makeproxies(struct engine *e) { } /** - * @brief Split the underlying space according to the given grid. + * @brief Split the underlying space. * * Only used with MPI the partitions produced associate cells with nodes. + * The partitioning can be performed in various ways, see the initpart + * struct. * * @param e The #engine. * @param grid The grid. */ -void engine_split(struct engine *e, struct pgrid *grid) { +void engine_split(struct engine *e, struct initpart *ipart) { #ifdef WITH_MPI struct space *s = e->s; - if (grid->type == GRID_GRID) { + if (ipart->type == INITPART_GRID) { int j, k; int ind[3]; struct cell *c; /* If we've got the wrong number of nodes, fail. */ - if (e->nr_nodes != grid->grid[0] * grid->grid[1] * grid->grid[2]) + if (e->nr_nodes != ipart->grid[0] * ipart->grid[1] * ipart->grid[2]) error("Grid size does not match number of nodes."); /* Run through the cells and set their nodeID. */ // message("s->dim = [%e,%e,%e]", s->dim[0], s->dim[1], s->dim[2]); for (k = 0; k < s->nr_cells; k++) { c = &s->cells[k]; - for (j = 0; j < 3; j++) ind[j] = c->loc[j] / s->dim[j] * grid->grid[j]; - c->nodeID = ind[0] + grid->grid[0] * (ind[1] + grid->grid[1] * ind[2]); + for (j = 0; j < 3; j++) ind[j] = c->loc[j] / s->dim[j] * ipart->grid[j]; + c->nodeID = ind[0] + ipart->grid[0] * (ind[1] + ipart->grid[1] * ind[2]); // message("cell at [%e,%e,%e]: ind = [%i,%i,%i], nodeID = %i", c->loc[0], // c->loc[1], c->loc[2], ind[0], ind[1], ind[2], c->nodeID); } } - else if (grid->type == GRID_RANDOM) { + else if (ipart->type == INITPART_RANDOM) { /* Random can only be done by one node, each node requires a list of * the sampling positions. */ @@ -2120,14 +2098,14 @@ void engine_split(struct engine *e, struct pgrid *grid) { part_split_random(s, e->nr_nodes, samplelist); free(samplelist); } - else if (grid->type == GRID_METIS_WEIGHT || grid->type == GRID_METIS_NOWEIGHT) { + else if (ipart->type == INITPART_METIS_WEIGHT || ipart->type == INITPART_METIS_NOWEIGHT) { /* Simple k-way partition selected by METIS using cell particle counts as * weights or not. Should be best when starting with a inhomogeneous dist. */ /* Space for particles per cell counts, which will be used as weights or not. */ int *weights = NULL; - if (grid->type == GRID_METIS_WEIGHT) { + if (ipart->type == INITPART_METIS_WEIGHT) { if ((weights = malloc(sizeof(int) * s->nr_cells)) == NULL) error("Failed to allocate weights buffer."); bzero(weights, sizeof(int) * s->nr_cells); @@ -2153,7 +2131,7 @@ void engine_split(struct engine *e, struct pgrid *grid) { parts[k].x[1] * ih[1], parts[k].x[2] * ih[2]); weights[cid]++; } - + /* Get all the counts from all the nodes. */ if (MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_INT, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) @@ -2167,7 +2145,7 @@ void engine_split(struct engine *e, struct pgrid *grid) { error("Failed to allocate celllist"); if (e->nodeID == 0) - part_pick_metis(s, e->nr_nodes, weights, celllist); + part_pick_metis(s, e->nr_nodes, weights, NULL, celllist); /* Distribute the celllist partition and apply. */ int res = MPI_Bcast(celllist, s->nr_cells, MPI_INT, 0, MPI_COMM_WORLD); @@ -2180,7 +2158,7 @@ void engine_split(struct engine *e, struct pgrid *grid) { if (weights != NULL) free(weights); free(celllist); } - else if (grid->type == GRID_VECTORIZE) { + else if (ipart->type == INITPART_VECTORIZE) { /* Vectorised selection, guaranteed to work, but not very clumpy in the * selection of regions. */ @@ -2289,7 +2267,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, e->proxy_ind = NULL; e->nr_proxies = 0; e->forcerebuild = 1; - e->forcerepart = 0; + e->forcerepart = REPART_NONE; e->links = NULL; e->nr_links = 0; engine_rank = nodeID; diff --git a/src/engine.h b/src/engine.h index 34c74ecc3b6fca75fa029b93b0e9d7ee3439e4d2..7ba40ae6e5df1fbb823a9dbdec012463865221ac 100644 --- a/src/engine.h +++ b/src/engine.h @@ -29,7 +29,7 @@ #include "scheduler.h" #include "space.h" #include "task.h" -#include "param.h" +#include "partition.h" /* Some constants. */ #define engine_policy_none 0 @@ -118,7 +118,8 @@ struct engine { ticks tic_step; /* Force the engine to rebuild? */ - int forcerebuild, forcerepart; + int forcerebuild; + enum repart_type forcerepart; /* How many steps have we done with the same set of tasks? */ int tasks_age; @@ -136,7 +137,7 @@ void engine_launch(struct engine *e, int nr_runners, unsigned int mask); void engine_prepare(struct engine *e); void engine_step(struct engine *e); void engine_maketasks(struct engine *e); -void engine_split(struct engine *e, struct pgrid *grid); +void engine_split(struct engine *e, struct initpart *ipart); int engine_exchange_strays(struct engine *e, int offset, int *ind, int N); void engine_rebuild(struct engine *e); void engine_repartition(struct engine *e); diff --git a/src/param.h b/src/param.h deleted file mode 100644 index c8390cbaf4a9cfaf47a08e0be6ce4892bfcc3010..0000000000000000000000000000000000000000 --- a/src/param.h +++ /dev/null @@ -1,39 +0,0 @@ -/******************************************************************************* - * This file is part of SWIFT. - * Copyright (C) 2015 Peter W. Draper (p.w.draper@durham.ac.uk) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - ******************************************************************************/ -#ifndef SWIFT_PARAM_H -#define SWIFT_PARAM_H - - -/* Initial partition grid struct. Defines type of partitioning to use and any - * related parameters. */ -enum grid_types { - GRID_GRID = 0, - GRID_RANDOM, - GRID_VECTORIZE, - GRID_METIS_WEIGHT, - GRID_METIS_NOWEIGHT -}; - -struct pgrid { - enum grid_types type; - int grid[3]; -}; - - -#endif /* SWIFT_PARAM_H */ diff --git a/src/partition.c b/src/partition.c index 3f331869c14592bc7192fee39cbf0bd557a0109f..299cd9d97beac3d22238920314956da08b162991 100644 --- a/src/partition.c +++ b/src/partition.c @@ -47,15 +47,29 @@ #include "partition.h" #include "const.h" #include "error.h" -#include "param.h" #include "debug.h" - /* Useful defines. */ #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define MIN(a, b) ((a) > (b) ? (b) : (a)) #define CHUNK 512 +/* Simple descriptions of initial partition types for reports. */ +const char *initpart_name[] = { + "gridded cells", + "random point associated cells", + "vectorized point associated cells", + "METIS particle weighted cells", + "METIS unweighted cells" +}; + +/* Simple descriptions of repartition types for reports. */ +const char *repart_name[] = { + "METIS edge and vertex weighted cells", + "METIS vertex weighted cells", + "METIS edge weights" +}; + /* Vectorisation support */ /* ===================== */ @@ -744,12 +758,16 @@ int main(int argc, char *argv[]) { * * @param s the space of cells to partition. * @param nregions the number of regions required in the partition. -* @param weights for the cells, sizeof number of cells if used, NULL - * for unit weights. - * @param celllist on exit this contains the ids of the select region, + * @param vertexw weights for the cells, sizeof number of cells if used, + * NULL for unit weights. + * @param edgew weights for the graph edges between all cells, sizeof number + * of cells * 26 if used, NULL for unit weights. Need to be packed + * in CSR format, so same as adjcny array. + * @param celllist on exit this contains the ids of the selected regions, * sizeof number of cells. */ -void part_pick_metis(struct space *s, int nregions, int *weights, int *celllist) { +void part_pick_metis(struct space *s, int nregions, int *vertexw, + int *edgew, int *celllist) { #if defined(HAVE_METIS) @@ -770,9 +788,14 @@ void part_pick_metis(struct space *s, int nregions, int *weights, int *celllist) idx_t *adjncy; if ((adjncy = (idx_t *)malloc(sizeof(idx_t) * 26 * ncells)) == NULL) error("Failed to allocate adjncy array."); - idx_t *weights_v; - if ((weights_v = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL) - error("Failed to allocate weights array"); + idx_t *weights_v = NULL; + if (vertexw != NULL) + if ((weights_v = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL) + error("Failed to allocate vertex weights array"); + idx_t *weights_e = NULL; + if (edgew != NULL) + if ((weights_e = (idx_t *)malloc( 26 *sizeof(idx_t) * ncells)) == NULL) + error("Failed to allocate edge weights array"); idx_t *regionid; if ((regionid = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL) error("Failed to allocate regionid array"); @@ -823,17 +846,25 @@ void part_pick_metis(struct space *s, int nregions, int *weights, int *celllist) for (int k = 0; k < ncells; k++) xadj[k + 1] = xadj[k] + 26; /* Init the vertex weights array. */ - if ( weights != NULL) { + if (vertexw != NULL) { for (int k = 0; k < ncells; k++) { - if ( weights[k] > 0 ) { - weights_v[k] = weights[k]; + if ( vertexw[k] > 0 ) { + weights_v[k] = vertexw[k]; } else { weights_v[k] = 1; } } - } else { - for (int k = 0; k < ncells; k++) - weights_v[k] = 1; + } + + /* Init the edges weights array. */ + if (edgew != NULL) { + for (int k = 0; k < 26 * ncells; k++) { + if (edgew[k] > 0 ) { + weights_e[k] = edgew[k]; + } else { + weights_e[k] = 1; + } + } } /* Set the METIS options. */ @@ -853,10 +884,10 @@ void part_pick_metis(struct space *s, int nregions, int *weights, int *celllist) /* Dump graph in METIS format */ dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy, - weights_v, NULL, NULL); + weights_v, weights_e, NULL); if (METIS_PartGraphKway(&idx_ncells, &one, xadj, adjncy, weights_v, - NULL, NULL, &idx_nregions, NULL, NULL, + weights_e, NULL, &idx_nregions, NULL, NULL, options, &objval, regionid) != METIS_OK) error("Call to METIS_PartGraphKway failed."); @@ -894,7 +925,7 @@ void part_pick_metis(struct space *s, int nregions, int *weights, int *celllist) for (int l = 0, k = 0; l < s->cdim[0]; l++) { for (int m = 0; m < s->cdim[1]; m++) { for (int n = 0; n < s->cdim[2]; n++) { - message("node %d %d %d -> %d %d", l, m, n, regionid[k], weights_v[k]); + message("node %d %d %d -> %d", l, m, n, regionid[k]); k++; } } diff --git a/src/partition.h b/src/partition.h index 70657425d82c2e9681e98263e4daf7a98cdfe3b3..d019dc0ad4171ae1f55b212a6ace6a64a1adae6f 100644 --- a/src/partition.h +++ b/src/partition.h @@ -22,13 +22,43 @@ #include "space.h" #include "cell.h" +/* Initial partitioning types. */ +enum initpart_type { + INITPART_GRID = 0, + INITPART_RANDOM, + INITPART_VECTORIZE, + INITPART_METIS_WEIGHT, + INITPART_METIS_NOWEIGHT +}; + +/* Simple descriptions of types for reports. */ +extern const char *initpart_name[]; + +/* The selected initial partition type and any related metadata. */ +struct initpart { + enum initpart_type type; + int grid[3]; +}; + +/* Repartition type to use. */ +enum repart_type { + REPART_NONE = 0, + REPART_METIS_BOTH, + REPART_METIS_VERTEX, + REPART_METIS_EDGE +}; + +/* Simple descriptions of types for reports. */ +extern const char *repart_name[]; + int part_pick_random(struct space *s, int nregions, float *samplelist); void part_split_random(struct space *s, int nregions, float *samplelist); void part_pick_vector(struct space *s, int nregions, int *samplecells); void part_split_vector(struct space *s, int nregions, int *samplecells); -void part_pick_metis(struct space *s, int nregions, int *weight, int *celllist); +void part_pick_metis(struct space *s, int nregions, int *vertexw, + int *edgew, int *celllist); void part_split_metis(struct space *s, int nregions, int *celllist); -#endif /* SWIFT_POISSON_DISC_H */ +#endif /* SWIFT_PARTITION_H */ diff --git a/src/swift.h b/src/swift.h index 44c17d3121b231acdc2f04e7d2a1c5cc64869b51..943d1dea9dc9d5bae7d1ceae296b2ec2e7a106df 100644 --- a/src/swift.h +++ b/src/swift.h @@ -46,7 +46,6 @@ #include "timers.h" #include "units.h" #include "tools.h" -#include "param.h" #include "partition.h" #include "version.h"