Commit aee41060 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

various fixes to the merge, making sure things compile.

parent 5f3bd99c
......@@ -58,7 +58,7 @@ void printParticle(struct part *parts, struct xpart *xparts, long long int id,
/* Look for the particle. */
for (size_t i = 0; i < N; i++)
if (parts[i].id == id) {
printf("## Particle[%d]:\n id=%lld", i, parts[i].id);
printf("## Particle[%zd]:\n id=%lld", i, parts[i].id);
hydro_debug_particle(&parts[i], &xparts[i]);
found = 1;
break;
......@@ -75,7 +75,7 @@ void printgParticle(struct gpart *parts, long long int id, int N) {
for (size_t i = 0; i < N; i++)
if (parts[i].id == -id || (parts[i].id > 0 && parts[i].part->id == id)) {
printf(
"## gParticle[%d]: id=%lld, x=[%.16e,%.16e,%.16e], "
"## gParticle[%zd]: id=%lld, x=[%.16e,%.16e,%.16e], "
"v=[%.3e,%.3e,%.3e], a=[%.3e,%.3e,%.3e], m=%.3e, t_begin=%d, "
"t_end=%d\n",
i, parts[i].part->id, parts[i].x[0], parts[i].x[1], parts[i].x[2],
......
......@@ -295,344 +295,17 @@ void engine_repartition(struct engine *e) {
#if defined(WITH_MPI) && defined(HAVE_METIS)
int res;
idx_t *inds, *nodeIDs;
idx_t *weights_v = NULL, *weights_e = NULL;
struct space *s = e->s;
int nr_cells = s->nr_cells, my_cells = 0;
struct cell *cells = s->cells;
int ind[3], *cdim = s->cdim;
struct task *tasks = e->sched.tasks;
int nr_nodes = e->nr_nodes, nodeID = e->nodeID;
float wscale = 1e-3, vscale = 1e-3, wscale_buff;
idx_t wtot = 0;
idx_t wmax = 1e9 / e->nr_nodes;
idx_t wmin;
/* Clear the repartition flag. */
enum repartition_type reparttype = e->forcerepart;
e->forcerepart = REPART_NONE;
/* Nothing to do if only using a single node. Also avoids METIS
* bug that doesn't handle this case well. */
if (nr_nodes == 1) return;
/* Allocate the inds and weights. */
if ((inds = (idx_t *)malloc(sizeof(idx_t) * 26 *nr_cells)) == NULL ||
(weights_v = (idx_t *)malloc(sizeof(idx_t) *nr_cells)) == NULL ||
(weights_e = (idx_t *)malloc(sizeof(idx_t) * 26 *nr_cells)) == NULL ||
(nodeIDs = (idx_t *)malloc(sizeof(idx_t) * nr_cells)) == NULL)
error("Failed to allocate inds and weights arrays.");
/* Fill the inds array. */
for (int cid = 0; cid < nr_cells; cid++) {
ind[0] = cells[cid].loc[0] / s->cells[cid].h[0] + 0.5;
ind[1] = cells[cid].loc[1] / s->cells[cid].h[1] + 0.5;
ind[2] = cells[cid].loc[2] / s->cells[cid].h[2] + 0.5;
int l = 0;
for (int i = -1; i <= 1; i++) {
int ii = ind[0] + i;
if (ii < 0)
ii += cdim[0];
else if (ii >= cdim[0])
ii -= cdim[0];
for (int j = -1; j <= 1; j++) {
int jj = ind[1] + j;
if (jj < 0)
jj += cdim[1];
else if (jj >= cdim[1])
jj -= cdim[1];
for (int k = -1; k <= 1; k++) {
int kk = ind[2] + k;
if (kk < 0)
kk += cdim[2];
else if (kk >= cdim[2])
kk -= cdim[2];
if (i || j || k) {
inds[cid * 26 + l] = cell_getid(cdim, ii, jj, kk);
l += 1;
}
}
}
}
}
/* Init the weights arrays. */
bzero(weights_e, sizeof(idx_t) * 26 * nr_cells);
bzero(weights_v, sizeof(idx_t) * nr_cells);
/* Loop over the tasks... */
for (int j = 0; j < e->sched.nr_tasks; j++) {
/* Get a pointer to the kth task. */
const struct task *t = &tasks[j];
/* Skip un-interesting tasks. */
if (t->type != task_type_self && t->type != task_type_pair &&
t->type != task_type_sub && t->type != task_type_ghost &&
t->type != task_type_kick1 && t->type != task_type_kick2)
continue;
/* Get the task weight. */
idx_t w = (t->toc - t->tic) * wscale;
if (w < 0) error("Bad task weight (%" SCIDX ").", w);
/* Do we need to re-scale? */
wtot += w;
while (wtot > wmax) {
wscale /= 2;
wtot /= 2;
w /= 2;
for (int k = 0; k < 26 * nr_cells; k++) weights_e[k] *= 0.5;
for (int k = 0; k < nr_cells; k++) weights_v[k] *= 0.5;
}
/* Get the top-level cells involved. */
const struct cell *ci, *cj = NULL;
for (ci = t->ci; ci->parent != NULL; ci = ci->parent)
;
if (t->cj != NULL)
for (cj = t->cj; cj->parent != NULL; cj = cj->parent)
;
/* Get the cell IDs. */
const int cid = ci - cells;
/* Different weights for different tasks. */
if (t->type == task_type_ghost || t->type == task_type_kick1 ||
t->type == task_type_kick2) {
/* Particle updates add only to vertex weight. */
weights_v[cid] += w;
}
/* Self interaction? */
else if ((t->type == task_type_self && ci->nodeID == nodeID) ||
(t->type == task_type_sub && cj == NULL && ci->nodeID == nodeID)) {
/* Self interactions add only to vertex weight. */
weights_v[cid] += w;
}
/* Pair? */
else if (t->type == task_type_pair ||
(t->type == task_type_sub && cj != NULL)) {
/* In-cell pair? */
if (ci == cj) {
/* Add weight to vertex for ci. */
weights_v[cid] += w;
}
/* Distinct cells with local ci? */
else if (ci->nodeID == nodeID) {
/* Index of the jth cell. */
const int cjd = cj - cells;
/* Add half of weight to each cell. */
if (ci->nodeID == nodeID) weights_v[cid] += 0.5 * w;
if (cj->nodeID == nodeID) weights_v[cjd] += 0.5 * w;
/* Add Weight to edge. */
int k = 26 * cid;
while (inds[k] != cjd) k++;
weights_e[k] += w;
k = 26 * cjd;
while (inds[k] != cid) k++;
weights_e[k] += w;
}
}
}
if (e->nr_nodes == 1) return;
/* Get the minimum scaling and re-scale if necessary. */
if ((res = MPI_Allreduce(&wscale, &wscale_buff, 1, MPI_FLOAT, MPI_MIN,
MPI_COMM_WORLD)) != MPI_SUCCESS) {
char buff[MPI_MAX_ERROR_STRING];
int len;
MPI_Error_string(res, buff, &len);
error("Failed to allreduce the weight scale (%s).", buff);
}
if (wscale_buff != wscale) {
float scale = wscale_buff / wscale;
for (int k = 0; k < 26 * nr_cells; k++) weights_e[k] *= scale;
for (int k = 0; k < nr_cells; k++) weights_v[k] *= scale;
}
/* Merge the weights arrays across all nodes. */
#if IDXTYPEWIDTH == 32
if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v,
nr_cells, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD)) !=
MPI_SUCCESS) {
#else
if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v,
nr_cells, MPI_LONG_LONG_INT, MPI_SUM, 0,
MPI_COMM_WORLD)) != MPI_SUCCESS) {
#endif
char buff[MPI_MAX_ERROR_STRING];
int len;
MPI_Error_string(res, buff, &len);
error("Failed to allreduce vertex weights (%s).", buff);
}
#if IDXTYPEWIDTH == 32
if (MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e,
26 * nr_cells, MPI_INT, MPI_SUM, 0,
MPI_COMM_WORLD) != MPI_SUCCESS)
#else
if (MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e,
26 * nr_cells, MPI_LONG_LONG_INT, MPI_SUM, 0,
MPI_COMM_WORLD) != MPI_SUCCESS)
#endif
error("Failed to allreduce edge weights.");
/* As of here, only one node needs to compute the partition. */
if (nodeID == 0) {
/* Final rescale of all weights to avoid a large range. Large ranges have
* been seen to cause an incomplete graph. */
wmin = wmax;
wmax = 0.0;
for (int k = 0; k < 26 * nr_cells; k++) {
wmax = weights_e[k] > wmax ? weights_e[k] : wmax;
wmin = weights_e[k] < wmin ? weights_e[k] : wmin;
}
if ((wmax - wmin) > engine_maxmetisweight) {
wscale = engine_maxmetisweight / (wmax - wmin);
for (int k = 0; k < 26 * nr_cells; k++) {
weights_e[k] = (weights_e[k] - wmin) * wscale + 1;
}
for (int k = 0; k < nr_cells; k++) {
weights_v[k] = (weights_v[k] - wmin) * wscale + 1;
}
}
/* Check that the edge weights are fully symmetric. */
/* for ( cid = 0 ; cid < nr_cells ; cid++ )
for ( k = 0 ; k < 26 ; k++ ) {
cjd = inds[ cid*26 + k ];
for ( j = 26*cjd ; inds[j] != cid ; j++ );
if ( weights_e[ cid*26+k ] != weights_e[ j ] )
error( "Unsymmetric edge weights detected (%i vs %i)." ,
weights_e[ cid*26+k ] , weights_e[ j ] );
} */
/* int w_min = weights_e[0], w_max = weights_e[0], w_tot = weights_e[0];
for ( k = 1 ; k < 26*nr_cells ; k++ ) {
w_tot += weights_e[k];
if ( weights_e[k] < w_min )
w_min = weights_e[k];
else if ( weights_e[k] > w_max )
w_max = weights_e[k];
}
message( "edge weights in [ %i , %i ], tot=%i." , w_min , w_max , w_tot );
w_min = weights_e[0], w_max = weights_e[0]; w_tot = weights_v[0];
for ( k = 1 ; k < nr_cells ; k++ ) {
w_tot += weights_v[k];
if ( weights_v[k] < w_min )
w_min = weights_v[k];
else if ( weights_v[k] > w_max )
w_max = weights_v[k];
}
message( "vertex weights in [ %i , %i ], tot=%i." , w_min , w_max , w_tot );
*/
/* Make sure there are no zero weights. */
for (int k = 0; k < 26 * nr_cells; k++)
if (weights_e[k] == 0) weights_e[k] = 1;
for (int k = 0; k < nr_cells; k++)
if ((weights_v[k] *= vscale) == 0) weights_v[k] = 1;
/* Allocate and fill the connection array. */
idx_t *offsets;
if ((offsets = (idx_t *)malloc(sizeof(idx_t) * (nr_cells + 1))) == NULL)
error("Failed to allocate offsets buffer.");
offsets[0] = 0;
for (int k = 0; k < nr_cells; k++) offsets[k + 1] = offsets[k] + 26;
/* Set the METIS options. +1 to keep the GCC sanitizer happy. */
idx_t options[METIS_NOPTIONS + 1];
METIS_SetDefaultOptions(options);
options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT;
options[METIS_OPTION_NUMBERING] = 0;
options[METIS_OPTION_CONTIG] = 1;
options[METIS_OPTION_NCUTS] = 10;
options[METIS_OPTION_NITER] = 20;
// options[ METIS_OPTION_UFACTOR ] = 1;
/* Set the initial partition, although this is probably ignored. */
for (int k = 0; k < nr_cells; k++) nodeIDs[k] = cells[k].nodeID;
/* Call METIS. */
idx_t one = 1, idx_nr_cells = nr_cells, idx_nr_nodes = nr_nodes;
idx_t objval;
/* Dump graph in METIS format */
/*dumpMETISGraph("metis_graph", idx_nr_cells, one, offsets, inds,
weights_v, NULL, weights_e);*/
if (METIS_PartGraphRecursive(&idx_nr_cells, &one, offsets, inds, weights_v,
NULL, weights_e, &idx_nr_nodes, NULL, NULL,
options, &objval, nodeIDs) != METIS_OK)
error("Call to METIS_PartGraphRecursive failed.");
/* Dump the 3d array of cell IDs. */
/* printf( "engine_repartition: nodeIDs = reshape( [" );
for ( i = 0 ; i < cdim[0]*cdim[1]*cdim[2] ; i++ )
printf( "%i " , (int)nodeIDs[ i ] );
printf("] ,%i,%i,%i);\n",cdim[0],cdim[1],cdim[2]); */
/* Check that the nodeIDs are ok. */
for (int k = 0; k < nr_cells; k++) {
if (nodeIDs[k] < 0 || nodeIDs[k] >= nr_nodes)
error("Got bad nodeID %" PRIDX " for cell %i.", nodeIDs[k], k);
}
/* Check that the partition is complete and all nodes have some work. */
int present[nr_nodes];
int failed = 0;
for (int i = 0; i < nr_nodes; i++) present[i] = 0;
for (int i = 0; i < nr_cells; i++) present[nodeIDs[i]]++;
for (int i = 0; i < nr_nodes; i++) {
if (!present[i]) {
failed = 1;
message("Node %d is not present after repartition", i);
}
}
/* If partition failed continue with the current one, but make this
* clear. */
if (failed) {
message(
"WARNING: METIS repartition has failed, continuing with "
"the current partition, load balance will not be optimal");
for (int k = 0; k < nr_cells; k++) nodeIDs[k] = cells[k].nodeID;
}
}
/* Broadcast the result of the partition. */
#if IDXTYPEWIDTH == 32
if (MPI_Bcast(nodeIDs, nr_cells, MPI_INT, 0, MPI_COMM_WORLD) != MPI_SUCCESS)
error("Failed to bcast the node IDs.");
#else
if (MPI_Bcast(nodeIDs, nr_cells, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD) !=
MPI_SUCCESS)
error("Failed to bcast the node IDs.");
#endif
/* Set the cell nodeIDs and clear any non-local parts. */
for (int k = 0; k < nr_cells; k++) {
cells[k].nodeID = nodeIDs[k];
if (nodeIDs[k] == nodeID) my_cells += 1;
}
/* Clean up. */
free(inds);
free(weights_v);
free(weights_e);
free(nodeIDs);
/* Do the repartitioning. */
partition_repartition(reparttype, e->nodeID, e->nr_nodes, e->s,
e->sched.tasks, e->sched.nr_tasks);
/* Now comes the tricky part: Exchange particles between all nodes.
This is done in two steps, first allreducing a matrix of
......@@ -687,6 +360,7 @@ void engine_addtasks_grav(struct engine *e, struct cell *c, struct task *up,
void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj) {
#ifdef WITH_MPI
struct link *l = NULL;
struct scheduler *s = &e->sched;
......@@ -740,6 +414,7 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj) {
void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv,
struct task *t_rho) {
#ifdef WITH_MPI
struct scheduler *s = &e->sched;
/* Do we need to construct a recv task? */
......@@ -1321,7 +996,7 @@ int engine_marktasks(struct engine *e) {
struct scheduler *s = &e->sched;
const int nr_tasks = s->nr_tasks, *ind = s->tasks_ind;
struct task *tasks = s->tasks;
const float dt_step = e->dt_step;
float ti_end = e->ti_current;
/* Much less to do here if we're on a fixed time-step. */
if ((e->policy & engine_policy_fixdt) == engine_policy_fixdt) {
......@@ -1456,8 +1131,8 @@ void engine_print(struct engine *e) {
/* Count and print the number of each task type. */
int counts[task_type_count + 1];
for (k = 0; k <= task_type_count; k++) counts[k] = 0;
for (k = 0; k < sched->nr_tasks; k++)
for (int k = 0; k <= task_type_count; k++) counts[k] = 0;
for (int k = 0; k < sched->nr_tasks; k++)
if (!sched->tasks[k].skip)
counts[(int)sched->tasks[k].type] += 1;
else
......@@ -1468,7 +1143,7 @@ void engine_print(struct engine *e) {
#else
printf("engine_print: task counts are [ %s=%i", taskID_names[0], counts[0]);
#endif
for (k = 1; k < task_type_count; k++)
for (int k = 1; k < task_type_count; k++)
printf(" %s=%i", taskID_names[k], counts[k]);
printf(" skipped=%i ]\n", counts[task_type_count]);
fflush(stdout);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment