Skip to content
Snippets Groups Projects
Commit aee41060 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

various fixes to the merge, making sure things compile.

parent 5f3bd99c
No related branches found
No related tags found
2 merge requests!136Master,!113Local variable cleanup
......@@ -58,7 +58,7 @@ void printParticle(struct part *parts, struct xpart *xparts, long long int id,
/* Look for the particle. */
for (size_t i = 0; i < N; i++)
if (parts[i].id == id) {
printf("## Particle[%d]:\n id=%lld", i, parts[i].id);
printf("## Particle[%zd]:\n id=%lld", i, parts[i].id);
hydro_debug_particle(&parts[i], &xparts[i]);
found = 1;
break;
......@@ -75,7 +75,7 @@ void printgParticle(struct gpart *parts, long long int id, int N) {
for (size_t i = 0; i < N; i++)
if (parts[i].id == -id || (parts[i].id > 0 && parts[i].part->id == id)) {
printf(
"## gParticle[%d]: id=%lld, x=[%.16e,%.16e,%.16e], "
"## gParticle[%zd]: id=%lld, x=[%.16e,%.16e,%.16e], "
"v=[%.3e,%.3e,%.3e], a=[%.3e,%.3e,%.3e], m=%.3e, t_begin=%d, "
"t_end=%d\n",
i, parts[i].part->id, parts[i].x[0], parts[i].x[1], parts[i].x[2],
......
......@@ -295,344 +295,17 @@ void engine_repartition(struct engine *e) {
#if defined(WITH_MPI) && defined(HAVE_METIS)
int res;
idx_t *inds, *nodeIDs;
idx_t *weights_v = NULL, *weights_e = NULL;
struct space *s = e->s;
int nr_cells = s->nr_cells, my_cells = 0;
struct cell *cells = s->cells;
int ind[3], *cdim = s->cdim;
struct task *tasks = e->sched.tasks;
int nr_nodes = e->nr_nodes, nodeID = e->nodeID;
float wscale = 1e-3, vscale = 1e-3, wscale_buff;
idx_t wtot = 0;
idx_t wmax = 1e9 / e->nr_nodes;
idx_t wmin;
/* Clear the repartition flag. */
enum repartition_type reparttype = e->forcerepart;
e->forcerepart = REPART_NONE;
/* Nothing to do if only using a single node. Also avoids METIS
* bug that doesn't handle this case well. */
if (nr_nodes == 1) return;
/* Allocate the inds and weights. */
if ((inds = (idx_t *)malloc(sizeof(idx_t) * 26 *nr_cells)) == NULL ||
(weights_v = (idx_t *)malloc(sizeof(idx_t) *nr_cells)) == NULL ||
(weights_e = (idx_t *)malloc(sizeof(idx_t) * 26 *nr_cells)) == NULL ||
(nodeIDs = (idx_t *)malloc(sizeof(idx_t) * nr_cells)) == NULL)
error("Failed to allocate inds and weights arrays.");
/* Fill the inds array. */
for (int cid = 0; cid < nr_cells; cid++) {
ind[0] = cells[cid].loc[0] / s->cells[cid].h[0] + 0.5;
ind[1] = cells[cid].loc[1] / s->cells[cid].h[1] + 0.5;
ind[2] = cells[cid].loc[2] / s->cells[cid].h[2] + 0.5;
int l = 0;
for (int i = -1; i <= 1; i++) {
int ii = ind[0] + i;
if (ii < 0)
ii += cdim[0];
else if (ii >= cdim[0])
ii -= cdim[0];
for (int j = -1; j <= 1; j++) {
int jj = ind[1] + j;
if (jj < 0)
jj += cdim[1];
else if (jj >= cdim[1])
jj -= cdim[1];
for (int k = -1; k <= 1; k++) {
int kk = ind[2] + k;
if (kk < 0)
kk += cdim[2];
else if (kk >= cdim[2])
kk -= cdim[2];
if (i || j || k) {
inds[cid * 26 + l] = cell_getid(cdim, ii, jj, kk);
l += 1;
}
}
}
}
}
/* Init the weights arrays. */
bzero(weights_e, sizeof(idx_t) * 26 * nr_cells);
bzero(weights_v, sizeof(idx_t) * nr_cells);
/* Loop over the tasks... */
for (int j = 0; j < e->sched.nr_tasks; j++) {
/* Get a pointer to the kth task. */
const struct task *t = &tasks[j];
/* Skip un-interesting tasks. */
if (t->type != task_type_self && t->type != task_type_pair &&
t->type != task_type_sub && t->type != task_type_ghost &&
t->type != task_type_kick1 && t->type != task_type_kick2)
continue;
/* Get the task weight. */
idx_t w = (t->toc - t->tic) * wscale;
if (w < 0) error("Bad task weight (%" SCIDX ").", w);
/* Do we need to re-scale? */
wtot += w;
while (wtot > wmax) {
wscale /= 2;
wtot /= 2;
w /= 2;
for (int k = 0; k < 26 * nr_cells; k++) weights_e[k] *= 0.5;
for (int k = 0; k < nr_cells; k++) weights_v[k] *= 0.5;
}
/* Get the top-level cells involved. */
const struct cell *ci, *cj = NULL;
for (ci = t->ci; ci->parent != NULL; ci = ci->parent)
;
if (t->cj != NULL)
for (cj = t->cj; cj->parent != NULL; cj = cj->parent)
;
/* Get the cell IDs. */
const int cid = ci - cells;
/* Different weights for different tasks. */
if (t->type == task_type_ghost || t->type == task_type_kick1 ||
t->type == task_type_kick2) {
/* Particle updates add only to vertex weight. */
weights_v[cid] += w;
}
/* Self interaction? */
else if ((t->type == task_type_self && ci->nodeID == nodeID) ||
(t->type == task_type_sub && cj == NULL && ci->nodeID == nodeID)) {
/* Self interactions add only to vertex weight. */
weights_v[cid] += w;
}
/* Pair? */
else if (t->type == task_type_pair ||
(t->type == task_type_sub && cj != NULL)) {
/* In-cell pair? */
if (ci == cj) {
/* Add weight to vertex for ci. */
weights_v[cid] += w;
}
/* Distinct cells with local ci? */
else if (ci->nodeID == nodeID) {
/* Index of the jth cell. */
const int cjd = cj - cells;
/* Add half of weight to each cell. */
if (ci->nodeID == nodeID) weights_v[cid] += 0.5 * w;
if (cj->nodeID == nodeID) weights_v[cjd] += 0.5 * w;
/* Add Weight to edge. */
int k = 26 * cid;
while (inds[k] != cjd) k++;
weights_e[k] += w;
k = 26 * cjd;
while (inds[k] != cid) k++;
weights_e[k] += w;
}
}
}
if (e->nr_nodes == 1) return;
/* Get the minimum scaling and re-scale if necessary. */
if ((res = MPI_Allreduce(&wscale, &wscale_buff, 1, MPI_FLOAT, MPI_MIN,
MPI_COMM_WORLD)) != MPI_SUCCESS) {
char buff[MPI_MAX_ERROR_STRING];
int len;
MPI_Error_string(res, buff, &len);
error("Failed to allreduce the weight scale (%s).", buff);
}
if (wscale_buff != wscale) {
float scale = wscale_buff / wscale;
for (int k = 0; k < 26 * nr_cells; k++) weights_e[k] *= scale;
for (int k = 0; k < nr_cells; k++) weights_v[k] *= scale;
}
/* Merge the weights arrays across all nodes. */
#if IDXTYPEWIDTH == 32
if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v,
nr_cells, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD)) !=
MPI_SUCCESS) {
#else
if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v,
nr_cells, MPI_LONG_LONG_INT, MPI_SUM, 0,
MPI_COMM_WORLD)) != MPI_SUCCESS) {
#endif
char buff[MPI_MAX_ERROR_STRING];
int len;
MPI_Error_string(res, buff, &len);
error("Failed to allreduce vertex weights (%s).", buff);
}
#if IDXTYPEWIDTH == 32
if (MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e,
26 * nr_cells, MPI_INT, MPI_SUM, 0,
MPI_COMM_WORLD) != MPI_SUCCESS)
#else
if (MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e,
26 * nr_cells, MPI_LONG_LONG_INT, MPI_SUM, 0,
MPI_COMM_WORLD) != MPI_SUCCESS)
#endif
error("Failed to allreduce edge weights.");
/* As of here, only one node needs to compute the partition. */
if (nodeID == 0) {
/* Final rescale of all weights to avoid a large range. Large ranges have
* been seen to cause an incomplete graph. */
wmin = wmax;
wmax = 0.0;
for (int k = 0; k < 26 * nr_cells; k++) {
wmax = weights_e[k] > wmax ? weights_e[k] : wmax;
wmin = weights_e[k] < wmin ? weights_e[k] : wmin;
}
if ((wmax - wmin) > engine_maxmetisweight) {
wscale = engine_maxmetisweight / (wmax - wmin);
for (int k = 0; k < 26 * nr_cells; k++) {
weights_e[k] = (weights_e[k] - wmin) * wscale + 1;
}
for (int k = 0; k < nr_cells; k++) {
weights_v[k] = (weights_v[k] - wmin) * wscale + 1;
}
}
/* Check that the edge weights are fully symmetric. */
/* for ( cid = 0 ; cid < nr_cells ; cid++ )
for ( k = 0 ; k < 26 ; k++ ) {
cjd = inds[ cid*26 + k ];
for ( j = 26*cjd ; inds[j] != cid ; j++ );
if ( weights_e[ cid*26+k ] != weights_e[ j ] )
error( "Unsymmetric edge weights detected (%i vs %i)." ,
weights_e[ cid*26+k ] , weights_e[ j ] );
} */
/* int w_min = weights_e[0], w_max = weights_e[0], w_tot = weights_e[0];
for ( k = 1 ; k < 26*nr_cells ; k++ ) {
w_tot += weights_e[k];
if ( weights_e[k] < w_min )
w_min = weights_e[k];
else if ( weights_e[k] > w_max )
w_max = weights_e[k];
}
message( "edge weights in [ %i , %i ], tot=%i." , w_min , w_max , w_tot );
w_min = weights_e[0], w_max = weights_e[0]; w_tot = weights_v[0];
for ( k = 1 ; k < nr_cells ; k++ ) {
w_tot += weights_v[k];
if ( weights_v[k] < w_min )
w_min = weights_v[k];
else if ( weights_v[k] > w_max )
w_max = weights_v[k];
}
message( "vertex weights in [ %i , %i ], tot=%i." , w_min , w_max , w_tot );
*/
/* Make sure there are no zero weights. */
for (int k = 0; k < 26 * nr_cells; k++)
if (weights_e[k] == 0) weights_e[k] = 1;
for (int k = 0; k < nr_cells; k++)
if ((weights_v[k] *= vscale) == 0) weights_v[k] = 1;
/* Allocate and fill the connection array. */
idx_t *offsets;
if ((offsets = (idx_t *)malloc(sizeof(idx_t) * (nr_cells + 1))) == NULL)
error("Failed to allocate offsets buffer.");
offsets[0] = 0;
for (int k = 0; k < nr_cells; k++) offsets[k + 1] = offsets[k] + 26;
/* Set the METIS options. +1 to keep the GCC sanitizer happy. */
idx_t options[METIS_NOPTIONS + 1];
METIS_SetDefaultOptions(options);
options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT;
options[METIS_OPTION_NUMBERING] = 0;
options[METIS_OPTION_CONTIG] = 1;
options[METIS_OPTION_NCUTS] = 10;
options[METIS_OPTION_NITER] = 20;
// options[ METIS_OPTION_UFACTOR ] = 1;
/* Set the initial partition, although this is probably ignored. */
for (int k = 0; k < nr_cells; k++) nodeIDs[k] = cells[k].nodeID;
/* Call METIS. */
idx_t one = 1, idx_nr_cells = nr_cells, idx_nr_nodes = nr_nodes;
idx_t objval;
/* Dump graph in METIS format */
/*dumpMETISGraph("metis_graph", idx_nr_cells, one, offsets, inds,
weights_v, NULL, weights_e);*/
if (METIS_PartGraphRecursive(&idx_nr_cells, &one, offsets, inds, weights_v,
NULL, weights_e, &idx_nr_nodes, NULL, NULL,
options, &objval, nodeIDs) != METIS_OK)
error("Call to METIS_PartGraphRecursive failed.");
/* Dump the 3d array of cell IDs. */
/* printf( "engine_repartition: nodeIDs = reshape( [" );
for ( i = 0 ; i < cdim[0]*cdim[1]*cdim[2] ; i++ )
printf( "%i " , (int)nodeIDs[ i ] );
printf("] ,%i,%i,%i);\n",cdim[0],cdim[1],cdim[2]); */
/* Check that the nodeIDs are ok. */
for (int k = 0; k < nr_cells; k++) {
if (nodeIDs[k] < 0 || nodeIDs[k] >= nr_nodes)
error("Got bad nodeID %" PRIDX " for cell %i.", nodeIDs[k], k);
}
/* Check that the partition is complete and all nodes have some work. */
int present[nr_nodes];
int failed = 0;
for (int i = 0; i < nr_nodes; i++) present[i] = 0;
for (int i = 0; i < nr_cells; i++) present[nodeIDs[i]]++;
for (int i = 0; i < nr_nodes; i++) {
if (!present[i]) {
failed = 1;
message("Node %d is not present after repartition", i);
}
}
/* If partition failed continue with the current one, but make this
* clear. */
if (failed) {
message(
"WARNING: METIS repartition has failed, continuing with "
"the current partition, load balance will not be optimal");
for (int k = 0; k < nr_cells; k++) nodeIDs[k] = cells[k].nodeID;
}
}
/* Broadcast the result of the partition. */
#if IDXTYPEWIDTH == 32
if (MPI_Bcast(nodeIDs, nr_cells, MPI_INT, 0, MPI_COMM_WORLD) != MPI_SUCCESS)
error("Failed to bcast the node IDs.");
#else
if (MPI_Bcast(nodeIDs, nr_cells, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD) !=
MPI_SUCCESS)
error("Failed to bcast the node IDs.");
#endif
/* Set the cell nodeIDs and clear any non-local parts. */
for (int k = 0; k < nr_cells; k++) {
cells[k].nodeID = nodeIDs[k];
if (nodeIDs[k] == nodeID) my_cells += 1;
}
/* Clean up. */
free(inds);
free(weights_v);
free(weights_e);
free(nodeIDs);
/* Do the repartitioning. */
partition_repartition(reparttype, e->nodeID, e->nr_nodes, e->s,
e->sched.tasks, e->sched.nr_tasks);
/* Now comes the tricky part: Exchange particles between all nodes.
This is done in two steps, first allreducing a matrix of
......@@ -687,6 +360,7 @@ void engine_addtasks_grav(struct engine *e, struct cell *c, struct task *up,
void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj) {
#ifdef WITH_MPI
struct link *l = NULL;
struct scheduler *s = &e->sched;
......@@ -740,6 +414,7 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj) {
void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv,
struct task *t_rho) {
#ifdef WITH_MPI
struct scheduler *s = &e->sched;
/* Do we need to construct a recv task? */
......@@ -1321,7 +996,7 @@ int engine_marktasks(struct engine *e) {
struct scheduler *s = &e->sched;
const int nr_tasks = s->nr_tasks, *ind = s->tasks_ind;
struct task *tasks = s->tasks;
const float dt_step = e->dt_step;
float ti_end = e->ti_current;
/* Much less to do here if we're on a fixed time-step. */
if ((e->policy & engine_policy_fixdt) == engine_policy_fixdt) {
......@@ -1456,8 +1131,8 @@ void engine_print(struct engine *e) {
/* Count and print the number of each task type. */
int counts[task_type_count + 1];
for (k = 0; k <= task_type_count; k++) counts[k] = 0;
for (k = 0; k < sched->nr_tasks; k++)
for (int k = 0; k <= task_type_count; k++) counts[k] = 0;
for (int k = 0; k < sched->nr_tasks; k++)
if (!sched->tasks[k].skip)
counts[(int)sched->tasks[k].type] += 1;
else
......@@ -1468,7 +1143,7 @@ void engine_print(struct engine *e) {
#else
printf("engine_print: task counts are [ %s=%i", taskID_names[0], counts[0]);
#endif
for (k = 1; k < task_type_count; k++)
for (int k = 1; k < task_type_count; k++)
printf(" %s=%i", taskID_names[k], counts[k]);
printf(" skipped=%i ]\n", counts[task_type_count]);
fflush(stdout);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment