diff --git a/src/debug.c b/src/debug.c index 93f0136f0d1ffa60dfd69e6e3ddf9ef483b21958..716fa66934f988196b9ebfa8f037c4c8c28d92d2 100644 --- a/src/debug.c +++ b/src/debug.c @@ -58,7 +58,7 @@ void printParticle(struct part *parts, struct xpart *xparts, long long int id, /* Look for the particle. */ for (size_t i = 0; i < N; i++) if (parts[i].id == id) { - printf("## Particle[%d]:\n id=%lld", i, parts[i].id); + printf("## Particle[%zd]:\n id=%lld", i, parts[i].id); hydro_debug_particle(&parts[i], &xparts[i]); found = 1; break; @@ -75,7 +75,7 @@ void printgParticle(struct gpart *parts, long long int id, int N) { for (size_t i = 0; i < N; i++) if (parts[i].id == -id || (parts[i].id > 0 && parts[i].part->id == id)) { printf( - "## gParticle[%d]: id=%lld, x=[%.16e,%.16e,%.16e], " + "## gParticle[%zd]: id=%lld, x=[%.16e,%.16e,%.16e], " "v=[%.3e,%.3e,%.3e], a=[%.3e,%.3e,%.3e], m=%.3e, t_begin=%d, " "t_end=%d\n", i, parts[i].part->id, parts[i].x[0], parts[i].x[1], parts[i].x[2], diff --git a/src/engine.c b/src/engine.c index cc69711bf1021263f1f2b0dffb42a631ba6db787..d75cef107c8effe8a24df01e94c660863b6be8ce 100644 --- a/src/engine.c +++ b/src/engine.c @@ -295,344 +295,17 @@ void engine_repartition(struct engine *e) { #if defined(WITH_MPI) && defined(HAVE_METIS) - int res; - idx_t *inds, *nodeIDs; - idx_t *weights_v = NULL, *weights_e = NULL; - struct space *s = e->s; - int nr_cells = s->nr_cells, my_cells = 0; - struct cell *cells = s->cells; - int ind[3], *cdim = s->cdim; - struct task *tasks = e->sched.tasks; - int nr_nodes = e->nr_nodes, nodeID = e->nodeID; - float wscale = 1e-3, vscale = 1e-3, wscale_buff; - idx_t wtot = 0; - idx_t wmax = 1e9 / e->nr_nodes; - idx_t wmin; - /* Clear the repartition flag. */ enum repartition_type reparttype = e->forcerepart; e->forcerepart = REPART_NONE; /* Nothing to do if only using a single node. Also avoids METIS * bug that doesn't handle this case well. */ - if (nr_nodes == 1) return; - - /* Allocate the inds and weights. */ - if ((inds = (idx_t *)malloc(sizeof(idx_t) * 26 *nr_cells)) == NULL || - (weights_v = (idx_t *)malloc(sizeof(idx_t) *nr_cells)) == NULL || - (weights_e = (idx_t *)malloc(sizeof(idx_t) * 26 *nr_cells)) == NULL || - (nodeIDs = (idx_t *)malloc(sizeof(idx_t) * nr_cells)) == NULL) - error("Failed to allocate inds and weights arrays."); - - /* Fill the inds array. */ - for (int cid = 0; cid < nr_cells; cid++) { - ind[0] = cells[cid].loc[0] / s->cells[cid].h[0] + 0.5; - ind[1] = cells[cid].loc[1] / s->cells[cid].h[1] + 0.5; - ind[2] = cells[cid].loc[2] / s->cells[cid].h[2] + 0.5; - int l = 0; - for (int i = -1; i <= 1; i++) { - int ii = ind[0] + i; - if (ii < 0) - ii += cdim[0]; - else if (ii >= cdim[0]) - ii -= cdim[0]; - for (int j = -1; j <= 1; j++) { - int jj = ind[1] + j; - if (jj < 0) - jj += cdim[1]; - else if (jj >= cdim[1]) - jj -= cdim[1]; - for (int k = -1; k <= 1; k++) { - int kk = ind[2] + k; - if (kk < 0) - kk += cdim[2]; - else if (kk >= cdim[2]) - kk -= cdim[2]; - if (i || j || k) { - inds[cid * 26 + l] = cell_getid(cdim, ii, jj, kk); - l += 1; - } - } - } - } - } - - /* Init the weights arrays. */ - bzero(weights_e, sizeof(idx_t) * 26 * nr_cells); - bzero(weights_v, sizeof(idx_t) * nr_cells); - - /* Loop over the tasks... */ - for (int j = 0; j < e->sched.nr_tasks; j++) { - - /* Get a pointer to the kth task. */ - const struct task *t = &tasks[j]; - - /* Skip un-interesting tasks. */ - if (t->type != task_type_self && t->type != task_type_pair && - t->type != task_type_sub && t->type != task_type_ghost && - t->type != task_type_kick1 && t->type != task_type_kick2) - continue; - - /* Get the task weight. */ - idx_t w = (t->toc - t->tic) * wscale; - if (w < 0) error("Bad task weight (%" SCIDX ").", w); - - /* Do we need to re-scale? */ - wtot += w; - while (wtot > wmax) { - wscale /= 2; - wtot /= 2; - w /= 2; - for (int k = 0; k < 26 * nr_cells; k++) weights_e[k] *= 0.5; - for (int k = 0; k < nr_cells; k++) weights_v[k] *= 0.5; - } - - /* Get the top-level cells involved. */ - const struct cell *ci, *cj = NULL; - for (ci = t->ci; ci->parent != NULL; ci = ci->parent) - ; - if (t->cj != NULL) - for (cj = t->cj; cj->parent != NULL; cj = cj->parent) - ; - - /* Get the cell IDs. */ - const int cid = ci - cells; - - /* Different weights for different tasks. */ - if (t->type == task_type_ghost || t->type == task_type_kick1 || - t->type == task_type_kick2) { - - /* Particle updates add only to vertex weight. */ - weights_v[cid] += w; - - } - - /* Self interaction? */ - else if ((t->type == task_type_self && ci->nodeID == nodeID) || - (t->type == task_type_sub && cj == NULL && ci->nodeID == nodeID)) { - - /* Self interactions add only to vertex weight. */ - weights_v[cid] += w; - - } - - /* Pair? */ - else if (t->type == task_type_pair || - (t->type == task_type_sub && cj != NULL)) { - - /* In-cell pair? */ - if (ci == cj) { - - /* Add weight to vertex for ci. */ - weights_v[cid] += w; - - } - - /* Distinct cells with local ci? */ - else if (ci->nodeID == nodeID) { - - /* Index of the jth cell. */ - const int cjd = cj - cells; - - /* Add half of weight to each cell. */ - if (ci->nodeID == nodeID) weights_v[cid] += 0.5 * w; - if (cj->nodeID == nodeID) weights_v[cjd] += 0.5 * w; - - /* Add Weight to edge. */ - int k = 26 * cid; - while (inds[k] != cjd) k++; - weights_e[k] += w; - k = 26 * cjd; - while (inds[k] != cid) k++; - weights_e[k] += w; - } - } - } + if (e->nr_nodes == 1) return; - /* Get the minimum scaling and re-scale if necessary. */ - if ((res = MPI_Allreduce(&wscale, &wscale_buff, 1, MPI_FLOAT, MPI_MIN, - MPI_COMM_WORLD)) != MPI_SUCCESS) { - char buff[MPI_MAX_ERROR_STRING]; - int len; - MPI_Error_string(res, buff, &len); - error("Failed to allreduce the weight scale (%s).", buff); - } - if (wscale_buff != wscale) { - float scale = wscale_buff / wscale; - for (int k = 0; k < 26 * nr_cells; k++) weights_e[k] *= scale; - for (int k = 0; k < nr_cells; k++) weights_v[k] *= scale; - } - -/* Merge the weights arrays across all nodes. */ -#if IDXTYPEWIDTH == 32 - if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v, - nr_cells, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD)) != - MPI_SUCCESS) { -#else - if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v, - nr_cells, MPI_LONG_LONG_INT, MPI_SUM, 0, - MPI_COMM_WORLD)) != MPI_SUCCESS) { -#endif - char buff[MPI_MAX_ERROR_STRING]; - int len; - MPI_Error_string(res, buff, &len); - error("Failed to allreduce vertex weights (%s).", buff); - } -#if IDXTYPEWIDTH == 32 - if (MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e, - 26 * nr_cells, MPI_INT, MPI_SUM, 0, - MPI_COMM_WORLD) != MPI_SUCCESS) -#else - if (MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e, - 26 * nr_cells, MPI_LONG_LONG_INT, MPI_SUM, 0, - MPI_COMM_WORLD) != MPI_SUCCESS) -#endif - error("Failed to allreduce edge weights."); - - /* As of here, only one node needs to compute the partition. */ - if (nodeID == 0) { - - /* Final rescale of all weights to avoid a large range. Large ranges have - * been seen to cause an incomplete graph. */ - wmin = wmax; - wmax = 0.0; - for (int k = 0; k < 26 * nr_cells; k++) { - wmax = weights_e[k] > wmax ? weights_e[k] : wmax; - wmin = weights_e[k] < wmin ? weights_e[k] : wmin; - } - if ((wmax - wmin) > engine_maxmetisweight) { - wscale = engine_maxmetisweight / (wmax - wmin); - for (int k = 0; k < 26 * nr_cells; k++) { - weights_e[k] = (weights_e[k] - wmin) * wscale + 1; - } - for (int k = 0; k < nr_cells; k++) { - weights_v[k] = (weights_v[k] - wmin) * wscale + 1; - } - } - - /* Check that the edge weights are fully symmetric. */ - /* for ( cid = 0 ; cid < nr_cells ; cid++ ) - for ( k = 0 ; k < 26 ; k++ ) { - cjd = inds[ cid*26 + k ]; - for ( j = 26*cjd ; inds[j] != cid ; j++ ); - if ( weights_e[ cid*26+k ] != weights_e[ j ] ) - error( "Unsymmetric edge weights detected (%i vs %i)." , - weights_e[ cid*26+k ] , weights_e[ j ] ); - } */ - /* int w_min = weights_e[0], w_max = weights_e[0], w_tot = weights_e[0]; - for ( k = 1 ; k < 26*nr_cells ; k++ ) { - w_tot += weights_e[k]; - if ( weights_e[k] < w_min ) - w_min = weights_e[k]; - else if ( weights_e[k] > w_max ) - w_max = weights_e[k]; - } - message( "edge weights in [ %i , %i ], tot=%i." , w_min , w_max , w_tot ); - w_min = weights_e[0], w_max = weights_e[0]; w_tot = weights_v[0]; - for ( k = 1 ; k < nr_cells ; k++ ) { - w_tot += weights_v[k]; - if ( weights_v[k] < w_min ) - w_min = weights_v[k]; - else if ( weights_v[k] > w_max ) - w_max = weights_v[k]; - } - message( "vertex weights in [ %i , %i ], tot=%i." , w_min , w_max , w_tot ); - */ - - /* Make sure there are no zero weights. */ - for (int k = 0; k < 26 * nr_cells; k++) - if (weights_e[k] == 0) weights_e[k] = 1; - for (int k = 0; k < nr_cells; k++) - if ((weights_v[k] *= vscale) == 0) weights_v[k] = 1; - - /* Allocate and fill the connection array. */ - idx_t *offsets; - if ((offsets = (idx_t *)malloc(sizeof(idx_t) * (nr_cells + 1))) == NULL) - error("Failed to allocate offsets buffer."); - offsets[0] = 0; - for (int k = 0; k < nr_cells; k++) offsets[k + 1] = offsets[k] + 26; - - /* Set the METIS options. +1 to keep the GCC sanitizer happy. */ - idx_t options[METIS_NOPTIONS + 1]; - METIS_SetDefaultOptions(options); - options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT; - options[METIS_OPTION_NUMBERING] = 0; - options[METIS_OPTION_CONTIG] = 1; - options[METIS_OPTION_NCUTS] = 10; - options[METIS_OPTION_NITER] = 20; - // options[ METIS_OPTION_UFACTOR ] = 1; - - /* Set the initial partition, although this is probably ignored. */ - for (int k = 0; k < nr_cells; k++) nodeIDs[k] = cells[k].nodeID; - - /* Call METIS. */ - idx_t one = 1, idx_nr_cells = nr_cells, idx_nr_nodes = nr_nodes; - idx_t objval; - - /* Dump graph in METIS format */ - /*dumpMETISGraph("metis_graph", idx_nr_cells, one, offsets, inds, - weights_v, NULL, weights_e);*/ - - if (METIS_PartGraphRecursive(&idx_nr_cells, &one, offsets, inds, weights_v, - NULL, weights_e, &idx_nr_nodes, NULL, NULL, - options, &objval, nodeIDs) != METIS_OK) - error("Call to METIS_PartGraphRecursive failed."); - - /* Dump the 3d array of cell IDs. */ - /* printf( "engine_repartition: nodeIDs = reshape( [" ); - for ( i = 0 ; i < cdim[0]*cdim[1]*cdim[2] ; i++ ) - printf( "%i " , (int)nodeIDs[ i ] ); - printf("] ,%i,%i,%i);\n",cdim[0],cdim[1],cdim[2]); */ - - /* Check that the nodeIDs are ok. */ - for (int k = 0; k < nr_cells; k++) { - if (nodeIDs[k] < 0 || nodeIDs[k] >= nr_nodes) - error("Got bad nodeID %" PRIDX " for cell %i.", nodeIDs[k], k); - } - - /* Check that the partition is complete and all nodes have some work. */ - int present[nr_nodes]; - int failed = 0; - for (int i = 0; i < nr_nodes; i++) present[i] = 0; - for (int i = 0; i < nr_cells; i++) present[nodeIDs[i]]++; - for (int i = 0; i < nr_nodes; i++) { - if (!present[i]) { - failed = 1; - message("Node %d is not present after repartition", i); - } - } - - /* If partition failed continue with the current one, but make this - * clear. */ - if (failed) { - message( - "WARNING: METIS repartition has failed, continuing with " - "the current partition, load balance will not be optimal"); - for (int k = 0; k < nr_cells; k++) nodeIDs[k] = cells[k].nodeID; - } - } - -/* Broadcast the result of the partition. */ -#if IDXTYPEWIDTH == 32 - if (MPI_Bcast(nodeIDs, nr_cells, MPI_INT, 0, MPI_COMM_WORLD) != MPI_SUCCESS) - error("Failed to bcast the node IDs."); -#else - if (MPI_Bcast(nodeIDs, nr_cells, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD) != - MPI_SUCCESS) - error("Failed to bcast the node IDs."); -#endif - - /* Set the cell nodeIDs and clear any non-local parts. */ - for (int k = 0; k < nr_cells; k++) { - cells[k].nodeID = nodeIDs[k]; - if (nodeIDs[k] == nodeID) my_cells += 1; - } - - /* Clean up. */ - free(inds); - free(weights_v); - free(weights_e); - free(nodeIDs); + /* Do the repartitioning. */ + partition_repartition(reparttype, e->nodeID, e->nr_nodes, e->s, + e->sched.tasks, e->sched.nr_tasks); /* Now comes the tricky part: Exchange particles between all nodes. This is done in two steps, first allreducing a matrix of @@ -687,6 +360,7 @@ void engine_addtasks_grav(struct engine *e, struct cell *c, struct task *up, void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj) { +#ifdef WITH_MPI struct link *l = NULL; struct scheduler *s = &e->sched; @@ -740,6 +414,7 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj) { void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv, struct task *t_rho) { +#ifdef WITH_MPI struct scheduler *s = &e->sched; /* Do we need to construct a recv task? */ @@ -1321,7 +996,7 @@ int engine_marktasks(struct engine *e) { struct scheduler *s = &e->sched; const int nr_tasks = s->nr_tasks, *ind = s->tasks_ind; struct task *tasks = s->tasks; - const float dt_step = e->dt_step; + float ti_end = e->ti_current; /* Much less to do here if we're on a fixed time-step. */ if ((e->policy & engine_policy_fixdt) == engine_policy_fixdt) { @@ -1456,8 +1131,8 @@ void engine_print(struct engine *e) { /* Count and print the number of each task type. */ int counts[task_type_count + 1]; - for (k = 0; k <= task_type_count; k++) counts[k] = 0; - for (k = 0; k < sched->nr_tasks; k++) + for (int k = 0; k <= task_type_count; k++) counts[k] = 0; + for (int k = 0; k < sched->nr_tasks; k++) if (!sched->tasks[k].skip) counts[(int)sched->tasks[k].type] += 1; else @@ -1468,7 +1143,7 @@ void engine_print(struct engine *e) { #else printf("engine_print: task counts are [ %s=%i", taskID_names[0], counts[0]); #endif - for (k = 1; k < task_type_count; k++) + for (int k = 1; k < task_type_count; k++) printf(" %s=%i", taskID_names[k], counts[k]); printf(" skipped=%i ]\n", counts[task_type_count]); fflush(stdout);