diff --git a/configure.ac b/configure.ac index 5c46efc1a1a47837d9d75e4dcfc0632ea9d98b49..bf00acd183a035ba7e010df7568a38e5d16667b9 100644 --- a/configure.ac +++ b/configure.ac @@ -196,17 +196,8 @@ if test "x$enable_debug" = "xyes"; then fi fi -# Check if task debugging is on. -AC_ARG_ENABLE([task-debugging], - [AS_HELP_STRING([--enable-task-debugging], - [Store task timing information and generate task dump files @<:@yes/no@:>@] - )], - [enable_task_debugging="$enableval"], - [enable_task_debugging="no"] -) -if test "$enable_task_debugging" = "yes"; then - AC_DEFINE([SWIFT_DEBUG_TASKS],1,[Enable task debugging]) -fi +# We always do this now... +AC_DEFINE([SWIFT_DEBUG_TASKS],1,[Enable task timing]) # Check if threadpool debugging is on. AC_ARG_ENABLE([threadpool-debugging], @@ -335,7 +326,7 @@ AC_ARG_ENABLE([vec], [enable_vec="yes"] ) -# Disable hand written vectorisation. Slightly odd implementation as want +# Disable hand written vectorisation. Slightly odd implementation as want # to describe as --disable-hand-vec, but macro is enable (there is no enable action). AC_ARG_ENABLE([hand-vec], [AS_HELP_STRING([--disable-hand-vec], diff --git a/src/engine.c b/src/engine.c index fa095f4d377b483289166cf86fa50e757ec1a82b..7386f18a8b13e82635561b070b032f4098f664e9 100644 --- a/src/engine.c +++ b/src/engine.c @@ -1046,65 +1046,56 @@ void engine_repartition_trigger(struct engine *e) { const ticks tic = getticks(); - /* Do nothing if there have not been enough steps since the last - * repartition, don't want to repeat this too often or immediately after - * a repartition step. Also nothing to do when requested. */ + /* Do nothing if there have not been enough steps since the last repartition + * as we don't want to repeat this too often or immediately after a + * repartition step. Also nothing to do when requested. */ if (e->step - e->last_repartition >= 2 && e->reparttype->type != REPART_NONE) { - /* Old style if trigger is >1 or this is the second step (want an early - * repartition following the initial repartition). */ - if (e->reparttype->trigger > 1 || e->step == 2) { - if (e->reparttype->trigger > 1) { - if ((e->step % (int)e->reparttype->trigger) == 0) e->forcerepart = 1; - } else { - e->forcerepart = 1; - } - - } else { - - /* Use cputimes from ranks to estimate the imbalance. */ - /* First check if we are going to skip this stage anyway, if so do that - * now. If is only worth checking the CPU loads when we have processed a - * significant number of all particles. */ - if ((e->updates > 1 && - e->updates >= e->total_nr_parts * e->reparttype->minfrac) || - (e->g_updates > 1 && - e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) { - - /* Get CPU time used since the last call to this function. */ - double elapsed_cputime = - clocks_get_cputime_used() - e->cputime_last_step; - - /* Gather the elapsed CPU times from all ranks for the last step. */ - double elapsed_cputimes[e->nr_nodes]; - MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1, - MPI_DOUBLE, 0, MPI_COMM_WORLD); - if (e->nodeID == 0) { - - /* Get the range and mean of cputimes. */ - double mintime = elapsed_cputimes[0]; - double maxtime = elapsed_cputimes[0]; - double sum = elapsed_cputimes[0]; - for (int k = 1; k < e->nr_nodes; k++) { - if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k]; - if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k]; - sum += elapsed_cputimes[k]; - } - double mean = sum / (double)e->nr_nodes; - - /* Are we out of balance? */ - if (((maxtime - mintime) / mean) > e->reparttype->trigger) { - if (e->verbose) - message("trigger fraction %.3f exceeds %.3f will repartition", - (maxtime - mintime) / mintime, e->reparttype->trigger); - e->forcerepart = 1; - } + /* It is only worth checking the CPU loads when we have processed a + * significant number of all particles as we require all tasks to have + * times. */ + if ((e->updates > 1 && + e->updates >= e->total_nr_parts * e->reparttype->minfrac) || + (e->g_updates > 1 && + e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) { + + /* Get CPU time used since the last call to this function. */ + double elapsed_cputime = clocks_get_cputime_used() - e->cputime_last_step; + + /* Gather the elapsed CPU times from all ranks for the last step. */ + double elapsed_cputimes[e->nr_nodes]; + MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1, + MPI_DOUBLE, 0, MPI_COMM_WORLD); + if (e->nodeID == 0) { + + /* Get the range and mean of cputimes. */ + double mintime = elapsed_cputimes[0]; + double maxtime = elapsed_cputimes[0]; + double sum = elapsed_cputimes[0]; + for (int k = 1; k < e->nr_nodes; k++) { + if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k]; + if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k]; + sum += elapsed_cputimes[k]; + } + double mean = sum / (double)e->nr_nodes; + + /* Are we out of balance? */ + double abs_trigger = fabs(e->reparttype->trigger); + if (((maxtime - mintime) / mean) > abs_trigger) { + // if (e->verbose) + message("trigger fraction %.3f > %.3f will repartition", + (maxtime - mintime) / mean, abs_trigger); + e->forcerepart = 1; + } else { + // if (e->verbose) { + message("trigger fraction %.3f =< %.3f will not repartition", + (maxtime - mintime) / mean, abs_trigger); } - - /* All nodes do this together. */ - MPI_Bcast(&e->forcerepart, 1, MPI_INT, 0, MPI_COMM_WORLD); } + + /* All nodes do this together. */ + MPI_Bcast(&e->forcerepart, 1, MPI_INT, 0, MPI_COMM_WORLD); } /* Remember we did this. */ diff --git a/src/partition.c b/src/partition.c index 9cc99a9c571057dda96f85a7ec6b14835b1f3b61..41e3556b09dcf788d88bac4093899672c12a864b 100644 --- a/src/partition.c +++ b/src/partition.c @@ -1105,11 +1105,9 @@ void partition_gather_weights(void *map_data, int num_elements, for (int i = 0; i < num_elements; i++) { struct task *t = &tasks[i]; - /* Skip un-interesting tasks. */ - if (t->cost == 0.f) continue; - /* Get the task weight based on costs. */ - double w = (double)t->cost; + double w = (double)t->toc - (double)t->tic; + if (w <= 0.0) continue; /* Get the top-level cells involved. */ struct cell *ci, *cj; @@ -1720,13 +1718,13 @@ void partition_init(struct partition *partition, " than 1"); /* Fraction of particles that should be updated before a repartition - * based on CPU time is considered. */ + * based on CPU time is considered, needs to be high. */ repartition->minfrac = - parser_get_opt_param_float(params, "DomainDecomposition:minfrac", 0.9f); - if (repartition->minfrac <= 0 || repartition->minfrac > 1) + parser_get_opt_param_float(params, "DomainDecomposition:minfrac", 0.95f); + if (repartition->minfrac <= 0.5 || repartition->minfrac > 1) error( - "Invalid DomainDecomposition:minfrac, must be greater than 0 and less " - "than equal to 1"); + "Invalid DomainDecomposition:minfrac, must be greater than 0.5 " + "and less than equal to 1"); /* Use METIS or ParMETIS when ParMETIS is also available. */ repartition->usemetis = diff --git a/src/scheduler.c b/src/scheduler.c index 2ae8f6785434af021b52dd2d6586b4e2dc5d68bb..79e54927ad98054cdebf3c850e9b92db4feb21d2 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -1816,17 +1816,11 @@ void scheduler_reweight(struct scheduler *s, int verbose) { /* Run through the tasks backwards and set their weights. */ for (int k = nr_tasks - 1; k >= 0; k--) { struct task *t = &tasks[tid[k]]; + float cost; t->weight = 0.f; -#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS)) - t->cost = 0.f; -#endif for (int j = 0; j < t->nr_unlock_tasks; j++) if (t->unlock_tasks[j]->weight > t->weight) t->weight = t->unlock_tasks[j]->weight; - float cost = 0.f; -#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS)) - int partcost = 1; -#endif const float count_i = (t->ci != NULL) ? t->ci->hydro.count : 0.f; const float count_j = (t->cj != NULL) ? t->cj->hydro.count : 0.f; @@ -1949,18 +1943,12 @@ void scheduler_reweight(struct scheduler *s, int verbose) { cost = wscale * count_i + wscale * gcount_i; break; case task_type_send: -#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS)) - partcost = 0; -#endif if (count_i < 1e5) cost = 10.f * (wscale * count_i) * count_i; else cost = 2e9; break; case task_type_recv: -#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS)) - partcost = 0; -#endif if (count_i < 1e5) cost = 5.f * (wscale * count_i) * count_i; else @@ -1970,10 +1958,6 @@ void scheduler_reweight(struct scheduler *s, int verbose) { cost = 0; break; } - -#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS)) - if (partcost) t->cost = cost; -#endif t->weight += cost; }