diff --git a/configure.ac b/configure.ac index 5c46efc1a1a47837d9d75e4dcfc0632ea9d98b49..c12ea7a51973e4aa495a5809d2687961bacda585 100644 --- a/configure.ac +++ b/configure.ac @@ -199,7 +199,7 @@ fi # Check if task debugging is on. AC_ARG_ENABLE([task-debugging], [AS_HELP_STRING([--enable-task-debugging], - [Store task timing information and generate task dump files @<:@yes/no@:>@] + [Store extra information for generating task dump files @<:@yes/no@:>@] )], [enable_task_debugging="$enableval"], [enable_task_debugging="no"] @@ -335,7 +335,7 @@ AC_ARG_ENABLE([vec], [enable_vec="yes"] ) -# Disable hand written vectorisation. Slightly odd implementation as want +# Disable hand written vectorisation. Slightly odd implementation as want # to describe as --disable-hand-vec, but macro is enable (there is no enable action). AC_ARG_ENABLE([hand-vec], [AS_HELP_STRING([--disable-hand-vec], diff --git a/src/engine.c b/src/engine.c index d0123174764fd67c954fea7b5740053b3059973b..18853bf00c13963d4224ad6005a7bbefe9f2d34a 100644 --- a/src/engine.c +++ b/src/engine.c @@ -1046,65 +1046,56 @@ void engine_repartition_trigger(struct engine *e) { const ticks tic = getticks(); - /* Do nothing if there have not been enough steps since the last - * repartition, don't want to repeat this too often or immediately after - * a repartition step. Also nothing to do when requested. */ + /* Do nothing if there have not been enough steps since the last repartition + * as we don't want to repeat this too often or immediately after a + * repartition step. Also nothing to do when requested. */ if (e->step - e->last_repartition >= 2 && e->reparttype->type != REPART_NONE) { - /* Old style if trigger is >1 or this is the second step (want an early - * repartition following the initial repartition). */ - if (e->reparttype->trigger > 1 || e->step == 2) { - if (e->reparttype->trigger > 1) { - if ((e->step % (int)e->reparttype->trigger) == 0) e->forcerepart = 1; - } else { - e->forcerepart = 1; - } - - } else { - - /* Use cputimes from ranks to estimate the imbalance. */ - /* First check if we are going to skip this stage anyway, if so do that - * now. If is only worth checking the CPU loads when we have processed a - * significant number of all particles. */ - if ((e->updates > 1 && - e->updates >= e->total_nr_parts * e->reparttype->minfrac) || - (e->g_updates > 1 && - e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) { - - /* Get CPU time used since the last call to this function. */ - double elapsed_cputime = - clocks_get_cputime_used() - e->cputime_last_step; - - /* Gather the elapsed CPU times from all ranks for the last step. */ - double elapsed_cputimes[e->nr_nodes]; - MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1, - MPI_DOUBLE, 0, MPI_COMM_WORLD); - if (e->nodeID == 0) { - - /* Get the range and mean of cputimes. */ - double mintime = elapsed_cputimes[0]; - double maxtime = elapsed_cputimes[0]; - double sum = elapsed_cputimes[0]; - for (int k = 1; k < e->nr_nodes; k++) { - if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k]; - if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k]; - sum += elapsed_cputimes[k]; - } - double mean = sum / (double)e->nr_nodes; - - /* Are we out of balance? */ - if (((maxtime - mintime) / mean) > e->reparttype->trigger) { - if (e->verbose) - message("trigger fraction %.3f exceeds %.3f will repartition", - (maxtime - mintime) / mintime, e->reparttype->trigger); - e->forcerepart = 1; - } + /* It is only worth checking the CPU loads when we have processed a + * significant number of all particles as we require all tasks to have + * times. */ + if ((e->updates > 1 && + e->updates >= e->total_nr_parts * e->reparttype->minfrac) || + (e->g_updates > 1 && + e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) { + + /* Get CPU time used since the last call to this function. */ + double elapsed_cputime = clocks_get_cputime_used() - e->cputime_last_step; + + /* Gather the elapsed CPU times from all ranks for the last step. */ + double elapsed_cputimes[e->nr_nodes]; + MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1, + MPI_DOUBLE, 0, MPI_COMM_WORLD); + if (e->nodeID == 0) { + + /* Get the range and mean of cputimes. */ + double mintime = elapsed_cputimes[0]; + double maxtime = elapsed_cputimes[0]; + double sum = elapsed_cputimes[0]; + for (int k = 1; k < e->nr_nodes; k++) { + if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k]; + if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k]; + sum += elapsed_cputimes[k]; + } + double mean = sum / (double)e->nr_nodes; + + /* Are we out of balance? */ + double abs_trigger = fabs(e->reparttype->trigger); + if (((maxtime - mintime) / mean) > abs_trigger) { + // if (e->verbose) + message("trigger fraction %.3f > %.3f will repartition", + (maxtime - mintime) / mean, abs_trigger); + e->forcerepart = 1; + } else { + // if (e->verbose) { + message("trigger fraction %.3f =< %.3f will not repartition", + (maxtime - mintime) / mean, abs_trigger); } - - /* All nodes do this together. */ - MPI_Bcast(&e->forcerepart, 1, MPI_INT, 0, MPI_COMM_WORLD); } + + /* All nodes do this together. */ + MPI_Bcast(&e->forcerepart, 1, MPI_INT, 0, MPI_COMM_WORLD); } /* Remember we did this. */ @@ -2897,9 +2888,7 @@ void engine_step(struct engine *e) { struct clocks_time time1, time2; clocks_gettime(&time1); -#ifdef SWIFT_DEBUG_TASKS e->tic_step = getticks(); -#endif if (e->nodeID == 0) { @@ -3056,10 +3045,8 @@ void engine_step(struct engine *e) { clocks_gettime(&time2); e->wallclock_time = (float)clocks_diff(&time1, &time2); -#ifdef SWIFT_DEBUG_TASKS /* Time in ticks at the end of this step. */ e->toc_step = getticks(); -#endif } /** diff --git a/src/engine.h b/src/engine.h index eb73dc32d0dd885424335ad598ec93c866a6ccda..98848b50c9da9cca62b7625af44f47d0f60897ef 100644 --- a/src/engine.h +++ b/src/engine.h @@ -289,10 +289,8 @@ struct engine { struct proxy *proxies; int nr_proxies, *proxy_ind; -#ifdef SWIFT_DEBUG_TASKS /* Tic/toc at the start/end of a step. */ ticks tic_step, toc_step; -#endif #ifdef WITH_MPI /* CPU time of the last step. */ diff --git a/src/partition.c b/src/partition.c index 965c5a6142c44b8ddcc06c72ad533dba5e9f2b80..0bcf3ef0e6c5be8607a02bcfca6195ad03538bcd 100644 --- a/src/partition.c +++ b/src/partition.c @@ -1208,7 +1208,11 @@ void partition_gather_weights(void *map_data, int num_elements, t->type == task_type_logger || t->implicit || t->ci == NULL) continue; /* Get the task weight based on fixed cost for this task type. */ - double w = repartition_costs[t->type][t->subtype]; + //double w = repartition_costs[t->type][t->subtype]; + + /* Get the task weight based on costs. */ + double w = (double)t->toc - (double)t->tic; + if (w <= 0.0) continue; /* Get the top-level cells involved. */ struct cell *ci, *cj; @@ -1824,13 +1828,13 @@ void partition_init(struct partition *partition, " than 1"); /* Fraction of particles that should be updated before a repartition - * based on CPU time is considered. */ + * based on CPU time is considered, needs to be high. */ repartition->minfrac = - parser_get_opt_param_float(params, "DomainDecomposition:minfrac", 0.9f); - if (repartition->minfrac <= 0 || repartition->minfrac > 1) + parser_get_opt_param_float(params, "DomainDecomposition:minfrac", 0.95f); + if (repartition->minfrac <= 0.5 || repartition->minfrac > 1) error( - "Invalid DomainDecomposition:minfrac, must be greater than 0 and less " - "than equal to 1"); + "Invalid DomainDecomposition:minfrac, must be greater than 0.5 " + "and less than equal to 1"); /* Use METIS or ParMETIS when ParMETIS is also available. */ repartition->usemetis = diff --git a/src/scheduler.c b/src/scheduler.c index d40971482a273fd3b3d2d65a166d1340e7cd4c9d..d4ac89ddca85b9acefee3ac5acd85d073f3666d9 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -1595,9 +1595,9 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type, t->nr_unlock_tasks = 0; #ifdef SWIFT_DEBUG_TASKS t->rid = -1; +#endif t->tic = 0; t->toc = 0; -#endif /* Add an index for it. */ // lock_lock( &s->lock ); @@ -1816,6 +1816,7 @@ void scheduler_reweight(struct scheduler *s, int verbose) { /* Run through the tasks backwards and set their weights. */ for (int k = nr_tasks - 1; k >= 0; k--) { struct task *t = &tasks[tid[k]]; + float cost; t->weight = 0.f; for (int j = 0; j < t->nr_unlock_tasks; j++) @@ -2031,14 +2032,14 @@ void scheduler_enqueue_mapper(void *map_data, int num_elements, */ void scheduler_start(struct scheduler *s) { -/* Reset all task debugging timers */ -#ifdef SWIFT_DEBUG_TASKS +/* Reset all task timers. */ for (int i = 0; i < s->nr_tasks; ++i) { s->tasks[i].tic = 0; s->tasks[i].toc = 0; +#ifdef SWIFT_DEBUG_TASKS s->tasks[i].rid = -1; - } #endif + } /* Re-wait the tasks. */ if (s->active_count > 1000) { @@ -2290,9 +2291,7 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) { /* Task definitely done, signal any sleeping runners. */ if (!t->implicit) { -#ifdef SWIFT_DEBUG_TASKS t->toc = getticks(); -#endif pthread_mutex_lock(&s->sleep_mutex); atomic_dec(&s->waiting); pthread_cond_broadcast(&s->sleep_cond); @@ -2333,9 +2332,7 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t) { /* Task definitely done. */ if (!t->implicit) { -#ifdef SWIFT_DEBUG_TASKS t->toc = getticks(); -#endif pthread_mutex_lock(&s->sleep_mutex); atomic_dec(&s->waiting); pthread_cond_broadcast(&s->sleep_cond); @@ -2419,13 +2416,13 @@ struct task *scheduler_gettask(struct scheduler *s, int qid, } } -#ifdef SWIFT_DEBUG_TASKS /* Start the timer on this task, if we got one. */ if (res != NULL) { res->tic = getticks(); +#ifdef SWIFT_DEBUG_TASKS res->rid = qid; - } #endif + } /* No milk today. */ return res; diff --git a/src/task.h b/src/task.h index 8203c2484345017fc34dd58a78882fc069fb8e4a..2122f757220f2fea6691c45370b513a913aeeac8 100644 --- a/src/task.h +++ b/src/task.h @@ -180,10 +180,10 @@ struct task { /*! Information about the direction of the pair task */ short int sid; +#endif /*! Start and end time of this task */ ticks tic, toc; -#endif #ifdef SWIFT_DEBUG_CHECKS /* When was this task last run? */