diff --git a/configure.ac b/configure.ac index f6e2ea0db8e9829b719a0190eea7a1d891bfbbd6..8cb7fd1b3f09819b7c8b57ce5d70e3b38e32a637 100644 --- a/configure.ac +++ b/configure.ac @@ -170,6 +170,18 @@ if test "x$enable_debug" = "xyes"; then fi fi +# Check if task debugging is on. +AC_ARG_ENABLE([task-debugging], + [AS_HELP_STRING([--enable-task-debugging], + [Store task timing information and generate task dump files @<:@yes/no@:>@] + )], + [enable_task_debugging="$enableval"], + [enable_task_debugging="no"] +) +if test "$enable_task_debugging" = "yes"; then + AC_DEFINE([SWIFT_DEBUG_TASKS],1,[Enable task debugging]) +fi + # Define HAVE_POSIX_MEMALIGN if it works. AX_FUNC_POSIX_MEMALIGN @@ -533,6 +545,7 @@ AC_MSG_RESULT([ libNUMA enabled : $have_numa Using tcmalloc : $have_tcmalloc CPU profiler : $have_profiler + Task debugging : $enable_task_debugging ]) # Generate output. diff --git a/examples/main.c b/examples/main.c index 8bd591ccbd1c2fd860fa8ae2321d2e34132b287f..09b6c785a6dc29ecc1202c949b8f3d03f6f1fa78 100644 --- a/examples/main.c +++ b/examples/main.c @@ -235,6 +235,13 @@ int main(int argc, char *argv[]) { if (myrank == 0) print_help_message(); return 1; } +#ifndef SWIFT_DEBUG_TASKS + if (dump_tasks) { + error( + "Task dumping is only possible if SWIFT was configured with the " + "--enable-task-debugging option."); + } +#endif break; case '?': if (myrank == 0) print_help_message(); @@ -549,6 +556,7 @@ int main(int argc, char *argv[]) { /* Take a step. */ engine_step(&e); +#ifdef SWIFT_DEBUG_TASKS /* Dump the task data using the given frequency. */ if (dump_tasks && (dump_tasks == 1 || j % dump_tasks == 1)) { #ifdef WITH_MPI @@ -626,8 +634,9 @@ int main(int argc, char *argv[]) { } } fclose(file_thread); -#endif +#endif // WITH_MPI } +#endif // SWIFT_DEBUG_TASKS } /* Print the values of the runner histogram. */ diff --git a/src/partition.c b/src/partition.c index 8d17bedf0aaeadc64044b12ffe1bb8887b02d83e..3f5386154497d6901a5330b828007f86d87033a4 100644 --- a/src/partition.c +++ b/src/partition.c @@ -370,7 +370,7 @@ static void pick_metis(struct space *s, int nregions, int *vertexw, int *edgew, /* Dump graph in METIS format */ /* dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy, - * weights_v, weights_e, NULL); + * weights_v, NULL, weights_e); */ if (METIS_PartGraphKway(&idx_ncells, &one, xadj, adjncy, weights_v, weights_e, @@ -420,7 +420,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, * assume the same graph structure as used in the part_ calls). */ int nr_cells = s->nr_cells; struct cell *cells = s->cells_top; - float wscale = 1e-3, vscale = 1e-3, wscale_buff = 0.0; + float wscale = 1.f, wscale_buff = 0.0; int wtot = 0; int wmax = 1e9 / nr_nodes; int wmin; @@ -459,15 +459,8 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, t->type != task_type_init) continue; - /* Get the task weight. This can be slightly negative on multiple board - * computers when the runners are not pinned to cores, don't stress just - * make a report and ignore these tasks. */ - int w = (t->toc - t->tic) * wscale; - if (w < 0) { - message("Task toc before tic: -%.3f %s, (try using processor affinity).", - clocks_from_ticks(t->tic - t->toc), clocks_getunit()); - w = 0; - } + /* Get the task weight. */ + int w = t->cost * wscale; /* Do we need to re-scale? */ wtot += w; @@ -616,7 +609,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, if (weights_e[k] == 0) weights_e[k] = 1; if (bothweights) for (int k = 0; k < nr_cells; k++) - if ((weights_v[k] *= vscale) == 0) weights_v[k] = 1; + if (weights_v[k] == 0) weights_v[k] = 1; /* And partition, use both weights or not as requested. */ if (bothweights) diff --git a/src/runner.c b/src/runner.c index f5efc99d492be837509e50bd2674ab6923404446..aaec28e2e6f924f7e912f8156520fac9647bccfe 100644 --- a/src/runner.c +++ b/src/runner.c @@ -1231,7 +1231,9 @@ void *runner_main(void *data) { /* Get the cells. */ struct cell *ci = t->ci; struct cell *cj = t->cj; +#ifdef SWIFT_DEBUG_TASKS t->rid = r->cpuid; +#endif /* Check that we haven't scheduled an inactive task */ #ifdef SWIFT_DEBUG_CHECKS diff --git a/src/scheduler.c b/src/scheduler.c index 44790fcd2fa5f67e6f325ba5849da19e35ab285a..c656470cb596c12cf93639f00e26d96c24403df8 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -708,10 +708,12 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type, t->implicit = 0; t->weight = 0; t->rank = 0; - t->tic = 0; - t->toc = 0; t->nr_unlock_tasks = 0; +#ifdef SWIFT_DEBUG_TASKS t->rid = -1; + t->tic = 0; + t->toc = 0; +#endif /* Add an index for it. */ // lock_lock( &s->lock ); @@ -924,55 +926,56 @@ void scheduler_reweight(struct scheduler *s, int verbose) { for (int j = 0; j < t->nr_unlock_tasks; j++) if (t->unlock_tasks[j]->weight > t->weight) t->weight = t->unlock_tasks[j]->weight; - if (!t->implicit && t->tic > 0) - t->weight += wscale * (t->toc - t->tic); - else - switch (t->type) { - case task_type_sort: - t->weight += wscale * intrinsics_popcount(t->flags) * t->ci->count * - (sizeof(int) * 8 - intrinsics_clz(t->ci->count)); - break; - case task_type_self: - t->weight += 1 * wscale * t->ci->count * t->ci->count; - break; - case task_type_pair: - if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) - t->weight += + int cost = 0; + switch (t->type) { + case task_type_sort: + cost = wscale * intrinsics_popcount(t->flags) * t->ci->count * + (sizeof(int) * 8 - intrinsics_clz(t->ci->count)); + break; + case task_type_self: + cost = 1 * wscale * t->ci->count * t->ci->count; + break; + case task_type_pair: + if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) + cost = 3 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags]; + else + cost = 2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags]; + break; + case task_type_sub_pair: + if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) { + if (t->flags < 0) + cost = 3 * wscale * t->ci->count * t->cj->count; + else + cost = 3 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags]; + } else { + if (t->flags < 0) + cost = 2 * wscale * t->ci->count * t->cj->count; else - t->weight += + cost = 2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags]; - break; - case task_type_sub_pair: - if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) { - if (t->flags < 0) - t->weight += 3 * wscale * t->ci->count * t->cj->count; - else - t->weight += 3 * wscale * t->ci->count * t->cj->count * - sid_scale[t->flags]; - } else { - if (t->flags < 0) - t->weight += 2 * wscale * t->ci->count * t->cj->count; - else - t->weight += 2 * wscale * t->ci->count * t->cj->count * - sid_scale[t->flags]; - } - break; - case task_type_sub_self: - t->weight += 1 * wscale * t->ci->count * t->ci->count; - break; - case task_type_ghost: - if (t->ci == t->ci->super) t->weight += wscale * t->ci->count; - break; - case task_type_kick: - t->weight += wscale * t->ci->count; - break; - case task_type_init: - t->weight += wscale * t->ci->count; - break; - default: - break; - } + } + break; + case task_type_sub_self: + cost = 1 * wscale * t->ci->count * t->ci->count; + break; + case task_type_ghost: + if (t->ci == t->ci->super) cost = wscale * t->ci->count; + break; + case task_type_kick: + cost = wscale * t->ci->count; + break; + case task_type_init: + cost = wscale * t->ci->count; + break; + default: + cost = 0; + break; + } +#if defined(WITH_MPI) && defined(HAVE_METIS) + t->cost = cost; +#endif + t->weight += cost; } if (verbose) @@ -1052,9 +1055,6 @@ void scheduler_start(struct scheduler *s, unsigned int mask, /* Clear all the waits, rids, and times. */ for (int k = 0; k < s->nr_tasks; k++) { s->tasks[k].wait = 1; - s->tasks[k].rid = -1; - s->tasks[k].tic = 0; - s->tasks[k].toc = 0; if (((1 << s->tasks[k].type) & mask) == 0 || ((1 << s->tasks[k].subtype) & s->submask) == 0) s->tasks[k].skip = 1; @@ -1137,9 +1137,6 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { /* The target queue for this task. */ int qid = -1; - /* Fail if this task has already been enqueued before. */ - if (t->rid >= 0) error("Task has already been enqueued."); - /* Ignore skipped tasks and tasks not in the masks. */ if (t->skip || (1 << t->type) & ~(s->mask) || (1 << t->subtype) & ~(s->submask)) { @@ -1268,7 +1265,9 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) { /* Task definitely done, signal any sleeping runners. */ if (!t->implicit) { +#ifdef SWIFT_DEBUG_TASKS t->toc = getticks(); +#endif pthread_mutex_lock(&s->sleep_mutex); atomic_dec(&s->waiting); pthread_cond_broadcast(&s->sleep_cond); @@ -1310,7 +1309,9 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t) { /* Task definitely done. */ if (!t->implicit) { +#ifdef SWIFT_DEBUG_TASKS t->toc = getticks(); +#endif pthread_mutex_lock(&s->sleep_mutex); atomic_dec(&s->waiting); pthread_cond_broadcast(&s->sleep_cond); @@ -1394,11 +1395,13 @@ struct task *scheduler_gettask(struct scheduler *s, int qid, } } +#ifdef SWIFT_DEBUG_TASKS /* Start the timer on this task, if we got one. */ if (res != NULL) { res->tic = getticks(); res->rid = qid; } +#endif /* No milk today. */ return res; diff --git a/src/task.h b/src/task.h index f840c0b4b8e807dce28f6f13479dbdf4995ab66d..f6d75598af933adeb045973890866b54f16d84d4 100644 --- a/src/task.h +++ b/src/task.h @@ -105,9 +105,6 @@ struct task { /*! List of tasks unlocked by this one */ struct task **unlock_tasks; - /*! Start and end time of this task */ - ticks tic, toc; - #ifdef WITH_MPI /*! Buffer for this task's communications */ @@ -127,8 +124,10 @@ struct task { /*! Weight of the task */ int weight; - /*! ID of the queue or runner owning this task */ - short int rid; +#if defined(WITH_MPI) && defined(HAVE_METIS) + /*! Individual cost estimate for this task. */ + int cost; +#endif /*! Number of tasks unlocked by this one */ short int nr_unlock_tasks; @@ -151,6 +150,14 @@ struct task { /*! Is this task implicit (i.e. does not do anything) ? */ char implicit; +#ifdef SWIFT_DEBUG_TASKS + /*! ID of the queue or runner owning this task */ + short int rid; + + /*! Start and end time of this task */ + ticks tic, toc; +#endif + } SWIFT_STRUCT_ALIGN; /* Function prototypes. */