Commit 7c2f75fa authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'master' of gitlab.cosma.dur.ac.uk:swift/swiftsim

parents 7bb7239f a88509cd
...@@ -170,6 +170,18 @@ if test "x$enable_debug" = "xyes"; then ...@@ -170,6 +170,18 @@ if test "x$enable_debug" = "xyes"; then
fi fi
fi fi
# Check if task debugging is on.
AC_ARG_ENABLE([task-debugging],
[AS_HELP_STRING([--enable-task-debugging],
[Store task timing information and generate task dump files @<:@yes/no@:>@]
)],
[enable_task_debugging="$enableval"],
[enable_task_debugging="no"]
)
if test "$enable_task_debugging" = "yes"; then
AC_DEFINE([SWIFT_DEBUG_TASKS],1,[Enable task debugging])
fi
# Define HAVE_POSIX_MEMALIGN if it works. # Define HAVE_POSIX_MEMALIGN if it works.
AX_FUNC_POSIX_MEMALIGN AX_FUNC_POSIX_MEMALIGN
...@@ -533,6 +545,7 @@ AC_MSG_RESULT([ ...@@ -533,6 +545,7 @@ AC_MSG_RESULT([
libNUMA enabled : $have_numa libNUMA enabled : $have_numa
Using tcmalloc : $have_tcmalloc Using tcmalloc : $have_tcmalloc
CPU profiler : $have_profiler CPU profiler : $have_profiler
Task debugging : $enable_task_debugging
]) ])
# Generate output. # Generate output.
......
...@@ -235,6 +235,13 @@ int main(int argc, char *argv[]) { ...@@ -235,6 +235,13 @@ int main(int argc, char *argv[]) {
if (myrank == 0) print_help_message(); if (myrank == 0) print_help_message();
return 1; return 1;
} }
#ifndef SWIFT_DEBUG_TASKS
if (dump_tasks) {
error(
"Task dumping is only possible if SWIFT was configured with the "
"--enable-task-debugging option.");
}
#endif
break; break;
case '?': case '?':
if (myrank == 0) print_help_message(); if (myrank == 0) print_help_message();
...@@ -549,6 +556,7 @@ int main(int argc, char *argv[]) { ...@@ -549,6 +556,7 @@ int main(int argc, char *argv[]) {
/* Take a step. */ /* Take a step. */
engine_step(&e); engine_step(&e);
#ifdef SWIFT_DEBUG_TASKS
/* Dump the task data using the given frequency. */ /* Dump the task data using the given frequency. */
if (dump_tasks && (dump_tasks == 1 || j % dump_tasks == 1)) { if (dump_tasks && (dump_tasks == 1 || j % dump_tasks == 1)) {
#ifdef WITH_MPI #ifdef WITH_MPI
...@@ -626,8 +634,9 @@ int main(int argc, char *argv[]) { ...@@ -626,8 +634,9 @@ int main(int argc, char *argv[]) {
} }
} }
fclose(file_thread); fclose(file_thread);
#endif #endif // WITH_MPI
} }
#endif // SWIFT_DEBUG_TASKS
} }
/* Print the values of the runner histogram. */ /* Print the values of the runner histogram. */
......
...@@ -370,7 +370,7 @@ static void pick_metis(struct space *s, int nregions, int *vertexw, int *edgew, ...@@ -370,7 +370,7 @@ static void pick_metis(struct space *s, int nregions, int *vertexw, int *edgew,
/* Dump graph in METIS format */ /* Dump graph in METIS format */
/* dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy, /* dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy,
* weights_v, weights_e, NULL); * weights_v, NULL, weights_e);
*/ */
if (METIS_PartGraphKway(&idx_ncells, &one, xadj, adjncy, weights_v, weights_e, if (METIS_PartGraphKway(&idx_ncells, &one, xadj, adjncy, weights_v, weights_e,
...@@ -420,7 +420,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, ...@@ -420,7 +420,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
* assume the same graph structure as used in the part_ calls). */ * assume the same graph structure as used in the part_ calls). */
int nr_cells = s->nr_cells; int nr_cells = s->nr_cells;
struct cell *cells = s->cells_top; struct cell *cells = s->cells_top;
float wscale = 1e-3, vscale = 1e-3, wscale_buff = 0.0; float wscale = 1.f, wscale_buff = 0.0;
int wtot = 0; int wtot = 0;
int wmax = 1e9 / nr_nodes; int wmax = 1e9 / nr_nodes;
int wmin; int wmin;
...@@ -459,15 +459,8 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, ...@@ -459,15 +459,8 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
t->type != task_type_init) t->type != task_type_init)
continue; continue;
/* Get the task weight. This can be slightly negative on multiple board /* Get the task weight. */
* computers when the runners are not pinned to cores, don't stress just int w = t->cost * wscale;
* make a report and ignore these tasks. */
int w = (t->toc - t->tic) * wscale;
if (w < 0) {
message("Task toc before tic: -%.3f %s, (try using processor affinity).",
clocks_from_ticks(t->tic - t->toc), clocks_getunit());
w = 0;
}
/* Do we need to re-scale? */ /* Do we need to re-scale? */
wtot += w; wtot += w;
...@@ -616,7 +609,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, ...@@ -616,7 +609,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
if (weights_e[k] == 0) weights_e[k] = 1; if (weights_e[k] == 0) weights_e[k] = 1;
if (bothweights) if (bothweights)
for (int k = 0; k < nr_cells; k++) for (int k = 0; k < nr_cells; k++)
if ((weights_v[k] *= vscale) == 0) weights_v[k] = 1; if (weights_v[k] == 0) weights_v[k] = 1;
/* And partition, use both weights or not as requested. */ /* And partition, use both weights or not as requested. */
if (bothweights) if (bothweights)
......
...@@ -1231,7 +1231,9 @@ void *runner_main(void *data) { ...@@ -1231,7 +1231,9 @@ void *runner_main(void *data) {
/* Get the cells. */ /* Get the cells. */
struct cell *ci = t->ci; struct cell *ci = t->ci;
struct cell *cj = t->cj; struct cell *cj = t->cj;
#ifdef SWIFT_DEBUG_TASKS
t->rid = r->cpuid; t->rid = r->cpuid;
#endif
/* Check that we haven't scheduled an inactive task */ /* Check that we haven't scheduled an inactive task */
#ifdef SWIFT_DEBUG_CHECKS #ifdef SWIFT_DEBUG_CHECKS
......
...@@ -708,10 +708,12 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type, ...@@ -708,10 +708,12 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type,
t->implicit = 0; t->implicit = 0;
t->weight = 0; t->weight = 0;
t->rank = 0; t->rank = 0;
t->tic = 0;
t->toc = 0;
t->nr_unlock_tasks = 0; t->nr_unlock_tasks = 0;
#ifdef SWIFT_DEBUG_TASKS
t->rid = -1; t->rid = -1;
t->tic = 0;
t->toc = 0;
#endif
/* Add an index for it. */ /* Add an index for it. */
// lock_lock( &s->lock ); // lock_lock( &s->lock );
...@@ -924,55 +926,56 @@ void scheduler_reweight(struct scheduler *s, int verbose) { ...@@ -924,55 +926,56 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
for (int j = 0; j < t->nr_unlock_tasks; j++) for (int j = 0; j < t->nr_unlock_tasks; j++)
if (t->unlock_tasks[j]->weight > t->weight) if (t->unlock_tasks[j]->weight > t->weight)
t->weight = t->unlock_tasks[j]->weight; t->weight = t->unlock_tasks[j]->weight;
if (!t->implicit && t->tic > 0) int cost = 0;
t->weight += wscale * (t->toc - t->tic); switch (t->type) {
else case task_type_sort:
switch (t->type) { cost = wscale * intrinsics_popcount(t->flags) * t->ci->count *
case task_type_sort: (sizeof(int) * 8 - intrinsics_clz(t->ci->count));
t->weight += wscale * intrinsics_popcount(t->flags) * t->ci->count * break;
(sizeof(int) * 8 - intrinsics_clz(t->ci->count)); case task_type_self:
break; cost = 1 * wscale * t->ci->count * t->ci->count;
case task_type_self: break;
t->weight += 1 * wscale * t->ci->count * t->ci->count; case task_type_pair:
break; if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID)
case task_type_pair: cost = 3 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) else
t->weight += cost = 2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
break;
case task_type_sub_pair:
if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) {
if (t->flags < 0)
cost = 3 * wscale * t->ci->count * t->cj->count;
else
cost =
3 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags]; 3 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
} else {
if (t->flags < 0)
cost = 2 * wscale * t->ci->count * t->cj->count;
else else
t->weight += cost =
2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags]; 2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
break; }
case task_type_sub_pair: break;
if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) { case task_type_sub_self:
if (t->flags < 0) cost = 1 * wscale * t->ci->count * t->ci->count;
t->weight += 3 * wscale * t->ci->count * t->cj->count; break;
else case task_type_ghost:
t->weight += 3 * wscale * t->ci->count * t->cj->count * if (t->ci == t->ci->super) cost = wscale * t->ci->count;
sid_scale[t->flags]; break;
} else { case task_type_kick:
if (t->flags < 0) cost = wscale * t->ci->count;
t->weight += 2 * wscale * t->ci->count * t->cj->count; break;
else case task_type_init:
t->weight += 2 * wscale * t->ci->count * t->cj->count * cost = wscale * t->ci->count;
sid_scale[t->flags]; break;
} default:
break; cost = 0;
case task_type_sub_self: break;
t->weight += 1 * wscale * t->ci->count * t->ci->count; }
break; #if defined(WITH_MPI) && defined(HAVE_METIS)
case task_type_ghost: t->cost = cost;
if (t->ci == t->ci->super) t->weight += wscale * t->ci->count; #endif
break; t->weight += cost;
case task_type_kick:
t->weight += wscale * t->ci->count;
break;
case task_type_init:
t->weight += wscale * t->ci->count;
break;
default:
break;
}
} }
if (verbose) if (verbose)
...@@ -1052,9 +1055,6 @@ void scheduler_start(struct scheduler *s, unsigned int mask, ...@@ -1052,9 +1055,6 @@ void scheduler_start(struct scheduler *s, unsigned int mask,
/* Clear all the waits, rids, and times. */ /* Clear all the waits, rids, and times. */
for (int k = 0; k < s->nr_tasks; k++) { for (int k = 0; k < s->nr_tasks; k++) {
s->tasks[k].wait = 1; s->tasks[k].wait = 1;
s->tasks[k].rid = -1;
s->tasks[k].tic = 0;
s->tasks[k].toc = 0;
if (((1 << s->tasks[k].type) & mask) == 0 || if (((1 << s->tasks[k].type) & mask) == 0 ||
((1 << s->tasks[k].subtype) & s->submask) == 0) ((1 << s->tasks[k].subtype) & s->submask) == 0)
s->tasks[k].skip = 1; s->tasks[k].skip = 1;
...@@ -1137,9 +1137,6 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { ...@@ -1137,9 +1137,6 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
/* The target queue for this task. */ /* The target queue for this task. */
int qid = -1; int qid = -1;
/* Fail if this task has already been enqueued before. */
if (t->rid >= 0) error("Task has already been enqueued.");
/* Ignore skipped tasks and tasks not in the masks. */ /* Ignore skipped tasks and tasks not in the masks. */
if (t->skip || (1 << t->type) & ~(s->mask) || if (t->skip || (1 << t->type) & ~(s->mask) ||
(1 << t->subtype) & ~(s->submask)) { (1 << t->subtype) & ~(s->submask)) {
...@@ -1268,7 +1265,9 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) { ...@@ -1268,7 +1265,9 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) {
/* Task definitely done, signal any sleeping runners. */ /* Task definitely done, signal any sleeping runners. */
if (!t->implicit) { if (!t->implicit) {
#ifdef SWIFT_DEBUG_TASKS
t->toc = getticks(); t->toc = getticks();
#endif
pthread_mutex_lock(&s->sleep_mutex); pthread_mutex_lock(&s->sleep_mutex);
atomic_dec(&s->waiting); atomic_dec(&s->waiting);
pthread_cond_broadcast(&s->sleep_cond); pthread_cond_broadcast(&s->sleep_cond);
...@@ -1310,7 +1309,9 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t) { ...@@ -1310,7 +1309,9 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t) {
/* Task definitely done. */ /* Task definitely done. */
if (!t->implicit) { if (!t->implicit) {
#ifdef SWIFT_DEBUG_TASKS
t->toc = getticks(); t->toc = getticks();
#endif
pthread_mutex_lock(&s->sleep_mutex); pthread_mutex_lock(&s->sleep_mutex);
atomic_dec(&s->waiting); atomic_dec(&s->waiting);
pthread_cond_broadcast(&s->sleep_cond); pthread_cond_broadcast(&s->sleep_cond);
...@@ -1394,11 +1395,13 @@ struct task *scheduler_gettask(struct scheduler *s, int qid, ...@@ -1394,11 +1395,13 @@ struct task *scheduler_gettask(struct scheduler *s, int qid,
} }
} }
#ifdef SWIFT_DEBUG_TASKS
/* Start the timer on this task, if we got one. */ /* Start the timer on this task, if we got one. */
if (res != NULL) { if (res != NULL) {
res->tic = getticks(); res->tic = getticks();
res->rid = qid; res->rid = qid;
} }
#endif
/* No milk today. */ /* No milk today. */
return res; return res;
......
...@@ -105,9 +105,6 @@ struct task { ...@@ -105,9 +105,6 @@ struct task {
/*! List of tasks unlocked by this one */ /*! List of tasks unlocked by this one */
struct task **unlock_tasks; struct task **unlock_tasks;
/*! Start and end time of this task */
ticks tic, toc;
#ifdef WITH_MPI #ifdef WITH_MPI
/*! Buffer for this task's communications */ /*! Buffer for this task's communications */
...@@ -127,8 +124,10 @@ struct task { ...@@ -127,8 +124,10 @@ struct task {
/*! Weight of the task */ /*! Weight of the task */
int weight; int weight;
/*! ID of the queue or runner owning this task */ #if defined(WITH_MPI) && defined(HAVE_METIS)
short int rid; /*! Individual cost estimate for this task. */
int cost;
#endif
/*! Number of tasks unlocked by this one */ /*! Number of tasks unlocked by this one */
short int nr_unlock_tasks; short int nr_unlock_tasks;
...@@ -151,6 +150,14 @@ struct task { ...@@ -151,6 +150,14 @@ struct task {
/*! Is this task implicit (i.e. does not do anything) ? */ /*! Is this task implicit (i.e. does not do anything) ? */
char implicit; char implicit;
#ifdef SWIFT_DEBUG_TASKS
/*! ID of the queue or runner owning this task */
short int rid;
/*! Start and end time of this task */
ticks tic, toc;
#endif
} SWIFT_STRUCT_ALIGN; } SWIFT_STRUCT_ALIGN;
/* Function prototypes. */ /* Function prototypes. */
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment