Commit 7c2f75fa authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'master' of gitlab.cosma.dur.ac.uk:swift/swiftsim

parents 7bb7239f a88509cd
......@@ -170,6 +170,18 @@ if test "x$enable_debug" = "xyes"; then
fi
fi
# Check if task debugging is on.
AC_ARG_ENABLE([task-debugging],
[AS_HELP_STRING([--enable-task-debugging],
[Store task timing information and generate task dump files @<:@yes/no@:>@]
)],
[enable_task_debugging="$enableval"],
[enable_task_debugging="no"]
)
if test "$enable_task_debugging" = "yes"; then
AC_DEFINE([SWIFT_DEBUG_TASKS],1,[Enable task debugging])
fi
# Define HAVE_POSIX_MEMALIGN if it works.
AX_FUNC_POSIX_MEMALIGN
......@@ -533,6 +545,7 @@ AC_MSG_RESULT([
libNUMA enabled : $have_numa
Using tcmalloc : $have_tcmalloc
CPU profiler : $have_profiler
Task debugging : $enable_task_debugging
])
# Generate output.
......
......@@ -235,6 +235,13 @@ int main(int argc, char *argv[]) {
if (myrank == 0) print_help_message();
return 1;
}
#ifndef SWIFT_DEBUG_TASKS
if (dump_tasks) {
error(
"Task dumping is only possible if SWIFT was configured with the "
"--enable-task-debugging option.");
}
#endif
break;
case '?':
if (myrank == 0) print_help_message();
......@@ -549,6 +556,7 @@ int main(int argc, char *argv[]) {
/* Take a step. */
engine_step(&e);
#ifdef SWIFT_DEBUG_TASKS
/* Dump the task data using the given frequency. */
if (dump_tasks && (dump_tasks == 1 || j % dump_tasks == 1)) {
#ifdef WITH_MPI
......@@ -626,8 +634,9 @@ int main(int argc, char *argv[]) {
}
}
fclose(file_thread);
#endif
#endif // WITH_MPI
}
#endif // SWIFT_DEBUG_TASKS
}
/* Print the values of the runner histogram. */
......
......@@ -370,7 +370,7 @@ static void pick_metis(struct space *s, int nregions, int *vertexw, int *edgew,
/* Dump graph in METIS format */
/* dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy,
* weights_v, weights_e, NULL);
* weights_v, NULL, weights_e);
*/
if (METIS_PartGraphKway(&idx_ncells, &one, xadj, adjncy, weights_v, weights_e,
......@@ -420,7 +420,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
* assume the same graph structure as used in the part_ calls). */
int nr_cells = s->nr_cells;
struct cell *cells = s->cells_top;
float wscale = 1e-3, vscale = 1e-3, wscale_buff = 0.0;
float wscale = 1.f, wscale_buff = 0.0;
int wtot = 0;
int wmax = 1e9 / nr_nodes;
int wmin;
......@@ -459,15 +459,8 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
t->type != task_type_init)
continue;
/* Get the task weight. This can be slightly negative on multiple board
* computers when the runners are not pinned to cores, don't stress just
* make a report and ignore these tasks. */
int w = (t->toc - t->tic) * wscale;
if (w < 0) {
message("Task toc before tic: -%.3f %s, (try using processor affinity).",
clocks_from_ticks(t->tic - t->toc), clocks_getunit());
w = 0;
}
/* Get the task weight. */
int w = t->cost * wscale;
/* Do we need to re-scale? */
wtot += w;
......@@ -616,7 +609,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID,
if (weights_e[k] == 0) weights_e[k] = 1;
if (bothweights)
for (int k = 0; k < nr_cells; k++)
if ((weights_v[k] *= vscale) == 0) weights_v[k] = 1;
if (weights_v[k] == 0) weights_v[k] = 1;
/* And partition, use both weights or not as requested. */
if (bothweights)
......
......@@ -1231,7 +1231,9 @@ void *runner_main(void *data) {
/* Get the cells. */
struct cell *ci = t->ci;
struct cell *cj = t->cj;
#ifdef SWIFT_DEBUG_TASKS
t->rid = r->cpuid;
#endif
/* Check that we haven't scheduled an inactive task */
#ifdef SWIFT_DEBUG_CHECKS
......
......@@ -708,10 +708,12 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type,
t->implicit = 0;
t->weight = 0;
t->rank = 0;
t->tic = 0;
t->toc = 0;
t->nr_unlock_tasks = 0;
#ifdef SWIFT_DEBUG_TASKS
t->rid = -1;
t->tic = 0;
t->toc = 0;
#endif
/* Add an index for it. */
// lock_lock( &s->lock );
......@@ -924,55 +926,56 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
for (int j = 0; j < t->nr_unlock_tasks; j++)
if (t->unlock_tasks[j]->weight > t->weight)
t->weight = t->unlock_tasks[j]->weight;
if (!t->implicit && t->tic > 0)
t->weight += wscale * (t->toc - t->tic);
else
switch (t->type) {
case task_type_sort:
t->weight += wscale * intrinsics_popcount(t->flags) * t->ci->count *
(sizeof(int) * 8 - intrinsics_clz(t->ci->count));
break;
case task_type_self:
t->weight += 1 * wscale * t->ci->count * t->ci->count;
break;
case task_type_pair:
if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID)
t->weight +=
int cost = 0;
switch (t->type) {
case task_type_sort:
cost = wscale * intrinsics_popcount(t->flags) * t->ci->count *
(sizeof(int) * 8 - intrinsics_clz(t->ci->count));
break;
case task_type_self:
cost = 1 * wscale * t->ci->count * t->ci->count;
break;
case task_type_pair:
if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID)
cost = 3 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
else
cost = 2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
break;
case task_type_sub_pair:
if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) {
if (t->flags < 0)
cost = 3 * wscale * t->ci->count * t->cj->count;
else
cost =
3 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
} else {
if (t->flags < 0)
cost = 2 * wscale * t->ci->count * t->cj->count;
else
t->weight +=
cost =
2 * wscale * t->ci->count * t->cj->count * sid_scale[t->flags];
break;
case task_type_sub_pair:
if (t->ci->nodeID != nodeID || t->cj->nodeID != nodeID) {
if (t->flags < 0)
t->weight += 3 * wscale * t->ci->count * t->cj->count;
else
t->weight += 3 * wscale * t->ci->count * t->cj->count *
sid_scale[t->flags];
} else {
if (t->flags < 0)
t->weight += 2 * wscale * t->ci->count * t->cj->count;
else
t->weight += 2 * wscale * t->ci->count * t->cj->count *
sid_scale[t->flags];
}
break;
case task_type_sub_self:
t->weight += 1 * wscale * t->ci->count * t->ci->count;
break;
case task_type_ghost:
if (t->ci == t->ci->super) t->weight += wscale * t->ci->count;
break;
case task_type_kick:
t->weight += wscale * t->ci->count;
break;
case task_type_init:
t->weight += wscale * t->ci->count;
break;
default:
break;
}
}
break;
case task_type_sub_self:
cost = 1 * wscale * t->ci->count * t->ci->count;
break;
case task_type_ghost:
if (t->ci == t->ci->super) cost = wscale * t->ci->count;
break;
case task_type_kick:
cost = wscale * t->ci->count;
break;
case task_type_init:
cost = wscale * t->ci->count;
break;
default:
cost = 0;
break;
}
#if defined(WITH_MPI) && defined(HAVE_METIS)
t->cost = cost;
#endif
t->weight += cost;
}
if (verbose)
......@@ -1052,9 +1055,6 @@ void scheduler_start(struct scheduler *s, unsigned int mask,
/* Clear all the waits, rids, and times. */
for (int k = 0; k < s->nr_tasks; k++) {
s->tasks[k].wait = 1;
s->tasks[k].rid = -1;
s->tasks[k].tic = 0;
s->tasks[k].toc = 0;
if (((1 << s->tasks[k].type) & mask) == 0 ||
((1 << s->tasks[k].subtype) & s->submask) == 0)
s->tasks[k].skip = 1;
......@@ -1137,9 +1137,6 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) {
/* The target queue for this task. */
int qid = -1;
/* Fail if this task has already been enqueued before. */
if (t->rid >= 0) error("Task has already been enqueued.");
/* Ignore skipped tasks and tasks not in the masks. */
if (t->skip || (1 << t->type) & ~(s->mask) ||
(1 << t->subtype) & ~(s->submask)) {
......@@ -1268,7 +1265,9 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) {
/* Task definitely done, signal any sleeping runners. */
if (!t->implicit) {
#ifdef SWIFT_DEBUG_TASKS
t->toc = getticks();
#endif
pthread_mutex_lock(&s->sleep_mutex);
atomic_dec(&s->waiting);
pthread_cond_broadcast(&s->sleep_cond);
......@@ -1310,7 +1309,9 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t) {
/* Task definitely done. */
if (!t->implicit) {
#ifdef SWIFT_DEBUG_TASKS
t->toc = getticks();
#endif
pthread_mutex_lock(&s->sleep_mutex);
atomic_dec(&s->waiting);
pthread_cond_broadcast(&s->sleep_cond);
......@@ -1394,11 +1395,13 @@ struct task *scheduler_gettask(struct scheduler *s, int qid,
}
}
#ifdef SWIFT_DEBUG_TASKS
/* Start the timer on this task, if we got one. */
if (res != NULL) {
res->tic = getticks();
res->rid = qid;
}
#endif
/* No milk today. */
return res;
......
......@@ -105,9 +105,6 @@ struct task {
/*! List of tasks unlocked by this one */
struct task **unlock_tasks;
/*! Start and end time of this task */
ticks tic, toc;
#ifdef WITH_MPI
/*! Buffer for this task's communications */
......@@ -127,8 +124,10 @@ struct task {
/*! Weight of the task */
int weight;
/*! ID of the queue or runner owning this task */
short int rid;
#if defined(WITH_MPI) && defined(HAVE_METIS)
/*! Individual cost estimate for this task. */
int cost;
#endif
/*! Number of tasks unlocked by this one */
short int nr_unlock_tasks;
......@@ -151,6 +150,14 @@ struct task {
/*! Is this task implicit (i.e. does not do anything) ? */
char implicit;
#ifdef SWIFT_DEBUG_TASKS
/*! ID of the queue or runner owning this task */
short int rid;
/*! Start and end time of this task */
ticks tic, toc;
#endif
} SWIFT_STRUCT_ALIGN;
/* Function prototypes. */
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment