Compare revisions

Peter W. Draper · Peter W. Draper · 1a3b94b1 · 1a3b94b1 · 1a3b94b1 · 1a3b94b1
--- a/src/queue.c
+++ b/src/queue.c
@@ -181,6 +181,11 @@ void queue_insert(struct queue *q, struct task *t) {

  /* Increase the incoming count. */
  atomic_inc(&q->count_incoming);
+
+#ifdef SWIFT_DEBUG_TASKS
+  /* Start timing how long we are in the queue. */
+  t->queued_tic = getticks();
+#endif
 }

 /**
@@ -268,6 +273,11 @@ struct task *queue_gettask(struct queue *q, const struct task *prev,
      /* Send it down the binary heap. */
      if (queue_sift_down(q, ind) != ind) ind -= 1;
    }
+
+#ifdef SWIFT_DEBUG_TASKS
+    /* One more miss. */
+    qtasks[entries[ind].tid].nr_task_locks++;
+#endif
  }

  /* Did we get a task? */

--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -1266,6 +1266,10 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type,
  t->tic = 0;
  t->toc = 0;
  t->total_ticks = 0;
+#ifdef SWIFT_DEBUG_TASKS
+  t->queued_tic = 0;
+  t->nr_task_locks = 0;
+#endif

  if (ci != NULL) cell_set_flag(ci, cell_flag_has_tasks);
  if (cj != NULL) cell_set_flag(cj, cell_flag_has_tasks);

--- a/src/task.c
+++ b/src/task.c
@@ -1082,7 +1082,7 @@ void task_dump_all(struct engine *e, int step) {

      /* Add some information to help with the plots and conversion of ticks to
       * seconds. */
-      fprintf(file_thread, " %03d 0 0 0 0 %lld %lld %lld %lld %lld 0 0 %lld\n",
+      fprintf(file_thread, " %03d 0 0 0 0 %lld %lld %lld %lld %lld 0 0 0 0 %lld\n",
              engine_rank, (long long int)e->tic_step,
              (long long int)e->toc_step, e->updates, e->g_updates,
              e->s_updates, cpufreq);
@@ -1091,7 +1091,8 @@ void task_dump_all(struct engine *e, int step) {
        if (!e->sched.tasks[l].implicit &&
            e->sched.tasks[l].tic > e->tic_step) {
          fprintf(
-              file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i\n",
+              file_thread,
+              " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i %lli %zu\n",
              engine_rank, e->sched.tasks[l].rid, e->sched.tasks[l].type,
              e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
              (long long int)e->sched.tasks[l].tic,
@@ -1104,7 +1105,8 @@ void task_dump_all(struct engine *e, int step) {
                                             : 0,
              (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->grav.count
                                             : 0,
-              e->sched.tasks[l].flags, e->sched.tasks[l].sid);
+              e->sched.tasks[l].flags, e->sched.tasks[l].sid,
+              e->sched.tasks[l].queued_tic, e->sched.tasks[l].nr_task_locks);
        }
        count++;
      }
@@ -1124,14 +1126,14 @@ void task_dump_all(struct engine *e, int step) {

  /* Add some information to help with the plots and conversion of ticks to
   * seconds. */
-  fprintf(file_thread, " %d %d %d %d %lld %lld %lld %lld %lld %d %lld\n", -2,
+  fprintf(file_thread, " %d %d %d %d %lld %lld %lld %lld %lld 0 0 0  %lld\n", -2,
          -1, -1, 1, (unsigned long long)e->tic_step,
          (unsigned long long)e->toc_step, e->updates, e->g_updates,
-          e->s_updates, 0, cpufreq);
+          e->s_updates, cpufreq);
  for (int l = 0; l < e->sched.nr_tasks; l++) {
    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].tic > e->tic_step) {
      fprintf(
-          file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n",
+          file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i %lli %zu\n",
          e->sched.tasks[l].rid, e->sched.tasks[l].type,
          e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
          (unsigned long long)e->sched.tasks[l].tic,
@@ -1142,7 +1144,8 @@ void task_dump_all(struct engine *e, int step) {
                                         : e->sched.tasks[l].cj->hydro.count,
          (e->sched.tasks[l].ci == NULL) ? 0 : e->sched.tasks[l].ci->grav.count,
          (e->sched.tasks[l].cj == NULL) ? 0 : e->sched.tasks[l].cj->grav.count,
-          e->sched.tasks[l].sid);
+          e->sched.tasks[l].sid, e->sched.tasks[l].queued_tic,
+          e->sched.tasks[l].nr_task_locks);
    }
  }
  fclose(file_thread);

--- a/src/task.h
+++ b/src/task.h
@@ -265,6 +265,14 @@ struct task {
  /* Total time spent running this task */
  ticks total_ticks;

+#ifdef SWIFT_DEBUG_TASKS
+  /*! Time task was queued. */
+  ticks queued_tic;
+
+  /*! Number of time the task was rejected by locks. */
+  size_t nr_task_locks;
+#endif
+
 #ifdef SWIFT_DEBUG_CHECKS
  /* When was this task last run? */
  integertime_t ti_run;

--- a/tools/task_plots/analyse_tasks.py
+++ b/tools/task_plots/analyse_tasks.py
@@ -190,7 +190,7 @@ full_step = data[0, :]

 #  Do we have an MPI file?
 full_step = data[0, :]
-if full_step.size == 13:
+if full_step.size == 15:
    print("# MPI mode")
    mpimode = True
    nranks = int(max(data[:, 0])) + 1
@@ -201,6 +201,8 @@ if full_step.size == 13:
    subtaskcol = 3
    ticcol = 5
    toccol = 6
+    qticcol = 13
+    nunlockscol = 14
    updates = int(full_step[7])
    g_updates = int(full_step[8])
    s_updates = int(full_step[9])
@@ -214,6 +216,8 @@ else:
    subtaskcol = 2
    ticcol = 4
    toccol = 5
+    qticcol = 11
+    nunlockscol = 12
    updates = int(full_step[6])
    g_updates = int(full_step[7])
    s_updates = int(full_step[8])
@@ -271,6 +275,7 @@ for rank in ranks:
    start_t = float(tic_step)
    data[:, ticcol] -= start_t
    data[:, toccol] -= start_t
+    data[:, qticcol] -= start_t
    end_t = (toc_step - start_t) / CPU_CLOCK

    tasks = {}
@@ -284,11 +289,13 @@ for rank in ranks:
        thread = int(data[line, threadscol])
        tic = int(data[line, ticcol]) / CPU_CLOCK
        toc = int(data[line, toccol]) / CPU_CLOCK
+        qtic = int(data[line, qticcol]) / CPU_CLOCK
+        nunlocks = int(data[line, nunlockscol])
        tasktype = int(data[line, taskcol])
        subtype = int(data[line, subtaskcol])
        sid = int(data[line, -1])

-        tasks[thread].append([tic, toc, tasktype, subtype, sid])
+        tasks[thread].append([tic, toc, tasktype, subtype, sid, qtic, nunlocks])

    #  Sort by tic and gather used threads.
    threadids = []
@@ -302,26 +309,51 @@ for rank in ranks:
    print("# Task times:")
    print("# -----------")
    print(
-        "# {0:<17s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s}".format(
-            "type/subtype", "count", "minimum", "maximum", "sum", "mean", "percent"
+        "# {0:<22s}: {1:>7s} {2:>9s} {3:>9s} {4:>9s} {5:>9s} {6:>9s} {7:>9s} {8:>9s} {9:>9s} {10:>9s}  {11:>14s} {12:>14s} {13:>14s} {14:>14s} ".format(
+            "type/subtype", "count", "min", "max", "sum", "mean",
+            "percent",
+            "qmin", "qmax", "qsum", "qmean", 
+            "unlocks_min", "unlocks_max", "unlocks_sum", "unlocks_mean", 
        )
    )

    alltasktimes = {}
+    alltaskqtimes = {}
+    alltaskunlocks = {}
    sidtimes = {}
    for i in threadids:
        tasktimes = {}
+        taskqtimes = {}
+        taskunlocks = {}
        for task in tasks[i]:
            key = TASKTYPES[task[2]] + "/" + SUBTYPES[task[3]]
            dt = task[1] - task[0]
+            dtq = task[0] - task[5]
+
            if not key in tasktimes:
                tasktimes[key] = []
            tasktimes[key].append(dt)

+            if not key in taskqtimes:
+                taskqtimes[key] = []
+            taskqtimes[key].append(dtq)
+
+            if not key in taskunlocks:
+                taskunlocks[key] = []
+            taskunlocks[key].append(task[6])
+
            if not key in alltasktimes:
                alltasktimes[key] = []
            alltasktimes[key].append(dt)

+            if not key in alltaskqtimes:
+                alltaskqtimes[key] = []
+            alltaskqtimes[key].append(dtq)
+
+            if not key in alltaskunlocks:
+                alltaskunlocks[key] = []
+            alltaskunlocks[key].append(task[6])
+
            my_sid = task[4]
            if my_sid > -1:
                if not my_sid in sidtimes:
@@ -335,8 +367,14 @@ for rank in ranks:
            taskmin = min(tasktimes[key])
            taskmax = max(tasktimes[key])
            tasksum = sum(tasktimes[key])
+            taskqmin = min(taskqtimes[key])
+            taskqmax = max(taskqtimes[key])
+            taskqsum = sum(taskqtimes[key])
+            taskunlockmin = min(taskunlocks[key])
+            taskunlockmax = max(taskunlocks[key])
+            taskunlocksum = sum(taskunlocks[key])
            print(
-                "{0:24s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format(
+                "{0:24s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f} {7:9.4f} {8:9.4f} {9:9.4f} {10:9.4f} {11:14.4f} {12:14.4f} {13:14.4f} {14:14.4f} ".format(
                    key,
                    len(tasktimes[key]),
                    taskmin,
@@ -344,6 +382,14 @@ for rank in ranks:
                    tasksum,
                    tasksum / len(tasktimes[key]),
                    tasksum / total_t * 100.0,
+                    taskqmin,
+                    taskqmax,
+                    taskqsum,
+                    taskqsum / len(tasktimes[key]),
+                    taskunlockmin,
+                    taskunlockmax,
+                    taskunlocksum,
+                    taskunlocksum / len(tasktimes[key]),
                )
            )
        print()
@@ -352,20 +398,34 @@ for rank in ranks:
        print('<div id="all"></div>')
    print("# All threads : ")
    for key in sorted(alltasktimes.keys()):
-        taskmin = min(alltasktimes[key])
-        taskmax = max(alltasktimes[key])
-        tasksum = sum(alltasktimes[key])
-        print(
-            "{0:23s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f}".format(
-                key,
-                len(alltasktimes[key]),
-                taskmin,
-                taskmax,
-                tasksum,
-                tasksum / len(alltasktimes[key]),
-                tasksum / (len(threadids) * total_t) * 100.0,
+            taskmin = min(alltasktimes[key])
+            taskmax = max(alltasktimes[key])
+            tasksum = sum(alltasktimes[key])
+            taskqmin = min(alltaskqtimes[key])
+            taskqmax = max(alltaskqtimes[key])
+            taskqsum = sum(alltaskqtimes[key])
+            taskunlockmin = min(alltaskunlocks[key])
+            taskunlockmax = max(alltaskunlocks[key])
+            taskunlocksum = sum(alltaskunlocks[key])
+            print(
+                "{0:24s}: {1:7d} {2:9.4f} {3:9.4f} {4:9.4f} {5:9.4f} {6:9.2f} {7:9.4f} {8:9.4f} {9:9.4f} {10:9.4f} {11:14.4f} {12:14.4f} {13:14.4f} {14:14.4f} ".format(
+                    key,
+                    len(alltasktimes[key]),
+                    taskmin,
+                    taskmax,
+                    tasksum,
+                    tasksum / len(alltasktimes[key]),
+                    tasksum / total_t * 100.0,
+                    taskqmin,
+                    taskqmax,
+                    taskqsum,
+                    taskqsum / len(alltasktimes[key]),
+                    taskunlockmin,
+                    taskunlockmax,
+                    taskunlocksum,
+                    taskunlocksum / len(alltasktimes[key]),
+                )
            )
-        )
    print()

    # For pairs, show stuff sorted by SID

--- a/tools/task_plots/iplot_tasks.py
+++ b/tools/task_plots/iplot_tasks.py
@@ -40,7 +40,7 @@ matplotlib.use('TkAgg')
 import numpy as np
 import matplotlib.backends.backend_tkagg as tkagg
 from matplotlib.figure import Figure
-import Tkinter as tk
+import tkinter as tk
 import matplotlib.collections as collections
 import matplotlib.ticker as plticker
 import pylab as pl
@@ -361,7 +361,7 @@ data = pl.loadtxt(infile)

 #  Do we have an MPI file?
 full_step = data[0, :]
-if full_step.size == 13:
+if full_step.size == 15:
    print("# MPI mode")
    mpimode = True
    ranks = list(range(int(max(data[:, 0])) + 1))
@@ -372,6 +372,8 @@ if full_step.size == 13:
    subtaskcol = 3
    ticcol = 5
    toccol = 6
+    qticcol = 13
+    unlockscol = 14
 else:
    print("# non MPI mode")
    mpimode = False
@@ -381,6 +383,8 @@ else:
    subtaskcol = 2
    ticcol = 4
    toccol = 5
+    qticcol = 11
+    unlockscol = 12

 #  Get CPU_CLOCK to convert ticks into milliseconds.
 CPU_CLOCK = float(full_step[-1]) / 1000.0
@@ -425,6 +429,7 @@ if data.size == 0:
 start_t = float(tic_step)
 data[:, ticcol] -= start_t
 data[:, toccol] -= start_t
+data[:, qticcol] -= start_t
 end_t = (toc_step - start_t) / CPU_CLOCK

 tasks = {}
@@ -443,8 +448,12 @@ for line in range(num_lines):
    tasks[thread][-1]["subtype"] = subtype
    tic = int(data[line, ticcol]) / CPU_CLOCK
    toc = int(data[line, toccol]) / CPU_CLOCK
+    qtic = int(data[line, qticcol]) / CPU_CLOCK
+    unlocks = int(data[line, unlockscol])
    tasks[thread][-1]["tic"] = tic
    tasks[thread][-1]["toc"] = toc
+    tasks[thread][-1]["qtic"] = qtic
+    tasks[thread][-1]["unlocks"] = unlocks
    if "fof" in tasktype:
        tasks[thread][-1]["colour"] = TASKCOLOURS[tasktype]
    elif ("self" in tasktype) or ("pair" in tasktype) or ("recv" in tasktype) or ("send" in tasktype):
@@ -463,27 +472,37 @@ ax.set_xlim(-delta_t * 0.01 / CPU_CLOCK, delta_t * 1.01 / CPU_CLOCK)
 ax.set_ylim(0.5, nthread + 1.0)

 ltics = []
+lqtics = []
 ltocs = []
 llabels = []
+lunlocks = []
 for i in range(nthread):

    #  Collect ranges and colours into arrays. Also indexed lists for lookup tables.
    tictocs = []
+    qtictocs = []
    colours = []
    tics = []
    tocs = []
+    qtics = []
+    unlocks = []
    labels = []
    for task in tasks[i]:
        tictocs.append((task["tic"], task["toc"] - task["tic"]))
+        qtictocs.append((task["qtic"], task["tic"] - task["qtic"]))
        colours.append(task["colour"])

        tics.append(task["tic"])
        tocs.append(task["toc"])
+        qtics.append(task["qtic"])
+        unlocks.append(task["unlocks"])
        labels.append(task["type"] + "/" + task["subtype"])

    #  Add to look up tables.
    ltics.append(tics)
    ltocs.append(tocs)
+    lqtics.append(qtics)
+    lunlocks.append(unlocks)
    llabels.append(labels)

    #  Now plot.
@@ -502,7 +521,8 @@ ax.yaxis.set_major_locator(loc)
 ax.grid(True, which="major", axis="y", linestyle="-")

 class Container:
-    def __init__(self,  window, figure, motion, nthread, ltics, ltocs, llabels):
+    def __init__(self,  window, figure, motion, nthread, ltics, ltocs,
+                 llabels, lqtics, lunlocks):
        self.window = window
        self.figure = figure
        self.motion = motion
@@ -510,6 +530,8 @@ class Container:
        self.ltics = ltics
        self.ltocs = ltocs
        self.llabels = llabels
+        self.lqtics = lqtics
+        self.lunlocks = lunlocks

    def plot(self):
        canvas = tkagg.FigureCanvasTkAgg(self.figure, master=self.window)
@@ -517,7 +539,7 @@ class Container:
        wcanvas.config(width=1000, height=300)
        wcanvas.pack(side=tk.TOP, expand=True, fill=tk.BOTH)

-        toolbar = tkagg.NavigationToolbar2TkAgg(canvas, self.window)
+        toolbar = tkagg.NavigationToolbar2Tk(canvas, self.window)
        toolbar.update()
        self.output = tk.StringVar()
        label = tk.Label(self.window, textvariable=self.output, bg="white", fg="red", bd=2)
@@ -540,15 +562,20 @@ class Container:
                tics = self.ltics[thread]
                tocs = self.ltocs[thread]
                labels = self.llabels[thread]
+                qtics = self.lqtics[thread]
+                unlocks = self.lunlocks[thread]
                for i in range(len(tics)):
                    if event.xdata > tics[i] and event.xdata < tocs[i]:
                        tic = "{0:.3f}".format(tics[i])
                        toc = "{0:.3f}".format(tocs[i])
-                        outstr = "task =  " + labels[i] + ",  tic/toc =  " + tic + " / " + toc
+                        qtic = "{0:.3f}".format(qtics[i])
+                        unlock = "{0:d}".format(unlocks[i])
+                        outstr = "task =  " + labels[i] + ",  tic/toc =  " + tic + " / " + toc + " qtic = " + qtic + " unlocks = " + unlock
                        self.output.set(outstr)
                        break
        except TypeError:
            #  Ignore out of bounds.
+            print("out of bounds")
            pass

    def quit(self):
@@ -556,6 +583,7 @@ class Container:

 window = tk.Tk()
 window.protocol("WM_DELETE_WINDOW", window.quit)
-container = Container(window, fig, args.motion, nthread, ltics, ltocs, llabels)
+container = Container(window, fig, args.motion, nthread, ltics, ltocs,
+                      llabels, lqtics, lunlocks)
 container.plot()
 window.mainloop()
No results found