From 4bae5e243e6b495b3b98661433d30377f676181a Mon Sep 17 00:00:00 2001
From: Aidan Chalk <d74ksy@cosma-e.cosma>
Date: Thu, 10 Dec 2015 13:54:30 +0000
Subject: [PATCH] Potential massive speed increase at a small memory cost. To
 test.

---
 src/qsched.c | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/src/qsched.c b/src/qsched.c
index 1cdf53a..dbf7b2c 100644
--- a/src/qsched.c
+++ b/src/qsched.c
@@ -2303,6 +2303,24 @@ if(wait_init == NULL)
     error("Failed to allocate wait_init array.");
 qsched_partition_compute_waitinit(s, wait_init);
 
+/* Compute all of the parents. */
+// Make one large array to store all the parents.
+int offsets[s->task_ranks[s->count_ranks] + 1];
+offsets[0] = 0;
+/* Set the offsets for each*/
+for (int i = 0; i < s->task_ranks[s->count_ranks]; i++)
+  offsets[i + 1] = offsets[i] + wait_init[i];
+int all_parents[offsets[s->task_ranks[s->count_ranks]]];
+for (int i = 0; i < s->task_ranks[s->count_ranks]; i++)
+  for (int j = 0; j < s->tasks[gettaskindex(tid[i],s)].nr_unlocks; j++)
+    all_parents[offsets[gettaskindex(s->tasks[gettaskindex(tid[i],s)].unlocks[j], s)]++] = i;
+
+// Re-set the offsets.
+offsets[0] = 0;
+for (int i = 0; i < s->task_ranks[s->count_ranks]; i++)
+  offsets[i + 1] = offsets[i] + wait_init[i];
+
+
 /* The send task data is as follows:
  * data[0] = Sending Node.
  * data[1] = Receiving Node.
@@ -2409,7 +2427,7 @@ for(i = 0; i < count; i++)
     /*Find the parents. */
     num_parents = 0;
     int max = wait_init[gettaskindex(t->id, s)];
-    for(j = i-1; j >= 0 && num_parents < max; j--)
+/*    for(j = i-1; j >= 0 && num_parents < max; j--)
     {
         const struct task *t2 = &s->tasks[gettaskindex(tid[j], s)];
         const int nr_un = t2->nr_unlocks;
@@ -2417,7 +2435,7 @@ for(i = 0; i < count; i++)
         {
             if(t2->unlocks[k] == ttid)
             {
-                /* If parents array is full we need to extend it.*/
+                 //If parents array is full we need to extend it.
                 if(num_parents == size_parents -1)
                 {
                     size_parents *=2;
@@ -2436,11 +2454,11 @@ for(i = 0; i < count; i++)
                 parents[num_parents++] = j;//We're storing the index in the topological order for parents -- maybe should just be ID?
                 break;
             }
-        }/* k*/
-    }/* j */
+        } //k
+    } //j */
 
     /* We have all the parents now. */
-
+    int *parents = &all_parents[offsets[gettaskindex(ttid,s)]];
     /* Loop over the locked resources */
     for(j = 0; j < t->nr_locks; j++)
     {
-- 
GitLab