From d40765a699607ffb1c85ca3480a58bba3e0ac9f4 Mon Sep 17 00:00:00 2001
From: Pedro Gonnet <pedro.gonnet@durham.ac.uk>
Date: Sun, 2 Dec 2012 18:01:05 +0000
Subject: [PATCH] engine_ranktasks now actually stores the ranked order, no
 real need to re-sort tasks in queues. added engine_prepare, which re-sorts
 the particles into cells and rebuilds the tasks lists and queues if
 necessary. removed sorting along dt for now.

Former-commit-id: 799188eb9e75ce8cdbee53fba2efa69b060ba7cf
---
 src/cell.c   |   2 -
 src/engine.c |  55 +++++++++---
 src/engine.h |   1 +
 src/runner.h |   4 +-
 src/space.c  | 236 +++++++++++++++++++++++++++++++++++++++++++++++----
 src/space.h  |   4 +-
 6 files changed, 267 insertions(+), 35 deletions(-)

diff --git a/src/cell.c b/src/cell.c
index 0092644a5a..ba1a631037 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -257,8 +257,6 @@ void cell_split ( struct cell *c  ) {
     /* Store the counts and offsets. */
     for ( k = 0 ; k < 8 ; k++ ) {
         c->progeny[k]->count = right[k] - left[k] + 1;
-        if ( c->progeny[k]->count < 0 )
-            abort();
         c->progeny[k]->parts = &c->parts[ left[k] ];
         }
         
diff --git a/src/engine.c b/src/engine.c
index c9433f4d11..b5f3794840 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -51,14 +51,50 @@
 #define cell_getid( cdim , i , j , k ) ( (int)(k) + (cdim)[2]*( (int)(j) + (cdim)[1]*(int)(i) ) )
 
 
+/**
+ * @brief Prepare the #engine by re-building the cells and tasks.
+ *
+ * @param e The #engine to prepare.
+ * @param force Flag to force re-building the cell and task structure.
+ */
+ 
+void engine_prepare ( struct engine *e , int force ) {
+
+    int k, qid, changes;
+    struct space *s = e->s;
+
+    /* Rebuild the space. */
+    changes = space_rebuild( e->s , force );
+    printf( "engine_prepare: space_rebuild with %i changes.\n" , changes );
+    
+    /* Has anything changed? */
+    if ( changes ) {
+    
+        /* Rank the tasks in topological order. */
+        engine_ranktasks( e );
+    
+        /* Clear the queues. */
+        for ( k = 0 ; k < e->nr_queues ; k++ )
+            e->queues[k].count = 0;
+        
+        /* Fill the queues (round-robin). */
+        for ( k = 0 ; k < s->nr_tasks ; k++ ) {
+            if ( s->tasks[ s->tasks_ind[k] ].type == task_type_none )
+                continue;
+            qid = k % e->nr_queues;
+            e->queues[qid].tid[ e->queues[qid].count ] = s->tasks_ind[k];
+            e->queues[qid].count += 1;
+            }
+            
+        }
+
+    }
+
+
 /** 
  * @brief Sort the tasks in topological order over all queues.
  *
  * @param e The #engine.
- *
- * TODO: Return the indices tid as these are the tasks sorted according
- * to their ranks. They can then be dropped into the queues in order
- * of these indices.
  */
  
 void engine_ranktasks ( struct engine *e ) {
@@ -66,7 +102,7 @@ void engine_ranktasks ( struct engine *e ) {
     int i, j = 0, k, temp, left = 0, rank;
     struct task *t;
     struct space *s = e->s;
-    int *tid;
+    int *tid = s->tasks_ind;
 
     /* Run throught the tasks and get all the waits right. */
     for ( k = 0 ; k < s->nr_tasks ; k++ ) {
@@ -74,12 +110,6 @@ void engine_ranktasks ( struct engine *e ) {
             s->tasks[k].unlock_tasks[j]->wait += 1;
         }
         
-    /* Allocate and init the task-ID array. */
-    if ( ( tid = (int *)malloc( sizeof(int) * s->nr_tasks ) ) == NULL )
-        error( "Failed to allocate temporary tid array." );
-    for ( k = 0 ; k < s->nr_tasks ; k++ )
-        tid[k] = k;
-        
     /* Main loop. */
     for ( rank = 0 ; left < s->nr_tasks ; rank++ ) {
         
@@ -106,9 +136,6 @@ void engine_ranktasks ( struct engine *e ) {
             
         }
         
-    /* Release the temporary array. */
-    free(tid);
-    
     }
 
 
diff --git a/src/engine.h b/src/engine.h
index c2ca98abfe..e7ad72d52c 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -61,5 +61,6 @@ struct engine {
 /* Function prototypes. */
 void engine_barrier( struct engine *e );
 void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_queues , int policy );
+void engine_prepare ( struct engine *e , int force );
 void engine_ranktasks ( struct engine *e );
 void engine_run ( struct engine *e , int sort_queues );
diff --git a/src/runner.h b/src/runner.h
index 4b21648853..99b8f5cdd3 100644
--- a/src/runner.h
+++ b/src/runner.h
@@ -88,8 +88,8 @@ extern int runner_counter[ runner_counter_count ];
 
 /* Histogram functions. */
 #define runner_hist_a 1.0
-#define runner_hist_b 1000.0
-#define runner_hist_N 100
+#define runner_hist_b 100.0
+#define runner_hist_N 99
 long long int runner_hist_bins[ runner_hist_N ];
 #define runner_hist_hit( x ) __sync_add_and_fetch( &runner_hist_bins[ (int)fmax( 0.0 , fmin( runner_hist_N-1 , ((x) - runner_hist_a) / (runner_hist_b - runner_hist_a) * runner_hist_N ) ) ] , 1 )
 
diff --git a/src/space.c b/src/space.c
index b43203c97d..6a189ce148 100644
--- a/src/space.c
+++ b/src/space.c
@@ -76,6 +76,209 @@ const int sortlistID[27] = {
     };
     
     
+/**
+ * @breif Recursively dismantle a cell tree.
+ *
+ */
+ 
+void space_rebuild_recycle ( struct space *s , struct cell *c ) {
+    
+    int k;
+    
+    if ( c->split )
+        for ( k = 0 ; k < 8 ; k++ )
+            if ( c->progeny[k] != NULL ) {
+                space_rebuild_recycle( s , c->progeny[k] );
+                space_recycle( s , c->progeny[k] );
+                c->progeny[k] = NULL;
+                }
+    
+    }
+
+/**
+ * @breif Recursively rebuild a cell tree.
+ *
+ */
+ 
+int space_rebuild_recurse ( struct space *s , struct cell *c ) {
+    
+    int k, count, changes = 0, wasmt[8];
+    float h, h_limit, h_max = 0.0f;
+    struct cell *temp;
+    
+    /* If the cell is already split, check that the split is still ok. */
+    if ( c->split ) {
+    
+        /* Check the depth. */
+        if ( c->depth > s->maxdepth )
+            s->maxdepth = c->depth;
+
+        /* Set the minimum cutoff. */
+        h_limit = fmin( c->h[0] , fmin( c->h[1] , c->h[2] ) ) / 2;
+
+        /* Count the particles below that. */
+        for ( count = 0 , k = 0 ; k < c->count ; k++ ) {
+            h = c->parts[k].h;
+            if ( h <= h_limit )
+                count += 1;
+            if ( h > h_max )
+                h_max = h;
+            }
+        c->h_max = h_max;
+            
+        /* Un-split? */
+        if ( count < c->count*space_splitratio || c->count < space_splitsize ) {
+        
+            /* Get rid of the progeny. */
+            space_rebuild_recycle( s , c );
+            
+            /* Re-set the split flag. */
+            c->split = 0;
+        
+            }
+        
+        /* Otherwise, recurse on the kids. */
+        else {
+        
+            /* Populate all progeny. */
+            for ( k = 0 ; k < 8 ; k++ )
+                if ( ( wasmt[k] = ( c->progeny[k] == NULL ) ) ) {
+                    temp = space_getcell( s );
+                    temp->count = 0;
+                    temp->loc[0] = c->loc[0];
+                    temp->loc[1] = c->loc[1];
+                    temp->loc[2] = c->loc[2];
+                    temp->h[0] = c->h[0]/2;
+                    temp->h[1] = c->h[1]/2;
+                    temp->h[2] = c->h[2]/2;
+                    if ( k & 4 )
+                        temp->loc[0] += temp->h[0];
+                    if ( k & 2 )
+                        temp->loc[1] += temp->h[1];
+                    if ( k & 1 )
+                        temp->loc[2] += temp->h[2];
+                    temp->depth = c->depth + 1;
+                    temp->split = 0;
+                    temp->h_max = 0.0;
+                    temp->parent = c;
+                    c->progeny[k] = temp;
+                    }
+        
+            /* Make sure each part is in its place. */
+            cell_split( c );
+            
+            /* Remove empty progeny. */
+            for ( k = 0 ; k < 8 ; k++ )
+                if ( c->progeny[k]->count == 0 ) {
+                    changes += !wasmt[k];
+                    space_recycle( s , c->progeny[k] );
+                    c->progeny[k] = NULL;
+                    }
+                else
+                    changes += wasmt[k];
+        
+            /* Recurse. */
+            for ( k = 0 ; k < 8 ; k++ )
+                if ( c->progeny[k] != NULL )
+                    changes += space_rebuild_recurse( s , c->progeny[k] );
+                    
+            }
+    
+        }
+        
+    /* Otherwise, try to split it anyway. */
+    else {
+        space_split( s , c );
+        changes += c->split;
+        }
+        
+    /* Return the grand total. */
+    return changes;
+    
+    }
+
+/**
+ * @breif Re-build the cells as well as the tasks.
+ *
+ * @param s The #space in which to update the cells.
+ * @param force Flag to force re-building the cells and tasks.
+ *
+ * @return 1 if changes to the cells and/or tasks were made.
+ */
+ 
+int space_rebuild ( struct space *s , int force ) {
+
+    float h_max = 0.0f;
+    int i, j, k, cdim[3];
+    struct cell *c;
+    int changes = 0;
+    
+    /* Run through the parts and get the current h_max. */
+    for ( k = 0 ; k < s->nr_parts ; k++ )
+        if ( s->parts[k].h > h_max )
+            h_max = s->parts[k].h;
+    
+    /* Get the new putative cell dimensions. */
+    for ( k = 0 ; k < 3 ; k++ )
+        cdim[k] = floor( s->dim[k] / h_max );
+        
+    /* Do we need to re-build the upper-level cells? */
+    if ( force || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2] ) {
+    
+        /* Free the old cells, if they were allocated. */
+        if ( s->cells != NULL ) {
+            for ( k = 0 ; k < s->nr_cells ; k++ )
+                space_rebuild_recycle( s , &s->cells[k] );
+            free( s->cells );
+            s->maxdepth = 0;
+            }
+            
+        /* Set the new cell dimensions. */
+        for ( k = 0 ; k < 3 ; k++ ) {
+            s->cdim[k] = cdim[k];
+            s->h[k] = s->dim[k] / cdim[k];
+            s->ih[k] = 1.0 / s->h[k];
+            }
+    
+        /* Allocate the highest level of cells. */
+        s->nr_cells = cdim[0] * cdim[1] * cdim[2];
+        if ( posix_memalign( (void *)&s->cells , 64 , s->nr_cells * sizeof(struct cell) ) != 0 )
+            error( "Failed to allocate cells." );
+        bzero( s->cells , s->nr_cells * sizeof(struct cell) );
+        for ( k = 0 ; k < s->nr_cells ; k++ )
+            if ( lock_init( &s->cells[k].lock ) != 0 )
+                error( "Failed to init spinlock." );
+
+        /* Set the cell location and sizes. */
+        for ( i = 0 ; i < cdim[0] ; i++ )
+            for ( j = 0 ; j < cdim[1] ; j++ )
+                for ( k = 0 ; k < cdim[2] ; k++ ) {
+                    c = &s->cells[ cell_getid( cdim , i , j , k ) ];
+                    c->loc[0] = i*s->h[0]; c->loc[1] = j*s->h[1]; c->loc[2] = k*s->h[2];
+                    c->h[0] = s->h[0]; c->h[1] = s->h[1]; c->h[2] = s->h[2];
+                    c->depth = 0;
+                    }
+                    
+        /* There were massive changes. */
+        changes = 1;
+        
+        } /* re-build upper-level cells? */
+        
+    /* At this point, we have the upper-level cells, old or new. Now make
+       sure that the parts in each cell are ok. */
+    for ( k = 0 ; k < s->nr_cells ; k++ )
+        changes += space_rebuild_recurse( s , &s->cells[k] );
+        
+    /* Now that we have the cell structre, re-build the tasks. */
+    if ( changes )
+        space_maketasks( s , 1 );
+    
+    /* Return the number of changes. */
+    return changes;
+
+    }
+
+
 /**
  * @brief Sort the particles according to the given indices.
  *
@@ -855,7 +1058,6 @@ void space_maketasks ( struct space *s , int do_sort ) {
 
     int i, j, k, ii, jj, kk, iii, jjj, kkk, cid, cjd;
     int *cdim = s->cdim;
-    int nr_tasks_old = s->nr_tasks;
     struct task *t , *t2;
     int pts[7][8] = { { -1 , 12 , 10 , 9 , 4 , 3 , 1 , 0 } ,
                       { -1 , -1 , 11 , 10 , 5 , 4 , 2 , 1 } ,
@@ -881,14 +1083,14 @@ void space_maketasks ( struct space *s , int do_sort ) {
         
             if ( do_sort ) {
                 if ( c->count < 1000 ) {
-                    sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0x3fff , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
+                    sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0x1fff , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
                     for ( k = 0 ; k < 13 ; k++ )
                         c->sorts[k] = sort[0];
                     nr_sort = 1;
                     }
                 else if ( c->count < 5000 ) {
                     sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0x7f , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
-                    sort[1] = space_addtask( s , task_type_sort , task_subtype_none , 0x3f80 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
+                    sort[1] = space_addtask( s , task_type_sort , task_subtype_none , 0x1f80 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
                     for ( k = 0 ; k < 7 ; k++ )
                         c->sorts[k] = sort[0];
                     for ( k = 7 ; k < 14 ; k++ )
@@ -902,7 +1104,7 @@ void space_maketasks ( struct space *s , int do_sort ) {
                     c->sorts[6] = c->sorts[7] = sort[3] = space_addtask( s , task_type_sort , task_subtype_none , 0x40 + 0x80 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
                     c->sorts[8] = c->sorts[9] = sort[4] = space_addtask( s , task_type_sort , task_subtype_none , 0x100 + 0x200 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
                     c->sorts[10] = c->sorts[11] = sort[5] = space_addtask( s , task_type_sort , task_subtype_none , 0x400 + 0x800 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
-                    c->sorts[12] = c->sorts[13] = sort[6] = space_addtask( s , task_type_sort , task_subtype_none , 0x1000 + 0x2000 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
+                    c->sorts[12] = c->sorts[13] = sort[6] = space_addtask( s , task_type_sort , task_subtype_none , 0x1000 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 );
                     nr_sort = 7;
                     }
                 }
@@ -957,9 +1159,17 @@ void space_maketasks ( struct space *s , int do_sort ) {
         }
         
     /* Allocate the task-list, if needed. */
-    if ( s->tasks == NULL )
-        if ( posix_memalign( (void *)&s->tasks , 64 , sizeof(struct task) * s->tot_cells * 30 ) != 0 )
+    if ( s->tasks == NULL || s->tasks_size < s->tot_cells * 30 ) {
+        if ( s->tasks != NULL )
+            free( s->tasks );
+        if ( s->tasks_ind != NULL )
+            free( s->tasks_ind );
+        s->tasks_size = s->tot_cells * 30;
+        if ( posix_memalign( (void *)&s->tasks , 64 , sizeof(struct task) * s->tasks_size ) != 0 )
             error( "Failed to allocate task list." );
+        if ( ( s->tasks_ind = (int *)malloc( sizeof(int) * s->tasks_size ) ) == NULL )
+            error( "Failed to allocate task indices." );
+        }
     s->nr_tasks = 0;
     
     /* Loop over the cells and get their sub-tasks. */
@@ -1097,15 +1307,9 @@ void space_maketasks ( struct space *s , int do_sort ) {
             
         }
         
-    /* Did we already create indices? */
-    if ( s->tasks_ind == NULL )
-        if ( ( s->tasks_ind = (int *)malloc( sizeof(int) * s->nr_tasks ) ) == NULL )
-            error( "Failed to allocate task indices." );
-    
-    /* Did the number of tasks change, i.e. do we have to re-index? */
-    if ( nr_tasks_old != s->nr_tasks )
-        for ( k = 0 ; k < s->nr_tasks ; k++ )
-            s->tasks_ind[k] = k;
+    /* Re-set the indices. */
+    for ( k = 0 ; k < s->nr_tasks ; k++ )
+        s->tasks_ind[k] = k;
             
     /* Count the number of each task type. */
     for ( k = 0 ; k < task_type_count ; k++ )
@@ -1196,7 +1400,7 @@ void space_split ( struct space *s , struct cell *c ) {
                 space_recycle( s , c->progeny[k] );
                 c->progeny[k] = NULL;
                 }
-            
+                
         }
         
     /* Otherwise, set the progeny to null. */
diff --git a/src/space.h b/src/space.h
index 32219c8a4f..5586d81f42 100644
--- a/src/space.h
+++ b/src/space.h
@@ -87,7 +87,7 @@ struct space {
     
     /* The list of tasks. */
     struct task *tasks;
-    int nr_tasks, next_task;
+    int nr_tasks, next_task, tasks_size;
     int *tasks_ind;
     lock_type task_lock;
     
@@ -102,7 +102,9 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N ,
 void space_maketasks ( struct space *s , int do_sort );
 void space_map_cells ( struct space *s , int full , void (*fun)( struct cell *c , void *data ) , void *data );
 void space_map_parts ( struct space *s , void (*fun)( struct part *p , struct cell *c , void *data ) , void *data );
+int space_rebuild ( struct space *s , int force );
 void space_recycle ( struct space *s , struct cell *c );
+void space_split ( struct space *s , struct cell *c );
 
 
 
-- 
GitLab