diff --git a/src/Makefile.am b/src/Makefile.am
index 4ab052fabb82f0a7466a564e4c06ffed9c6b6aba..f1376a6258180bd0a72306a60af0beece6e097d8 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -32,9 +32,9 @@ AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) $(HDF5_LDFLAGS) -version-info 0:0:0
 # Build the libswiftsim library
 lib_LTLIBRARIES = libswiftsim.la
 libswiftsim_la_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
-    io.c timers.c debug.c
+    io.c timers.c debug.c scheduler.c
 
 # List required headers
 include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
-    engine.h swift.h io.h timers.h debug.h
+    engine.h swift.h io.h timers.h debug.h scheduler.h
 
diff --git a/src/engine.c b/src/engine.c
index cc93613fe32bb7edeef010446a5882410b7c9d9f..1e4afdb89bfa16b967b6d9fddf35ce8803f1cf0d 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -44,6 +44,7 @@
 #include "cell.h"
 #include "space.h"
 #include "queue.h"
+#include "scheduler.h"
 #include "engine.h"
 #include "runner.h"
 #include "runner_iact.h"
@@ -54,67 +55,304 @@
 
 
 /**
- * @brief Prepare the #engine by re-building the cells and tasks.
+ * @brief Fill the #space's task list.
  *
- * @param e The #engine to prepare.
+ * @param s The #space we are working in.
  */
  
-void engine_prepare ( struct engine *e ) {
+void engine_maketasks ( struct engine *e ) {
 
-    int j, k, qid, rebuild;
     struct space *s = e->s;
-    struct queue *q;
+    struct scheduler *sched = &e->sched;
+    int i, j, k, ii, jj, kk, iii, jjj, kkk, cid, cjd, sid;
+    int *cdim = s->cdim;
+    struct task *t, *t2;
+    struct cell *ci, *cj;
+
+    /* Re-set the scheduler. */
+    scheduler_reset( sched , s->tot_cells * space_maxtaskspercell );
+    
+    /* Run through the highest level of cells and add pairs. */
+    for ( i = 0 ; i < cdim[0] ; i++ )
+        for ( j = 0 ; j < cdim[1] ; j++ )
+            for ( k = 0 ; k < cdim[2] ; k++ ) {
+                cid = cell_getid( cdim , i , j , k );
+                if ( s->cells[cid].count == 0 )
+                    continue;
+                ci = &s->cells[cid];
+                if ( ci->count == 0 )
+                    continue;
+                scheduler_addtask( sched , task_type_self , task_subtype_density , 0 , 0 , ci , NULL , 0 );
+                for ( ii = -1 ; ii < 2 ; ii++ ) {
+                    iii = i + ii;
+                    if ( !s->periodic && ( iii < 0 || iii >= cdim[0] ) )
+                        continue;
+                    iii = ( iii + cdim[0] ) % cdim[0];
+                    for ( jj = -1 ; jj < 2 ; jj++ ) {
+                        jjj = j + jj;
+                        if ( !s->periodic && ( jjj < 0 || jjj >= cdim[1] ) )
+                            continue;
+                        jjj = ( jjj + cdim[1] ) % cdim[1];
+                        for ( kk = -1 ; kk < 2 ; kk++ ) {
+                            kkk = k + kk;
+                            if ( !s->periodic && ( kkk < 0 || kkk >= cdim[2] ) )
+                                continue;
+                            kkk = ( kkk + cdim[2] ) % cdim[2];
+                            cjd = cell_getid( cdim , iii , jjj , kkk );
+                            cj = &s->cells[cjd];
+                            if ( cid >= cjd || cj->count == 0 )
+                                continue;
+                            sid = sortlistID[ (kk+1) + 3*( (jj+1) + 3*(ii+1) ) ];
+                            t = scheduler_addtask( sched , task_type_pair , task_subtype_density , sid , 0 , ci , cj , 1 );
+                            }
+                        }
+                    }
+                }
+
+    /* Split the tasks. */
+    scheduler_splittasks( sched );
+    
+    /* Count the number of tasks associated with each cell and
+       store the density tasks in each cell, and make each sort
+       depend on the sorts of its progeny. */
+    // #pragma omp parallel for private(t,j)
+    for ( k = 0 ; k < sched->nr_tasks ; k++ ) {
+        t = &sched->tasks[k];
+        if ( t->skip )
+            continue;
+        if ( t->type == task_type_sort && t->ci->split )
+            for ( j = 0 ; j < 8 ; j++ ) {
+                if ( t->ci->progeny[j] != NULL ) {
+                    if ( t->ci->progeny[j]->sorts == NULL )
+                        t->ci->progeny[j]->sorts = scheduler_addtask( sched , task_type_sort , task_subtype_none , t->flags , 0 , t->ci->progeny[j] , NULL , 0 );
+                    t->ci->progeny[j]->sorts->skip = 0;
+                    task_addunlock( t->ci->progeny[j]->sorts , t );
+                    }
+                }
+        if ( t->type == task_type_self ) {
+            atomic_inc( &t->ci->nr_tasks );
+            if ( t->subtype == task_subtype_density ) {
+                t->ci->density[ atomic_inc( &t->ci->nr_density ) ] = t;
+                }
+            }
+        else if ( t->type == task_type_pair ) {
+            atomic_inc( &t->ci->nr_tasks );
+            atomic_inc( &t->cj->nr_tasks );
+            if ( t->subtype == task_subtype_density ) {
+                t->ci->density[ atomic_inc( &t->ci->nr_density ) ] = t;
+                t->cj->density[ atomic_inc( &t->cj->nr_density ) ] = t;
+                }
+            }
+        else if ( t->type == task_type_sub ) {
+            atomic_inc( &t->ci->nr_tasks );
+            if ( t->cj != NULL )
+                atomic_inc( &t->cj->nr_tasks );
+            if ( t->subtype == task_subtype_density ) {
+                t->ci->density[ atomic_inc( &t->ci->nr_density ) ] = t;
+                if ( t->cj != NULL )
+                    t->cj->density[ atomic_inc( &t->cj->nr_density ) ] = t;
+                }
+            }
+        }
+        
+    /* Append a ghost task to each cell. */
+    space_map_cells_pre( s , 1 , &scheduler_map_mkghosts , sched );
+    
+    /* Run through the tasks and make force tasks for each density task.
+       Each force task depends on the cell ghosts and unlocks the kick2 task
+       of its super-cell. */
+    kk = sched->nr_tasks;
+    // #pragma omp parallel for private(t,t2)
+    for ( k = 0 ; k < kk ; k++ ) {
+    
+        /* Get a pointer to the task. */
+        t = &sched->tasks[k];
+        
+        /* Skip? */
+        if ( t->skip )
+            continue;
+        
+        /* Self-interaction? */
+        if ( t->type == task_type_self && t->subtype == task_subtype_density ) {
+            task_addunlock( t , t->ci->super->ghost );
+            t2 = scheduler_addtask( sched , task_type_self , task_subtype_force , 0 , 0 , t->ci , NULL , 0 );
+            task_addunlock( t->ci->ghost , t2 );
+            task_addunlock( t2 , t->ci->super->kick2 );
+            }
+            
+        /* Otherwise, pair interaction? */
+        else if ( t->type == task_type_pair && t->subtype == task_subtype_density ) {
+            task_addunlock( t , t->ci->super->ghost );
+            if ( t->ci->super != t->cj->super )
+                task_addunlock( t , t->cj->super->ghost );
+            t2 = scheduler_addtask( sched , task_type_pair , task_subtype_force , 0 , 0 , t->ci , t->cj , 0 );
+            task_addunlock( t->ci->ghost , t2 );
+            task_addunlock( t->cj->ghost , t2 );
+            task_addunlock( t2 , t->ci->super->kick2 );
+            if ( t->ci->super != t->cj->super )
+                task_addunlock( t2 , t->cj->super->kick2 );
+            }
+    
+        /* Otherwise, sub interaction? */
+        else if ( t->type == task_type_sub && t->subtype == task_subtype_density ) {
+            task_addunlock( t , t->ci->super->ghost );
+            if ( t->cj != NULL && t->ci->super != t->cj->super )
+                task_addunlock( t , t->cj->super->ghost );
+            t2 = scheduler_addtask( sched , task_type_sub , task_subtype_force , t->flags , 0 , t->ci , t->cj , 0 );
+            task_addunlock( t->ci->ghost , t2 );
+            if ( t->cj != NULL )
+                task_addunlock( t->cj->ghost , t2 );
+            task_addunlock( t2 , t->ci->super->kick2 );
+            if ( t->cj != NULL && t->ci->super != t->cj->super )
+                task_addunlock( t2 , t->cj->super->kick2 );
+            }
+            
+        }
+        
+    /* Rank the tasks. */
+    scheduler_ranktasks( sched );
+            
+    /* Count the number of each task type. */
+    int counts[ task_type_count+1 ];
+    for ( k = 0 ; k <= task_type_count ; k++ )
+        counts[k] = 0;
+    for ( k = 0 ; k < sched->nr_tasks ; k++ )
+        if ( !sched->tasks[k].skip )
+            counts[ (int)sched->tasks[k].type ] += 1;
+        else
+            counts[ task_type_count ] += 1;
+    printf( "engine_maketasks: task counts are [ %s=%i" , taskID_names[0] , counts[0] );
+    for ( k = 1 ; k < task_type_count ; k++ )
+        printf( " %s=%i" , taskID_names[k] , counts[k] );
+    printf( " skipped=%i ]\n" , counts[ task_type_count ] ); fflush(stdout); 
+    
+    }
+    
     
-    TIMER_TIC
 
-    /* Rebuild the space. */
-    // tic = getticks();
-    rebuild = ( space_prepare( e->s ) || e->step == 0 );
-    // printf( "engine_prepare: space_prepare took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
+/**
+ * @brief Mark tasks to be skipped and set the sort flags accordingly.
+ * 
+ * @return 1 if the space has to be rebuilt, 0 otherwise.
+ */
+ 
+int engine_marktasks ( struct engine *e ) {
+
+    struct scheduler *s = &e->sched;
+    int k, nr_tasks = s->nr_tasks, *ind = s->tasks_ind;
+    struct task *t, *tasks = s->tasks;
+    float dt_step = e->dt_step;
+    struct cell *ci, *cj;
     
-    /* The queues only need to be re-built if we have variable time-steps
-       or the space was rebuilt. */
-    if ( !(e->policy & engine_policy_fixdt) || rebuild ) {
+    /* Run through the tasks and mark as skip or not. */
+    for ( k = 0 ; k < nr_tasks ; k++ ) {
     
-        // tic = getticks();
-        /* Init the queues (round-robin). */
-        for ( qid = 0 ; qid < e->nr_queues ; qid++ )
-            queue_init( &e->queues[qid] , s->nr_tasks , s->tasks );
-
-        /* Fill the queues (round-robin). */
-        for ( qid = 0 , k = 0 ; k < s->nr_tasks ; k++ ) {
-            if ( s->tasks[ s->tasks_ind[k] ].skip )
-                continue;
-            q = &e->queues[qid];
-            qid = ( qid + 1 ) % e->nr_queues;
-            q->tid[ q->count ] = s->tasks_ind[k];
-            q->count += 1;
+        /* Get a handle on the kth task. */
+        t = &tasks[ ind[k] ];
+        
+        /* Sort-task? Note that due to the task ranking, the sorts
+           will all come before the pairs and/or subs. */
+        if ( t->type == task_type_sort ) {
+        
+            /* Re-set the flags. */
+            t->flags = 0;
+            t->skip = 1;
+        
             }
-        // printf( "engine_prepare: re-filling queues took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
         
-        }
+        /* Single-cell task? */
+        else if ( t->type == task_type_self ||
+                  t->type == task_type_ghost ||
+                ( t->type == task_type_sub && t->cj == NULL ) ) {
+             
+            /* Set this task's skip. */
+            t->skip = ( t->ci->dt_min > dt_step );
+            
+            }
         
-    /* Otherwise, just re-set them. */
-    else {
-        for ( qid = 0 ; qid < e->nr_queues ; qid++ )
-            e->queues[qid].next = 0;
+        /* Pair? */
+        else if ( t->type == task_type_pair || ( t->type == task_type_sub && t->cj != NULL ) ) {
+            
+            /* Local pointers. */
+            ci = t->ci;
+            cj = t->cj;
+            
+            /* Set this task's skip. */
+            t->skip = ( ci->dt_min > dt_step && cj->dt_min > dt_step );
+            
+            /* Too much particle movement? */
+            if ( t->tight &&
+                 ( fmaxf( ci->h_max , cj->h_max ) + ci->dx_max + cj->dx_max > cj->dmin || 
+                   ci->dx_max > space_maxreldx*ci->h_max || cj->dx_max > space_maxreldx*cj->h_max ) )
+                return 1;
+                
+            /* Set the sort flags. */
+            if ( !t->skip && t->type == task_type_pair ) {
+                ci->sorts->flags |= (1 << t->flags);
+                ci->sorts->skip = 0;
+                cj->sorts->flags |= (1 << t->flags);
+                cj->sorts->skip = 0;
+                }
+                
+            }
+            
+        /* Kick2? */
+        else if ( t->type == task_type_kick2 )
+            t->skip = 0;
+            
+        /* None? */
+        else if ( t->type == task_type_none )
+            t->skip = 1;
+            
         }
+        
+    /* All is well... */
+    return 0;
+    
+    }
+
+
+/**
+ * @brief Prepare the #engine by re-building the cells and tasks.
+ *
+ * @param e The #engine to prepare.
+ */
+ 
+void engine_prepare ( struct engine *e ) {
+    
+    int rebuild;
+    
+    TIMER_TIC
 
-    /* Run throught the tasks and get all the waits right. */
+    /* Run through the tasks and mark as skip or not. */
     // tic = getticks();
-    #pragma omp parallel for schedule(static) private(j)
-    for ( k = 0 ; k < s->nr_tasks ; k++ ) {
-        if ( s->tasks[k].skip )
-            continue;
-        for ( j = 0 ; j < s->tasks[k].nr_unlock_tasks ; j++ )
-            atomic_inc( &s->tasks[k].unlock_tasks[j]->wait );
-        }
-    // printf( "engine_prepare: preparing task dependencies took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
+    rebuild = ( e->step == 0 || engine_marktasks( e ) );
+    // printf( "space_prepare: space_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
+        
+    /* Did this not go through? */
+    if ( rebuild ) {
     
-    /* Re-set the queues.*/
-    for ( k = 0 ; k < e->nr_queues ; k++ )
-        e->queues[k].next = 0;
+        /* Re-build the space. */
+        tic = getticks();
+        space_rebuild( e->s , 0.0 );
+        printf( "engine_prepare: space_rebuild took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
+    
+        /* Re-build the tasks. */
+        tic = getticks();
+        engine_maketasks( e );
+        printf( "engine_prepare: engine_maketasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
+    
+        /* Run through the tasks and mark as skip or not. */
+        // tic = getticks();
+        if ( engine_marktasks( e ) )
+            error( "engine_marktasks failed after space_rebuild." );
+        // printf( "engine_prepare: engine_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
         
+        }
+
+    /* Start the scheduler. */
+    scheduler_start( &e->sched );
+    
     TIMER_TOC( timer_prepare );
     
     }
@@ -455,7 +693,7 @@ void engine_single_force ( double *dim , long long int pid , struct part *__rest
  * @param sort_queues Flag to try to sort the queues topologically.
  */
  
-void engine_step ( struct engine *e , int sort_queues ) {
+void engine_step ( struct engine *e ) {
 
     int k;
     float dt = e->dt, dt_step, dt_max = 0.0f, dt_min = FLT_MAX;
@@ -509,15 +747,6 @@ void engine_step ( struct engine *e , int sort_queues ) {
     /* Prepare the space. */
     engine_prepare( e );
     
-    /* Sort the queues?*/
-    if ( sort_queues ) {
-        #pragma omp parallel for default(none), shared(e)
-        for ( k = 0 ; k < e->nr_queues ; k++ ) {
-            queue_sort( &e->queues[k] );
-            e->queues[k].next = 0;
-            }
-        }
-        
     // engine_single_density( e->s->dim , 3392063069037 , e->s->parts , e->s->nr_parts , e->s->periodic );
 
     /* Start the clock. */
@@ -634,7 +863,6 @@ void engine_init ( struct engine *e , struct space *s , float dt , int nr_thread
     /* Store the values. */
     e->s = s;
     e->nr_threads = nr_threads;
-    e->nr_queues = nr_queues;
     e->policy = policy;
     e->step = 0;
     e->nullstep = 0;
@@ -661,14 +889,8 @@ void engine_init ( struct engine *e , struct space *s , float dt , int nr_thread
             dt *= 0.5f;
     e->dt = dt;
     
-    /* Allocate the queues. */
-    if ( posix_memalign( (void *)(&e->queues) , 64 , nr_queues * sizeof(struct queue) ) != 0 )
-        error( "Failed to allocate queues." );
-    bzero( e->queues , nr_queues * sizeof(struct queue) );
-        
-    /* Sort the queues topologically. */
-    // for ( k = 0 ; k < nr_queues ; k++ )
-    //     queue_sort( &e->queues[k] );
+    /* Init the scheduler. */
+    scheduler_init( &e->sched , e->s , nr_queues , scheduler_flag_steal );
         
     /* Allocate and init the threads. */
     if ( ( e->runners = (struct runner *)malloc( sizeof(struct runner) * nr_threads ) ) == NULL )
diff --git a/src/engine.h b/src/engine.h
index 2d3c6a3575ad8d53fdbdf0e8f5768ad0e0128b76..8efae350763a2e5133aa2199ec52207ca2e6b981 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -46,11 +46,8 @@ struct engine {
     /* The running policy. */
     int policy;
     
-    /* The number of queues. */
-    int nr_queues;
-    
-    /* The queues. */
-    struct queue *queues;
+    /* The task scheduler. */
+    struct scheduler sched;
     
     /* The maximum dt to step (current). */
     float dt_step;
@@ -85,4 +82,5 @@ struct engine {
 void engine_barrier( struct engine *e );
 void engine_init ( struct engine *e , struct space *s , float dt , int nr_threads , int nr_queues , int policy );
 void engine_prepare ( struct engine *e );
-void engine_step ( struct engine *e , int sort_queues );
+void engine_step ( struct engine *e );
+void engine_maketasks ( struct engine *e );
diff --git a/src/io.c b/src/io.c
index a229e028e80dce199bafe9ed2b415a37218650ea..c965cf925f1cdb16fe4f2cb9e67b239fcb522da5 100644
--- a/src/io.c
+++ b/src/io.c
@@ -37,6 +37,7 @@
 #include "task.h"
 #include "part.h"
 #include "space.h"
+#include "scheduler.h"
 #include "engine.h"
 #include "error.h"
 #include "kernel.h"
diff --git a/src/queue.c b/src/queue.c
index 95b1eac0d6b29c9645e4018d55451fe3016d4e73..ebf187915e8e6567343be531cfb472ca8c1119de 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -24,12 +24,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <pthread.h>
-#include <math.h>
-#include <float.h>
-#include <limits.h>
-#include <omp.h>
-#include <sched.h>
 
 /* Local headers. */
 #include "cycle.h"
@@ -93,11 +87,30 @@ void queue_insert ( struct queue *q , struct task *t ) {
     if ( lock_lock( &q->lock ) != 0 )
         error( "Failed to get queue lock." );
         
-    /* Swap next task to end. */
-    q->tid[ q->count ] = q->tid[ q->next ];
+    /* Does the queue need to be grown? */
+    if ( q->count == q->size ) {
+        int *temp;
+        q->size *= queue_sizegrow;
+        if ( ( temp = (int *)malloc( sizeof(int) * q->size ) ) == NULL )
+            error( "Failed to allocate new indices." );
+        memcpy( temp , q->tid , sizeof(int) * q->count );
+        free( q->tid );
+        q->tid = temp;
+        }
+        
+    /* Drop the task at the end of the queue. */
+    q->tid[ q->count ] = ( t - q->tasks );
     q->count += 1;
-    q->tid[ q->next ] = t - q->tasks;
-    q->next += 1;
+    
+    /* Shuffle up. */
+    for ( int k = q->count - 1 ; k > 0 ; k /= 2 )
+        if ( q->tasks[ q->tid[k] ].rank < q->tasks[ q->tid[k/2] ].rank ) {
+            int temp = q->tid[k];
+            q->tid[k] = q->tid[k/2];
+            q->tid[k/2] = temp;
+            }
+        else
+            break;
     
     /* Unlock the queue. */
     if ( lock_unlock( &q->lock ) != 0 )
@@ -113,23 +126,18 @@ void queue_insert ( struct queue *q , struct task *t ) {
  * @param tasks List of tasks to which the queue indices refer to.
  */
  
-void queue_init ( struct queue *q , int size , struct task *tasks ) {
+void queue_init ( struct queue *q , struct task *tasks ) {
     
     /* Allocate the task list if needed. */
-    if ( q->tid == NULL || q->size < size ) {
-        if ( q->tid != NULL )
-            free( q->tid );
-        q->size = size;
-        if ( ( q->tid = (int *)malloc( sizeof(int) * size ) ) == NULL )
-            error( "Failed to allocate queue tids." );
-        }
+    q->size = queue_sizeinit;
+    if ( ( q->tid = (int *)malloc( sizeof(int) * q->size ) ) == NULL )
+        error( "Failed to allocate queue tids." );
         
     /* Set the tasks pointer. */
     q->tasks = tasks;
         
     /* Init counters. */
     q->count = 0;
-    q->next = 0;
     
     /* Init the queue lock. */
     if ( lock_init( &q->lock ) != 0 )
@@ -146,157 +154,29 @@ void queue_init ( struct queue *q , int size , struct task *tasks ) {
  * @param keep Remove the returned task from this queue.
  */
  
-struct task *queue_gettask_old ( struct queue *q , int blocking , int keep ) {
+struct task *queue_gettask ( struct queue *q , int qid , int blocking ) {
 
-    int k, tid = -1, qcount, *qtid = q->tid;
+    int k, qcount, *qtid = q->tid;
     lock_type *qlock = &q->lock;
     struct task *qtasks = q->tasks, *res = NULL;
     TIMER_TIC
     
     /* If there are no tasks, leave immediately. */
-    if ( q->next >= q->count ) {
+    if ( q->count == 0 ) {
         TIMER_TOC(queue_timer_gettask);
         return NULL;
         }
 
     /* Main loop, while there are tasks... */
-    while ( q->next < q->count ) {
+    while ( q->count > 0 ) {
     
         /* Grab the task lock. */
-        // if ( blocking ) {
-            if ( lock_lock( qlock ) != 0 )
-                error( "Locking the task_lock failed.\n" );
-        //     }
-        // else {
-        //     if ( lock_trylock( qlock ) != 0 )
-        //         break;
-        //     }
+        if ( lock_lock( qlock ) != 0 )
+            error( "Locking the qlock failed.\n" );
             
         /* Loop over the remaining task IDs. */
         qcount = q->count;
-        for ( k = q->next ; k < qcount ; k++ ) {
-        
-            /* Put a finger on the task. */
-            res = &qtasks[ qtid[k] ];
-            
-            /* Is this task blocked? */
-            if ( res->wait )
-                continue;
-            
-            /* Different criteria for different types. */
-            if ( res->type == task_type_self || res->type == task_type_sort || (res->type == task_type_sub && res->cj == NULL) ) {
-                if ( res->ci->hold || cell_locktree( res->ci ) != 0 )
-                    continue;
-                }
-            else if ( res->type == task_type_pair || (res->type == task_type_sub && res->cj != NULL) ) {
-                if ( res->ci->hold || res->cj->hold || res->ci->wait || res->cj->wait )
-                    continue;
-                if ( cell_locktree( res->ci ) != 0 )
-                    continue;
-                if ( cell_locktree( res->cj ) != 0 ) {
-                    cell_unlocktree( res->ci );
-                    continue;
-                    }
-                }
-            
-            /* If we made it this far, we're safe. */
-            break;
-        
-            } /* loop over the task IDs. */
-            
-        /* Did we get a task? */
-        if ( k < qcount ) {
-        
-            /* Do we need to swap? */
-            if ( k != q->next )
-                COUNT(queue_counter_swap);
-        
-            /* get the task ID. */
-            tid = qtid[k];
-        
-            /* Remove the task? */
-            if ( keep ) {
-            
-                /* Bubble-up. */
-                q->count = qcount - 1;
-                for ( ; k < qcount - 1 ; k++ )
-                    qtid[k] = qtid[k+1];
-            
-                }
-                
-            /* No, leave it in the queue. */
-            else {
-            
-                TIMER_TIC2
-
-                /* Bubble-down the task. */
-                while ( k > q->next ) {
-                    qtid[ k ] = qtid[ k-1 ];
-                    k -= 1;
-                    }
-                qtid[ q->next ] = tid;
-                
-                /* up the counter. */
-                q->next += 1;
-                
-                TIMER_TOC2(queue_timer_bubble);
-            
-                }
-            
-            }
-    
-        /* Release the task lock. */
-        if ( lock_unlock( qlock ) != 0 )
-            error( "Unlocking the task_lock failed.\n" );
-            
-        /* Leave? */
-        if ( tid >= 0 ) {
-            TIMER_TOC(queue_timer_gettask);
-            return &qtasks[tid];
-            }
-        else if ( !blocking )
-            break;
-    
-        } /* while there are tasks. */
-        
-    /* No beef. */
-    TIMER_TOC(queue_timer_gettask);
-    return NULL;
-
-    }
-
-
-struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep ) {
-
-    int k, tid = -1, qcount, *qtid = q->tid, hits;
-    lock_type *qlock = &q->lock;
-    struct task *qtasks = q->tasks, *res = NULL;
-    struct cell *ci_best = NULL, *cj_best = NULL;
-    int ind_best, score_best = -1, score;
-    TIMER_TIC
-    
-    /* If there are no tasks, leave immediately. */
-    if ( q->next >= q->count ) {
-        TIMER_TOC(queue_timer_gettask);
-        return NULL;
-        }
-
-    /* Main loop, while there are tasks... */
-    while ( q->next < q->count ) {
-    
-        /* Grab the task lock. */
-        // if ( blocking ) {
-            if ( lock_lock( qlock ) != 0 )
-                error( "Locking the qlock failed.\n" );
-        //     }
-        // else {
-        //     if ( lock_trylock( qlock ) != 0 )
-        //         break;
-        //     }
-            
-        /* Loop over the remaining task IDs. */
-        qcount = q->count; ind_best = -1; hits = 0;
-        for ( k = q->next ; k < qcount && hits < queue_maxhits ; k++ ) {
+        for ( k = 0 ; k < qcount ; k++ ) {
         
             /* Put a finger on the task. */
             res = &qtasks[ qtid[k] ];
@@ -305,109 +185,60 @@ struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep
             if ( res->wait )
                 continue;
                 
-            /* Get the score for this task. */
-            if ( res->cj == NULL )
-                score = 2 * ( res->ci->super->owner == rid );
-            else
-                score = ( res->ci->super->owner == rid ) + ( res->cj->super->owner == rid );
-            if ( score <= score_best )
-                continue;
-                
             /* Try to lock ci. */
             if ( res->type == task_type_self || 
                  res->type == task_type_sort || 
                  (res->type == task_type_sub && res->cj == NULL) ) {
-                if ( res->ci != ci_best && res->ci != cj_best && cell_locktree( res->ci ) != 0 )
+                if ( cell_locktree( res->ci ) != 0 )
                     continue;
                 }
             else if ( res->type == task_type_pair || (res->type == task_type_sub && res->cj != NULL) ) {
                 if ( res->ci->hold || res->cj->hold || res->ci->wait || res->cj->wait )
                     continue;
-                if ( res->ci != ci_best && res->ci != cj_best && cell_locktree( res->ci ) != 0 )
+                if ( cell_locktree( res->ci ) != 0 )
                     continue;
-                if ( res->cj != ci_best && res->cj != cj_best && cell_locktree( res->cj ) != 0 ) {
-                    if ( res->ci != ci_best && res->ci != cj_best )
-                        cell_unlocktree( res->ci );
+                if ( cell_locktree( res->cj ) != 0 ) {
+                    cell_unlocktree( res->ci );
                     continue;
                     }
                 }
             
-            /* If we owned a previous task, unlock it. */
-            if ( ind_best >= 0 ) {
-                res = &qtasks[ qtid[ ind_best ] ];
-                if ( res->type == task_type_self || res->type == task_type_sort || res->type == task_type_pair || res->type == task_type_sub )
-                    if ( res->ci != ci_best && res->ci != cj_best )
-                        cell_unlocktree( res->ci );
-                if ( res->type == task_type_pair || (res->type == task_type_sub && res->cj != NULL) )
-                    if ( res->cj != ci_best && res->cj != cj_best )
-                        cell_unlocktree( res->cj );
-                }
-            
             /* If we made it this far, we're safe. */
-            ind_best = k;
-            ci_best = qtasks[ qtid[ k ] ].ci;
-            cj_best = qtasks[ qtid[ k ] ].cj;
-            score_best = score;
-            hits += 1;
-            
-            /* Should we bother looking any farther? */
-            if ( score_best == 2 );
-                break;
+            break;
         
             } /* loop over the task IDs. */
             
         /* Did we get a task? */
-        if ( ind_best >= 0 ) {
+        if ( k < qcount ) {
         
-            /* Do we need to swap? */
-            if ( ind_best != q->next )
-                COUNT(queue_counter_swap);
+            /* Another one bites the dust. */
+            q->count -= 1;
         
-            /* get the task ID. */
-            tid = qtid[ ind_best ];
-            
             /* Own the cells involved. */
-            qtasks[ tid ].ci->super->owner = rid;
-            if ( qtasks[ tid ].cj != NULL )
-                qtasks[ tid ].cj->super->owner = rid;
-        
-            /* Remove the task? */
-            if ( keep ) {
-            
-                /* Bubble-up. */
-                /* q->count = qcount - 1;
-                for ( k = ind_best ; k < qcount - 1 ; k++ )
-                    qtid[k] = qtid[k+1]; */
-                    
-                /* Swap with last task. */
-                q->count = qcount - 1;
-                qtid[ ind_best ] = qtid[ q->count ];
-            
-                }
-                
-            /* No, leave it in the queue. */
-            else {
-            
-                TIMER_TIC2
-
-                /* Bubble-down the task. */
-                /* for ( k = ind_best ; k > q->next ; k-- )
-                    qtid[ k ] = qtid[ k-1 ];
-                qtid[ q->next ] = tid; */
+            res->ci->super->owner = qid;
+            if ( res->cj != NULL )
+                res->cj->super->owner = qid;
                 
-                /* Swap with the first task. */
-                if ( ind_best != q->next ) {
-                    qtid[ ind_best ] = qtid[ q->next ];
-                    qtid[ q->next ] = tid;
+            /* Swap this task with the last task and re-heap. */
+            if ( k < q->count ) {
+                qtid[ k ] = qtid[ q->count ];
+                while ( 1 ) {
+                    int i = 2*k;
+                    if ( i >= q->count )
+                        break;
+                    if ( i+1 < q->count && qtasks[ qtid[i+1] ].rank < qtasks[ qtid[i] ].rank )
+                        i += 1;
+                    if ( qtasks[ qtid[i] ].rank < qtasks[ qtid[k] ].rank ) {
+                        int temp = qtid[i];
+                        qtid[i] = qtid[k];
+                        qtid[k] = temp;
+                        k = i;
+                        }
+                    else
+                        break;
                     }
-                
-                /* up the counter. */
-                q->next += 1;
-                
-                TIMER_TOC2(queue_timer_bubble);
-            
                 }
-            
+        
             }
     
         /* Release the task lock. */
@@ -415,136 +246,15 @@ struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep
             error( "Unlocking the qlock failed.\n" );
             
         /* Leave? */
-        if ( tid >= 0 ) {
-            TIMER_TOC(queue_timer_gettask);
-            return &qtasks[tid];
-            }
-        else if ( !blocking )
+        if ( res != NULL || !blocking )
             break;
     
         } /* while there are tasks. */
         
     /* No beef. */
     TIMER_TOC(queue_timer_gettask);
-    return NULL;
+    return res;
 
     }
 
 
-/**
- * @brief Sort the tasks IDs according to their weight and constraints.
- *
- * @param q The #queue.
- */
- 
-void queue_sort ( struct queue *q ) {
-
-    struct {
-        short int lo, hi;
-        } qstack[20];
-    int qpos, i, j, k, lo, hi, imin, temp;
-    int pivot_weight, pivot_wait;
-    int *weight, *wait;
-    int *data = q->tid;
-    struct task *t;
-    
-    printf( "queue_sort: sorting queue with %i tasks.\n" , q->count );
-        
-    /* Allocate and pre-compute each task's weight. */
-    if ( ( weight = (int *)alloca( sizeof(int) * q->count ) ) == NULL ||
-         ( wait = (int *)alloca( sizeof(int) * q->count ) ) == NULL )
-        error( "Failed to allocate weight buffer." );
-    for ( k = 0 ; k < q->count ; k++ ) {
-        t = &q->tasks[ q->tid[k] ];
-        switch ( t->type ) {
-            case task_type_self:
-                wait[k] = t->rank;
-                weight[k] = 0; // t->ci->count * t->ci->count;
-                break;
-            case task_type_pair:
-                wait[k] = t->rank;
-                weight[k] = 0; // t->ci->count * t->cj->count;
-                break;
-            case task_type_sub:
-                wait[k] = t->rank;
-                weight[k] = 0; // (t->cj == NULL) ? t->ci->count * t->ci->count : t->ci->count * t->cj->count;
-                break;
-            case task_type_sort:
-                wait[k] = t->rank;
-                weight[k] = 0; // t->ci->count;
-                break;
-            case task_type_ghost:
-                wait[k] = t->rank;
-                weight[k] = 0; // t->ci->count;
-                break;
-            }
-        }
-        
-    /* Sort tasks. */
-    qstack[0].lo = 0; qstack[0].hi = q->count - 1; qpos = 0;
-    while ( qpos >= 0 ) {
-        lo = qstack[qpos].lo; hi = qstack[qpos].hi;
-        qpos -= 1;
-        if ( hi - lo < 15 ) {
-            for ( i = lo ; i < hi ; i++ ) {
-                imin = i;
-                for ( j = i+1 ; j <= hi ; j++ )
-                    if ( ( wait[ j ] < wait[ imin ] ) ||
-                         ( wait[ j ] == wait[ imin ] && weight[ j ] > weight[ imin ] ) )
-                if ( imin != i ) {
-                    temp = data[imin]; data[imin] = data[i]; data[i] = temp;
-                    temp = wait[imin]; wait[imin] = wait[i]; wait[i] = temp;
-                    temp = weight[imin]; weight[imin] = weight[i]; weight[i] = temp;
-                    }
-                }
-            }
-        else {
-            pivot_weight = weight[ ( lo + hi ) / 2 ];
-            pivot_wait = wait[ ( lo + hi ) / 2 ];
-            i = lo; j = hi;
-            while ( i <= j ) {
-                while ( ( wait[ i ] < pivot_wait ) ||
-                        ( wait[ i ] == pivot_wait && weight[ i ] > pivot_weight ) )
-                    i++;
-                while ( ( wait[ j ] > pivot_wait ) ||
-                        ( wait[ j ] == pivot_wait && weight[ j ] < pivot_weight ) )
-                    j--;
-                if ( i <= j ) {
-                    if ( i < j ) {
-                        temp = data[i]; data[i] = data[j]; data[j] = temp;
-                        temp = wait[i]; wait[i] = wait[j]; wait[j] = temp;
-                        temp = weight[i]; weight[i] = weight[j]; weight[j] = temp;
-                        }
-                    i += 1; j -= 1;
-                    }
-                }
-            if ( j > ( lo + hi ) / 2 ) {
-                if ( lo < j ) {
-                    qpos += 1;
-                    qstack[qpos].lo = lo;
-                    qstack[qpos].hi = j;
-                    }
-                if ( i < hi ) {
-                    qpos += 1;
-                    qstack[qpos].lo = i;
-                    qstack[qpos].hi = hi;
-                    }
-                }
-            else {
-                if ( i < hi ) {
-                    qpos += 1;
-                    qstack[qpos].lo = i;
-                    qstack[qpos].hi = hi;
-                    }
-                if ( lo < j ) {
-                    qpos += 1;
-                    qstack[qpos].lo = lo;
-                    qstack[qpos].hi = j;
-                    }
-                }
-            }
-        }
-                
-    }
-    
-    
diff --git a/src/queue.h b/src/queue.h
index 2db5b57c04f92ef5b3809ca77167d0fbce8ae780..f15664be93a4a00516614911b9826cfe41458220 100644
--- a/src/queue.h
+++ b/src/queue.h
@@ -20,6 +20,8 @@
 
 /* Some constants. */
 #define queue_maxhits            10
+#define queue_sizeinit           100
+#define queue_sizegrow           2
 
 
 /* The queue timers themselves. */
@@ -47,7 +49,7 @@ struct queue {
     lock_type lock;
 
     /* Size, count and next element. */
-    int size, count, next;
+    int size, count;
     
     /* The actual tasks to which the indices refer. */
     struct task *tasks;
@@ -59,8 +61,6 @@ struct queue {
     
 
 /* Function prototypes. */
-struct task *queue_gettask_old ( struct queue *q , int blocking , int keep );
-struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep );
-void queue_init ( struct queue *q , int size , struct task *tasks );
+struct task *queue_gettask ( struct queue *q , int qid , int blocking );
+void queue_init ( struct queue *q , struct task *tasks );
 void queue_insert ( struct queue *q , struct task *t );
-void queue_sort ( struct queue *q );
diff --git a/src/runner.c b/src/runner.c
index e55895ea739e0c118fe7610bb31948ae2d8fd0c7..90c34007d66183a77793157fa6bfbef52c6baa8d 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -29,7 +29,6 @@
 #include <float.h>
 #include <limits.h>
 #include <omp.h>
-#include <sched.h>
 
 /* Local headers. */
 #include "cycle.h"
@@ -42,6 +41,7 @@
 #include "cell.h"
 #include "space.h"
 #include "queue.h"
+#include "scheduler.h"
 #include "engine.h"
 #include "runner.h"
 #include "runner_iact.h"
@@ -617,15 +617,10 @@ void *runner_main ( void *data ) {
 
     struct runner *r = (struct runner *)data;
     struct engine *e = r->e;
+    struct scheduler *sched = &e->sched;
     int threadID = r->id;
-    int k, qid, naq, keep, tpq;
-    struct queue *queues[ e->nr_queues ], *myq;
     struct task *t;
     struct cell *ci, *cj;
-    unsigned int myseed = rand() + r->id;
-    #ifdef TIMER
-        ticks stalled;
-    #endif
     
     /* Main loop. */
     while ( 1 ) {
@@ -633,88 +628,17 @@ void *runner_main ( void *data ) {
         /* Wait at the barrier. */
         engine_barrier( e );
         
-        /* Set some convenient local data. */
-        keep = e->policy & engine_policy_keep;
-        myq = &e->queues[ threadID * e->nr_queues / e->nr_threads ];
-        tpq = ceil( ((double)e->nr_threads) / e->nr_queues );
-        #ifdef TIMER
-            stalled = 0;
-        #endif
-        
-        /* Set up the local list of active queues. */
-        naq = e->nr_queues;
-        for ( k = 0 ; k < naq ; k++ )
-            queues[k] = &e->queues[k];
-    
-        /* Set up the local list of active queues. */
-        naq = e->nr_queues;
-        for ( k = 0 ; k < naq ; k++ )
-            queues[k] = &e->queues[k];
-    
         /* Loop while there are tasks... */
         while ( 1 ) {
         
-            /* Remove any inactive queues. */
-            for ( k = 0 ; k < naq ; k++ )
-                if ( queues[k]->next == queues[k]->count ) {
-                    naq -= 1;
-                    queues[k] = queues[naq];
-                    k -= 1;
-                    }
-            if ( naq == 0 )
-                break;
-        
             /* Get a task, how and from where depends on the policy. */
             TIMER_TIC
-            t = NULL;
-            if ( e->nr_queues == 1 ) {
-                t = queue_gettask_old( &e->queues[0] , 1 , 0 );
-                }
-            else if ( e->policy & engine_policy_steal ) {
-                if ( ( myq->next == myq->count ) ||
-                     ( t = queue_gettask( myq , r->id , 0 , 0 ) ) == NULL ) {
-                    TIMER_TIC2
-                    qid = rand_r( &myseed ) % naq;
-                    keep = ( e->policy & engine_policy_keep ) &&
-                           ( myq->count <= myq->size-tpq );
-                    if ( myq->next == myq->count )
-                        COUNT(runner_counter_steal_empty);
-                    else
-                        COUNT(runner_counter_steal_stall);
-                    t = queue_gettask( queues[qid] , r->id , 0 , keep );
-                    if ( t != NULL && keep )
-                        queue_insert( myq , t );
-                    TIMER_TOC2(timer_steal);
-                    }
-                }
-            else if ( e->policy & engine_policy_rand ) {
-                qid = rand_r( &myseed ) % naq;
-                t = queue_gettask( queues[qid] , r->id , e->policy & engine_policy_block , 0 );
-                }
-            else {
-                t = queue_gettask( &e->queues[threadID] , r->id , e->policy & engine_policy_block , 0 );
-                }
+            t = scheduler_gettask( sched , threadID );
             TIMER_TOC(timer_getpair);
             
             /* Did I get anything? */
-            if ( t == NULL ) {
-                COUNT(runner_counter_stall);
-                #ifdef TIMER
-                    if ( !stalled )
-                        stalled = getticks();
-                #endif
-                continue;
-                }
-            #ifdef TIMER
-            else if ( stalled ) {
-                timers_toc( timer_stalled , stalled );
-                #ifdef TIMER_VERBOSE
-                    printf( "runner_main[%02i]: stalled %.3f ms\n" , r->id , ((double)stalled) / CPU_TPS * 1000 );
-                    fflush(stdout);
-                #endif
-                stalled = 0;
-                }
-            #endif
+            if ( t == NULL )
+                break;
         
             /* Get the cells. */
             ci = t->ci;
@@ -731,7 +655,6 @@ void *runner_main ( void *data ) {
                         runner_doself2_force( r , ci );
                     else
                         error( "Unknown task subtype." );
-                    cell_unlocktree( ci );
                     break;
                 case task_type_pair:
                     if ( t->subtype == task_subtype_density )
@@ -740,12 +663,9 @@ void *runner_main ( void *data ) {
                         runner_dopair2_force( r , ci , cj );
                     else
                         error( "Unknown task subtype." );
-                    cell_unlocktree( ci );
-                    cell_unlocktree( cj );
                     break;
                 case task_type_sort:
                     runner_dosort( r , ci , t->flags , 1 );
-                    cell_unlocktree( ci );
                     break;
                 case task_type_sub:
                     if ( t->subtype == task_subtype_density )
@@ -754,9 +674,6 @@ void *runner_main ( void *data ) {
                         runner_dosub2_force( r , ci , cj , t->flags );
                     else
                         error( "Unknown task subtype." );
-                    cell_unlocktree( ci );
-                    if ( cj != NULL )
-                        cell_unlocktree( cj );
                     break;
                 case task_type_ghost:
                     if ( ci->super == ci )
@@ -769,26 +686,12 @@ void *runner_main ( void *data ) {
                     error( "Unknown task type." );
                 }
             t->toc = getticks();
+            
+            /* We're done with this task. */
+            scheduler_done( sched , t );
                 
-            /* Resolve any dependencies. */
-            for ( k = 0 ; k < t->nr_unlock_tasks ; k++ )
-                if ( atomic_dec( &t->unlock_tasks[k]->wait ) == 0 )
-                    error( "Task negative wait." );
-        
             } /* main loop. */
             
-    	/* Any leftover stalls? */    
-        #ifdef TIMER
-        if ( stalled ) {
-            timers_toc( timer_stalled , stalled );
-            #ifdef TIMER_VERBOSE
-                printf( "runner_main[%02i]: stalled %.3f ms\n" , r->id , ((double)stalled) / CPU_TPS * 1000 );
-                fflush(stdout);
-            #endif
-            stalled = 0;
-            }
-        #endif
-            
         }
         
     /* Be kind, rewind. */
diff --git a/src/scheduler.c b/src/scheduler.c
new file mode 100644
index 0000000000000000000000000000000000000000..8ffc16a48057e4042f5e6a967c2a3f88848905c2
--- /dev/null
+++ b/src/scheduler.c
@@ -0,0 +1,713 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ * 
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ * 
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+/* Local headers. */
+#include "error.h"
+#include "cycle.h"
+#include "atomic.h"
+#include "timers.h"
+#include "const.h"
+#include "vector.h"
+#include "lock.h"
+#include "task.h"
+#include "part.h"
+#include "debug.h"
+#include "cell.h"
+#include "space.h"
+#include "queue.h"
+#include "kernel.h"
+#include "scheduler.h"
+
+
+/**
+ * @brief Mapping function to append a ghost task to each cell.
+ *
+ * Looks for the super cell, e.g. the highest-level cell above each
+ * cell for which a pair is defined. All ghosts below this cell will
+ * depend on the ghost of their parents (sounds spooky, but it isn't).
+ *
+ * A kick2-task is appended to each super cell.
+ */
+
+void scheduler_map_mkghosts ( struct cell *c , void *data ) {
+
+    struct scheduler *s = (struct scheduler *)data;
+    struct cell *finger;
+
+    /* Find the super cell, i.e. the highest cell hierarchically above
+       this one to still have at least one task associated with it. */
+    c->super = c;
+    for ( finger = c->parent ; finger != NULL ; finger = finger->parent )
+        if ( finger->nr_tasks > 0 )
+            c->super = finger;
+            
+    /* Make the ghost task */
+    if ( c->super != c || c->nr_tasks > 0 )
+        c->ghost = scheduler_addtask( s , task_type_ghost , task_subtype_none , 0 , 0 , c , NULL , 0 );
+
+    /* Append a kick task if we are the active super cell. */
+    if ( c->super == c && c->nr_tasks > 0 )
+        c->kick2 = scheduler_addtask( s , task_type_kick2 , task_subtype_none , 0 , 0 , c , NULL , 0 );
+    
+    /* If we are not the super cell ourselves, make our ghost depend
+       on our parent cell. */
+    if ( c->super != c )
+        task_addunlock( c->parent->ghost , c->ghost );
+        
+    }
+
+
+/**
+ * @brief Split tasks that may be too large.
+ *
+ * @param s The #scheduler we are working in.
+ */
+ 
+void scheduler_splittasks ( struct scheduler *s ) {
+
+    int j, k, ind, sid, tid = 0, redo;
+    struct cell *ci, *cj;
+    double hi, hj, shift[3];
+    struct task *t, *t_old;
+    // float dt_step = s->dt_step;
+    int pts[7][8] = { { -1 , 12 , 10 ,  9 ,  4 ,  3 ,  1 ,  0 } ,
+                      { -1 , -1 , 11 , 10 ,  5 ,  4 ,  2 ,  1 } ,
+                      { -1 , -1 , -1 , 12 ,  7 ,  6 ,  4 ,  3 } , 
+                      { -1 , -1 , -1 , -1 ,  8 ,  7 ,  5 ,  4 } ,
+                      { -1 , -1 , -1 , -1 , -1 , 12 , 10 ,  9 } ,
+                      { -1 , -1 , -1 , -1 , -1 , -1 , 11 , 10 } ,
+                      { -1 , -1 , -1 , -1 , -1 , -1 , -1 , 12 } };
+
+    /* Loop through the tasks... */
+    // #pragma omp parallel default(none) shared(s,tid,pts,space_subsize) private(ind,j,k,t,t_old,redo,ci,cj,hi,hj,sid,shift)
+    {
+    redo = 0; t_old = t = NULL;
+    while ( 1 ) {
+    
+        /* Get a pointer on the task. */
+        if ( redo ) {
+            redo = 0;
+            t = t_old;
+            }
+        else {
+            if ( ( ind = atomic_inc( &tid ) ) < s->nr_tasks )
+                t_old = t = &s->tasks[ s->tasks_ind[ ind ] ];
+            else
+                break;
+            }
+        
+        /* Empty task? */
+        if ( t->ci == NULL || ( t->type == task_type_pair && t->cj == NULL ) ) {
+            t->type = task_type_none;
+            t->skip = 1;
+            continue;
+            }
+        
+        /* Self-interaction? */
+        if ( t->type == task_type_self ) {
+        
+            /* Get a handle on the cell involved. */
+            ci = t->ci;
+            
+            /* Ingore this task? */
+            /* if ( ci->dt_min > dt_step ) {
+                t->skip = 1;
+                continue;
+                } */
+            
+            /* Is this cell even split? */
+            if ( ci->split ) {
+            
+                /* Make a sub? */
+                if ( scheduler_dosub && ci->count < space_subsize && ci->maxdepth - ci->depth < scheduler_maxsubdepth ) {
+
+                    /* convert to a self-subtask. */
+                    t->type = task_type_sub;
+
+                    }
+
+                /* Otherwise, make tasks explicitly. */
+                else {
+
+                    /* Take a step back (we're going to recycle the current task)... */
+                    redo = 1;
+
+                    /* Add the self taks. */
+                    for ( k = 0 ; ci->progeny[k] == NULL ; k++ );
+                    t->ci = ci->progeny[k];
+                    for ( k += 1 ; k < 8 ; k++ )
+                        if ( ci->progeny[k] != NULL )
+                            scheduler_addtask( s , task_type_self , task_subtype_density , 0 , 0 , ci->progeny[k] , NULL , 0 );
+
+                    /* Make a task for each pair of progeny. */
+                    for ( j = 0 ; j < 8 ; j++ )
+                        if ( ci->progeny[j] != NULL )
+                            for ( k = j + 1 ; k < 8 ; k++ )
+                                if ( ci->progeny[k] != NULL )
+                                    scheduler_addtask( s , task_type_pair , task_subtype_density , pts[j][k] , 0 , ci->progeny[j] , ci->progeny[k] , 0 );
+                    }
+
+                }
+        
+            }
+    
+        /* Pair interaction? */
+        else if ( t->type == task_type_pair ) {
+            
+            /* Get a handle on the cells involved. */
+            ci = t->ci;
+            cj = t->cj;
+            hi = ci->dmin;
+            hj = cj->dmin;
+
+            /* Ingore this task? */
+            /* if ( ci->dt_min > dt_step && cj->dt_min > dt_step ) {
+                t->skip = 1;
+                continue;
+                } */
+            
+            /* Get the sort ID, use space_getsid and not t->flags
+               to make sure we get ci and cj swapped if needed. */
+            sid = space_getsid( s->space , &ci , &cj , shift );
+                
+            /* Should this task be split-up? */
+            if ( ci->split && cj->split &&
+                 ci->h_max*kernel_gamma*space_stretch < hi/2 &&
+                 cj->h_max*kernel_gamma*space_stretch < hj/2 ) {
+                 
+                /* Replace by a single sub-task? */
+                if ( scheduler_dosub &&
+                     ci->count < space_subsize && cj->count < space_subsize &&
+                     ci->maxdepth - ci->depth < scheduler_maxsubdepth && cj->maxdepth - cj->depth < scheduler_maxsubdepth &&
+                     sid != 0 && sid != 2 && sid != 6 && sid != 8 ) {
+                
+                    /* Make this task a sub task. */
+                    t->type = task_type_sub;
+
+                    }
+                    
+                /* Otherwise, split it. */
+                else {
+
+                    /* Take a step back (we're going to recycle the current task)... */
+                    redo = 1;
+
+                    /* For each different sorting type... */
+                    switch ( sid ) {
+
+                        case 0: /* (  1 ,  1 ,  1 ) */
+                            t->ci = ci->progeny[7]; t->cj = cj->progeny[0]; t->flags = 0;
+                            break;
+
+                        case 1: /* (  1 ,  1 ,  0 ) */
+                            t->ci = ci->progeny[6]; t->cj = cj->progeny[0]; t->flags = 1; t->tight = 1;
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[7] , cj->progeny[1] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[1] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[7] , cj->progeny[0] , 1 );
+                            break;
+
+                        case 2: /* (  1 ,  1 , -1 ) */
+                            t->ci = ci->progeny[6]; t->cj = cj->progeny[1]; t->flags = 2; t->tight = 1;
+                            break;
+
+                        case 3: /* (  1 ,  0 ,  1 ) */
+                            t->ci = ci->progeny[5]; t->cj = cj->progeny[0]; t->flags = 3; t->tight = 1;
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[7] , cj->progeny[2] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[5] , cj->progeny[2] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[7] , cj->progeny[0] , 1 );
+                            break;
+
+                        case 4: /* (  1 ,  0 ,  0 ) */
+                            t->ci = ci->progeny[4]; t->cj = cj->progeny[0]; t->flags = 4; t->tight = 1;
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[5] , cj->progeny[0] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[6] , cj->progeny[0] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[7] , cj->progeny[0] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[4] , cj->progeny[1] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[5] , cj->progeny[1] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[6] , cj->progeny[1] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[7] , cj->progeny[1] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[4] , cj->progeny[2] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[5] , cj->progeny[2] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[6] , cj->progeny[2] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[7] , cj->progeny[2] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[4] , cj->progeny[3] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[5] , cj->progeny[3] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[6] , cj->progeny[3] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[7] , cj->progeny[3] , 1 );
+                            break;
+
+                        case 5: /* (  1 ,  0 , -1 ) */
+                            t->ci = ci->progeny[4]; t->cj = cj->progeny[1]; t->flags = 5; t->tight = 1;
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[6] , cj->progeny[3] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[4] , cj->progeny[3] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[6] , cj->progeny[1] , 1 );
+                            break;
+
+                        case 6: /* (  1 , -1 ,  1 ) */
+                            t->ci = ci->progeny[5]; t->cj = cj->progeny[2]; t->flags = 6; t->tight = 1;
+                            break;
+
+                        case 7: /* (  1 , -1 ,  0 ) */
+                            t->ci = ci->progeny[4]; t->cj = cj->progeny[3]; t->flags = 6; t->tight = 1;
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[5] , cj->progeny[2] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[4] , cj->progeny[2] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[5] , cj->progeny[3] , 1 );
+                            break;
+
+                        case 8: /* (  1 , -1 , -1 ) */
+                            t->ci = ci->progeny[4]; t->cj = cj->progeny[3]; t->flags = 8; t->tight = 1;
+                            break;
+
+                        case 9: /* (  0 ,  1 ,  1 ) */
+                            t->ci = ci->progeny[3]; t->cj = cj->progeny[0]; t->flags = 9; t->tight = 1;
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[7] , cj->progeny[4] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[3] , cj->progeny[4] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[7] , cj->progeny[0] , 1 );
+                            break;
+
+                        case 10: /* (  0 ,  1 ,  0 ) */
+                            t->ci = ci->progeny[2]; t->cj = cj->progeny[0]; t->flags = 10; t->tight = 1;
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[3] , cj->progeny[0] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[6] , cj->progeny[0] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[7] , cj->progeny[0] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[2] , cj->progeny[1] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 10 , 0 , ci->progeny[3] , cj->progeny[1] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[6] , cj->progeny[1] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[7] , cj->progeny[1] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[2] , cj->progeny[4] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[3] , cj->progeny[4] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 10 , 0 , ci->progeny[6] , cj->progeny[4] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[7] , cj->progeny[4] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[2] , cj->progeny[5] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[3] , cj->progeny[5] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[6] , cj->progeny[5] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 10 , 0 , ci->progeny[7] , cj->progeny[5] , 1 );
+                            break;
+
+                        case 11: /* (  0 ,  1 , -1 ) */
+                            t->ci = ci->progeny[2]; t->cj = cj->progeny[1]; t->flags = 11; t->tight = 1;
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[6] , cj->progeny[5] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[2] , cj->progeny[5] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[6] , cj->progeny[1] , 1 );
+                            break;
+
+                        case 12: /* (  0 ,  0 ,  1 ) */
+                            t->ci = ci->progeny[1]; t->cj = cj->progeny[0]; t->flags = 12; t->tight = 1;
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[3] , cj->progeny[0] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[5] , cj->progeny[0] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[7] , cj->progeny[0] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[1] , cj->progeny[2] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 12 , 0 , ci->progeny[3] , cj->progeny[2] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[5] , cj->progeny[2] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[7] , cj->progeny[2] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[1] , cj->progeny[4] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[3] , cj->progeny[4] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 12 , 0 , ci->progeny[5] , cj->progeny[4] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[7] , cj->progeny[4] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[1] , cj->progeny[6] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[3] , cj->progeny[6] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[5] , cj->progeny[6] , 1 );
+                            t = scheduler_addtask( s , task_type_pair , t->subtype , 12 , 0 , ci->progeny[7] , cj->progeny[6] , 1 );
+                            break;
+
+                        }
+                        
+                    }
+
+                } /* split this task? */
+                
+            /* Otherwise, if not spilt, stitch-up the sorting. */
+            else {
+            
+                /* Create the sort for ci. */
+                // lock_lock( &ci->lock );
+                if ( ci->sorts == NULL )
+                    ci->sorts = scheduler_addtask( s , task_type_sort , 0 , 1 << sid , 0 , ci , NULL , 0 );
+                else
+                    ci->sorts->flags |= (1 << sid);
+                // lock_unlock_blind( &ci->lock );
+                task_addunlock( ci->sorts , t );
+                
+                /* Create the sort for cj. */
+                // lock_lock( &cj->lock );
+                if ( cj->sorts == NULL )
+                    cj->sorts = scheduler_addtask( s , task_type_sort , 0 , 1 << sid , 0 , cj , NULL , 0 );
+                else
+                    cj->sorts->flags |= (1 << sid);
+                // lock_unlock_blind( &cj->lock );
+                task_addunlock( cj->sorts , t );
+                
+                }
+                
+            } /* pair interaction? */
+    
+        } /* loop over all tasks. */
+        
+        }
+        
+    }
+    
+    
+/**
+ * @brief Add a #task to the #scheduler.
+ *
+ * @param s The #scheduler we are working in.
+ * @param type The type of the task.
+ * @param subtype The sub-type of the task.
+ * @param flags The flags of the task.
+ * @param wait 
+ * @param ci The first cell to interact.
+ * @param cj The second cell to interact.
+ * @param tight
+ */
+ 
+struct task *scheduler_addtask ( struct scheduler *s , int type , int subtype , int flags , int wait , struct cell *ci , struct cell *cj , int tight ) {
+
+    int ind;
+    struct task *t;
+    
+    /* Get the next free task. */
+    ind = atomic_inc( &s->tasks_next );
+    t = &s->tasks[ ind ];
+    
+    /* Copy the data. */
+    t->type = type;
+    t->subtype = subtype;
+    t->flags = flags;
+    t->wait = wait;
+    t->ci = ci;
+    t->cj = cj;
+    t->skip = 0;
+    t->tight = tight;
+    t->nr_unlock_tasks = 0;
+    
+    /* Init the lock. */
+    lock_init( &t->lock );
+    
+    /* Add an index for it. */
+    // lock_lock( &s->lock );
+    s->tasks_ind[ atomic_inc( &s->nr_tasks ) ] = ind;
+    // lock_unlock_blind( &s->lock );
+    
+    /* Return a pointer to the new task. */
+    return t;
+
+    }
+
+
+
+/** 
+ * @brief Sort the tasks in topological order over all queues.
+ *
+ * @param s The #scheduler.
+ */
+ 
+void scheduler_ranktasks ( struct scheduler *s ) {
+
+    int i, j = 0, k, temp, left = 0, rank;
+    struct task *t, *tasks = s->tasks;
+    int *tid = s->tasks_ind, nr_tasks = s->nr_tasks;
+    
+    /* Run throught the tasks and get all the waits right. */
+    for ( i = 0 , k = 0 ; k < nr_tasks ; k++ ) {
+        tid[k] = k;
+        for ( j = 0 ; j < tasks[k].nr_unlock_tasks ; j++ )
+            tasks[k].unlock_tasks[j]->wait += 1;
+        }
+        
+    /* Main loop. */
+    for ( j = 0 , rank = 0 ; left < nr_tasks ; rank++ ) {
+        
+        /* Load the tids of tasks with no waits. */
+        for ( k = left ; k < nr_tasks ; k++ )
+            if ( tasks[ tid[k] ].wait == 0 ) {
+                temp = tid[j]; tid[j] = tid[k]; tid[k] = temp;
+                j += 1;
+                }
+                
+        /* Did we get anything? */
+        if ( j == left )
+            error( "Unsatisfiable task dependencies detected." );
+
+        /* Unlock the next layer of tasks. */
+        for ( i = left ; i < j ; i++ ) {
+            t = &tasks[ tid[i] ];
+            t->rank = rank;
+            tid[i] = t - tasks;
+            if ( tid[i] >= nr_tasks )
+                error( "Task index overshoot." );
+            /* printf( "scheduler_ranktasks: task %i of type %s has rank %i.\n" , i , 
+                (t->type == task_type_self) ? "self" : (t->type == task_type_pair) ? "pair" : "sort" , rank ); */
+            for ( k = 0 ; k < t->nr_unlock_tasks ; k++ )
+                t->unlock_tasks[k]->wait -= 1;
+            }
+            
+        /* The new left (no, not tony). */
+        left = j;
+            
+        }
+        
+    }
+
+
+/**
+ * @brief (Re)allocate the task arrays.
+ *
+ * @param s The #scheduler.
+ * @param size The maximum number of tasks in the #scheduler.
+ */
+ 
+void scheduler_reset ( struct scheduler *s , int size ) {
+
+    int k;
+
+    /* Do we need to re-allocate? */
+    if ( size > s->size ) {
+
+        /* Free exising task lists if necessary. */
+        if ( s->tasks != NULL )
+            free( s->tasks );
+        if ( s->tasks_ind != NULL )
+            free( s->tasks_ind );
+
+        /* Allocate the new lists. */
+        if ( ( s->tasks = (struct task *)malloc( sizeof(struct task) * size ) ) == NULL ||
+             ( s->tasks_ind = (int *)malloc( sizeof(int) * size ) ) == NULL )
+            error( "Failed to allocate task lists." );
+            
+        }
+        
+    /* Reset the counters. */
+    s->size = size;
+    s->nr_tasks = 0;
+    s->tasks_next = 0;
+    s->waiting = 0;
+    
+    /* Set the task pointers in the queues. */
+    for ( k = 0 ; k < s->nr_queues ; k++ )
+        s->queues[k].tasks = s->tasks;
+
+    }
+
+
+/**
+ * @brief Start the scheduler, i.e. fill the queues with ready tasks.
+ *
+ * @param s The #scheduler.
+ */
+ 
+void scheduler_start ( struct scheduler *s ) {
+
+    int k, j;
+    struct task *t;
+    
+    /* Run through the tasks and get all the waits right. */
+    // #pragma omp parallel for schedule(static) private(t,j)
+    for ( k = 0 ; k < s->nr_tasks ; k++ ) {
+        t = &s->tasks[k];
+        if ( !t->skip )
+            for ( j = 0 ; j < t->nr_unlock_tasks ; j++ )
+                atomic_inc( &t->unlock_tasks[j]->wait );
+        }
+        
+    /* Loop over the tasks and enqueue whoever is ready. */
+    for ( k = 0 ; k < s->nr_tasks ; k++ ) {
+        t = &s->tasks[k];
+        if ( !t->skip && t->wait == 0 )
+            scheduler_enqueue( s , t );
+        }
+        
+    }
+
+
+/**
+ * @brief Put a task on one of the queues.
+ *
+ * @param s The #scheduler.
+ * @param t The #task.
+ */
+ 
+void scheduler_enqueue ( struct scheduler *s , struct task *t ) {
+
+    int k, qid = -1;
+    
+    /* Ignore skipped tasks. */
+    if ( t->skip )
+        return;
+        
+    /* Find the previous owner for each task type. */
+    switch ( t->type ) {
+        case task_type_self:
+        case task_type_sort:
+        case task_type_ghost:
+        case task_type_kick2:
+            qid = t->ci->super->owner;
+            break;
+        case task_type_pair:
+        case task_type_sub:
+            qid = t->ci->super->owner;
+            if ( t->cj != NULL &&
+                 ( qid < 0 || s->queues[qid].count > s->queues[t->cj->super->owner].count ) )
+                qid = t->cj->super->owner;
+            break;
+        }
+        
+    /* If no previous owner, find the shortest queue. */
+    if ( qid < 0 )
+        for ( qid = 0 , k = 1 ; k < s->nr_queues ; k++ )
+            if ( s->queues[k].count < s->queues[qid].count )
+                qid = k;
+                
+    /* Increase the waiting counter. */
+    atomic_inc( &s->waiting );
+            
+    /* Insert the task into that queue. */
+    queue_insert( &s->queues[qid] , t );
+        
+    }
+
+
+/**
+ * @brief Take care of a tasks dependencies.
+ *
+ * @param s The #scheduler.
+ * @param t The finished #task.
+ */
+ 
+void scheduler_done ( struct scheduler *s , struct task *t ) {
+
+    int k;
+    struct task *t2;
+
+    /* Release whatever locks this task held. */
+    switch ( t->type ) {
+        case task_type_self:
+        case task_type_sort:
+            cell_unlocktree( t->ci );
+            break;
+        case task_type_pair:
+        case task_type_sub:
+            cell_unlocktree( t->ci );
+            if ( t->cj != NULL )
+                cell_unlocktree( t->cj );
+            break;
+        }
+        
+    /* Loop through the dependencies and add them to a queue if
+       they are ready. */
+    for ( k = 0 ; k < t->nr_unlock_tasks ; k++ ) {
+        t2 = t->unlock_tasks[k];
+        if ( atomic_dec( &t2->wait ) == 1 && !t2->skip )
+            scheduler_enqueue( s , t2 );
+        }
+        
+    /* Task definitely done. */
+    atomic_dec( &s->waiting );
+
+    }
+
+
+/**
+ * @brief Get a task, preferably from the given queue.
+ *
+ * @param s The #scheduler.
+ * @param qid The ID of the prefered #queue.
+ *
+ * @return A pointer to a #task or @c NULL if there are no available tasks.
+ */
+ 
+struct task *scheduler_gettask ( struct scheduler *s , int qid ) {
+
+    struct task *res;
+    int k, max_count, max_ind;
+
+    /* Loop as long as there are tasks... */
+    while ( s->waiting > 0 ) {
+        
+        /* Try to get a task from the suggested queue. */
+        if ( ( res = queue_gettask( &s->queues[qid] , qid , 0 ) ) != NULL )
+            return res;
+            
+        /* If unsucessful, try stealing from the largest queue. */
+        if ( s->flags & scheduler_flag_steal ) {
+            max_count = 0; max_ind = 0;
+            for ( k = 0 ; k < s->nr_queues ; k++ )
+                if ( k != qid && s->queues[k].count > max_count ) {
+                    max_ind = k;
+                    max_count = s->queues[k].count;
+                    }
+            if ( max_count > 0 && ( res = queue_gettask( &s->queues[ max_ind ] , qid , 0 ) ) != NULL )
+                return res;
+            }
+        
+        }
+        
+    /* No milk today. */
+    return NULL;
+
+    }
+
+
+/**
+ * @brief Initialize the #scheduler.
+ *
+ * @param s The #scheduler.
+ * @param nr_queues The number of queues in this scheduler.
+ * @param flags The #scheduler flags.
+ */
+ 
+void scheduler_init ( struct scheduler *s , struct space *space , int nr_queues , unsigned int flags ) {
+    
+    int k;
+    
+    /* Init the lock. */
+    lock_init( &s->lock );
+
+    /* Allocate the queues. */
+    if ( ( s->queues = (struct queue *)malloc( sizeof(struct queue) * nr_queues ) ) == NULL )
+        error( "Failed to allocate queues." );
+        
+    /* Initialize each queue. */
+    for ( k = 0 ; k < nr_queues ; k++ )
+        queue_init( &s->queues[k] , NULL );
+        
+    /* Set the scheduler variables. */
+    s->nr_queues = nr_queues;
+    s->flags = flags;
+    s->space = space;
+    
+    /* Init other values. */
+    s->tasks = NULL;
+    s->tasks_ind = NULL;
+    s->waiting = 0;
+    s->size = 0;
+    s->nr_tasks = 0;
+    s->tasks_next = 0;
+
+    }
+
diff --git a/src/scheduler.h b/src/scheduler.h
new file mode 100644
index 0000000000000000000000000000000000000000..485c9b101b25cb699b7f24e1589884bc94986b72
--- /dev/null
+++ b/src/scheduler.h
@@ -0,0 +1,75 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ * 
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ * 
+ ******************************************************************************/
+
+
+/* Some constants. */
+#define scheduler_maxwait                    3
+#define scheduler_maxunlock                  40
+#define scheduler_dosub                      1
+#define scheduler_maxsubdepth                3
+
+/* Flags . */
+#define scheduler_flag_none                  0
+#define scheduler_flag_steal                 1
+
+
+/* Data of a scheduler. */
+struct scheduler {
+
+    /* Scheduler flags. */
+    unsigned int flags;
+
+    /* Number of queues in this scheduler. */
+    int nr_queues;
+    
+    /* Array of queues. */
+    struct queue *queues;
+    
+    /* Total number of tasks. */
+    int nr_tasks, size, tasks_next;
+    
+    /* Total number of waiting tasks. */
+    int waiting;
+    
+    /* The task array. */
+    struct task *tasks;
+    
+    /* The task indices. */
+    int *tasks_ind;
+    
+    /* Lock for this scheduler. */
+    lock_type lock;
+    
+    /* The space associated with this scheduler. */
+    struct space *space;
+
+    };
+
+
+/* Function prototypes. */
+void scheduler_init ( struct scheduler *s , struct space *space , int nr_queues , unsigned int flags );
+struct task *scheduler_gettask ( struct scheduler *s , int qid );
+void scheduler_enqueue ( struct scheduler *s , struct task *t );
+void scheduler_start ( struct scheduler *s );
+void scheduler_reset ( struct scheduler *s , int nr_tasks );
+void scheduler_ranktasks ( struct scheduler *s );
+struct task *scheduler_addtask ( struct scheduler *s , int type , int subtype , int flags , int wait , struct cell *ci , struct cell *cj , int tight );
+void scheduler_splittasks ( struct scheduler *s );
+void scheduler_map_mkghosts ( struct cell *c , void *data );
+void scheduler_done ( struct scheduler *s , struct task *t );
diff --git a/src/space.c b/src/space.c
index 97fad5127eb5f72a13bb6e29ba5eab0098876912..fe719a580754f6bb17fdef34a5b68f06afc6493e 100644
--- a/src/space.c
+++ b/src/space.c
@@ -77,193 +77,6 @@ const int sortlistID[27] = {
     };
     
     
-/**
- * @brief Mark tasks to be skipped and set the sort flags accordingly.
- * 
- * @return 1 if the space has to be rebuilt, 0 otherwise.
- */
- 
-int space_marktasks ( struct space *s ) {
-
-    int k, nr_tasks = s->nr_tasks, *ind = s->tasks_ind;
-    struct task *t, *tasks = s->tasks;
-    float dt_step = s->dt_step;
-    struct cell *ci, *cj;
-    
-    /* Run through the tasks and mark as skip or not. */
-    for ( k = 0 ; k < nr_tasks ; k++ ) {
-    
-        /* Get a handle on the kth task. */
-        t = &tasks[ ind[k] ];
-        
-        /* Sort-task? Note that due to the task ranking, the sorts
-           will all come before the pairs and/or subs. */
-        if ( t->type == task_type_sort ) {
-        
-            /* Re-set the flags. */
-            t->flags = 0;
-            t->skip = 1;
-        
-            }
-        
-        /* Single-cell task? */
-        else if ( t->type == task_type_self ||
-                  t->type == task_type_ghost ||
-                ( t->type == task_type_sub && t->cj == NULL ) ) {
-             
-            /* Set this task's skip. */
-            t->skip = ( t->ci->dt_min > dt_step );
-            
-            }
-        
-        /* Pair? */
-        else if ( t->type == task_type_pair || ( t->type == task_type_sub && t->cj != NULL ) ) {
-            
-            /* Local pointers. */
-            ci = t->ci;
-            cj = t->cj;
-            
-            /* Set this task's skip. */
-            t->skip = ( ci->dt_min > dt_step && cj->dt_min > dt_step );
-            
-            /* Too much particle movement? */
-            if ( t->tight &&
-                 ( fmaxf( ci->h_max , cj->h_max ) + ci->dx_max + cj->dx_max > cj->dmin || 
-                   ci->dx_max > space_maxreldx*ci->h_max || cj->dx_max > space_maxreldx*cj->h_max ) )
-                return 1;
-                
-            /* Set the sort flags. */
-            if ( !t->skip && t->type == task_type_pair ) {
-                ci->sorts->flags |= (1 << t->flags);
-                ci->sorts->skip = 0;
-                cj->sorts->flags |= (1 << t->flags);
-                cj->sorts->skip = 0;
-                }
-                
-            }
-            
-        /* Kick2? */
-        else if ( t->type == task_type_kick2 )
-            t->skip = 0;
-            
-        /* None? */
-        else if ( t->type == task_type_none )
-            t->skip = 1;
-            
-        }
-        
-    /* All is well... */
-    return 0;
-    
-    }
-
-
-/**
- * @brief Check the integrity of the space and rebuild if necessary.
- *
- * @param s The #space.
- *
- * Runs through the tasks and marks those as "skip" which have no
- * effect for the current @c dt_max. Verifies the integrity of the
- * cell tree for those tasks and triggers a rebuild if necessary.
- */
- 
-int space_prepare ( struct space *s ) {
-
-    int k, rebuild;
-    // struct task *t;
-    // float dt_step = s->dt_step;
-    float dx_max = 0.0f;
-    // int counts[ task_type_count + 1 ];
-    ticks tic;
-    
-    /* Get the maximum displacement in the whole system. */
-    for ( k = 0 ; k < s->nr_cells ; k++ )
-        dx_max = fmaxf( dx_max , s->cells[k].dx_max );
-    // printf( "space_prepare: dx_max is %e.\n" , dx_max );
-    
-    /* Run through the tasks and mark as skip or not. */
-    // tic = getticks();
-    rebuild = space_marktasks( s );
-    // printf( "space_prepare: space_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
-        
-    /* Did this not go through? */
-    if ( rebuild ) {
-    
-        /* Re-build the space. */
-        tic = getticks();
-        space_rebuild( s , 0.0 );
-        printf( "space_prepare: space_rebuild took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
-    
-        /* Run through the tasks and mark as skip or not. */
-        // tic = getticks();
-        if ( space_marktasks( s ) )
-            error( "space_marktasks failed after space_rebuild." );
-        // printf( "space_prepare: space_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
-        
-        }
-
-    
-    /* Let whoever cares know if we rebuilt. */
-    return rebuild;
-    
-    }
-    
-    
-/** 
- * @brief Sort the tasks in topological order over all queues.
- *
- * @param s The #space.
- */
- 
-void space_ranktasks ( struct space *s ) {
-
-    int i, j = 0, k, temp, left = 0, rank;
-    struct task *t, *tasks = s->tasks;
-    int *tid = s->tasks_ind, nr_tasks = s->nr_tasks;
-    
-    /* Run throught the tasks and get all the waits right. */
-    for ( i = 0 , k = 0 ; k < nr_tasks ; k++ ) {
-        tid[k] = k;
-        for ( j = 0 ; j < tasks[k].nr_unlock_tasks ; j++ )
-            tasks[k].unlock_tasks[j]->wait += 1;
-        }
-        
-    /* Main loop. */
-    for ( j = 0 , rank = 0 ; left < nr_tasks ; rank++ ) {
-        
-        /* Load the tids of tasks with no waits. */
-        for ( k = left ; k < nr_tasks ; k++ )
-            if ( tasks[ tid[k] ].wait == 0 ) {
-                temp = tid[j]; tid[j] = tid[k]; tid[k] = temp;
-                j += 1;
-                }
-                
-        /* Did we get anything? */
-        if ( j == left )
-            error( "Unsatisfiable task dependencies detected." );
-
-        /* Unlock the next layer of tasks. */
-        for ( i = left ; i < j ; i++ ) {
-            t = &tasks[ tid[i] ];
-            t->rank = rank;
-            tid[i] = t - tasks;
-            if ( tid[i] >= nr_tasks )
-                error( "Task index overshoot." );
-            /* printf( "engine_ranktasks: task %i of type %s has rank %i.\n" , i , 
-                (t->type == task_type_self) ? "self" : (t->type == task_type_pair) ? "pair" : "sort" , rank ); */
-            for ( k = 0 ; k < t->nr_unlock_tasks ; k++ )
-                t->unlock_tasks[k]->wait -= 1;
-            }
-            
-        /* The new left (no, not tony). */
-        left = j;
-            
-        }
-        
-    }
-
-
 /**
  * @brief Get the shift-id of the given pair of cells, swapping them
  *      if need be.
@@ -509,11 +322,6 @@ void space_rebuild ( struct space *s , double cell_max ) {
         }
     // printf( "space_rebuild: space_rebuild_recurse took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
         
-    /* Now that we have the cell structre, re-build the tasks. */
-    // tic = getticks();
-    space_maketasks( s , 1 );
-    // printf( "space_rebuild: maketasks took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
-    
     }
 
 
@@ -645,44 +453,6 @@ void space_map_clearsort ( struct cell *c , void *data ) {
     }
 
 
-/**
- * @brief Mapping function to append a ghost task to each cell.
- *
- * Looks for the super cell, e.g. the highest-level cell above each
- * cell for which a pair is defined. All ghosts below this cell will
- * depend on the ghost of their parents (sounds spooky, but it isn't).
- *
- * A kick2-task is appended to each super cell.
- */
-
-void space_map_mkghosts ( struct cell *c , void *data ) {
-
-    struct space *s = (struct space *)data;
-    struct cell *finger;
-
-    /* Find the super cell, i.e. the highest cell hierarchically above
-       this one to still have at least one task associated with it. */
-    c->super = c;
-    for ( finger = c->parent ; finger != NULL ; finger = finger->parent )
-        if ( finger->nr_tasks > 0 )
-            c->super = finger;
-            
-    /* Make the ghost task */
-    if ( c->super != c || c->nr_tasks > 0 )
-        c->ghost = space_addtask( s , task_type_ghost , task_subtype_none , 0 , 0 , c , NULL , 0 );
-
-    /* Append a kick task if we are the active super cell. */
-    if ( c->super == c && c->nr_tasks > 0 )
-        c->kick2 = space_addtask( s , task_type_kick2 , task_subtype_none , 0 , 0 , c , NULL , 0 );
-    
-    /* If we are not the super cell ourselves, make our ghost depend
-       on our parent cell. */
-    if ( c->super != c )
-        task_addunlock( c->parent->ghost , c->ghost );
-        
-    }
-
-
 /**
  * @brief Map a function to all particles in a aspace.
  *
@@ -812,530 +582,6 @@ void space_map_cells_pre ( struct space *s , int full , void (*fun)( struct cell
     }
 
 
-/**
- * @brief Add a #task to the #space.
- *
- * @param s The #space we are working in.
- * @param type The type of the task.
- * @param subtype The sub-type of the task.
- * @param flags The flags of the task.
- * @param wait 
- * @param ci The first cell to interact.
- * @param cj The second cell to interact.
- * @param tight
- */
- 
-struct task *space_addtask ( struct space *s , int type , int subtype , int flags , int wait , struct cell *ci , struct cell *cj , int tight ) {
-
-    int ind;
-    struct task *t;
-    
-    /* Get the next free task. */
-    ind = atomic_inc( &s->tasks_next );
-    t = &s->tasks[ ind ];
-    
-    /* Copy the data. */
-    t->type = type;
-    t->subtype = subtype;
-    t->flags = flags;
-    t->wait = wait;
-    t->ci = ci;
-    t->cj = cj;
-    t->skip = 0;
-    t->tight = tight;
-    t->nr_unlock_tasks = 0;
-    
-    /* Init the lock. */
-    lock_init( &t->lock );
-    
-    /* Add an index for it. */
-    // lock_lock( &s->lock );
-    s->tasks_ind[ atomic_inc( &s->nr_tasks ) ] = ind;
-    // lock_unlock_blind( &s->lock );
-    
-    /* Return a pointer to the new task. */
-    return t;
-
-    }
-
-
-
-/**
- * @brief Split tasks that may be too large.
- *
- * @param s The #space we are working in.
- */
- 
-void space_splittasks ( struct space *s ) {
-
-    int j, k, ind, sid, tid = 0, redo;
-    struct cell *ci, *cj;
-    double hi, hj, shift[3];
-    struct task *t, *t_old;
-    // float dt_step = s->dt_step;
-    int pts[7][8] = { { -1 , 12 , 10 ,  9 ,  4 ,  3 ,  1 ,  0 } ,
-                      { -1 , -1 , 11 , 10 ,  5 ,  4 ,  2 ,  1 } ,
-                      { -1 , -1 , -1 , 12 ,  7 ,  6 ,  4 ,  3 } , 
-                      { -1 , -1 , -1 , -1 ,  8 ,  7 ,  5 ,  4 } ,
-                      { -1 , -1 , -1 , -1 , -1 , 12 , 10 ,  9 } ,
-                      { -1 , -1 , -1 , -1 , -1 , -1 , 11 , 10 } ,
-                      { -1 , -1 , -1 , -1 , -1 , -1 , -1 , 12 } };
-
-    /* Loop through the tasks... */
-    // #pragma omp parallel default(none) shared(s,tid,pts,space_subsize) private(ind,j,k,t,t_old,redo,ci,cj,hi,hj,sid,shift)
-    {
-    redo = 0; t_old = t = NULL;
-    while ( 1 ) {
-    
-        /* Get a pointer on the task. */
-        if ( redo ) {
-            redo = 0;
-            t = t_old;
-            }
-        else {
-            if ( ( ind = atomic_inc( &tid ) ) < s->nr_tasks )
-                t_old = t = &s->tasks[ s->tasks_ind[ ind ] ];
-            else
-                break;
-            }
-        
-        /* Empty task? */
-        if ( t->ci == NULL || ( t->type == task_type_pair && t->cj == NULL ) ) {
-            t->type = task_type_none;
-            t->skip = 1;
-            continue;
-            }
-        
-        /* Self-interaction? */
-        if ( t->type == task_type_self ) {
-        
-            /* Get a handle on the cell involved. */
-            ci = t->ci;
-            
-            /* Ingore this task? */
-            /* if ( ci->dt_min > dt_step ) {
-                t->skip = 1;
-                continue;
-                } */
-            
-            /* Is this cell even split? */
-            if ( ci->split ) {
-            
-            /* Make a sub? */
-            if ( space_dosub && ci->count < space_subsize && ci->maxdepth - ci->depth < space_maxsubdepth ) {
-            
-                /* convert to a self-subtask. */
-                t->type = task_type_sub;
-                
-                }
-                
-            /* Otherwise, make tasks explicitly. */
-            else {
-            
-                /* Take a step back (we're going to recycle the current task)... */
-                redo = 1;
-
-                /* Add the self taks. */
-                for ( k = 0 ; ci->progeny[k] == NULL ; k++ );
-                t->ci = ci->progeny[k];
-                for ( k += 1 ; k < 8 ; k++ )
-                    if ( ci->progeny[k] != NULL )
-                        space_addtask( s , task_type_self , task_subtype_density , 0 , 0 , ci->progeny[k] , NULL , 0 );
-            
-                /* Make a task for each pair of progeny. */
-                for ( j = 0 ; j < 8 ; j++ )
-                    if ( ci->progeny[j] != NULL )
-                        for ( k = j + 1 ; k < 8 ; k++ )
-                            if ( ci->progeny[k] != NULL )
-                                space_addtask( s , task_type_pair , task_subtype_density , pts[j][k] , 0 , ci->progeny[j] , ci->progeny[k] , 0 );
-                }
-
-                }
-        
-            }
-    
-        /* Pair interaction? */
-        else if ( t->type == task_type_pair ) {
-            
-            /* Get a handle on the cells involved. */
-            ci = t->ci;
-            cj = t->cj;
-            hi = ci->dmin;
-            hj = cj->dmin;
-
-            /* Ingore this task? */
-            /* if ( ci->dt_min > dt_step && cj->dt_min > dt_step ) {
-                t->skip = 1;
-                continue;
-                } */
-            
-            /* Get the sort ID, use space_getsid and not t->flags
-               to make sure we get ci and cj swapped if needed. */
-            sid = space_getsid( s , &ci , &cj , shift );
-                
-            /* Should this task be split-up? */
-            if ( ci->split && cj->split &&
-                 ci->h_max*kernel_gamma*space_stretch < hi/2 &&
-                 cj->h_max*kernel_gamma*space_stretch < hj/2 ) {
-                 
-                /* Replace by a single sub-task? */
-                if ( space_dosub &&
-                     ci->count < space_subsize && cj->count < space_subsize &&
-                     ci->maxdepth - ci->depth < space_maxsubdepth && cj->maxdepth - cj->depth < space_maxsubdepth &&
-                     sid != 0 && sid != 2 && sid != 6 && sid != 8 ) {
-                
-                    /* Make this task a sub task. */
-                    t->type = task_type_sub;
-
-                    }
-                    
-                /* Otherwise, split it. */
-                else {
-
-                    /* Take a step back (we're going to recycle the current task)... */
-                    redo = 1;
-
-                    /* For each different sorting type... */
-                    switch ( sid ) {
-
-                        case 0: /* (  1 ,  1 ,  1 ) */
-                            t->ci = ci->progeny[7]; t->cj = cj->progeny[0]; t->flags = 0;
-                            break;
-
-                        case 1: /* (  1 ,  1 ,  0 ) */
-                            t->ci = ci->progeny[6]; t->cj = cj->progeny[0]; t->flags = 1; t->tight = 1;
-                            t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[7] , cj->progeny[1] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[6] , cj->progeny[1] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[7] , cj->progeny[0] , 1 );
-                            break;
-
-                        case 2: /* (  1 ,  1 , -1 ) */
-                            t->ci = ci->progeny[6]; t->cj = cj->progeny[1]; t->flags = 2; t->tight = 1;
-                            break;
-
-                        case 3: /* (  1 ,  0 ,  1 ) */
-                            t->ci = ci->progeny[5]; t->cj = cj->progeny[0]; t->flags = 3; t->tight = 1;
-                            t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[7] , cj->progeny[2] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[5] , cj->progeny[2] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[7] , cj->progeny[0] , 1 );
-                            break;
-
-                        case 4: /* (  1 ,  0 ,  0 ) */
-                            t->ci = ci->progeny[4]; t->cj = cj->progeny[0]; t->flags = 4; t->tight = 1;
-                            t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[5] , cj->progeny[0] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[6] , cj->progeny[0] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[7] , cj->progeny[0] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[4] , cj->progeny[1] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[5] , cj->progeny[1] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[6] , cj->progeny[1] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[7] , cj->progeny[1] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[4] , cj->progeny[2] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[5] , cj->progeny[2] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[6] , cj->progeny[2] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[7] , cj->progeny[2] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[4] , cj->progeny[3] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[5] , cj->progeny[3] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[6] , cj->progeny[3] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 4 , 0 , ci->progeny[7] , cj->progeny[3] , 1 );
-                            break;
-
-                        case 5: /* (  1 ,  0 , -1 ) */
-                            t->ci = ci->progeny[4]; t->cj = cj->progeny[1]; t->flags = 5; t->tight = 1;
-                            t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[6] , cj->progeny[3] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[4] , cj->progeny[3] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[6] , cj->progeny[1] , 1 );
-                            break;
-
-                        case 6: /* (  1 , -1 ,  1 ) */
-                            t->ci = ci->progeny[5]; t->cj = cj->progeny[2]; t->flags = 6; t->tight = 1;
-                            break;
-
-                        case 7: /* (  1 , -1 ,  0 ) */
-                            t->ci = ci->progeny[4]; t->cj = cj->progeny[3]; t->flags = 6; t->tight = 1;
-                            t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[5] , cj->progeny[2] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[4] , cj->progeny[2] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[5] , cj->progeny[3] , 1 );
-                            break;
-
-                        case 8: /* (  1 , -1 , -1 ) */
-                            t->ci = ci->progeny[4]; t->cj = cj->progeny[3]; t->flags = 8; t->tight = 1;
-                            break;
-
-                        case 9: /* (  0 ,  1 ,  1 ) */
-                            t->ci = ci->progeny[3]; t->cj = cj->progeny[0]; t->flags = 9; t->tight = 1;
-                            t = space_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[7] , cj->progeny[4] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[3] , cj->progeny[4] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[7] , cj->progeny[0] , 1 );
-                            break;
-
-                        case 10: /* (  0 ,  1 ,  0 ) */
-                            t->ci = ci->progeny[2]; t->cj = cj->progeny[0]; t->flags = 10; t->tight = 1;
-                            t = space_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[3] , cj->progeny[0] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[6] , cj->progeny[0] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[7] , cj->progeny[0] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[2] , cj->progeny[1] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 10 , 0 , ci->progeny[3] , cj->progeny[1] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[6] , cj->progeny[1] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 7 , 0 , ci->progeny[7] , cj->progeny[1] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[2] , cj->progeny[4] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[3] , cj->progeny[4] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 10 , 0 , ci->progeny[6] , cj->progeny[4] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[7] , cj->progeny[4] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[2] , cj->progeny[5] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 1 , 0 , ci->progeny[3] , cj->progeny[5] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[6] , cj->progeny[5] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 10 , 0 , ci->progeny[7] , cj->progeny[5] , 1 );
-                            break;
-
-                        case 11: /* (  0 ,  1 , -1 ) */
-                            t->ci = ci->progeny[2]; t->cj = cj->progeny[1]; t->flags = 11; t->tight = 1;
-                            t = space_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[6] , cj->progeny[5] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[2] , cj->progeny[5] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[6] , cj->progeny[1] , 1 );
-                            break;
-
-                        case 12: /* (  0 ,  0 ,  1 ) */
-                            t->ci = ci->progeny[1]; t->cj = cj->progeny[0]; t->flags = 12; t->tight = 1;
-                            t = space_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[3] , cj->progeny[0] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[5] , cj->progeny[0] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 2 , 0 , ci->progeny[7] , cj->progeny[0] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[1] , cj->progeny[2] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 12 , 0 , ci->progeny[3] , cj->progeny[2] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 8 , 0 , ci->progeny[5] , cj->progeny[2] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 5 , 0 , ci->progeny[7] , cj->progeny[2] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[1] , cj->progeny[4] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 6 , 0 , ci->progeny[3] , cj->progeny[4] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 12 , 0 , ci->progeny[5] , cj->progeny[4] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 11 , 0 , ci->progeny[7] , cj->progeny[4] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 0 , 0 , ci->progeny[1] , cj->progeny[6] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 3 , 0 , ci->progeny[3] , cj->progeny[6] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 9 , 0 , ci->progeny[5] , cj->progeny[6] , 1 );
-                            t = space_addtask( s , task_type_pair , t->subtype , 12 , 0 , ci->progeny[7] , cj->progeny[6] , 1 );
-                            break;
-
-                        }
-                        
-                    }
-
-                } /* split this task? */
-                
-            /* Otherwise, if not spilt, stitch-up the sorting. */
-            else {
-            
-                /* Create the sort for ci. */
-                // lock_lock( &ci->lock );
-                if ( ci->sorts == NULL )
-                    ci->sorts = space_addtask( s , task_type_sort , 0 , 1 << sid , 0 , ci , NULL , 0 );
-                ci->sorts->flags |= (1 << sid);
-                // lock_unlock_blind( &ci->lock );
-                task_addunlock( ci->sorts , t );
-                
-                /* Create the sort for cj. */
-                // lock_lock( &cj->lock );
-                if ( cj->sorts == NULL )
-                    cj->sorts = space_addtask( s , task_type_sort , 0 , 1 << sid , 0 , cj , NULL , 0 );
-                cj->sorts->flags |= (1 << sid);
-                // lock_unlock_blind( &cj->lock );
-                task_addunlock( cj->sorts , t );
-                
-                }
-                
-            } /* pair interaction? */
-    
-        } /* loop over all tasks. */
-        
-        }
-        
-    }
-    
-    
-/**
- * @brief Fill the #space's task list.
- *
- * @param s The #space we are working in.
- * @param do_sort Flag to add sorting tasks to the list.
- */
- 
-void space_maketasks ( struct space *s , int do_sort ) {
-
-    int i, j, k, ii, jj, kk, iii, jjj, kkk, cid, cjd, sid;
-    int *cdim = s->cdim;
-    struct task *t, *t2;
-    struct cell *ci, *cj;
-
-    /* Allocate the task-list, if needed. */
-    if ( s->tasks == NULL || s->tasks_size < s->tot_cells * space_maxtaskspercell ) {
-        if ( s->tasks != NULL )
-            free( s->tasks );
-        if ( s->tasks_ind != NULL )
-            free( s->tasks_ind );
-        s->tasks_size = s->tot_cells * space_maxtaskspercell;
-        if ( posix_memalign( (void *)&s->tasks , 64 , sizeof(struct task) * s->tasks_size ) != 0 )
-            error( "Failed to allocate task list." );
-        if ( ( s->tasks_ind = (int *)malloc( sizeof(int) * s->tasks_size ) ) == NULL )
-            error( "Failed to allocate task indices." );
-        }
-    s->nr_tasks = 0;
-    s->tasks_next = 0;
-    
-    /* Run through the highest level of cells and add pairs. */
-    for ( i = 0 ; i < cdim[0] ; i++ )
-        for ( j = 0 ; j < cdim[1] ; j++ )
-            for ( k = 0 ; k < cdim[2] ; k++ ) {
-                cid = cell_getid( cdim , i , j , k );
-                if ( s->cells[cid].count == 0 )
-                    continue;
-                ci = &s->cells[cid];
-                if ( ci->count == 0 )
-                    continue;
-                space_addtask( s , task_type_self , task_subtype_density , 0 , 0 , ci , NULL , 0 );
-                for ( ii = -1 ; ii < 2 ; ii++ ) {
-                    iii = i + ii;
-                    if ( !s->periodic && ( iii < 0 || iii >= cdim[0] ) )
-                        continue;
-                    iii = ( iii + cdim[0] ) % cdim[0];
-                    for ( jj = -1 ; jj < 2 ; jj++ ) {
-                        jjj = j + jj;
-                        if ( !s->periodic && ( jjj < 0 || jjj >= cdim[1] ) )
-                            continue;
-                        jjj = ( jjj + cdim[1] ) % cdim[1];
-                        for ( kk = -1 ; kk < 2 ; kk++ ) {
-                            kkk = k + kk;
-                            if ( !s->periodic && ( kkk < 0 || kkk >= cdim[2] ) )
-                                continue;
-                            kkk = ( kkk + cdim[2] ) % cdim[2];
-                            cjd = cell_getid( cdim , iii , jjj , kkk );
-                            cj = &s->cells[cjd];
-                            if ( cid >= cjd || cj->count == 0 )
-                                continue;
-                            sid = sortlistID[ (kk+1) + 3*( (jj+1) + 3*(ii+1) ) ];
-                            t = space_addtask( s , task_type_pair , task_subtype_density , sid , 0 , ci , cj , 1 );
-                            }
-                        }
-                    }
-                }
-
-    /* Split the tasks. */
-    space_splittasks( s );
-    
-    /* Count the number of tasks associated with each cell and
-       store the density tasks in each cell, and make each sort
-       depend on the sorts of its progeny. */
-    // #pragma omp parallel for private(t,j)
-    for ( k = 0 ; k < s->nr_tasks ; k++ ) {
-        t = &s->tasks[k];
-        if ( t->skip )
-            continue;
-        if ( t->type == task_type_sort && t->ci->split )
-            for ( j = 0 ; j < 8 ; j++ ) {
-                if ( t->ci->progeny[j] == NULL )
-                    continue;
-                if ( t->ci->progeny[j]->sorts == NULL )
-                    t->ci->progeny[j]->sorts = space_addtask( s , task_type_sort , task_subtype_none , t->flags , 0 , t->ci->progeny[j] , NULL , 0 );
-                t->ci->progeny[j]->sorts->skip = 0;
-                task_addunlock( t->ci->progeny[j]->sorts , t );
-                }
-        if ( t->type == task_type_self ) {
-            atomic_inc( &t->ci->nr_tasks );
-            if ( t->subtype == task_subtype_density ) {
-                t->ci->density[ atomic_inc( &t->ci->nr_density ) ] = t;
-                }
-            }
-        else if ( t->type == task_type_pair ) {
-            atomic_inc( &t->ci->nr_tasks );
-            atomic_inc( &t->cj->nr_tasks );
-            if ( t->subtype == task_subtype_density ) {
-                t->ci->density[ atomic_inc( &t->ci->nr_density ) ] = t;
-                t->cj->density[ atomic_inc( &t->cj->nr_density ) ] = t;
-                }
-            }
-        else if ( t->type == task_type_sub ) {
-            atomic_inc( &t->ci->nr_tasks );
-            if ( t->cj != NULL )
-                atomic_inc( &t->cj->nr_tasks );
-            if ( t->subtype == task_subtype_density ) {
-                t->ci->density[ atomic_inc( &t->ci->nr_density ) ] = t;
-                if ( t->cj != NULL )
-                    t->cj->density[ atomic_inc( &t->cj->nr_density ) ] = t;
-                }
-            }
-        }
-        
-    /* Append a ghost task to each cell. */
-    space_map_cells_pre( s , 1 , &space_map_mkghosts , s );
-    
-    /* Run through the tasks and make force tasks for each density task.
-       Each force task depends on the cell ghosts and unlocks the kick2 task
-       of its super-cell. */
-    kk = s->nr_tasks;
-    // #pragma omp parallel for private(t,t2)
-    for ( k = 0 ; k < kk ; k++ ) {
-    
-        /* Get a pointer to the task. */
-        t = &s->tasks[k];
-        
-        /* Skip? */
-        if ( t->skip )
-            continue;
-        
-        /* Self-interaction? */
-        if ( t->type == task_type_self && t->subtype == task_subtype_density ) {
-            task_addunlock( t , t->ci->super->ghost );
-            t2 = space_addtask( s , task_type_self , task_subtype_force , 0 , 0 , t->ci , NULL , 0 );
-            task_addunlock( t->ci->ghost , t2 );
-            task_addunlock( t2 , t->ci->super->kick2 );
-            }
-            
-        /* Otherwise, pair interaction? */
-        else if ( t->type == task_type_pair && t->subtype == task_subtype_density ) {
-            task_addunlock( t , t->ci->super->ghost );
-            if ( t->ci->super != t->cj->super )
-                task_addunlock( t , t->cj->super->ghost );
-            t2 = space_addtask( s , task_type_pair , task_subtype_force , 0 , 0 , t->ci , t->cj , 0 );
-            task_addunlock( t->ci->ghost , t2 );
-            task_addunlock( t->cj->ghost , t2 );
-            task_addunlock( t2 , t->ci->super->kick2 );
-            if ( t->ci->super != t->cj->super )
-                task_addunlock( t2 , t->cj->super->kick2 );
-            }
-    
-        /* Otherwise, sub interaction? */
-        else if ( t->type == task_type_sub && t->subtype == task_subtype_density ) {
-            task_addunlock( t , t->ci->super->ghost );
-            if ( t->cj != NULL && t->ci->super != t->cj->super )
-                task_addunlock( t , t->cj->super->ghost );
-            t2 = space_addtask( s , task_type_sub , task_subtype_force , t->flags , 0 , t->ci , t->cj , 0 );
-            task_addunlock( t->ci->ghost , t2 );
-            if ( t->cj != NULL )
-                task_addunlock( t->cj->ghost , t2 );
-            task_addunlock( t2 , t->ci->super->kick2 );
-            if ( t->cj != NULL && t->ci->super != t->cj->super )
-                task_addunlock( t2 , t->cj->super->kick2 );
-            }
-            
-        }
-        
-    /* Rank the tasks. */
-    space_ranktasks( s );
-            
-    /* Count the number of each task type. */
-    int counts[ task_type_count+1 ];
-    for ( k = 0 ; k <= task_type_count ; k++ )
-        counts[k] = 0;
-    for ( k = 0 ; k < s->nr_tasks ; k++ )
-        if ( !s->tasks[k].skip )
-            counts[ (int)s->tasks[k].type ] += 1;
-        else
-            counts[ task_type_count ] += 1;
-    printf( "space_maketasks: task counts are [ %s=%i" , taskID_names[0] , counts[0] );
-    for ( k = 1 ; k < task_type_count ; k++ )
-        printf( " %s=%i" , taskID_names[k] , counts[k] );
-    printf( " skipped=%i ]\n" , counts[ task_type_count ] ); fflush(stdout); 
-    
-    }
-    
-    
-
 /**
  * @brief Split cells that contain too many particles.
  *
@@ -1428,6 +674,7 @@ void space_split ( struct space *s , struct cell *c ) {
             xp->x_old[1] = x[1] = p->x[1];
             xp->x_old[2] = x[2] = p->x[2];
             dt = p->dt;
+            h = p->h;
             if ( h > h_max )
                 h_max = h;
             if ( dt < dt_min )
diff --git a/src/space.h b/src/space.h
index 0e1b504710c70a0e4796880ba89582991582ba17..6e31cd4397da07c01100e3e1766e33161b76bc4b 100644
--- a/src/space.h
+++ b/src/space.h
@@ -26,8 +26,6 @@
 #define space_splitratio                0.875f
 #define space_splitsize_default         400
 #define space_subsize_default           5000
-#define space_maxsubdepth               3
-#define space_dosub                     1
 #define space_stretch                   1.05f
 #define space_maxtaskspercell           31
 #define space_maxreldx                  0.2f
@@ -92,12 +90,6 @@ struct space {
     /* Is the space periodic? */
     int periodic;
     
-    /* The list of tasks. */
-    struct task *tasks;
-    int nr_tasks, tasks_next;
-    int tasks_size;
-    int *tasks_ind;
-    
     /* General-purpose lock for this space. */
     lock_type lock;
     
@@ -105,22 +97,14 @@ struct space {
 
 
 /* function prototypes. */
-void space_addsorts ( struct space *s , struct task *t , struct cell *ci , struct cell *cj , int sid );
 void parts_sort ( struct part *parts , int *ind , int N , int min , int max );
 struct cell *space_getcell ( struct space *s );
-struct task *space_gettask ( struct space *s );
-struct task *space_addtask ( struct space *s , int type , int subtype , int flags , int wait , struct cell *ci , struct cell *cj , int tight );
 int space_getsid ( struct space *s , struct cell **ci , struct cell **cj , double *shift );
 void space_init ( struct space *s , double dim[3] , struct part *parts , int N , int periodic , double h_max );
-void space_maketasks ( struct space *s , int do_sort );
 void space_map_cells_pre ( struct space *s , int full , void (*fun)( struct cell *c , void *data ) , void *data );
 void space_map_parts ( struct space *s , void (*fun)( struct part *p , struct cell *c , void *data ) , void *data );
 void space_map_cells_post ( struct space *s , int full , void (*fun)( struct cell *c , void *data ) , void *data );
-int space_prepare ( struct space *s );
-void space_ranktasks ( struct space *s );
 void space_rebuild ( struct space *s , double h_max );
 void space_recycle ( struct space *s , struct cell *c );
 void space_split ( struct space *s , struct cell *c );
 
-
-
diff --git a/src/swift.h b/src/swift.h
index e30c1b151709480cefab175e48fa996c30bb1d4b..bb310cbd6924ed78b63c990fb7bf3907959d8354 100644
--- a/src/swift.h
+++ b/src/swift.h
@@ -27,6 +27,7 @@
 #include "atomic.h"
 #include "lock.h"
 #include "task.h"
+#include "scheduler.h"
 #include "part.h"
 #include "cell.h"
 #include "space.h"