diff --git a/src/cell.h b/src/cell.h
index 23565c57694672fd5dd61e331c5d046c1b4727d7..27c4242ffdeac5312863d561f58daa8ead16d93b 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -41,6 +41,9 @@ struct cell {
     /* Max radii in this cell. */
     double h_max;
     
+    /* Minimum and maximum dt in this cell. */
+    double dt_min, dt_max;
+    
     /* The depth of this cell in the tree. */
     int depth, split;
     
diff --git a/src/engine.c b/src/engine.c
index c1af8aa610e4c8822de35d2edacdeece251b4a03..53b62c81e16d953f144e7806c72c8c625e776452 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -76,6 +76,10 @@ void engine_prepare ( struct engine *e , int force ) {
         /* Clear the queues. */
         for ( k = 0 ; k < e->nr_queues ; k++ )
             e->queues[k].count = 0;
+            
+        /* Re-allocate the queue buffers? */
+        for ( k = 0 ; k < e->nr_queues ; k++ )
+            queue_init( &e->queues[k] , s->nr_tasks , s->tasks );
         
         /* Fill the queues (round-robin). */
         for ( k = 0 ; k < s->nr_tasks ; k++ ) {
@@ -213,7 +217,7 @@ void engine_barrier( struct engine *e ) {
  * @param sort_queues Flag to try to sort the queues topologically.
  */
  
-void engine_run ( struct engine *e , int sort_queues ) {
+void engine_run ( struct engine *e , int sort_queues , float dt_max ) {
 
     int k;
 
@@ -225,6 +229,10 @@ void engine_run ( struct engine *e , int sort_queues ) {
             e->queues[k].next = 0;
             }
         }
+        
+    /* Set the maximum dt. */
+    e->dt_max = dt_max;
+    e->s->dt_max = dt_max;
     
     /* Cry havoc and let loose the dogs of war. */
     e->barrier_count = -e->barrier_count;
diff --git a/src/engine.h b/src/engine.h
index e7ad72d52cb788814094b7818bcdc3a3d32e032c..d3319f2d81ed91c0ddfae02710fc6827ff95f7e5 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -50,6 +50,9 @@ struct engine {
     /* The queues. */
     struct queue *queues;
     
+    /* The maximum dt to step. */
+    float dt_max;
+    
     /* Data for the threads' barrier. */
     pthread_mutex_t barrier_mutex;
     pthread_cond_t barrier_cond;
@@ -63,4 +66,4 @@ void engine_barrier( struct engine *e );
 void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_queues , int policy );
 void engine_prepare ( struct engine *e , int force );
 void engine_ranktasks ( struct engine *e );
-void engine_run ( struct engine *e , int sort_queues );
+void engine_run ( struct engine *e , int sort_queues , float dt_max );
diff --git a/src/ic.c b/src/ic.c
index 853311484b75b299d81b62839d88a7515d5a699c..c8cdfe8880b1d364314417c17db1528a88698a21 100644
--- a/src/ic.c
+++ b/src/ic.c
@@ -278,6 +278,7 @@ void read_ic ( char* fileName, double dim[3], struct part **parts,  int* N, int*
   /* Allocate memory to store particles */
   if(posix_memalign( (void*)parts , 32 , *N * sizeof(struct part)) != 0)
     error("Error while allocating memory for particles");
+  bzero( *parts , *N * sizeof(struct part) );
 
   printf("read_ic: Allocated %8.2f MB for particles.\n", *N * sizeof(struct part) / (1024.*1024.));
 		  
diff --git a/src/part.h b/src/part.h
index 4caf61ae29f493597ce9ee46183d4fad8b6ae857..32adceb9d058d0f52e91633689c8751bba680a00 100644
--- a/src/part.h
+++ b/src/part.h
@@ -48,9 +48,6 @@ struct part {
     /* Particle velocity. */
     float v[3];
     
-    /* Particle acceleration. */
-    float a[3];
-    
     /* Particle density. */
     float rho;
     
@@ -69,6 +66,9 @@ struct part {
     /* Derivative of the density with respect to this particle's smoothing length. */
     float rho_dh;
     
+    /* Particle acceleration. */
+    float a[3];
+    
     /* Particle number density. */
     // int icount;
     float wcount;
diff --git a/src/queue.c b/src/queue.c
index a507828bf6e88cd010cb1ab4db005a19f1b6bb77..272582519a7e08dc21ba45887c25c8d98cca8bdd 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -124,10 +124,16 @@ void queue_insert ( struct queue *q , struct task *t ) {
  
 void queue_init ( struct queue *q , int size , struct task *tasks ) {
     
-    /* Allocate the task list. */
-    q->size = size;
-    if ( ( q->tid = (int *)malloc( sizeof(int) * size ) ) == NULL )
-        error( "Failed to allocate queue tids." );
+    /* Allocate the task list if needed. */
+    if ( q->tid == NULL || q->size < size ) {
+        if ( q->tid != NULL )
+            free( q->tid );
+        q->size = size;
+        if ( ( q->tid = (int *)malloc( sizeof(int) * size ) ) == NULL )
+            error( "Failed to allocate queue tids." );
+        }
+        
+    /* Set the tasks pointer. */
     q->tasks = tasks;
         
     /* Init counters. */
diff --git a/src/runner.c b/src/runner.c
index 353bb90a7b8e83987a8da80b670e6f6e557650b1..21c4135f44f2b394f248c8e5aae875c993cc33e7 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -325,6 +325,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
     int i, k, redo, count = c->count;
     int *pid;
     float ihg, ihg2;
+    float dt_max = r->e->dt_max;
     TIMER_TIC
     
     /* Recurse? */
@@ -352,28 +353,42 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
 
             /* Get a direct pointer on the part. */
             p = &c->parts[ pid[i] ];
-
-            /* Adjust the computed rho. */
-            ihg = kernel_igamma / p->h;
-            ihg2 = ihg * ihg;
-            p->rho *= ihg * ihg2;
-            p->rho_dh *= ihg2 * ihg2;
             
-            /* Update the smoothing length. */
-            p->h -= ( p->wcount + kernel_root - const_nwneigh ) / p->wcount_dh;
-
-            /* Did we get the right number density? */
-            if ( p->wcount + kernel_root > const_nwneigh + 1 ||
-                 p->wcount + kernel_root < const_nwneigh - 1 ) {
-                // printf( "runner_doghost: particle %lli (h=%e,depth=%i) has bad wcount=%f.\n" , p->id , p->h , c->depth , p->wcount + kernel_root ); fflush(stdout);
-                // p->h += ( p->wcount + kernel_root - const_nwneigh ) / p->wcount_dh;
-                pid[redo] = pid[i];
-                redo += 1;
-                p->wcount = 0.0;
-                p->wcount_dh = 0.0;
-                p->rho = 0.0;
-                p->rho_dh = 0.0;
-                continue;
+            /* Is this part within the timestep? */
+            if ( p->dt <= dt_max ) {
+
+                /* Adjust the computed rho. */
+                ihg = kernel_igamma / p->h;
+                ihg2 = ihg * ihg;
+                p->rho *= ihg * ihg2;
+                p->rho_dh *= ihg2 * ihg2;
+
+                /* Update the smoothing length. */
+                p->h -= ( p->wcount + kernel_root - const_nwneigh ) / p->wcount_dh;
+
+                /* Did we get the right number density? */
+                if ( p->wcount + kernel_root > const_nwneigh + 1 ||
+                     p->wcount + kernel_root < const_nwneigh - 1 ) {
+                    // printf( "runner_doghost: particle %lli (h=%e,depth=%i) has bad wcount=%f.\n" , p->id , p->h , c->depth , p->wcount + kernel_root ); fflush(stdout);
+                    // p->h += ( p->wcount + kernel_root - const_nwneigh ) / p->wcount_dh;
+                    pid[redo] = pid[i];
+                    redo += 1;
+                    p->wcount = 0.0;
+                    p->wcount_dh = 0.0;
+                    p->rho = 0.0;
+                    p->rho_dh = 0.0;
+                    continue;
+                    }
+                    
+                /* Compute this particle's time step. */
+                p->dt = const_cfl * p->h / sqrtf( const_gamma * ( const_gamma - 1.0f ) * p->u );
+
+                /* Compute the pressure. */
+                // p->P = p->rho * p->u * ( const_gamma - 1.0f );
+
+                /* Compute the P/Omega/rho2. */
+                p->POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + p->h * p->rho_dh / 3.0f );
+
                 }
 
             /* Reset the acceleration. */
@@ -384,15 +399,6 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
             p->u_dt = 0.0f;
             p->h_dt = 0.0f;
 
-            /* Compute this particle's time step. */
-            p->dt = const_cfl * p->h / sqrtf( const_gamma * ( const_gamma - 1.0f ) * p->u );
-
-            /* Compute the pressure. */
-            // p->P = p->rho * p->u * ( const_gamma - 1.0f );
-
-            /* Compute the P/Omega/rho2. */
-            p->POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + p->h * p->rho_dh / 3.0f );
-
             }
             
         /* Re-set the counter for the next loop (potentially). */
@@ -570,18 +576,18 @@ void *runner_main ( void *data ) {
             switch ( t->type ) {
                 case task_type_self:
                     if ( t->subtype == task_subtype_density )
-                        runner_doself_density( r , ci );
+                        runner_doself1_density( r , ci );
                     else if ( t->subtype == task_subtype_force )
-                        runner_doself_force( r , ci );
+                        runner_doself2_force( r , ci );
                     else
                         error( "Unknown task subtype." );
                     cell_unlocktree( ci );
                     break;
                 case task_type_pair:
                     if ( t->subtype == task_subtype_density )
-                        runner_dopair_density( r , ci , cj );
+                        runner_dopair1_density( r , ci , cj );
                     else if ( t->subtype == task_subtype_force )
-                        runner_dopair_force( r , ci , cj );
+                        runner_dopair2_force( r , ci , cj );
                     else
                         error( "Unknown task subtype." );
                     cell_unlocktree( ci );
@@ -592,9 +598,9 @@ void *runner_main ( void *data ) {
                     break;
                 case task_type_sub:
                     if ( t->subtype == task_subtype_density )
-                        runner_dosub_density( r , ci , cj , t->flags );
+                        runner_dosub1_density( r , ci , cj , t->flags );
                     else if ( t->subtype == task_subtype_force )
-                        runner_dosub_force( r , ci , cj , t->flags );
+                        runner_dosub2_force( r , ci , cj , t->flags );
                     else
                         error( "Unknown task subtype." );
                     cell_unlocktree( ci );
diff --git a/src/runner.h b/src/runner.h
index 05906de43a650f83cb952b6d92d06987178ba424..9763a2ae50351427aabe8c01c28ae37d47a6b074 100644
--- a/src/runner.h
+++ b/src/runner.h
@@ -75,6 +75,7 @@ enum {
     runner_counter_steal_stall,
     runner_counter_steal_empty,
     runner_counter_keep,
+    runner_counter_iact,
     runner_counter_count,
     };
 extern int runner_counter[ runner_counter_count ];
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index d97728ff685e5ce4e23acd0e3be568ff29e34e23..fd4dd880116c06a225311df865775f4b765dcbc5 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -26,53 +26,65 @@
 
 #define PASTE(x,y) x ## _ ## y
 
-#define DOPAIR2(f) PASTE(runner_dopair,f)
-#define DOPAIR DOPAIR2(FUNCTION)
+#define _DOPAIR1(f) PASTE(runner_dopair1,f)
+#define DOPAIR1 _DOPAIR1(FUNCTION)
 
-#define DOPAIR_SUBSET2(f) PASTE(runner_dopair_subset,f)
-#define DOPAIR_SUBSET DOPAIR_SUBSET2(FUNCTION)
+#define _DOPAIR2(f) PASTE(runner_dopair2,f)
+#define DOPAIR2 _DOPAIR2(FUNCTION)
 
-#define DOPAIR_NAIVE2(f) PASTE(runner_dopair_naive,f)
-#define DOPAIR_NAIVE DOPAIR_NAIVE2(FUNCTION)
+#define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset,f)
+#define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION)
 
-#define DOSELF2(f) PASTE(runner_doself,f)
-#define DOSELF DOSELF2(FUNCTION)
+#define _DOPAIR_NAIVE(f) PASTE(runner_dopair_naive,f)
+#define DOPAIR_NAIVE _DOPAIR_NAIVE(FUNCTION)
 
-#define DOSELF_SUBSET2(f) PASTE(runner_doself_subset,f)
-#define DOSELF_SUBSET DOSELF_SUBSET2(FUNCTION)
+#define _DOSELF_NAIVE(f) PASTE(runner_doself_naive,f)
+#define DOSELF_NAIVE _DOSELF_NAIVE(FUNCTION)
 
-#define DOSUB2(f) PASTE(runner_dosub,f)
-#define DOSUB DOSUB2(FUNCTION)
+#define _DOSELF1(f) PASTE(runner_doself1,f)
+#define DOSELF1 _DOSELF1(FUNCTION)
 
-#define DOSUB_SUBSET2(f) PASTE(runner_dosub_subset,f)
-#define DOSUB_SUBSET DOSUB_SUBSET2(FUNCTION)
+#define _DOSELF2(f) PASTE(runner_doself2,f)
+#define DOSELF2 _DOSELF2(FUNCTION)
 
-#define IACT_NONSYM2(f) PASTE(runner_iact_nonsym,f)
-#define IACT_NONSYM IACT_NONSYM2(FUNCTION)
+#define _DOSELF_SUBSET(f) PASTE(runner_doself_subset,f)
+#define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION)
 
-#define IACT2(f) PASTE(runner_iact,f)
-#define IACT IACT2(FUNCTION)
+#define _DOSUB1(f) PASTE(runner_dosub1,f)
+#define DOSUB1 _DOSUB1(FUNCTION)
 
-#define TIMER_DOSELF2(f) PASTE(runner_timer_doself,f)
-#define TIMER_DOSELF TIMER_DOSELF2(FUNCTION)
+#define _DOSUB2(f) PASTE(runner_dosub2,f)
+#define DOSUB2 _DOSUB2(FUNCTION)
 
-#define TIMER_DOPAIR2(f) PASTE(runner_timer_dopair,f)
-#define TIMER_DOPAIR TIMER_DOPAIR2(FUNCTION)
+#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset,f)
+#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION)
 
-#define TIMER_DOSUB2(f) PASTE(runner_timer_dosub,f)
-#define TIMER_DOSUB TIMER_DOSUB2(FUNCTION)
+#define _IACT_NONSYM(f) PASTE(runner_iact_nonsym,f)
+#define IACT_NONSYM _IACT_NONSYM(FUNCTION)
 
-#define TIMER_DOSELF_SUBSET2(f) PASTE(runner_timer_doself_subset,f)
-#define TIMER_DOSELF_SUBSET TIMER_DOSELF_SUBSET2(FUNCTION)
+#define _IACT(f) PASTE(runner_iact,f)
+#define IACT _IACT(FUNCTION)
 
-#define TIMER_DOPAIR_SUBSET2(f) PASTE(runner_timer_dopair_subset,f)
-#define TIMER_DOPAIR_SUBSET TIMER_DOPAIR_SUBSET2(FUNCTION)
+#define _TIMER_DOSELF(f) PASTE(runner_timer_doself,f)
+#define TIMER_DOSELF _TIMER_DOSELF(FUNCTION)
 
-#define IACT_NONSYM_VEC2(f) PASTE(runner_iact_nonsym_vec,f)
-#define IACT_NONSYM_VEC IACT_NONSYM_VEC2(FUNCTION)
+#define _TIMER_DOPAIR(f) PASTE(runner_timer_dopair,f)
+#define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION)
 
-#define IACT_VEC2(f) PASTE(runner_iact_vec,f)
-#define IACT_VEC IACT_VEC2(FUNCTION)
+#define _TIMER_DOSUB(f) PASTE(runner_timer_dosub,f)
+#define TIMER_DOSUB _TIMER_DOSUB(FUNCTION)
+
+#define _TIMER_DOSELF_SUBSET(f) PASTE(runner_timer_doself_subset,f)
+#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION)
+
+#define _TIMER_DOPAIR_SUBSET(f) PASTE(runner_timer_dopair_subset,f)
+#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)
+
+#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec,f)
+#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)
+
+#define _IACT_VEC(f) PASTE(runner_iact_vec,f)
+#define IACT_VEC _IACT_VEC(FUNCTION)
 
 
 
@@ -94,6 +106,7 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r
     struct cpart *restrict cpi, *restrict cparts_i = ci->cparts;
     double pix[3];
     float dx[3], hi, hi2, r2;
+    float dt_max = e->dt_max;
     #ifdef VECTORIZE
         int icount = 0;
         float r2q[VEC_SIZE] __attribute__ ((aligned (16)));
@@ -104,6 +117,10 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r
     #endif
     TIMER_TIC
     
+    /* Anything to do here? */
+    if ( ci->dt_min > dt_max && cj->dt_min > dt_max )
+        return;
+    
     /* Get the relative distance between the pairs, wrapping. */
     for ( k = 0 ; k < 3 ; k++ ) {
         if ( cj->loc[k] - ci->loc[k] < -e->s->dim[k]/2 )
@@ -191,6 +208,106 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r
     }
 
 
+void DOSELF_NAIVE ( struct runner *r , struct cell *restrict c ) {
+
+    int pid, pjd, k, count = c->count;
+    struct part *restrict parts = c->parts;
+    struct cpart *restrict cpi,  *restrict cpj,*restrict cparts = c->cparts;
+    double pix[3];
+    float dx[3], hi, hi2, r2;
+    float dt_max = r->e->dt_max;
+    #ifdef VECTORIZE
+        int icount = 0;
+        float r2q[VEC_SIZE] __attribute__ ((aligned (16)));
+        float hiq[VEC_SIZE] __attribute__ ((aligned (16)));
+        float hjq[VEC_SIZE] __attribute__ ((aligned (16)));
+        float dxq[3*VEC_SIZE] __attribute__ ((aligned (16)));
+        struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
+    #endif
+    TIMER_TIC
+    
+    /* Anything to do here? */
+    if ( c->dt_min > dt_max )
+        return;
+    
+    /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts and shift = [ %g %g %g ].\n" ,
+        ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] ,
+        ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
+    tic = getticks(); */
+    
+    /* Loop over the parts in ci. */
+    for ( pid = 0 ; pid < count ; pid++ ) {
+    
+        /* Get a hold of the ith part in ci. */
+        cpi = &cparts[ pid ];
+        for ( k = 0 ; k < 3 ; k++ )
+            pix[k] = cpi->x[k];
+        hi = cpi->h;
+        hi2 = hi * hi;
+        
+        /* Loop over the parts in cj. */
+        for ( pjd = pid+1 ; pjd < count ; pjd++ ) {
+        
+            /* Get a pointer to the jth particle. */
+            cpj = &cparts[ pjd ];
+        
+            /* Compute the pairwise distance. */
+            r2 = 0.0f;
+            for ( k = 0 ; k < 3 ; k++ ) {
+                dx[k] = pix[k] - cpj->x[k];
+                r2 += dx[k]*dx[k];
+                }
+                
+            /* Hit or miss? */
+            if ( r2 < hi2 || r2 < cpj->h*cpj->h ) {
+            
+                #ifndef VECTORIZE
+                        
+                    IACT( r2 , dx , hi , cpj->h , &parts[ pid ] , &parts[pjd] );
+                
+                #else
+
+                    /* Add this interaction to the queue. */
+                    r2q[icount] = r2;
+                    dxq[3*icount+0] = dx[0];
+                    dxq[3*icount+1] = dx[1];
+                    dxq[3*icount+2] = dx[2];
+                    hiq[icount] = hi;
+                    hjq[icount] = cpj->h;
+                    piq[icount] = &parts[ pid ];
+                    pjq[icount] = &parts[ pjd ];
+                    icount += 1;
+
+                    /* Flush? */
+                    if ( icount == VEC_SIZE ) {
+                        IACT_VEC( r2q , dxq , hiq , hjq , piq , pjq );
+                        icount = 0;
+                        }
+
+                #endif
+                    
+                }
+        
+            } /* loop over the parts in cj. */
+    
+        } /* loop over the parts in ci. */
+        
+    #ifdef VECTORIZE
+    /* Pick up any leftovers. */
+    if ( icount > 0 )
+        for ( k = 0 ; k < icount ; k++ )
+            IACT( r2q[k] , &dxq[3*k] , hiq[k] , hjq[k] , piq[k] , pjq[k] );
+    #endif
+        
+    #ifdef TIMER_VERBOSE
+        printf( "runner_doself[%02i]: %i parts at depth %i took %.3f ms.\n" , r->id , count , c->depth , ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000 );
+    #else
+        TIMER_TOC(TIMER_DOSELF);
+    #endif
+
+    }
+
+
 /**
  * @brief Compute the interactions between a cell pair, but only for the
  *      given indices in ci.
@@ -507,7 +624,7 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
  * @param cj The second #cell.
  */
  
-void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj ) {
+void DOPAIR1 ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj ) {
 
     struct engine *restrict e = r->e;
     int pid, pjd, k, sid;
@@ -522,6 +639,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
     double hi_max, hj_max;
     double di_max, dj_min;
     int count_i, count_j;
+    float dt_max = e->dt_max;
     #ifdef VECTORIZE
         int icount = 0;
         float r2q[VEC_SIZE] __attribute__ ((aligned (16)));
@@ -532,6 +650,10 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
     #endif
     TIMER_TIC
     
+    /* Anything to do here? */
+    if ( ci->dt_min > dt_max && cj->dt_min > dt_max )
+        return;
+    
     /* Get the relative distance between the pairs, wrapping. */
     for ( k = 0 ; k < 3 ; k++ ) {
         if ( cj->loc[k] - ci->loc[k] < -e->s->dim[k]/2 )
@@ -594,6 +716,8 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
         /* Get a hold of the ith part in ci. */
         pi = &parts_i[ sort_i[ pid ].i ];
         cpi = &cparts_i[ sort_i[ pid ].i ];
+        if ( cpi->dt > dt_max )
+            continue;
         hi = cpi->h;
         di = sort_i[pid].d + hi - rshift;
         if ( di < dj_min )
@@ -621,7 +745,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
             
                 #ifndef VECTORIZE
                         
-                    IACT( r2 , dx , hi , cpj->h , &parts_i[ sort_i[ pid ].i ] , &parts_j[ sort_j[pjd].i ] );
+                    IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts_j[ sort_j[pjd].i ] );
                 
                 #else
 
@@ -638,7 +762,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
 
                     /* Flush? */
                     if ( icount == VEC_SIZE ) {
-                        IACT_VEC( r2q , dxq , hiq , hjq , piq , pjq );
+                        IACT_NONSYM_VEC( r2q , dxq , hiq , hjq , piq , pjq );
                         icount = 0;
                         }
 
@@ -659,6 +783,8 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
         /* Get a hold of the jth part in cj. */
         pj = &parts_j[ sort_j[ pjd ].i ];
         cpj = &cparts_j[ sort_j[ pjd ].i ];
+        if ( cpj->dt > dt_max )
+            continue;
         hj = cpj->h;
         dj = sort_j[pjd].d - hj - rshift;
         if ( dj > di_max )
@@ -682,11 +808,11 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
                 }
                 
             /* Hit or miss? */
-            if ( r2 < hj2 && r2 > cpi->h*cpi->h ) {
+            if ( r2 < hj2 ) {
             
                 #ifndef VECTORIZE
                         
-                    IACT( r2 , dx , hj , cpi->h , &parts_j[ sort_j[ pjd ].i ] , &parts_i[ sort_i[pid].i ] );
+                    IACT_NONSYM( r2 , dx , hj , cpi->h , pj , &parts_i[ sort_i[pid].i ] );
                 
                 #else
 
@@ -703,7 +829,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
 
                     /* Flush? */
                     if ( icount == VEC_SIZE ) {
-                        IACT_VEC( r2q , dxq , hiq , hjq , piq , pjq );
+                        IACT_NONSYM_VEC( r2q , dxq , hiq , hjq , piq , pjq );
                         icount = 0;
                         }
 
@@ -719,7 +845,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
     /* Pick up any leftovers. */
     if ( icount > 0 )
         for ( k = 0 ; k < icount ; k++ )
-            IACT( r2q[k] , &dxq[3*k] , hiq[k] , hjq[k] , piq[k] , pjq[k] );
+            IACT_NONSYM( r2q[k] , &dxq[3*k] , hiq[k] , hjq[k] , piq[k] , pjq[k] );
     #endif
         
     #ifdef TIMER_VERBOSE
@@ -731,20 +857,24 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
     }
 
 
-/**
- * @brief Compute the cell self-interaction.
- *
- * @param r The #runner.
- * @param c The #cell.
- */
-
-void DOSELF ( struct runner *r , struct cell *restrict c ) {
+void DOPAIR2 ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj ) {
 
-    int k, pid, pjd, count = c->count;
-    double pix[3];
-    float dx[3], hi, hi2, r2;
-    struct part *restrict parts = c->parts;
-    struct cpart *restrict cpi, *restrict cpj, *restrict cparts = c->cparts;
+    struct engine *restrict e = r->e;
+    int pid, pjd, k, sid;
+    double rshift, shift[3] = { 0.0 , 0.0 , 0.0 };
+    struct cell *temp;
+    struct entry *restrict sort_i, *restrict sort_j;
+    struct entry *restrict sortdt_i = NULL, *restrict sortdt_j = NULL;
+    int countdt_i = 0, countdt_j = 0;
+    struct part *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j;
+    struct cpart *restrict cpi, *restrict cparts_i;
+    struct cpart *restrict cpj, *restrict cparts_j;
+    double pix[3], pjx[3], di, dj;
+    float dx[3], hi, hi2, hj, hj2, r2;
+    double hi_max, hj_max;
+    double di_max, dj_min;
+    int count_i, count_j;
+    float dt_max = e->dt_max;
     #ifdef VECTORIZE
         int icount = 0;
         float r2q[VEC_SIZE] __attribute__ ((aligned (16)));
@@ -755,64 +885,606 @@ void DOSELF ( struct runner *r , struct cell *restrict c ) {
     #endif
     TIMER_TIC
     
-    /* Loop over the particles in the cell. */
-    for ( pid = 0 ; pid < count ; pid++ ) {
-    
-        /* Get a pointer to the ith particle. */
-        cpi = &cparts[pid];
+    /* Anything to do here? */
+    if ( ci->dt_min > dt_max && cj->dt_min > dt_max )
+        return;
     
-        /* Get the particle position and radius. */
+    /* Get the relative distance between the pairs, wrapping. */
+    for ( k = 0 ; k < 3 ; k++ ) {
+        if ( cj->loc[k] - ci->loc[k] < -e->s->dim[k]/2 )
+            shift[k] = e->s->dim[k];
+        else if ( cj->loc[k] - ci->loc[k] > e->s->dim[k]/2 )
+            shift[k] = -e->s->dim[k];
+        }
+        
+    /* Get the sorting index. */
+    for ( sid = 0 , k = 0 ; k < 3 ; k++ )
+        sid = 3*sid + ( (cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1 );
+
+    /* Switch the cells around? */
+    if ( runner_flip[sid] ) {
+        temp = ci; ci = cj; cj = temp;
         for ( k = 0 ; k < 3 ; k++ )
-            pix[k] = cpi->x[k];
+            shift[k] = -shift[k];
+        }
+    sid = sortlistID[sid];
+    
+    /* Get the cutoff shift. */
+    for ( rshift = 0.0 , k = 0 ; k < 3 ; k++ )
+        rshift += shift[k]*runner_shift[ 3*sid + k ];
+        
+    /* for ( k = 0 ; k < ci->count ; k++ )
+        if ( ci->parts[k].id == 561590 )
+            break;
+    if ( k == ci->count )
+        for ( k = 0 ; k < cj->count ; k++ )
+            if ( cj->parts[k].id == 561590 )
+                break;
+    if ( k < cj->count )
+        printf( "runner_dopair: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts, h_max=%g/%g, and shift = [ %g %g %g ] (rshift=%g).\n" ,
+            ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] ,
+            ci->count , cj->count , ci->h_max , cj->h_max , shift[0] , shift[1] , shift[2] , rshift ); fflush(stdout); */
+    /* for ( hi = 0 , k = 0 ; k < ci->count ; k++ )
+        hi += ci->parts[k].r;
+    for ( hj = 0 , k = 0 ; k < cj->count ; k++ )
+        hj += cj->parts[k].r;
+    printf( "runner_dopair: avg. radii %g/%g for h=%g at depth=%i.\n" , hi/ci->count , hj/cj->count , ci->h[0] , ci->depth ); fflush(stdout); */
+    
+    /* Pick-out the sorted lists. */
+    sort_i = &ci->sort[ sid*(ci->count + 1) ];
+    sort_j = &cj->sort[ sid*(cj->count + 1) ];
+    
+    /* Get some other useful values. */
+    hi_max = ci->h_max - rshift; hj_max = cj->h_max;
+    count_i = ci->count; count_j = cj->count;
+    parts_i = ci->parts; parts_j = cj->parts;
+    cparts_i = ci->cparts; cparts_j = cj->cparts;
+    di_max = sort_i[count_i-1].d - rshift;
+    dj_min = sort_j[0].d;
+    
+    /* Collect the number of parts left and right below dt. */
+    if ( cj->dt_min > dt_max ) {
+        sortdt_i = sort_i;
+        countdt_i = count_i;
+        }
+    else if ( cj->dt_max > dt_max ) {
+        if ( ( sortdt_i = (struct entry *)alloca( sizeof(struct entry) * count_i ) ) == NULL )
+            error( "Failed to allocate dt sortlists." );
+        for ( k = 0 ; k < count_i ; k++ )
+            if ( cparts_i[ sort_i[ k ].i ].dt <= dt_max ) {
+                sortdt_i[ countdt_i ] = sort_i[k];
+                countdt_i += 1;
+                }
+        }
+    if ( ci->dt_min > dt_max ) {
+        sortdt_j = sort_j;
+        countdt_j = count_j;
+        }
+    else if ( ci->dt_max > dt_max ) {
+        if ( ( sortdt_j = (struct entry *)alloca( sizeof(struct entry) * count_j ) ) == NULL )
+            error( "Failed to allocate dt sortlists." );
+        for ( k = 0 ; k < count_j ; k++ )
+            if ( cparts_j[ sort_j[ k ].i ].dt <= dt_max ) {
+                sortdt_j[ countdt_j ] = sort_j[k];
+                countdt_j += 1;
+                }
+        }
+    
+    /* if ( ci->split && cj->split && sid == 4 )
+        printf( "boing!\n" ); */
+    
+    /* Loop over the parts in ci. */
+    for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max > dj_min ; pid-- ) {
+    
+        /* Get a hold of the ith part in ci. */
+        pi = &parts_i[ sort_i[ pid ].i ];
+        cpi = &cparts_i[ sort_i[ pid ].i ];
         hi = cpi->h;
+        di = sort_i[pid].d + hi - rshift;
+        if ( di < dj_min )
+            continue;
+            
         hi2 = hi * hi;
+        for ( k = 0 ; k < 3 ; k++ )
+            pix[k] = cpi->x[k] - shift[k];
             
-        /* Loop over the other particles .*/
-        for ( pjd = pid+1 ; pjd < count ; pjd++ ) {
-        
-            /* Get a pointer to the jth particle. */
-            cpj = &cparts[pjd];
+        /* Look at valid dt parts only? */
+        if ( cpi->dt > dt_max ) {
         
-            /* Compute the pairwise distance. */
-            r2 = 0.0f;
-            for ( k = 0 ; k < 3 ; k++ ) {
-                dx[k] = pix[k] - cpj->x[k];
-                r2 += dx[k]*dx[k];
-                }
-                
-            /* Hit or miss? */
-            if ( r2 < hi2 || r2 < cpj->h*cpj->h ) {
-            
-                #ifndef VECTORIZE
-                        
-                    IACT( r2 , dx , hi , cpj->h , &parts[pid] , &parts[pjd] );
-                
-                #else
-
-                    /* Add this interaction to the queue. */
-                    r2q[icount] = r2;
-                    dxq[3*icount+0] = dx[0];
-                    dxq[3*icount+1] = dx[1];
-                    dxq[3*icount+2] = dx[2];
-                    hiq[icount] = hi;
-                    hjq[icount] = cpj->h;
-                    piq[icount] = &parts[pid];
-                    pjq[icount] = &parts[pjd];
-                    icount += 1;
+            /* Loop over the parts in cj within dt. */
+            for ( pjd = 0 ; pjd < countdt_j && sortdt_j[pjd].d < di ; pjd++ ) {
 
-                    /* Flush? */
-                    if ( icount == VEC_SIZE ) {
-                        IACT_VEC( r2q , dxq , hiq , hjq , piq , pjq );
-                        icount = 0;
-                        }
+                /* Get a pointer to the jth particle. */
+                cpj = &cparts_j[ sortdt_j[pjd].i ];
 
-                #endif
-                
-                }
-        
-            } /* loop over all other particles. */
-    
-        } /* loop over all particles. */
+                /* Compute the pairwise distance. */
+                r2 = 0.0f;
+                for ( k = 0 ; k < 3 ; k++ ) {
+                    dx[k] = pix[k] - cpj->x[k];
+                    r2 += dx[k]*dx[k];
+                    }
+
+                /* Hit or miss? */
+                if ( r2 < hi2 ) {
+
+                    #ifndef VECTORIZE
+
+                        IACT( r2 , dx , hi , cpj->h , pi , &parts_j[ sortdt_j[pjd].i ] );
+
+                    #else
+
+                        /* Add this interaction to the queue. */
+                        r2q[icount] = r2;
+                        dxq[3*icount+0] = dx[0];
+                        dxq[3*icount+1] = dx[1];
+                        dxq[3*icount+2] = dx[2];
+                        hiq[icount] = hi;
+                        hjq[icount] = cpj->h;
+                        piq[icount] = pi;
+                        pjq[icount] = &parts_j[ sortdt_j[ pjd ].i ];
+                        icount += 1;
+
+                        /* Flush? */
+                        if ( icount == VEC_SIZE ) {
+                            IACT_VEC( r2q , dxq , hiq , hjq , piq , pjq );
+                            icount = 0;
+                            }
+
+                    #endif
+
+                    }
+        
+                } /* loop over the parts in cj. */
+                
+            }
+            
+        /* Otherwise, look at all parts. */
+        else {
+        
+            /* Loop over the parts in cj. */
+            for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d < di ; pjd++ ) {
+
+                /* Get a pointer to the jth particle. */
+                cpj = &cparts_j[ sort_j[pjd].i ];
+
+                /* Compute the pairwise distance. */
+                r2 = 0.0f;
+                for ( k = 0 ; k < 3 ; k++ ) {
+                    dx[k] = pix[k] - cpj->x[k];
+                    r2 += dx[k]*dx[k];
+                    }
+
+                /* Hit or miss? */
+                if ( r2 < hi2 ) {
+
+                    #ifndef VECTORIZE
+
+                        IACT( r2 , dx , hi , cpj->h , pi , &parts_j[ sort_j[pjd].i ] );
+
+                    #else
+
+                        /* Add this interaction to the queue. */
+                        r2q[icount] = r2;
+                        dxq[3*icount+0] = dx[0];
+                        dxq[3*icount+1] = dx[1];
+                        dxq[3*icount+2] = dx[2];
+                        hiq[icount] = hi;
+                        hjq[icount] = cpj->h;
+                        piq[icount] = pi;
+                        pjq[icount] = &parts_j[ sort_j[ pjd ].i ];
+                        icount += 1;
+
+                        /* Flush? */
+                        if ( icount == VEC_SIZE ) {
+                            IACT_VEC( r2q , dxq , hiq , hjq , piq , pjq );
+                            icount = 0;
+                            }
+
+                    #endif
+
+                    }
+        
+                } /* loop over the parts in cj. */
+                
+            }
+    
+        } /* loop over the parts in ci. */
+        
+    /* printf( "runner_dopair: first half took %.3f ms...\n" , ((double)(getticks() - tic)) / CPU_TPS * 1000 );
+    tic = getticks(); */
+
+    /* Loop over the parts in cj. */
+    for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max < di_max ; pjd++ ) {
+    
+        /* Get a hold of the jth part in cj. */
+        pj = &parts_j[ sort_j[ pjd ].i ];
+        cpj = &cparts_j[ sort_j[ pjd ].i ];
+        hj = cpj->h;
+        dj = sort_j[pjd].d - hj - rshift;
+        if ( dj > di_max )
+            continue;
+            
+        for ( k = 0 ; k < 3 ; k++ )
+            pjx[k] = cpj->x[k] + shift[k];
+        hj2 = hj * hj;
+        
+        /* Is this particle outside the dt? */
+        if ( cpj->dt > dt_max ) {
+        
+            /* Loop over the parts in ci. */
+            for ( pid = countdt_i-1 ; pid >= 0 && sortdt_i[pid].d > dj ; pid-- ) {
+
+                /* Get a pointer to the jth particle. */
+                cpi = &cparts_i[ sortdt_i[pid].i ];
+
+                /* Compute the pairwise distance. */
+                r2 = 0.0f;
+                for ( k = 0 ; k < 3 ; k++ ) {
+                    dx[k] = cpi->x[k] - pjx[k];
+                    r2 += dx[k]*dx[k];
+                    }
+
+                /* Hit or miss? */
+                if ( r2 < hj2 && r2 > cpi->h*cpi->h ) {
+
+                    #ifndef VECTORIZE
+
+                        IACT( r2 , dx , hj , cpi->h , pj , &parts_i[ sortdt_i[pid].i ] );
+
+                    #else
+
+                        /* Add this interaction to the queue. */
+                        r2q[icount] = r2;
+                        dxq[3*icount+0] = dx[0];
+                        dxq[3*icount+1] = dx[1];
+                        dxq[3*icount+2] = dx[2];
+                        hiq[icount] = hj;
+                        hjq[icount] = cpi->h;
+                        piq[icount] = pj;
+                        pjq[icount] = &parts_i[ sortdt_i[ pid ].i ];
+                        icount += 1;
+
+                        /* Flush? */
+                        if ( icount == VEC_SIZE ) {
+                            IACT_VEC( r2q , dxq , hiq , hjq , piq , pjq );
+                            icount = 0;
+                            }
+
+                    #endif
+
+                    }
+
+                } /* loop over the parts in cj. */
+            }
+            
+        /* Otherwise, interact with all particles in cj. */
+        else {
+        
+            /* Loop over the parts in ci. */
+            for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d > dj ; pid-- ) {
+
+                /* Get a pointer to the jth particle. */
+                cpi = &cparts_i[ sort_i[pid].i ];
+
+                /* Compute the pairwise distance. */
+                r2 = 0.0f;
+                for ( k = 0 ; k < 3 ; k++ ) {
+                    dx[k] = cpi->x[k] - pjx[k];
+                    r2 += dx[k]*dx[k];
+                    }
+
+                /* Hit or miss? */
+                if ( r2 < hj2 && r2 > cpi->h*cpi->h ) {
+
+                    #ifndef VECTORIZE
+
+                        IACT( r2 , dx , hj , cpi->h , pj , &parts_i[ sort_i[pid].i ] );
+
+                    #else
+
+                        /* Add this interaction to the queue. */
+                        r2q[icount] = r2;
+                        dxq[3*icount+0] = dx[0];
+                        dxq[3*icount+1] = dx[1];
+                        dxq[3*icount+2] = dx[2];
+                        hiq[icount] = hj;
+                        hjq[icount] = cpi->h;
+                        piq[icount] = pj;
+                        pjq[icount] = &parts_i[ sort_i[ pid ].i ];
+                        icount += 1;
+
+                        /* Flush? */
+                        if ( icount == VEC_SIZE ) {
+                            IACT_VEC( r2q , dxq , hiq , hjq , piq , pjq );
+                            icount = 0;
+                            }
+
+                    #endif
+
+                    }
+
+                } /* loop over the parts in cj. */
+                
+            }
+    
+        } /* loop over the parts in ci. */
+
+    #ifdef VECTORIZE
+    /* Pick up any leftovers. */
+    if ( icount > 0 )
+        for ( k = 0 ; k < icount ; k++ )
+            IACT( r2q[k] , &dxq[3*k] , hiq[k] , hjq[k] , piq[k] , pjq[k] );
+    #endif
+    
+    #ifdef TIMER_VERBOSE
+        printf( "runner_dopair[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f, h=%.3f) took %.3f ms.\n" , r->id , count_i , count_j , ci->depth , ci->h_max , cj->h_max , fmax(ci->h[0],fmax(ci->h[1],ci->h[2])) , ((double)(TIMER_TOC(TIMER_DOPAIR))) / CPU_TPS * 1000 );
+    #else
+        TIMER_TOC(TIMER_DOPAIR);
+    #endif
+
+    }
+
+
+/**
+ * @brief Compute the cell self-interaction.
+ *
+ * @param r The #runner.
+ * @param c The #cell.
+ */
+
+void DOSELF1 ( struct runner *r , struct cell *restrict c ) {
+
+    int k, pid, pjd, count = c->count;
+    double pix[3];
+    float dx[3], hi, hi2, r2;
+    struct part *restrict parts = c->parts, *restrict pi;
+    struct cpart *restrict cpi, *restrict cpj, *restrict cparts = c->cparts;
+    float dt_max = r->e->dt_max;
+    #ifdef VECTORIZE
+        int icount = 0;
+        float r2q[VEC_SIZE] __attribute__ ((aligned (16)));
+        float hiq[VEC_SIZE] __attribute__ ((aligned (16)));
+        float hjq[VEC_SIZE] __attribute__ ((aligned (16)));
+        float dxq[3*VEC_SIZE] __attribute__ ((aligned (16)));
+        struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
+    #endif
+    TIMER_TIC
+    
+    /* Any work here? */
+    if ( c->dt_min > dt_max )
+        return;
+    
+    /* Loop over the particles in the cell. */
+    for ( pid = 0 ; pid < count ; pid++ ) {
+    
+        /* Get a pointer to the ith particle. */
+        cpi = &cparts[pid];
+        pi = &parts[pid];
+        if ( cpi->dt > dt_max )
+            continue;
+    
+        /* Get the particle position and radius. */
+        for ( k = 0 ; k < 3 ; k++ )
+            pix[k] = cpi->x[k];
+        hi = cpi->h;
+        hi2 = hi * hi;
+            
+        /* Loop over the other particles .*/
+        for ( pjd = 0 ; pjd < count ; pjd++ ) {
+        
+            /* Skip self-interactions. */
+            if ( pid == pjd )
+                continue;
+        
+            /* Get a pointer to the jth particle. */
+            cpj = &cparts[pjd];                
+        
+            /* Compute the pairwise distance. */
+            r2 = 0.0f;
+            for ( k = 0 ; k < 3 ; k++ ) {
+                dx[k] = pix[k] - cpj->x[k];
+                r2 += dx[k]*dx[k];
+                }
+                
+            /* Hit or miss? */
+            if ( r2 < hi2 ) {
+            
+                #ifndef VECTORIZE
+                        
+                    IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts[pjd] );
+                
+                #else
+
+                    /* Add this interaction to the queue. */
+                    r2q[icount] = r2;
+                    dxq[3*icount+0] = dx[0];
+                    dxq[3*icount+1] = dx[1];
+                    dxq[3*icount+2] = dx[2];
+                    hiq[icount] = hi;
+                    hjq[icount] = cpj->h;
+                    piq[icount] = pi;
+                    pjq[icount] = &parts[pjd];
+                    icount += 1;
+
+                    /* Flush? */
+                    if ( icount == VEC_SIZE ) {
+                        IACT_NONSYM_VEC( r2q , dxq , hiq , hjq , piq , pjq );
+                        icount = 0;
+                        }
+
+                #endif
+                
+                }
+        
+            } /* loop over all other particles. */
+    
+        } /* loop over all particles. */
+
+    #ifdef VECTORIZE
+    /* Pick up any leftovers. */
+    if ( icount > 0 )
+        for ( k = 0 ; k < icount ; k++ )
+            IACT_NONSYM( r2q[k] , &dxq[3*k] , hiq[k] , hjq[k] , piq[k] , pjq[k] );
+    #endif
+        
+    #ifdef TIMER_VERBOSE
+        printf( "runner_doself1[%02i]: %i parts at depth %i took %.3f ms.\n" , r->id , count , c->depth , ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000 );
+    #else
+        TIMER_TOC(TIMER_DOSELF);
+    #endif
+
+    }
+
+
+void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
+
+    int k, pid, pjd, count = c->count;
+    double pix[3];
+    float dx[3], hi, hi2, r2;
+    struct part *restrict parts = c->parts, *restrict pi;
+    struct cpart *restrict cpi, *restrict cpj, *restrict cparts = c->cparts;
+    float dt_max = r->e->dt_max;
+    int firstdt = 0, countdt = 0, *indt = NULL;
+    #ifdef VECTORIZE
+        int icount = 0;
+        float r2q[VEC_SIZE] __attribute__ ((aligned (16)));
+        float hiq[VEC_SIZE] __attribute__ ((aligned (16)));
+        float hjq[VEC_SIZE] __attribute__ ((aligned (16)));
+        float dxq[3*VEC_SIZE] __attribute__ ((aligned (16)));
+        struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
+    #endif
+    TIMER_TIC
+    
+    /* Set up indt if needed. */
+    if ( c->dt_min > dt_max )
+        return;
+    else if ( c->dt_max > dt_max ) {
+        if ( ( indt = (int *)alloca( sizeof(int) * count ) ) == NULL )
+            error( "Failed to allocate indt." );
+        for ( k = 0 ; k < count ; k++ )
+            if ( cparts[k].dt <= dt_max ) {
+                indt[ countdt ] = k;
+                countdt += 1;
+                }
+        }
+    
+    /* Loop over the particles in the cell. */
+    for ( pid = 0 ; pid < count ; pid++ ) {
+    
+        /* Get a pointer to the ith particle. */
+        pi = &parts[pid];
+        cpi = &cparts[pid];
+    
+        /* Get the particle position and radius. */
+        for ( k = 0 ; k < 3 ; k++ )
+            pix[k] = cpi->x[k];
+        hi = cpi->h;
+        hi2 = hi * hi;
+        
+        /* Is the ith particle not active? */
+        if ( cpi->dt > dt_max ) {
+        
+            /* Loop over the other particles .*/
+            for ( pjd = firstdt ; pjd < countdt ; pjd++ ) {
+
+                /* Get a pointer to the jth particle. */
+                cpj = &cparts[ indt[ pjd ] ];
+
+                /* Compute the pairwise distance. */
+                r2 = 0.0f;
+                for ( k = 0 ; k < 3 ; k++ ) {
+                    dx[k] = pix[k] - cpj->x[k];
+                    r2 += dx[k]*dx[k];
+                    }
+
+                /* Hit or miss? */
+                if ( r2 < hi2 || r2 < cpj->h*cpj->h ) {
+
+                    #ifndef VECTORIZE
+
+                        IACT( r2 , dx , hi , cpj->h , pi , &parts[indt[pjd]] );
+
+                    #else
+
+                        /* Add this interaction to the queue. */
+                        r2q[icount] = r2;
+                        dxq[3*icount+0] = dx[0];
+                        dxq[3*icount+1] = dx[1];
+                        dxq[3*icount+2] = dx[2];
+                        hiq[icount] = hi;
+                        hjq[icount] = cpj->h;
+                        piq[icount] = pi;
+                        pjq[icount] = &parts[indt[pjd]];
+                        icount += 1;
+
+                        /* Flush? */
+                        if ( icount == VEC_SIZE ) {
+                            IACT_VEC( r2q , dxq , hiq , hjq , piq , pjq );
+                            icount = 0;
+                            }
+
+                    #endif
+
+                    }
+
+                } /* loop over all other particles. */
+                
+            }
+            
+        /* Otherwise, interact with all candidates. */
+        else {
+        
+            /* We caught a live one! */
+            firstdt += 1;
+            
+            /* Loop over the other particles .*/
+            for ( pjd = pid+1 ; pjd < count ; pjd++ ) {
+
+                /* Get a pointer to the jth particle. */
+                cpj = &cparts[pjd];
+
+                /* Compute the pairwise distance. */
+                r2 = 0.0f;
+                for ( k = 0 ; k < 3 ; k++ ) {
+                    dx[k] = pix[k] - cpj->x[k];
+                    r2 += dx[k]*dx[k];
+                    }
+
+                /* Hit or miss? */
+                if ( r2 < hi2 || r2 < cpj->h*cpj->h ) {
+
+                    #ifndef VECTORIZE
+
+                        IACT( r2 , dx , hi , cpj->h , pi , &parts[pjd] );
+
+                    #else
+
+                        /* Add this interaction to the queue. */
+                        r2q[icount] = r2;
+                        dxq[3*icount+0] = dx[0];
+                        dxq[3*icount+1] = dx[1];
+                        dxq[3*icount+2] = dx[2];
+                        hiq[icount] = hi;
+                        hjq[icount] = cpj->h;
+                        piq[icount] = pi;
+                        pjq[icount] = &parts[pjd];
+                        icount += 1;
+
+                        /* Flush? */
+                        if ( icount == VEC_SIZE ) {
+                            IACT_VEC( r2q , dxq , hiq , hjq , piq , pjq );
+                            icount = 0;
+                            }
+
+                    #endif
+
+                    }
+
+                } /* loop over all other particles. */
+            
+            }
+    
+        } /* loop over all particles. */
 
     #ifdef VECTORIZE
     /* Pick up any leftovers. */
@@ -820,9 +1492,9 @@ void DOSELF ( struct runner *r , struct cell *restrict c ) {
         for ( k = 0 ; k < icount ; k++ )
             IACT( r2q[k] , &dxq[3*k] , hiq[k] , hjq[k] , piq[k] , pjq[k] );
     #endif
-        
+    
     #ifdef TIMER_VERBOSE
-        printf( "runner_doself[%02i]: %i parts at depth %i took %.3f ms.\n" , r->id , count , c->depth , ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000 );
+        printf( "runner_doself2[%02i]: %i parts at depth %i took %.3f ms.\n" , r->id , count , c->depth , ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000 );
     #else
         TIMER_TOC(TIMER_DOSELF);
     #endif
@@ -840,7 +1512,290 @@ void DOSELF ( struct runner *r , struct cell *restrict c ) {
  * redundant computations to find the sid on-the-fly.
  */
 
-void DOSUB ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj , int sid ) {
+void DOSUB1 ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj , int sid ) {
+
+    int j, k;
+    double shift[3];
+    float h;
+    struct space *s = r->e->s;
+
+    TIMER_TIC
+    
+    /* Is this a single cell? */
+    if ( cj == NULL ) {
+    
+        /* Recurse? */
+        if ( ci->split ) {
+        
+            /* Loop over all progeny. */
+            for ( k = 0 ; k < 8 ; k++ )
+                if ( ci->progeny[k] != NULL ) {
+                    DOSUB1( r , ci->progeny[k] , NULL , -1 );
+                    for ( j = k+1 ; j < 8 ; j++ )
+                        if ( ci->progeny[j] != NULL )
+                            DOSUB1( r , ci->progeny[k] , ci->progeny[j] , -1 );
+                    }
+        
+            }
+        
+        /* Otherwsie, compute self-interaction. */
+        else
+            DOSELF2( r , ci );
+            
+        } /* self-interaction. */
+        
+    /* Otherwise, it's a pair interaction. */
+    else {
+    
+        /* Get the cell dimensions. */
+        h = fmin( ci->h[0] , fmin( ci->h[1] , ci->h[2] ) );
+        
+        /* Get the type of pair if not specified explicitly. */
+        if ( sid < 0 ) {
+        
+            /* Get the relative distance between the pairs, wrapping. */
+            for ( k = 0 ; k < 3 ; k++ ) {
+                if ( cj->loc[k] - ci->loc[k] < -s->dim[k]/2 )
+                    shift[k] = s->dim[k];
+                else if ( cj->loc[k] - ci->loc[k] > s->dim[k]/2 )
+                    shift[k] = -s->dim[k];
+                else
+                    shift[k] = 0.0;
+                }
+
+            /* Get the sorting index. */
+            for ( sid = 0 , k = 0 ; k < 3 ; k++ )
+                sid = 3*sid + ( (cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1 );
+
+            /* Flip? */
+            if ( sid < 13 ) {
+                struct cell *temp = cj; cj = ci; ci = temp;
+                }
+            else
+                sid = 26 - sid;
+                
+            }
+    
+        /* Recurse? */
+        if ( ci->split && cj->split &&
+             ci->h_max*2 < h && cj->h_max*2 < h ) {
+             
+            /* Different types of flags. */
+            switch ( sid ) {
+
+                /* Regular sub-cell interactions of a single cell. */
+                case 0: /* (  1 ,  1 ,  1 ) */
+                    if ( ci->progeny[7] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[0] , -1 );
+                    break;
+                    
+                case 1: /* (  1 ,  1 ,  0 ) */
+                    if ( ci->progeny[6] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[6] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[6] != NULL && cj->progeny[1] != NULL )
+                        DOSUB1( r , ci->progeny[6] , cj->progeny[1] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[1] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[1] , -1 );
+                    break;
+
+                case 2: /* (  1 ,  1 , -1 ) */
+                    if ( ci->progeny[6] != NULL && cj->progeny[1] != NULL )
+                        DOSUB1( r , ci->progeny[6] , cj->progeny[1] , -1 );
+                    break;
+                    
+                case 3: /* (  1 ,  0 ,  1 ) */
+                    if ( ci->progeny[5] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[5] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[5] != NULL && cj->progeny[2] != NULL )
+                        DOSUB1( r , ci->progeny[5] , cj->progeny[2] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[2] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[2] , -1 );
+                    break;
+
+                case 4: /* (  1 ,  0 ,  0 ) */
+                    if ( ci->progeny[4] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[4] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[4] != NULL && cj->progeny[1] != NULL )
+                        DOSUB1( r , ci->progeny[4] , cj->progeny[1] , -1 );
+                    if ( ci->progeny[4] != NULL && cj->progeny[2] != NULL )
+                        DOSUB1( r , ci->progeny[4] , cj->progeny[2] , -1 );
+                    if ( ci->progeny[4] != NULL && cj->progeny[3] != NULL )
+                        DOSUB1( r , ci->progeny[4] , cj->progeny[3] , -1 );
+                    if ( ci->progeny[5] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[5] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[5] != NULL && cj->progeny[1] != NULL )
+                        DOSUB1( r , ci->progeny[5] , cj->progeny[1] , -1 );
+                    if ( ci->progeny[5] != NULL && cj->progeny[2] != NULL )
+                        DOSUB1( r , ci->progeny[5] , cj->progeny[2] , -1 );
+                    if ( ci->progeny[5] != NULL && cj->progeny[3] != NULL )
+                        DOSUB1( r , ci->progeny[5] , cj->progeny[3] , -1 );
+                    if ( ci->progeny[6] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[6] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[6] != NULL && cj->progeny[1] != NULL )
+                        DOSUB1( r , ci->progeny[6] , cj->progeny[1] , -1 );
+                    if ( ci->progeny[6] != NULL && cj->progeny[2] != NULL )
+                        DOSUB1( r , ci->progeny[6] , cj->progeny[2] , -1 );
+                    if ( ci->progeny[6] != NULL && cj->progeny[3] != NULL )
+                        DOSUB1( r , ci->progeny[6] , cj->progeny[3] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[1] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[1] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[2] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[2] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[3] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[3] , -1 );
+                    break;
+
+                case 5: /* (  1 ,  0 , -1 ) */
+                    if ( ci->progeny[4] != NULL && cj->progeny[1] != NULL )
+                        DOSUB1( r , ci->progeny[4] , cj->progeny[1] , -1 );
+                    if ( ci->progeny[4] != NULL && cj->progeny[3] != NULL )
+                        DOSUB1( r , ci->progeny[4] , cj->progeny[3] , -1 );
+                    if ( ci->progeny[6] != NULL && cj->progeny[1] != NULL )
+                        DOSUB1( r , ci->progeny[6] , cj->progeny[1] , -1 );
+                    if ( ci->progeny[6] != NULL && cj->progeny[3] != NULL )
+                        DOSUB1( r , ci->progeny[6] , cj->progeny[3] , -1 );
+                    break;
+
+                case 6: /* (  1 , -1 ,  1 ) */
+                    if ( ci->progeny[5] != NULL && cj->progeny[2] != NULL )
+                        DOSUB1( r , ci->progeny[5] , cj->progeny[2] , -1 );
+                    break;
+                    
+                case 7: /* (  1 , -1 ,  0 ) */
+                    if ( ci->progeny[4] != NULL && cj->progeny[2] != NULL )
+                        DOSUB1( r , ci->progeny[4] , cj->progeny[2] , -1 );
+                    if ( ci->progeny[4] != NULL && cj->progeny[3] != NULL )
+                        DOSUB1( r , ci->progeny[4] , cj->progeny[3] , -1 );
+                    if ( ci->progeny[5] != NULL && cj->progeny[2] != NULL )
+                        DOSUB1( r , ci->progeny[5] , cj->progeny[2] , -1 );
+                    if ( ci->progeny[5] != NULL && cj->progeny[3] != NULL )
+                        DOSUB1( r , ci->progeny[5] , cj->progeny[3] , -1 );
+                    break;
+
+                case 8: /* (  1 , -1 , -1 ) */
+                    if ( ci->progeny[4] != NULL && cj->progeny[3] != NULL )
+                        DOSUB1( r , ci->progeny[4] , cj->progeny[3] , -1 );
+                    break;
+                    
+                case 9: /* (  0 ,  1 ,  1 ) */
+                    if ( ci->progeny[3] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[3] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[3] != NULL && cj->progeny[4] != NULL )
+                        DOSUB1( r , ci->progeny[3] , cj->progeny[4] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[4] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[4] , -1 );
+                    break;
+
+                case 10: /* (  0 ,  1 ,  0 ) */
+                    if ( ci->progeny[2] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[2] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[2] != NULL && cj->progeny[1] != NULL )
+                        DOSUB1( r , ci->progeny[2] , cj->progeny[1] , -1 );
+                    if ( ci->progeny[2] != NULL && cj->progeny[4] != NULL )
+                        DOSUB1( r , ci->progeny[2] , cj->progeny[4] , -1 );
+                    if ( ci->progeny[2] != NULL && cj->progeny[5] != NULL )
+                        DOSUB1( r , ci->progeny[2] , cj->progeny[5] , -1 );
+                    if ( ci->progeny[3] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[3] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[3] != NULL && cj->progeny[1] != NULL )
+                        DOSUB1( r , ci->progeny[3] , cj->progeny[1] , -1 );
+                    if ( ci->progeny[3] != NULL && cj->progeny[4] != NULL )
+                        DOSUB1( r , ci->progeny[3] , cj->progeny[4] , -1 );
+                    if ( ci->progeny[3] != NULL && cj->progeny[5] != NULL )
+                        DOSUB1( r , ci->progeny[3] , cj->progeny[5] , -1 );
+                    if ( ci->progeny[6] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[6] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[6] != NULL && cj->progeny[1] != NULL )
+                        DOSUB1( r , ci->progeny[6] , cj->progeny[1] , -1 );
+                    if ( ci->progeny[6] != NULL && cj->progeny[4] != NULL )
+                        DOSUB1( r , ci->progeny[6] , cj->progeny[4] , -1 );
+                    if ( ci->progeny[6] != NULL && cj->progeny[5] != NULL )
+                        DOSUB1( r , ci->progeny[6] , cj->progeny[5] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[1] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[1] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[4] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[4] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[5] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[5] , -1 );
+                    break;
+
+                case 11: /* (  0 ,  1 , -1 ) */
+                    if ( ci->progeny[2] != NULL && cj->progeny[1] != NULL )
+                        DOSUB1( r , ci->progeny[2] , cj->progeny[1] , -1 );
+                    if ( ci->progeny[2] != NULL && cj->progeny[5] != NULL )
+                        DOSUB1( r , ci->progeny[2] , cj->progeny[5] , -1 );
+                    if ( ci->progeny[6] != NULL && cj->progeny[1] != NULL )
+                        DOSUB1( r , ci->progeny[6] , cj->progeny[1] , -1 );
+                    if ( ci->progeny[6] != NULL && cj->progeny[5] != NULL )
+                        DOSUB1( r , ci->progeny[6] , cj->progeny[5] , -1 );
+                    break;
+
+                case 12: /* (  0 ,  0 ,  1 ) */
+                    if ( ci->progeny[1] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[1] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[1] != NULL && cj->progeny[2] != NULL )
+                        DOSUB1( r , ci->progeny[1] , cj->progeny[2] , -1 );
+                    if ( ci->progeny[1] != NULL && cj->progeny[4] != NULL )
+                        DOSUB1( r , ci->progeny[1] , cj->progeny[4] , -1 );
+                    if ( ci->progeny[1] != NULL && cj->progeny[6] != NULL )
+                        DOSUB1( r , ci->progeny[1] , cj->progeny[6] , -1 );
+                    if ( ci->progeny[3] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[3] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[3] != NULL && cj->progeny[2] != NULL )
+                        DOSUB1( r , ci->progeny[3] , cj->progeny[2] , -1 );
+                    if ( ci->progeny[3] != NULL && cj->progeny[4] != NULL )
+                        DOSUB1( r , ci->progeny[3] , cj->progeny[4] , -1 );
+                    if ( ci->progeny[3] != NULL && cj->progeny[6] != NULL )
+                        DOSUB1( r , ci->progeny[3] , cj->progeny[6] , -1 );
+                    if ( ci->progeny[5] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[5] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[5] != NULL && cj->progeny[2] != NULL )
+                        DOSUB1( r , ci->progeny[5] , cj->progeny[2] , -1 );
+                    if ( ci->progeny[5] != NULL && cj->progeny[4] != NULL )
+                        DOSUB1( r , ci->progeny[5] , cj->progeny[4] , -1 );
+                    if ( ci->progeny[5] != NULL && cj->progeny[6] != NULL )
+                        DOSUB1( r , ci->progeny[5] , cj->progeny[6] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[0] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[0] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[2] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[2] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[4] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[4] , -1 );
+                    if ( ci->progeny[7] != NULL && cj->progeny[6] != NULL )
+                        DOSUB1( r , ci->progeny[7] , cj->progeny[6] , -1 );
+                    break;
+
+                }
+            
+            }
+            
+        /* Otherwise, compute the pair directly. */
+        else
+            DOPAIR1( r , ci , cj );
+    
+        } /* otherwise, pair interaction. */
+    
+
+    #ifdef TIMER_VERBOSE
+        printf( "runner_DOSUB[%02i]: flags=%i at depth %i took %.3f ms.\n" , r->id , flags , ci->depth , ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000 );
+    #else
+        TIMER_TOC(TIMER_DOSUB);
+    #endif
+
+    }
+
+
+void DOSUB2 ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj , int sid ) {
 
     int j, k;
     double shift[3];
@@ -858,17 +1813,17 @@ void DOSUB ( struct runner *r , struct cell *restrict ci , struct cell *restrict
             /* Loop over all progeny. */
             for ( k = 0 ; k < 8 ; k++ )
                 if ( ci->progeny[k] != NULL ) {
-                    DOSUB( r , ci->progeny[k] , NULL , -1 );
+                    DOSUB2( r , ci->progeny[k] , NULL , -1 );
                     for ( j = k+1 ; j < 8 ; j++ )
                         if ( ci->progeny[j] != NULL )
-                            DOSUB( r , ci->progeny[k] , ci->progeny[j] , -1 );
+                            DOSUB2( r , ci->progeny[k] , ci->progeny[j] , -1 );
                     }
         
             }
         
         /* Otherwsie, compute self-interaction. */
         else
-            DOSELF( r , ci );
+            DOSELF2( r , ci );
             
         } /* self-interaction. */
         
@@ -914,193 +1869,193 @@ void DOSUB ( struct runner *r , struct cell *restrict ci , struct cell *restrict
                 /* Regular sub-cell interactions of a single cell. */
                 case 0: /* (  1 ,  1 ,  1 ) */
                     if ( ci->progeny[7] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[0] , -1 );
                     break;
                     
                 case 1: /* (  1 ,  1 ,  0 ) */
                     if ( ci->progeny[6] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[6] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[6] , cj->progeny[0] , -1 );
                     if ( ci->progeny[6] != NULL && cj->progeny[1] != NULL )
-                        DOSUB( r , ci->progeny[6] , cj->progeny[1] , -1 );
+                        DOSUB2( r , ci->progeny[6] , cj->progeny[1] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[0] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[1] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[1] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[1] , -1 );
                     break;
 
                 case 2: /* (  1 ,  1 , -1 ) */
                     if ( ci->progeny[6] != NULL && cj->progeny[1] != NULL )
-                        DOSUB( r , ci->progeny[6] , cj->progeny[1] , -1 );
+                        DOSUB2( r , ci->progeny[6] , cj->progeny[1] , -1 );
                     break;
                     
                 case 3: /* (  1 ,  0 ,  1 ) */
                     if ( ci->progeny[5] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[5] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[5] , cj->progeny[0] , -1 );
                     if ( ci->progeny[5] != NULL && cj->progeny[2] != NULL )
-                        DOSUB( r , ci->progeny[5] , cj->progeny[2] , -1 );
+                        DOSUB2( r , ci->progeny[5] , cj->progeny[2] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[0] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[2] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[2] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[2] , -1 );
                     break;
 
                 case 4: /* (  1 ,  0 ,  0 ) */
                     if ( ci->progeny[4] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[4] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[4] , cj->progeny[0] , -1 );
                     if ( ci->progeny[4] != NULL && cj->progeny[1] != NULL )
-                        DOSUB( r , ci->progeny[4] , cj->progeny[1] , -1 );
+                        DOSUB2( r , ci->progeny[4] , cj->progeny[1] , -1 );
                     if ( ci->progeny[4] != NULL && cj->progeny[2] != NULL )
-                        DOSUB( r , ci->progeny[4] , cj->progeny[2] , -1 );
+                        DOSUB2( r , ci->progeny[4] , cj->progeny[2] , -1 );
                     if ( ci->progeny[4] != NULL && cj->progeny[3] != NULL )
-                        DOSUB( r , ci->progeny[4] , cj->progeny[3] , -1 );
+                        DOSUB2( r , ci->progeny[4] , cj->progeny[3] , -1 );
                     if ( ci->progeny[5] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[5] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[5] , cj->progeny[0] , -1 );
                     if ( ci->progeny[5] != NULL && cj->progeny[1] != NULL )
-                        DOSUB( r , ci->progeny[5] , cj->progeny[1] , -1 );
+                        DOSUB2( r , ci->progeny[5] , cj->progeny[1] , -1 );
                     if ( ci->progeny[5] != NULL && cj->progeny[2] != NULL )
-                        DOSUB( r , ci->progeny[5] , cj->progeny[2] , -1 );
+                        DOSUB2( r , ci->progeny[5] , cj->progeny[2] , -1 );
                     if ( ci->progeny[5] != NULL && cj->progeny[3] != NULL )
-                        DOSUB( r , ci->progeny[5] , cj->progeny[3] , -1 );
+                        DOSUB2( r , ci->progeny[5] , cj->progeny[3] , -1 );
                     if ( ci->progeny[6] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[6] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[6] , cj->progeny[0] , -1 );
                     if ( ci->progeny[6] != NULL && cj->progeny[1] != NULL )
-                        DOSUB( r , ci->progeny[6] , cj->progeny[1] , -1 );
+                        DOSUB2( r , ci->progeny[6] , cj->progeny[1] , -1 );
                     if ( ci->progeny[6] != NULL && cj->progeny[2] != NULL )
-                        DOSUB( r , ci->progeny[6] , cj->progeny[2] , -1 );
+                        DOSUB2( r , ci->progeny[6] , cj->progeny[2] , -1 );
                     if ( ci->progeny[6] != NULL && cj->progeny[3] != NULL )
-                        DOSUB( r , ci->progeny[6] , cj->progeny[3] , -1 );
+                        DOSUB2( r , ci->progeny[6] , cj->progeny[3] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[0] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[1] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[1] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[1] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[2] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[2] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[2] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[3] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[3] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[3] , -1 );
                     break;
 
                 case 5: /* (  1 ,  0 , -1 ) */
                     if ( ci->progeny[4] != NULL && cj->progeny[1] != NULL )
-                        DOSUB( r , ci->progeny[4] , cj->progeny[1] , -1 );
+                        DOSUB2( r , ci->progeny[4] , cj->progeny[1] , -1 );
                     if ( ci->progeny[4] != NULL && cj->progeny[3] != NULL )
-                        DOSUB( r , ci->progeny[4] , cj->progeny[3] , -1 );
+                        DOSUB2( r , ci->progeny[4] , cj->progeny[3] , -1 );
                     if ( ci->progeny[6] != NULL && cj->progeny[1] != NULL )
-                        DOSUB( r , ci->progeny[6] , cj->progeny[1] , -1 );
+                        DOSUB2( r , ci->progeny[6] , cj->progeny[1] , -1 );
                     if ( ci->progeny[6] != NULL && cj->progeny[3] != NULL )
-                        DOSUB( r , ci->progeny[6] , cj->progeny[3] , -1 );
+                        DOSUB2( r , ci->progeny[6] , cj->progeny[3] , -1 );
                     break;
 
                 case 6: /* (  1 , -1 ,  1 ) */
                     if ( ci->progeny[5] != NULL && cj->progeny[2] != NULL )
-                        DOSUB( r , ci->progeny[5] , cj->progeny[2] , -1 );
+                        DOSUB2( r , ci->progeny[5] , cj->progeny[2] , -1 );
                     break;
                     
                 case 7: /* (  1 , -1 ,  0 ) */
                     if ( ci->progeny[4] != NULL && cj->progeny[2] != NULL )
-                        DOSUB( r , ci->progeny[4] , cj->progeny[2] , -1 );
+                        DOSUB2( r , ci->progeny[4] , cj->progeny[2] , -1 );
                     if ( ci->progeny[4] != NULL && cj->progeny[3] != NULL )
-                        DOSUB( r , ci->progeny[4] , cj->progeny[3] , -1 );
+                        DOSUB2( r , ci->progeny[4] , cj->progeny[3] , -1 );
                     if ( ci->progeny[5] != NULL && cj->progeny[2] != NULL )
-                        DOSUB( r , ci->progeny[5] , cj->progeny[2] , -1 );
+                        DOSUB2( r , ci->progeny[5] , cj->progeny[2] , -1 );
                     if ( ci->progeny[5] != NULL && cj->progeny[3] != NULL )
-                        DOSUB( r , ci->progeny[5] , cj->progeny[3] , -1 );
+                        DOSUB2( r , ci->progeny[5] , cj->progeny[3] , -1 );
                     break;
 
                 case 8: /* (  1 , -1 , -1 ) */
                     if ( ci->progeny[4] != NULL && cj->progeny[3] != NULL )
-                        DOSUB( r , ci->progeny[4] , cj->progeny[3] , -1 );
+                        DOSUB2( r , ci->progeny[4] , cj->progeny[3] , -1 );
                     break;
                     
                 case 9: /* (  0 ,  1 ,  1 ) */
                     if ( ci->progeny[3] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[3] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[3] , cj->progeny[0] , -1 );
                     if ( ci->progeny[3] != NULL && cj->progeny[4] != NULL )
-                        DOSUB( r , ci->progeny[3] , cj->progeny[4] , -1 );
+                        DOSUB2( r , ci->progeny[3] , cj->progeny[4] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[0] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[4] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[4] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[4] , -1 );
                     break;
 
                 case 10: /* (  0 ,  1 ,  0 ) */
                     if ( ci->progeny[2] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[2] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[2] , cj->progeny[0] , -1 );
                     if ( ci->progeny[2] != NULL && cj->progeny[1] != NULL )
-                        DOSUB( r , ci->progeny[2] , cj->progeny[1] , -1 );
+                        DOSUB2( r , ci->progeny[2] , cj->progeny[1] , -1 );
                     if ( ci->progeny[2] != NULL && cj->progeny[4] != NULL )
-                        DOSUB( r , ci->progeny[2] , cj->progeny[4] , -1 );
+                        DOSUB2( r , ci->progeny[2] , cj->progeny[4] , -1 );
                     if ( ci->progeny[2] != NULL && cj->progeny[5] != NULL )
-                        DOSUB( r , ci->progeny[2] , cj->progeny[5] , -1 );
+                        DOSUB2( r , ci->progeny[2] , cj->progeny[5] , -1 );
                     if ( ci->progeny[3] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[3] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[3] , cj->progeny[0] , -1 );
                     if ( ci->progeny[3] != NULL && cj->progeny[1] != NULL )
-                        DOSUB( r , ci->progeny[3] , cj->progeny[1] , -1 );
+                        DOSUB2( r , ci->progeny[3] , cj->progeny[1] , -1 );
                     if ( ci->progeny[3] != NULL && cj->progeny[4] != NULL )
-                        DOSUB( r , ci->progeny[3] , cj->progeny[4] , -1 );
+                        DOSUB2( r , ci->progeny[3] , cj->progeny[4] , -1 );
                     if ( ci->progeny[3] != NULL && cj->progeny[5] != NULL )
-                        DOSUB( r , ci->progeny[3] , cj->progeny[5] , -1 );
+                        DOSUB2( r , ci->progeny[3] , cj->progeny[5] , -1 );
                     if ( ci->progeny[6] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[6] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[6] , cj->progeny[0] , -1 );
                     if ( ci->progeny[6] != NULL && cj->progeny[1] != NULL )
-                        DOSUB( r , ci->progeny[6] , cj->progeny[1] , -1 );
+                        DOSUB2( r , ci->progeny[6] , cj->progeny[1] , -1 );
                     if ( ci->progeny[6] != NULL && cj->progeny[4] != NULL )
-                        DOSUB( r , ci->progeny[6] , cj->progeny[4] , -1 );
+                        DOSUB2( r , ci->progeny[6] , cj->progeny[4] , -1 );
                     if ( ci->progeny[6] != NULL && cj->progeny[5] != NULL )
-                        DOSUB( r , ci->progeny[6] , cj->progeny[5] , -1 );
+                        DOSUB2( r , ci->progeny[6] , cj->progeny[5] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[0] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[1] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[1] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[1] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[4] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[4] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[4] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[5] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[5] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[5] , -1 );
                     break;
 
                 case 11: /* (  0 ,  1 , -1 ) */
                     if ( ci->progeny[2] != NULL && cj->progeny[1] != NULL )
-                        DOSUB( r , ci->progeny[2] , cj->progeny[1] , -1 );
+                        DOSUB2( r , ci->progeny[2] , cj->progeny[1] , -1 );
                     if ( ci->progeny[2] != NULL && cj->progeny[5] != NULL )
-                        DOSUB( r , ci->progeny[2] , cj->progeny[5] , -1 );
+                        DOSUB2( r , ci->progeny[2] , cj->progeny[5] , -1 );
                     if ( ci->progeny[6] != NULL && cj->progeny[1] != NULL )
-                        DOSUB( r , ci->progeny[6] , cj->progeny[1] , -1 );
+                        DOSUB2( r , ci->progeny[6] , cj->progeny[1] , -1 );
                     if ( ci->progeny[6] != NULL && cj->progeny[5] != NULL )
-                        DOSUB( r , ci->progeny[6] , cj->progeny[5] , -1 );
+                        DOSUB2( r , ci->progeny[6] , cj->progeny[5] , -1 );
                     break;
 
                 case 12: /* (  0 ,  0 ,  1 ) */
                     if ( ci->progeny[1] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[1] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[1] , cj->progeny[0] , -1 );
                     if ( ci->progeny[1] != NULL && cj->progeny[2] != NULL )
-                        DOSUB( r , ci->progeny[1] , cj->progeny[2] , -1 );
+                        DOSUB2( r , ci->progeny[1] , cj->progeny[2] , -1 );
                     if ( ci->progeny[1] != NULL && cj->progeny[4] != NULL )
-                        DOSUB( r , ci->progeny[1] , cj->progeny[4] , -1 );
+                        DOSUB2( r , ci->progeny[1] , cj->progeny[4] , -1 );
                     if ( ci->progeny[1] != NULL && cj->progeny[6] != NULL )
-                        DOSUB( r , ci->progeny[1] , cj->progeny[6] , -1 );
+                        DOSUB2( r , ci->progeny[1] , cj->progeny[6] , -1 );
                     if ( ci->progeny[3] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[3] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[3] , cj->progeny[0] , -1 );
                     if ( ci->progeny[3] != NULL && cj->progeny[2] != NULL )
-                        DOSUB( r , ci->progeny[3] , cj->progeny[2] , -1 );
+                        DOSUB2( r , ci->progeny[3] , cj->progeny[2] , -1 );
                     if ( ci->progeny[3] != NULL && cj->progeny[4] != NULL )
-                        DOSUB( r , ci->progeny[3] , cj->progeny[4] , -1 );
+                        DOSUB2( r , ci->progeny[3] , cj->progeny[4] , -1 );
                     if ( ci->progeny[3] != NULL && cj->progeny[6] != NULL )
-                        DOSUB( r , ci->progeny[3] , cj->progeny[6] , -1 );
+                        DOSUB2( r , ci->progeny[3] , cj->progeny[6] , -1 );
                     if ( ci->progeny[5] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[5] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[5] , cj->progeny[0] , -1 );
                     if ( ci->progeny[5] != NULL && cj->progeny[2] != NULL )
-                        DOSUB( r , ci->progeny[5] , cj->progeny[2] , -1 );
+                        DOSUB2( r , ci->progeny[5] , cj->progeny[2] , -1 );
                     if ( ci->progeny[5] != NULL && cj->progeny[4] != NULL )
-                        DOSUB( r , ci->progeny[5] , cj->progeny[4] , -1 );
+                        DOSUB2( r , ci->progeny[5] , cj->progeny[4] , -1 );
                     if ( ci->progeny[5] != NULL && cj->progeny[6] != NULL )
-                        DOSUB( r , ci->progeny[5] , cj->progeny[6] , -1 );
+                        DOSUB2( r , ci->progeny[5] , cj->progeny[6] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[0] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[0] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[0] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[2] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[2] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[2] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[4] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[4] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[4] , -1 );
                     if ( ci->progeny[7] != NULL && cj->progeny[6] != NULL )
-                        DOSUB( r , ci->progeny[7] , cj->progeny[6] , -1 );
+                        DOSUB2( r , ci->progeny[7] , cj->progeny[6] , -1 );
                     break;
 
                 }
@@ -1109,7 +2064,7 @@ void DOSUB ( struct runner *r , struct cell *restrict ci , struct cell *restrict
             
         /* Otherwise, compute the pair directly. */
         else
-            DOPAIR( r , ci , cj );
+            DOPAIR2( r , ci , cj );
     
         } /* otherwise, pair interaction. */
     
diff --git a/src/runner_iact.h b/src/runner_iact.h
index 750f44d02a9f417cdd9b06e90e4109ba774871bc..fff1382fc08bb8070fc1e4fb4545eb56c39eb30c 100644
--- a/src/runner_iact.h
+++ b/src/runner_iact.h
@@ -89,9 +89,10 @@ __attribute__ ((always_inline)) INLINE static void kernel_deval ( float x , floa
     }
 
 
+#ifdef VECTORIZE
+
 __attribute__ ((always_inline)) INLINE static void kernel_deval_vec ( vector *x , vector *w , vector *dw_dx ) {
     
-#ifdef VECTORIZE
     vector ind, c[kernel_degree+1];
     int j, k;
     
@@ -113,15 +114,9 @@ __attribute__ ((always_inline)) INLINE static void kernel_deval_vec ( vector *x
         w->v = ( x->v * w->v ) + c[k].v;
         }
         
-#else
+    }
     
-    /* Fake vectorization, i.e. hope the compiler gets it. */
-    for ( int k = 0 ; k < VEC_SIZE ; k++ )
-        kernel_deval( x.f[k] , &w.f[k] , &dw_dx.f[k] );
-
 #endif
-    
-    }
 
 
 __attribute__ ((always_inline)) INLINE static void kernel_eval ( float x , float *W ) {
diff --git a/src/space.c b/src/space.c
index bd34b1e0e8c8f5cf8937a0009bddc8e85a0db4ff..51eec54bf80d70041423e317ad6de5d2530e2968 100644
--- a/src/space.c
+++ b/src/space.c
@@ -104,7 +104,7 @@ void space_rebuild_recycle ( struct space *s , struct cell *c ) {
 int space_rebuild_recurse ( struct space *s , struct cell *c ) {
     
     int k, count, changes = 0, wasmt[8];
-    float h, h_limit, h_max = 0.0f;
+    float h, h_limit, h_max = 0.0f, dt_min = c->parts[0].dt, dt_max = dt_min;
     struct cell *temp;
     
     /* If the cell is already split, check that the split is still ok. */
@@ -124,8 +124,14 @@ int space_rebuild_recurse ( struct space *s , struct cell *c ) {
                 count += 1;
             if ( h > h_max )
                 h_max = h;
+            if ( c->cparts[k].dt < dt_min )
+                dt_min = c->cparts[k].dt;
+            if ( c->cparts[k].dt > dt_max )
+                dt_max = c->cparts[k].dt;
             }
         c->h_max = h_max;
+        c->dt_min = dt_min;
+        c->dt_max = dt_max;
             
         /* Un-split? */
         if ( count < c->count*space_splitratio || c->count < space_splitsize ) {
@@ -209,7 +215,7 @@ int space_rebuild_recurse ( struct space *s , struct cell *c ) {
  
 int space_rebuild ( struct space *s , int force , double cell_max ) {
 
-    float h_max = s->parts[0].h, h_min = s->parts[0].h;
+    float h_max = s->cell_min, h_min = s->parts[0].h;
     int i, j, k, cdim[3];
     struct cell *c;
     struct part *finger;
@@ -235,19 +241,22 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
     
         /* Free the old cells, if they were allocated. */
         if ( s->cells != NULL ) {
-            for ( k = 0 ; k < s->nr_cells ; k++ )
+            for ( k = 0 ; k < s->nr_cells ; k++ ) {
                 space_rebuild_recycle( s , &s->cells[k] );
+                if ( s->cells[k].sort != NULL )
+                    free( s->cells[k].sort );
+                }
             free( s->cells );
             s->maxdepth = 0;
             }
             
-        /* Set the new cell dimensions. */
+        /* Set the new cell dimensions only if smaller. */
         for ( k = 0 ; k < 3 ; k++ ) {
             s->cdim[k] = cdim[k];
             s->h[k] = s->dim[k] / cdim[k];
             s->ih[k] = 1.0 / s->h[k];
             }
-    
+
         /* Allocate the highest level of cells. */
         s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2];
         if ( posix_memalign( (void *)&s->cells , 64 , s->nr_cells * sizeof(struct cell) ) != 0 )
@@ -274,7 +283,8 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
         
         
     /* Run through the particles and get their cell index. */
-    ind = (int *)alloca( sizeof(int) * s->nr_parts );
+    if ( ( ind = (int *)malloc( sizeof(int) * s->nr_parts ) ) == NULL )
+        error( "Failed to allocate temporary particle indices." );
     for ( k = 0 ; k < s->nr_cells ; k++ )
         s->cells[ k ].count = 0;
     for ( k = 0 ; k < s->nr_parts ; k++ )  {
@@ -283,7 +293,10 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
         }
 
     /* Sort the parts according to their cells. */
-    parts_sort( s->parts , ind , s->nr_parts , 0 , s->nr_cells );        
+    parts_sort( s->parts , ind , s->nr_parts , 0 , s->nr_cells );    
+    
+    /* We no longer need the indices as of here. */
+    free( ind );    
 
     /* Update the condensed particle data. */         
     for ( k = 0 ; k < s->nr_parts ; k++ ) {
@@ -295,7 +308,9 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
         }
 
     /* Hook the cells up to the parts. */
-    for ( finger = s->parts , cfinger = s->cparts , k = 0 ; k < s->nr_cells ; k++ ) {
+    finger = s->parts;
+    cfinger = s->cparts;
+    for ( k = 0 ; k < s->nr_cells ; k++ ) {
         c = &s->cells[ k ];
         c->parts = finger;
         c->cparts = cfinger;
@@ -623,7 +638,7 @@ void space_splittasks ( struct space *s ) {
                 continue;
             
             /* Make a sub? */
-            if ( ci->count < space_subsize ) {
+            if ( space_dosub && ci->count < space_subsize ) {
             
                 /* convert to a self-subtask. */
                 t->type = task_type_sub;
@@ -701,22 +716,19 @@ void space_splittasks ( struct space *s ) {
                     sid = 26 - sid;
 
                 /* Replace by a single sub-task? */
-                if ( ci->count < space_subsize && cj->count < space_subsize &&
+                if ( space_dosub &&
+                     ci->count < space_subsize && cj->count < space_subsize &&
                      sid != 0 && sid != 2 && sid != 6 && sid != 8 ) {
                 
                     /* Make this task a sub task. */
                     t->type = task_type_sub;
                     t->flags = sid;
                     
-                    /* Make it depend on all the sorts of its sub-cells. */
-                    for ( j = 0 ; j < 8 ; j++ ) {
-                        if ( ci->progeny[j] != NULL )
-                            for ( k = 0 ; k < 14 ; k++ )
-                                task_addunlock( ci->progeny[j]->sorts[k] , t );
-                        if ( cj->progeny[j] != NULL )
-                            for ( k = 0 ; k < 14 ; k++ )
-                                task_addunlock( cj->progeny[j]->sorts[k] , t );
-                        }
+                    /* Make it depend on all the sorts of its two cells. */
+                    for ( k = 0 ; k < 14 ; k++ )
+                        task_addunlock( ci->sorts[k] , t );
+                    for ( k = 0 ; k < 14 ; k++ )
+                        task_addunlock( cj->sorts[k] , t );
                     
                     /* Don't go any further. */
                     continue;
@@ -1070,12 +1082,12 @@ void space_maketasks ( struct space *s , int do_sort ) {
         
         
     /* Allocate the task-list, if needed. */
-    if ( s->tasks == NULL || s->tasks_size < s->tot_cells * 43 ) {
+    if ( s->tasks == NULL || s->tasks_size < s->tot_cells * space_maxtaskspercell ) {
         if ( s->tasks != NULL )
             free( s->tasks );
         if ( s->tasks_ind != NULL )
             free( s->tasks_ind );
-        s->tasks_size = s->tot_cells * 43;
+        s->tasks_size = s->tot_cells * space_maxtaskspercell;
         if ( posix_memalign( (void *)&s->tasks , 64 , sizeof(struct task) * s->tasks_size ) != 0 )
             error( "Failed to allocate task list." );
         if ( ( s->tasks_ind = (int *)malloc( sizeof(int) * s->tasks_size ) ) == NULL )
@@ -1128,6 +1140,34 @@ void space_maketasks ( struct space *s , int do_sort ) {
     /* Split the tasks. */
     space_splittasks( s );
     
+    /* Remove pairs and self-interactions for cells with no parts in dt. */
+    for ( k = 0 ; k < s->nr_tasks ; k++ ) {
+        t = &s->tasks[k];
+        if ( t->type == task_type_self ) {
+            if ( t->ci->dt_min > s->dt_max )
+                t->type = task_type_none;
+            }
+        else if ( t->type == task_type_pair ) {
+            if ( t->ci->dt_min > s->dt_max && t->cj->dt_min > s->dt_max ) {
+                t->type = task_type_none;
+                for ( j = 0 ; j < 13 ; j++ )
+                    task_rmunlock_blind( t->ci->sorts[j] , t );
+                for ( j = 0 ; j < 13 ; j++ )
+                    task_rmunlock_blind( t->cj->sorts[j] , t );
+                }
+            }
+        else if ( t->type == task_type_sub ) {
+            if ( t->ci->dt_min > s->dt_max && ( t->cj == NULL || t->cj->dt_min > s->dt_max ) ) {
+                t->type = task_type_none;
+                for ( j = 0 ; j < 13 ; j++ )
+                    task_rmunlock_blind( t->ci->sorts[j] , t );
+                if ( t->cj != NULL )
+                    for ( j = 0 ; j < 13 ; j++ )
+                        task_rmunlock_blind( t->cj->sorts[j] , t );
+                }
+            }
+        }
+    
     /* Remove sort tasks with no dependencies. */
     for ( k = 0 ; k < s->nr_tasks ; k++ ) {
         t = &s->tasks[k];
@@ -1247,7 +1287,7 @@ void space_maketasks ( struct space *s , int do_sort ) {
 void space_split ( struct space *s , struct cell *c ) {
 
     int k, count;
-    double h, h_limit, h_max = 0.0;
+    double h, h_limit, h_max = 0.0, dt_min = c->parts[0].dt, dt_max = dt_min;
     struct cell *temp;
     
     /* Check the depth. */
@@ -1264,8 +1304,14 @@ void space_split ( struct space *s , struct cell *c ) {
             count += 1;
         if ( h > h_max )
             h_max = h;
+        if ( c->cparts[k].dt < dt_min )
+            dt_min = c->cparts[k].dt;
+        if ( c->cparts[k].dt > dt_max )
+            dt_max = c->cparts[k].dt;
         }
     c->h_max = h_max;
+    c->dt_min = dt_min;
+    c->dt_max = dt_max;
             
     /* Split or let it be? */
     if ( count > c->count*space_splitratio && c->count > space_splitsize ) {
@@ -1420,11 +1466,12 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N ,
     s->periodic = periodic;
     s->nr_parts = N;
     s->parts = parts;
+    s->cell_min = h_max;
     
     /* Allocate the cparts array. */
     if ( posix_memalign( (void *)&s->cparts , 32 ,  N * sizeof(struct cpart) ) != 0 )
         error( "Failed to allocate cparts." );
-    
+        
     /* Init the space lock. */
     if ( lock_init( &s->lock ) != 0 )
         error( "Failed to create space spin-lock." );
diff --git a/src/space.h b/src/space.h
index c4fb6db77411537942ea5e50314a541dfccf8e2f..0c52849363cae498a2d58930d4906e013bb3dc28 100644
--- a/src/space.h
+++ b/src/space.h
@@ -26,8 +26,9 @@
 #define space_splitratio                0.875
 #define space_splitsize_default         400
 #define space_subsize_default           1000
-#define space_dosub                     1
+#define space_dosub                     0
 #define space_stretch                   1.0
+#define space_maxtaskspercell           43
 
 
 /* Convert cell location to ID. */
@@ -58,10 +59,10 @@ struct space {
     double h[3], ih[3];
     
     /* The minimum and maximum cutoff radii. */
-    double h_min, h_max;
+    double h_min, h_max, cell_min;
     
     /* Current time step for particles. */
-    float dt;
+    float dt_max;
     
     /* Number of cells. */
     int nr_cells, tot_cells;
@@ -79,9 +80,6 @@ struct space {
     struct part *parts;
     struct cpart *cparts;
     
-    /* The sortlist data (cells hold pointers to these). */
-    struct entry *sortlist;
-    
     /* The total number of parts in the space. */
     int nr_parts;