diff --git a/src/cell.h b/src/cell.h
index fc4954ea99d7e30421d15c5d077327b475cceffd..a560fcb1a54000188b454913b2776308c0fce24b 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -51,7 +51,7 @@ struct cell {
     float slack;
     
     /* Maximum particle movement in this cell. */
-    float dx_max;
+    float dx_max, h2dx_max;
     
     /* The depth of this cell in the tree. */
     int depth, split;
diff --git a/src/debug.c b/src/debug.c
index 2a32a443b569b90c2b77ac17edeac40463bd147f..5b3b1f7cad619257e5f738a129b3b4fa8d0acfd8 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -49,7 +49,7 @@ void printParticle ( struct part *parts , long long int id, int N ) {
             parts[i].a[0], parts[i].a[1], parts[i].a[2],
             parts[i].h,
             parts[i].force.h_dt,
-            parts[i].wcount,
+            parts[i].density.wcount,
             parts[i].mass,
             parts[i].rho, parts[i].rho_dh,
             parts[i].density.div_v,
diff --git a/src/engine.c b/src/engine.c
index 07a9d51e0099e3a34358b0879c70b611caae5728..5d23f431b9ee657cd857118b2cd59de6d842333f 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -164,7 +164,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
     int j, k;
     struct engine *e = (struct engine *)data;
     float pdt, dt_step = e->dt_step, dt = e->dt, hdt = 0.5f*dt;
-    float dt_min, dt_max, h_max, dx_max, a[3], v[3], u, u_dt, h, h_dt, rho, v_old[3];
+    float dt_min, dt_max, h_max, dx, h2dx_max, dx_max, a[3], v[3], u, u_dt, h, h_dt, rho, v_old[3];
     double x[3], x_old[3];
     struct part *restrict p;
     struct xpart *restrict xp;
@@ -178,6 +178,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
         dt_max = 0.0f;
         h_max = 0.0f;
         dx_max = 0.0f;
+        h2dx_max = 0.0f;
     
         /* Loop over parts. */
         for ( k = 0 ; k < c->count ; k++ ) {
@@ -212,9 +213,11 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
             p->x[0] = x[0] += dt * v_old[0];
             p->x[1] = x[1] += dt * v_old[1];
             p->x[2] = x[2] += dt * v_old[2];
-            dx_max = fmaxf( dx_max , sqrtf( (x[0] - x_old[0])*(x[0] - x_old[0]) +
-                                            (x[1] - x_old[1])*(x[1] - x_old[1]) +
-                                            (x[2] - x_old[2])*(x[2] - x_old[2]) )*2 + h );
+            dx = sqrtf( (x[0] - x_old[0])*(x[0] - x_old[0]) +
+                        (x[1] - x_old[1])*(x[1] - x_old[1]) +
+                        (x[2] - x_old[2])*(x[2] - x_old[2]) );
+            dx_max = fmaxf( dx_max , dx );
+            h2dx_max = fmaxf( h2dx_max , dx*2 + h );
 
             /* Update positions and energies at the half-step. */
             p->v[0] = v[0] += dt * a[0];
@@ -236,11 +239,10 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
             /* Init fields for density calculation. */
             if ( pdt > dt_step ) {
                 rho = p->rho *= expf( -3.0f * h_dt / h * dt );
-                // rho = p->rho += cbrt( h_dt ) * dt;
                 p->force.POrho2 = u * ( const_gamma - 1.0f ) / ( rho + h * p->rho_dh / 3.0f );
                 }
             else {
-                p->wcount = 0.0f;
+                p->density.wcount = 0.0f;
                 p->density.wcount_dh = 0.0f;
                 p->rho = 0.0f;
                 p->rho_dh = 0.0f;
@@ -262,6 +264,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
         dt_max = c->progeny[k]->dt_max;
         h_max = c->progeny[k]->h_max;
         dx_max = c->progeny[k]->dx_max;
+        h2dx_max = c->progeny[k]->h2dx_max;
         
         /* Loop over the remaining progeny. */
         for ( k += 1 ; k < 8 ; k++ )
@@ -270,6 +273,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
                 dt_max = fmaxf( dt_max , c->progeny[k]->dt_max );
                 h_max = fmaxf( h_max , c->progeny[k]->h_max );
                 dx_max = fmaxf( dx_max , c->progeny[k]->dx_max );
+                h2dx_max = fmaxf( h2dx_max , c->progeny[k]->h2dx_max );
                 }
     
         }
@@ -279,7 +283,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
     c->dt_max = dt_max;
     c->h_max = h_max;
     c->dx_max = dx_max;
-    c->sorted = 0;
+    c->h2dx_max = h2dx_max;
     
     /* Clean out the task pointers. */
     // c->sorts[0] = NULL;
@@ -340,7 +344,7 @@ void engine_single ( double *dim , long long int pid , struct part *__restrict__
         
     /* Dump the result. */
     ihg = kernel_igamma / p.h;
-    printf( "pairs_single: part %lli (h=%e) has wcount=%f, rho=%f.\n" , p.id , p.h , p.wcount + 32.0/3 , ihg * ihg * ihg * ( p.rho + p.mass*kernel_root ) );
+    printf( "pairs_single: part %lli (h=%e) has wcount=%f, rho=%f.\n" , p.id , p.h , p.density.wcount + 32.0/3 , ihg * ihg * ihg * ( p.rho + p.mass*kernel_root ) );
     fflush(stdout);
     
     }
@@ -365,6 +369,8 @@ void engine_step ( struct engine *e , int sort_queues ) {
     double lang[3], ang[3] = { 0.0 , 0.0 , 0.0 };
     // double lent, ent = 0.0;
     int threadID, nthreads, count = 0, lcount;
+    
+    TIMER_TIC2
 
     /* Get the maximum dt. */
     dt_step = 2.0f*dt;
@@ -375,7 +381,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
     /* Set the maximum dt. */
     e->dt_step = dt_step;
     e->s->dt_step = dt_step;
-    printf( "engine_step: dt_step set to %.3e.\n" , dt_step ); fflush(stdout);
+    printf( "engine_step: dt_step set to %.3e (dt=%.3e).\n" , dt_step , e->dt ); fflush(stdout);
     
     // printParticle( parts , 432626 );
     
@@ -415,7 +421,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
             error( "Error while waiting for barrier." );
             
     /* Stop the clock. */
-    TIMER_TOC(timer_step);
+    TIMER_TOC(timer_runners);
     
     // for(k=0; k<10; ++k)
     //   printParticle(parts, k);
@@ -523,6 +529,8 @@ void engine_step ( struct engine *e , int sort_queues ) {
         e->step /= 2;
         printf( "engine_step: dt_min is larger than twice the time step, adjusting to dt=%e.\n" , e->dt );
         }
+        
+    TIMER_TOC2(timer_step);
     
     }
     
@@ -533,12 +541,13 @@ void engine_step ( struct engine *e , int sort_queues ) {
  *
  * @param e The #engine.
  * @param s The #space in which this #runner will run.
+ * @param dt The initial time step to use.
  * @param nr_threads The number of threads to spawn.
  * @param nr_queues The number of task queues to create.
  * @param policy The queueing policy to use.
  */
  
-void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_queues , int policy ) {
+void engine_init ( struct engine *e , struct space *s , float dt , int nr_threads , int nr_queues , int policy ) {
 
     #if defined(HAVE_SETAFFINITY)
         cpu_set_t cpuset;
@@ -550,7 +559,6 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_
     e->nr_threads = nr_threads;
     e->nr_queues = nr_queues;
     e->policy = policy;
-    e->dt_min = 0.0f;
     e->step = 0;
     
     /* First of all, init the barrier and lock it. */
@@ -562,6 +570,13 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_
         error( "Failed to lock barrier mutex." );
     e->barrier_count = 0;
     
+    /* Run through the parts and get the minimum time step. */
+    for ( k = 0 ; k < s->nr_parts ; k++ )
+        if ( s->parts[k].dt > 0.0f )
+            while ( s->parts[k].dt < dt )
+                dt *= 0.5;
+    e->dt_min = dt;
+    
     /* Allocate the queues. */
     if ( posix_memalign( (void *)(&e->queues) , 64 , nr_queues * sizeof(struct queue) ) != 0 )
         error( "Failed to allocate queues." );
diff --git a/src/engine.h b/src/engine.h
index f872f3cef703dfc58b4ed6dddfc365cc9268cfe3..cb96b81510da307fbe0b4c8a55aa5f058041a766 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -72,6 +72,6 @@ struct engine {
 
 /* Function prototypes. */
 void engine_barrier( struct engine *e );
-void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_queues , int policy );
+void engine_init ( struct engine *e , struct space *s , float dt , int nr_threads , int nr_queues , int policy );
 void engine_prepare ( struct engine *e );
 void engine_step ( struct engine *e , int sort_queues );
diff --git a/src/part.h b/src/part.h
index c90d0fe2717200f9290f34c5c1cf0479f7bf1dea..0341fec1e2db3399179f613fa16e3ba246147146 100644
--- a/src/part.h
+++ b/src/part.h
@@ -57,11 +57,23 @@ struct xpart {
 /* Data of a single particle. */
 struct part {
 
+    /* Particle acceleration. */
+    float a[3];
+    
     /* Particle velocity. */
     float v[3];
     
-    /* Particle mass. */
-    float mass;
+    /* Particle position. */
+    double x[3];
+    
+    /* Particle cutoff radius. */
+    float h;
+    
+    /* Particle time-step. */
+    float dt;
+    
+    /* Particle internal energy. */
+    float u;
     
     /* Particle density. */
     float rho;
@@ -83,6 +95,9 @@ struct part {
             /* Particle velocity curl. */
             float curl_v[3];
     
+            /* Particle number density. */
+            float wcount;
+    
             } density;
             
         struct {
@@ -112,14 +127,8 @@ struct part {
     /* Particle pressure. */
     // float P;
     
-    /* Particle acceleration. */
-    float a[3];
-    
-    /* Particle number density. */
-    float wcount;
-    
-    /* Particle internal energy. */
-    float u;
+    /* Particle mass. */
+    float mass;
     
     /* Particle ID. */
     unsigned long long id;
@@ -127,15 +136,6 @@ struct part {
     /* Pointer to extra particle data. */
     struct xpart *xtras;
     
-    /* Particle position. */
-    double x[3];
-    
-    /* Particle cutoff radius. */
-    float h;
-    
-    /* Particle time-step. */
-    float dt;
-    
     } __attribute__((aligned (32)));
     
 
diff --git a/src/runner.c b/src/runner.c
index 6a74534237ebd6d034a3a4707e84993975018b5c..8fa2d2fbede7e343b53e69e1f15f2966b09780b5 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -165,9 +165,11 @@ void runner_dosort_ascending ( struct entry *sort , int N ) {
  * @param r The #runner.
  * @param c The #cell.
  * @param flags Cell flag.
+ * @param clock Flag indicating whether to record the timing or not, needed
+ *      for recursive calls.
  */
  
-void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
+void runner_dosort ( struct runner *r , struct cell *c , int flags , int clock ) {
 
     struct entry *finger;
     struct entry *fingers[8];
@@ -176,8 +178,14 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
     int i, ind, off[8], inds[8], temp_i, missing;
     // float shift[3];
     float buff[8], px[3];
+    
     TIMER_TIC
     
+    /* Clean-up the flags, i.e. filter out what's already been sorted. */
+    flags &= ~c->sorted;
+    if ( flags == 0 )
+        return;
+    
     /* start by allocating the entry arrays. */
     if ( lock_lock( &c->lock ) != 0 )
         error( "Failed to lock cell." );
@@ -203,7 +211,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
             else
                 missing = ( c->progeny[k]->sorts->flags ^ flags ) & flags;
             if ( missing )
-                runner_dosort( r , c->progeny[k] , missing );
+                runner_dosort( r , c->progeny[k] , missing , 0 );
             }
     
         /* Loop over the 13 different sort arrays. */
@@ -314,7 +322,8 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
             (flags & 0x1000) >> 12 , (flags & 0x800) >> 11 , (flags & 0x400) >> 10 , (flags & 0x200) >> 9 , (flags & 0x100) >> 8 , (flags & 0x80) >> 7 , (flags & 0x40) >> 6 , (flags & 0x20) >> 5 , (flags & 0x10) >> 4 , (flags & 0x8) >> 3 , (flags & 0x4) >> 2 , (flags & 0x2) >> 1 , (flags & 0x1) >> 0 , 
             ((double)TIMER_TOC(timer_dosort)) / CPU_TPS * 1000 ); fflush(stdout);
     #else
-        TIMER_TOC(timer_dosort);
+        if ( clock )
+            TIMER_TOC(timer_dosort);
     #endif
 
     }
@@ -377,31 +386,35 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
                 ihg4 = ihg2 * ihg2;
 
                 /* Adjust the computed weighted number of neighbours. */
-                p->wcount += kernel_wroot;
+                p->density.wcount += kernel_wroot;
 
 		        /* Final operation on the density. */
                 p->rho = ihg * ihg2 * ( p->rho + p->mass*kernel_root );
                 p->rho_dh *= ihg4;
                     
                 /* Compute the smoothing length update (Newton step). */
-                h_corr = ( const_nwneigh - p->wcount ) / p->density.wcount_dh;
-                
-                /* Truncate to the range [ -p->h/2 , p->h ]. */
-                h_corr = fminf( h_corr , h );
-                h_corr = fmaxf( h_corr , -h/2 );
+                if ( p->density.wcount_dh == 0.0f )
+                    h_corr = p->h;
+                else {
+                    h_corr = ( const_nwneigh - p->density.wcount ) / p->density.wcount_dh;
+
+                    /* Truncate to the range [ -p->h/2 , p->h ]. */
+                    h_corr = fminf( h_corr , h );
+                    h_corr = fmaxf( h_corr , -h/2 );
+                    }
                 
                 /* Apply the correction to p->h. */
                 p->h += h_corr;
                 cp->h = p->h;
 
                 /* Did we get the right number density? */
-                if ( p->wcount > const_nwneigh + const_delta_nwneigh ||
-                     p->wcount < const_nwneigh - const_delta_nwneigh ) {
-                    // printf( "runner_doghost: particle %lli (h=%e,depth=%i) has bad wcount=%.3f.\n" , p->id , p->h , c->depth , p->wcount ); fflush(stdout);
-                    // p->h += ( p->wcount + kernel_root - const_nwneigh ) / p->density.wcount_dh;
+                if ( p->density.wcount > const_nwneigh + const_delta_nwneigh ||
+                     p->density.wcount < const_nwneigh - const_delta_nwneigh ) {
+                    // printf( "runner_doghost: particle %lli (h=%e,depth=%i) has bad wcount=%.3f.\n" , p->id , p->h , c->depth , p->density.wcount ); fflush(stdout);
+                    // p->h += ( p->density.wcount + kernel_root - const_nwneigh ) / p->density.wcount_dh;
                     pid[redo] = pid[i];
                     redo += 1;
-                    p->wcount = 0.0;
+                    p->density.wcount = 0.0;
                     p->density.wcount_dh = 0.0;
                     p->rho = 0.0;
                     p->rho_dh = 0.0;
@@ -632,7 +645,7 @@ void *runner_main ( void *data ) {
                     cell_unlocktree( cj );
                     break;
                 case task_type_sort:
-                    runner_dosort( r , ci , t->flags );
+                    runner_dosort( r , ci , t->flags , 1 );
                     break;
                 case task_type_sub:
                     if ( t->subtype == task_subtype_density )
diff --git a/src/runner.h b/src/runner.h
index cf904a8e1ab926f024192279adc58e31aa37dfb2..f9a8cfd6b07b5b6a454f4765bd922be315ea39a0 100644
--- a/src/runner.h
+++ b/src/runner.h
@@ -81,5 +81,5 @@ void runner_doghost ( struct runner *r , struct cell *c );
 void runner_dopair_density ( struct runner *r , struct cell *ci , struct cell *cj );
 void runner_doself_density ( struct runner *r , struct cell *c );
 void runner_dosub_density ( struct runner *r , struct cell *ci , struct cell *cj , int flags );
-void runner_dosort ( struct runner *r , struct cell *c , int flag );
+void runner_dosort ( struct runner *r , struct cell *c , int flag , int clock );
 void *runner_main ( void *data );
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index 15e079f6001abb4ece915eed8e4a058740c50b0e..45c03f597ad8555f8c835156e1628db252601c11 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -328,7 +328,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
     struct part *restrict pi, *restrict parts_j = cj->parts;
     struct cpart *restrict cpj, *restrict cparts_j = cj->cparts;
     double pix[3];
-    float dx[3], hi, hi2, r2, di;
+    float dx[3], hi, hi2, r2, di, dxj;
     struct entry *sort_j;
     #ifdef VECTORIZE
         int icount = 0;
@@ -363,6 +363,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
     
     /* Pick-out the sorted lists. */
     sort_j = &cj->sort[ sid*(cj->count + 1) ];
+    dxj = cj->dx_max;
     
     /* Parts are on the left? */
     if ( !flipped ) {
@@ -376,7 +377,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
                 pix[k] = pi->x[k] - shift[k];
             hi = pi->h;
             hi2 = hi * hi;
-            di = hi + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ];
+            di = hi + dxj + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ];
 
             /* Loop over the parts in cj. */
             for ( pjd = 0 ; pjd < count_j && sort_j[ pjd ].d < di ; pjd++ ) {
@@ -439,7 +440,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
                 pix[k] = pi->x[k] - shift[k];
             hi = pi->h;
             hi2 = hi * hi;
-            di = -hi + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ];
+            di = -hi - dxj + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ];
 
             /* Loop over the parts in cj. */
             for ( pjd = count_j-1 ; pjd >= 0 && di < sort_j[ pjd ].d ; pjd-- ) {
@@ -633,7 +634,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
     struct cpart *restrict cpi, *restrict cparts_i;
     struct cpart *restrict cpj, *restrict cparts_j;
     double pix[3], pjx[3], di, dj;
-    float dx[3], hi, hi2, hj, hj2, r2;
+    float dx[3], hi, hi2, hj, hj2, r2, dx_max;
     double hi_max, hj_max;
     double di_max, dj_min;
     int count_i, count_j;
@@ -674,13 +675,14 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
     cparts_i = ci->cparts; cparts_j = cj->cparts;
     di_max = sort_i[count_i-1].d - rshift;
     dj_min = sort_j[0].d;
+    dx_max = ( ci->dx_max + cj->dx_max );
     
     /* if ( ci->split && cj->split && sid == 4 )
         printf( "boing!\n" ); */
         
 
     /* Loop over the parts in ci. */
-    for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max > dj_min ; pid-- ) {
+    for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min ; pid-- ) {
     
         /* Get a hold of the ith part in ci. */
         pi = &parts_i[ sort_i[ pid ].i ];
@@ -688,7 +690,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
         if ( cpi->dt > dt_step )
             continue;
         hi = cpi->h;
-        di = sort_i[pid].d + hi - rshift;
+        di = sort_i[pid].d + hi + dx_max - rshift;
         if ( di < dj_min )
             continue;
             
@@ -747,7 +749,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
     tic = getticks(); */
 
     /* Loop over the parts in cj. */
-    for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max < di_max ; pjd++ ) {
+    for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max ; pjd++ ) {
     
         /* Get a hold of the jth part in cj. */
         pj = &parts_j[ sort_j[ pjd ].i ];
@@ -755,7 +757,7 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
         if ( cpj->dt > dt_step )
             continue;
         hj = cpj->h;
-        dj = sort_j[pjd].d - hj - rshift;
+        dj = sort_j[pjd].d - hj - dx_max - rshift;
         if ( dj > di_max )
             continue;
             
@@ -838,7 +840,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
     struct cpart *restrict cpi, *restrict cparts_i;
     struct cpart *restrict cpj, *restrict cparts_j;
     double pix[3], pjx[3], di, dj;
-    float dx[3], hi, hi2, hj, hj2, r2;
+    float dx[3], hi, hi2, hj, hj2, r2, dx_max;
     double hi_max, hj_max;
     double di_max, dj_min;
     int count_i, count_j;
@@ -885,6 +887,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
     cparts_i = ci->cparts; cparts_j = cj->cparts;
     di_max = sort_i[count_i-1].d - rshift;
     dj_min = sort_j[0].d;
+    dx_max = ( ci->dx_max + cj->dx_max );
     
     /* Collect the number of parts left and right below dt. */
     if ( ci->dt_max <= dt_step ) {
@@ -915,13 +918,13 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
         }
     
     /* Loop over the parts in ci. */
-    for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max > dj_min ; pid-- ) {
+    for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min ; pid-- ) {
     
         /* Get a hold of the ith part in ci. */
         pi = &parts_i[ sort_i[ pid ].i ];
         cpi = &cparts_i[ sort_i[ pid ].i ];
         hi = cpi->h;
-        di = sort_i[pid].d + hi - rshift;
+        di = sort_i[pid].d + hi + dx_max - rshift;
         if ( di < dj_min )
             continue;
             
@@ -1065,13 +1068,13 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
     tic = getticks(); */
 
     /* Loop over the parts in cj. */
-    for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max < di_max ; pjd++ ) {
+    for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max ; pjd++ ) {
     
         /* Get a hold of the jth part in cj. */
         pj = &parts_j[ sort_j[ pjd ].i ];
         cpj = &cparts_j[ sort_j[ pjd ].i ];
         hj = cpj->h;
-        dj = sort_j[pjd].d - hj - rshift;
+        dj = sort_j[pjd].d - hj - dx_max - rshift;
         if ( dj > di_max )
             continue;
             
diff --git a/src/runner_iact.h b/src/runner_iact.h
index 5ac5fd6c87d01ba3c07200e3a614d10a4444afd2..79708a311724959a9202ccd009a1b02f5cfc5659 100644
--- a/src/runner_iact.h
+++ b/src/runner_iact.h
@@ -68,7 +68,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
         
         pi->rho += pj->mass * wi;
         pi->rho_dh -= pj->mass * ( 3.0*wi + xi*wi_dx );
-        pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
+        pi->density.wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
         pi->density.wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
         // pi->icount += 1;
 
@@ -87,7 +87,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
         
         pj->rho += pi->mass * wj;
         pj->rho_dh -= pi->mass * ( 3.0*wj + xj*wj_dx );
-        pj->wcount += wj * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
+        pj->density.wcount += wj * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
         pj->density.wcount_dh -= xj * h_inv * wj_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
         // pj->icount += 1;
         
@@ -192,14 +192,14 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_density ( flo
     for ( k = 0 ; k < VEC_SIZE ; k++ ) {
         pi[k]->rho += rhoi.f[k];
         pi[k]->rho_dh -= rhoi_dh.f[k];
-        pi[k]->wcount += wcounti.f[k];
+        pi[k]->density.wcount += wcounti.f[k];
         pi[k]->density.wcount_dh -= wcounti_dh.f[k];
 	    pi[k]->density.div_v += div_vi.f[k];
 	    for( j = 0 ; j < 3 ; j++ )
    	        pi[k]->density.curl_v[j] += curl_vi[j].f[k];
         pj[k]->rho += rhoj.f[k];
         pj[k]->rho_dh -= rhoj_dh.f[k];
-        pj[k]->wcount += wcountj.f[k];
+        pj[k]->density.wcount += wcountj.f[k];
         pj[k]->density.wcount_dh -= wcountj_dh.f[k];
 	    pj[k]->density.div_v += div_vj.f[k];
 	    for( j = 0 ; j < 3 ; j++ )
@@ -251,7 +251,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_density (
         
         pi->rho += pj->mass * wi;
         pi->rho_dh -= pj->mass * ( 3.0*wi + xi*wi_dx );
-        pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
+        pi->density.wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
         pi->density.wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
         // pi->icount += 1;
 
@@ -329,7 +329,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_densit
     for ( k = 0 ; k < VEC_SIZE ; k++ ) {
         pi[k]->rho += rhoi.f[k];
         pi[k]->rho_dh -= rhoi_dh.f[k];
-        pi[k]->wcount += wcounti.f[k];
+        pi[k]->density.wcount += wcounti.f[k];
         pi[k]->density.wcount_dh -= wcounti_dh.f[k];
 	    pi[k]->density.div_v += div_vi.f[k];
 	    for( j = 0 ; j < 3 ; j++ )
diff --git a/src/space.c b/src/space.c
index 61d278a4c1274aa0bc07dbbde7db0f9a73ab3bae..f5ae3c8becd35dbbec5ee5c2946200683f505b18 100644
--- a/src/space.c
+++ b/src/space.c
@@ -133,7 +133,7 @@ int space_marktasks ( struct space *s ) {
             
             /* Too much particle movement? */
             if ( !t->skip && t->tight &&
-                 ( t->ci->dx_max > t->ci->dmin || t->cj->dx_max > t->cj->dmin ) )
+                 ( t->ci->h2dx_max > t->ci->dmin || t->cj->h2dx_max > t->cj->dmin ) )
                 return 1;
                 
             /* Set the sort flags. */
@@ -176,94 +176,6 @@ int space_marktasks ( struct space *s ) {
     }
 
 
-/**
- * @brief Mapping function to set dt_min and dt_max.
- */
-
-void space_map_prepare ( struct cell *c , void *data ) {
-
-    int k;
-    float dt_min, dt_max, h_max, dx_max;
-    struct part *restrict p;
-    struct xpart *restrict xp;
-    struct cpart *restrict cp;
-
-    /* No children? */
-    if ( !c->split ) {
-    
-        /* Init with first part. */
-        p = &c->parts[0];
-        xp = p->xtras;
-        cp = &c->cparts[0];
-        
-        dt_min = p->dt;
-        dt_max = p->dt;
-        h_max = p->h;
-        dx_max = sqrtf( (p->x[0] - xp->x_old[0])*(p->x[0] - xp->x_old[0]) +
-                        (p->x[1] - xp->x_old[1])*(p->x[1] - xp->x_old[1]) +
-                        (p->x[2] - xp->x_old[2])*(p->x[2] - xp->x_old[2]) )*2 + p->h;
-        cp->x[0] = p->x[0];
-        cp->x[1] = p->x[1];
-        cp->x[2] = p->x[2];
-        cp->h = p->h;
-        cp->dt = p->dt;
-    
-        /* Loop over parts. */
-        for ( k = 1 ; k < c->count ; k++ ) {
-            p = &c->parts[k];
-            xp = p->xtras;
-            cp = &c->cparts[k];
-            dt_min = fminf( dt_min , p->dt );
-            dt_max = fmaxf( dt_max , p->dt );
-            h_max = fmaxf( h_max , p->h );
-            dx_max = fmaxf( dx_max , sqrtf( (p->x[0] - xp->x_old[0])*(p->x[0] - xp->x_old[0]) +
-                                            (p->x[1] - xp->x_old[1])*(p->x[1] - xp->x_old[1]) +
-                                            (p->x[2] - xp->x_old[2])*(p->x[2] - xp->x_old[2]) )*2 + p->h );
-            cp->x[0] = p->x[0];
-            cp->x[1] = p->x[1];
-            cp->x[2] = p->x[2];
-            cp->h = p->h;
-            cp->dt = p->dt;
-            }
-            
-        }
-        
-    /* Otherwise, agregate from children. */
-    else {
-    
-        /* Init with the first non-null child. */
-        for ( k = 0 ; c->progeny[k] == NULL ; k++ );
-        dt_min = c->progeny[k]->dt_min;
-        dt_max = c->progeny[k]->dt_max;
-        h_max = c->progeny[k]->h_max;
-        dx_max = c->progeny[k]->dx_max;
-        
-        /* Loop over the remaining progeny. */
-        for ( k += 1 ; k < 8 ; k++ )
-            if ( c->progeny[k] != NULL ) {
-                dt_min = fminf( dt_min , c->progeny[k]->dt_min );
-                dt_max = fmaxf( dt_max , c->progeny[k]->dt_max );
-                h_max = fmaxf( h_max , c->progeny[k]->h_max );
-                dx_max = fmaxf( dx_max , c->progeny[k]->dx_max );
-                }
-    
-        }
-
-    /* Store the values. */
-    c->dt_min = dt_min;
-    c->dt_max = dt_max;
-    c->h_max = h_max;
-    c->dx_max = dx_max;
-    c->sorted = 0;
-    
-    /* Clean out the task pointers. */
-    // c->sorts = NULL;
-    // c->nr_tasks = 0;
-    // c->nr_density = 0;
-    
-    }
-
-
 /**
  * @brief Check the integrity of the space and rebuild if necessary.
  *
@@ -669,6 +581,8 @@ void space_rebuild ( struct space *s , double cell_max ) {
             s->cells[k].nr_tasks = 0;
             s->cells[k].nr_density = 0;
             s->cells[k].dx_max = 0.0f;
+            s->cells[k].h2dx_max = 0.0f;
+            s->cells[k].sorted = 0;
             }
         s->maxdepth = 0;
     
diff --git a/src/space.h b/src/space.h
index 8039411cafdd495b5efd96fe6e945703d7ff7e4c..c94bd86129a3ed425698c5b46893d2822cc54a45 100644
--- a/src/space.h
+++ b/src/space.h
@@ -26,7 +26,7 @@
 #define space_splitratio                0.875
 #define space_splitsize_default         400
 #define space_subsize_default           1000
-#define space_dosub                     1
+#define space_dosub                     0
 #define space_stretch                   1.05
 #define space_maxtaskspercell           30
 
@@ -64,6 +64,9 @@ struct space {
     /* Current time step for particles. */
     float dt_step;
     
+    /* Current maximum displacement for particles. */
+    float dx_max;
+    
     /* Number of cells. */
     int nr_cells, tot_cells;
     
diff --git a/src/timers.h b/src/timers.h
index 3f109ae8077813631a0f8979c7cb5f3970ab8199..a2e50a07371f0cccedccd4086b5c06ea03c5a9e7 100644
--- a/src/timers.h
+++ b/src/timers.h
@@ -37,6 +37,7 @@ enum {
     timer_getpair,
     timer_steal,
     timer_stalled,
+    timer_runners,
     timer_step,
     timer_count,
     };