diff --git a/src/cell.h b/src/cell.h
index f7f3246800bcdbc3d55f99120fde96df4606ced9..cdc452f6e7d6a310fdeaa0cd4cb03d0e038515ef 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -20,6 +20,7 @@
 /* Some constants. */
 #define cell_sid_dt                 13
 
+
 /* The queue timers. */
 enum {
     cell_timer_none = 0,
diff --git a/src/const.h b/src/const.h
index 0f68c9be92bb222c1dbaa134a71e3cded9d12511..09e0eb87a814f637cd4cc39f4f042e5615fc8ade 100644
--- a/src/const.h
+++ b/src/const.h
@@ -26,6 +26,7 @@
 /* Time integration constants. */
 #define const_cfl               0.3f
 #define const_ln_max_h_change   0.231111721f    /* Particle can't change volume by more than a factor of 2=1.26^3 over one time step */
+#define const_max_u_change      0.1f    
 
 /* Neighbour search constants. */
 #define const_eta_kernel        1.2348f         /* Corresponds to 48 ngbs with the cubic spline kernel */
diff --git a/src/engine.c b/src/engine.c
index ac906099f53857e1dbff7597b11dd476f5ddc9b7..653255a3bca2ad3f4a1155232a732688832082fd 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -176,7 +176,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
     struct engine *e = (struct engine *)data;
     float pdt, dt_step = e->dt_step, dt = e->dt, hdt = 0.5f*dt;
     float dt_min, dt_max, h_max, dx, h2dx_max, dx_max;
-    float a[3], v[3], u, u_dt, h, h_dt, rho, v_old[3], w;
+    float a[3], v[3], u, u_dt, h, h_dt, v_old[3], w, rho;
     double x[3], x_old[3];
     struct part *restrict p;
     struct xpart *restrict xp;
@@ -236,14 +236,14 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
             p->v[1] = v[1] += dt * a[1];
             p->v[2] = v[2] += dt * a[2];
             w = u_dt / u * dt;
-            if ( fabsf( w ) < 0.05f )
+            /* if ( fabsf( w ) < 0.05f )
                 p->u = u *= 1.0f + w*( 1.0f + w*( -0.5f + w*1.0f/6.0f ) );
-            else
+            else */
                 p->u = u *= expf( w );
             w = h_dt / h * dt;
-            if ( fabsf( w ) < 0.05f )
+            /* if ( fabsf( w ) < 0.05f )
                 p->h = h *= 1.0f + w*( 1.0f + w*( -0.5f + w*1.0f/6.0f ) );
-            else
+            else */
                 p->h = h *= expf( w );
             h_max = fmaxf( h_max , h );
 
@@ -259,11 +259,11 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
             /* Init fields for density calculation. */
             if ( pdt > dt_step ) {
                 float w = -3.0f * h_dt / h * dt;
-                if ( fabsf( w ) < 0.1f )
+                /* if ( fabsf( w ) < 0.1f )
                     rho = p->rho *= 1.0f + w*( 1.0f + w*( -0.5f + w*(1.0f/6.0f - 1.0f/24.0*w ) ) );
-                else
+                else */
                     rho = p->rho *= expf( w );
-                p->force.POrho2 = u * ( const_gamma - 1.0f ) / ( rho + h * p->rho_dh / 3.0f );
+                p->force.POrho2 = u * ( const_gamma - 1.0f ) / ( rho * rho );
                 }
             else {
                 p->density.wcount = 0.0f;
@@ -407,7 +407,7 @@ void engine_single_density ( double *dim , long long int pid , struct part *__re
     p.rho = ih * ih * ih * ( p.rho + p.mass*kernel_root );
     p.rho_dh = p.rho_dh * ih * ih * ih * ih;
     p.density.wcount = ( p.density.wcount + kernel_root ) * ( 4.0f / 3.0 * M_PI * kernel_gamma3 );
-    printf( "pairs_single: part %lli (h=%e) has wcount=%e, rho=%e, rho_dh=%e.\n" , p.id , p.h , p.density.wcount , p.rho , p.rho_dh );
+    printf( "pairs_single_density: part %lli (h=%e) has wcount=%e, rho=%e, rho_dh=%e.\n" , p.id , p.h , p.density.wcount , p.rho , p.rho_dh );
     fflush(stdout);
     
     }
@@ -447,12 +447,17 @@ void engine_single_force ( double *dim , long long int pid , struct part *__rest
             }
         r2 = fdx[0]*fdx[0] + fdx[1]*fdx[1] + fdx[2]*fdx[2];
         if ( r2 < p.h*p.h*kernel_gamma2 || r2 < parts[k].h*parts[k].h*kernel_gamma2 ) {
+            p.a[0] = 0.0f; p.a[1] = 0.0f; p.a[2] = 0.0f;
+            p.force.u_dt = 0.0f; p.force.h_dt = 0.0f; p.force.v_sig = 0.0f;
             runner_iact_nonsym_force( r2 , fdx , p.h , parts[k].h , &p , &parts[k] );
+            double dvdr = ( (p.v[0]-parts[k].v[0])*fdx[0] + (p.v[1]-parts[k].v[1])*fdx[1] + (p.v[2]-parts[k].v[2])*fdx[2] ) / sqrt(r2);
+            printf( "pairs_single_force: part %lli and %lli interact (r=%.3e,dvdr=%.3e) with a=[%.3e,%.3e,%.3e], dudt=%.3e.\n" ,
+                p.id , parts[k].id , sqrt(r2) , dvdr , p.a[0] , p.a[1], p.a[2] , p.force.u_dt );
             }
         }
         
     /* Dump the result. */
-    printf( "pairs_single: part %lli (h=%e) has a=[%.3e,%.3e,%.3e], udt=%e.\n" , p.id , p.h , p.a[0] , p.a[1] , p.a[2] , p.force.u_dt );
+    // printf( "pairs_single_force: part %lli (h=%e) has a=[%.3e,%.3e,%.3e], udt=%e.\n" , p.id , p.h , p.a[0] , p.a[1] , p.a[2] , p.force.u_dt );
     fflush(stdout);
     
     }
@@ -478,18 +483,18 @@ void engine_step ( struct engine *e , int sort_queues ) {
     TIMER_TIC2
 
     /* Get the maximum dt. */
-    if ( e->policy & engine_policy_fixdt )
-        dt_step = FLT_MAX;
-    else {
+    if ( e->policy & engine_policy_multistep ) {
         dt_step = 2.0f*dt;
         for ( k = 0 ; k < 32 && (e->step & (1 << k)) == 0 ; k++ )
             dt_step *= 2;
         }
+    else
+        dt_step = FLT_MAX;
         
     /* Set the maximum dt. */
     e->dt_step = dt_step;
     e->s->dt_step = dt_step;
-    printf( "engine_step: dt_step set to %.3e (dt=%.3e).\n" , dt_step , e->dt ); fflush(stdout);
+    // printf( "engine_step: dt_step set to %.3e (dt=%.3e).\n" , dt_step , e->dt ); fflush(stdout);
     
     // printParticle( parts , 432626 );
     
@@ -500,7 +505,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
         
     // for(k=0; k<10; ++k)
     //   printParticle(parts, k);
-    // printParticle( e->s->parts , 382557 , e->s->nr_parts );
+    // printParticle( e->s->parts , 3392063069037 , e->s->nr_parts );
  
     /* Prepare the space. */
     engine_prepare( e );
@@ -514,7 +519,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
             }
         }
         
-    // engine_single_density( e->s->dim , 7503966371841 , e->s->parts , e->s->nr_parts , e->s->periodic );
+    // engine_single_density( e->s->dim , 3392063069037 , e->s->parts , e->s->nr_parts , e->s->periodic );
 
     /* Start the clock. */
     TIMER_TIC_ND
@@ -532,12 +537,13 @@ void engine_step ( struct engine *e , int sort_queues ) {
     /* Stop the clock. */
     TIMER_TOC(timer_runners);
 
-    // engine_single_force( e->s->dim , 7503966371841 , e->s->parts , e->s->nr_parts , e->s->periodic );
+    // engine_single_force( e->s->dim , 5497479945069 , e->s->parts , e->s->nr_parts , e->s->periodic );
     
     // for(k=0; k<10; ++k)
     //   printParticle(parts, k);
     // printParticle( parts , 432626 );
-    // printParticle( e->s->parts , 7503966371841 , e->s->nr_parts );
+    // printParticle( e->s->parts , 3392063069037 , e->s->nr_parts );
+    // printParticle( e->s->parts , 5497479945069 , e->s->nr_parts );
 
     /* Collect the cell data from the second kick. */
     for ( k = 0 ; k < e->s->nr_cells ; k++ ) {
@@ -553,13 +559,17 @@ void engine_step ( struct engine *e , int sort_queues ) {
         }
     
     e->dt_min = dt_min;
+    e->dt_max = dt_max;
+    e->count_step = count;
+    e->ekin = ekin;
+    e->epot = epot;
     // printParticle( e->s->parts , 382557 , e->s->nr_parts );
-    printf( "engine_step: dt_min/dt_max is %e/%e.\n" , dt_min , dt_max ); fflush(stdout);
-    printf( "engine_step: etot is %e (ekin=%e, epot=%e).\n" , ekin+epot , ekin , epot ); fflush(stdout);
-    printf( "engine_step: total momentum is [ %e , %e , %e ].\n" , mom[0] , mom[1] , mom[2] ); fflush(stdout);
-    printf( "engine_step: total angular momentum is [ %e , %e , %e ].\n" , ang[0] , ang[1] , ang[2] ); fflush(stdout);
+    // printf( "engine_step: dt_min/dt_max is %e/%e.\n" , dt_min , dt_max ); fflush(stdout);
+    // printf( "engine_step: etot is %e (ekin=%e, epot=%e).\n" , ekin+epot , ekin , epot ); fflush(stdout);
+    // printf( "engine_step: total momentum is [ %e , %e , %e ].\n" , mom[0] , mom[1] , mom[2] ); fflush(stdout);
+    // printf( "engine_step: total angular momentum is [ %e , %e , %e ].\n" , ang[0] , ang[1] , ang[2] ); fflush(stdout);
     // printf( "engine_step: total entropic function is %e .\n", ent ); fflush(stdout);
-    printf( "engine_step: updated %i parts (dt_step=%.3e).\n" , count , dt_step ); fflush(stdout);
+    // printf( "engine_step: updated %i parts (dt_step=%.3e).\n" , count , dt_step ); fflush(stdout);
         
     /* Increase the step. */
     e->step += 1;
@@ -576,18 +586,20 @@ void engine_step ( struct engine *e , int sort_queues ) {
                 e->dt *= 0.5;
             while ( dt_min > 2*e->dt )
                 e->dt *= 2.0;
-            printf( "engine_step: dt_min=%.3e, adjusting time step to dt=%e.\n" , dt_min , e->dt );
+            // printf( "engine_step: dt_min=%.3e, adjusting time step to dt=%e.\n" , dt_min , e->dt );
             }
         else {
             while ( dt_min < e->dt ) {
                 e->dt *= 0.5;
                 e->step *= 2;
-                printf( "engine_step: dt_min dropped below time step, adjusting to dt=%e.\n" , e->dt );
+                e->nullstep *= 2;
+                // printf( "engine_step: dt_min dropped below time step, adjusting to dt=%e.\n" , e->dt );
                 }
             while ( dt_min > 2*e->dt && (e->step & 1) == 0 ) {
                 e->dt *= 2.0;
                 e->step /= 2;
-                printf( "engine_step: dt_min is larger than twice the time step, adjusting to dt=%e.\n" , e->dt );
+                e->nullstep /= 2;
+                // printf( "engine_step: dt_min is larger than twice the time step, adjusting to dt=%e.\n" , e->dt );
                 }
             }
         } 
diff --git a/src/engine.h b/src/engine.h
index 4e29a7b670790d094fb7616777c39ea9ef7da464..2d3c6a3575ad8d53fdbdf0e8f5768ad0e0128b76 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -26,6 +26,7 @@
 #define engine_policy_keep          4
 #define engine_policy_block         8
 #define engine_policy_fixdt         16
+#define engine_policy_multistep     32
 
 #define engine_queue_scale          1.2
 
@@ -55,14 +56,20 @@ struct engine {
     float dt_step;
     
     /* The minimum dt over all particles in the system. */
-    float dt_min;
+    float dt_min, dt_max;
     
     /* The system time step. */
     float dt, dt_orig;
     
+    /* The system energies from the previous step. */
+    double ekin, epot;
+    
     /* The current step number. */
     int step, nullstep;
     
+    /* The number of particles updated in the previous step. */
+    int count_step;
+    
     /* The current system time. */
     float time;
     
diff --git a/src/runner.c b/src/runner.c
index bc9345c05a79f99f27ecd9bb2ea92c4c8feffb6f..d8c032b3917a30f5a528bd9c959f6cf72b84c175 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -519,7 +519,8 @@ void runner_dokick2 ( struct runner *r , struct cell *c ) {
     float mom[3] = { 0.0f , 0.0f , 0.0f }, ang[3] = { 0.0f , 0.0f , 0.0f };
     float x[3], v[3], u, h, pdt, m;
     float dt_step = r->e->dt_step, dt = r->e->dt, hdt = 0.5f*dt;
-    float dt_cfl, dt_h_change;
+    float dt_cfl, dt_h_change, dt_u_change, dt_new;
+    float h_dt, u_dt;
     struct part *p, *parts = c->parts;
     struct xpart *xp;
     
@@ -534,31 +535,36 @@ void runner_dokick2 ( struct runner *r , struct cell *c ) {
 
         /* Get local copies of particle data. */
         pdt = p->dt;
+        u_dt = p->force.u_dt;
         h = p->h;
         m = p->mass;
         x[0] = p->x[0]; x[1] = p->x[1]; x[2] = p->x[2];
 
         /* Scale the derivatives if they're freshly computed. */
         if ( pdt <= dt_step ) {
-            p->force.h_dt *= h * 0.333333333f;
+            h_dt = p->force.h_dt *= h * 0.333333333f;
             count += 1;
             }
+        else
+            h_dt = p->force.h_dt;
 
         /* Update the particle's time step. */
         dt_cfl = const_cfl * h / p->force.v_sig;
-        dt_h_change = fabsf( const_ln_max_h_change * h / p->force.h_dt );
+        dt_h_change = ( h_dt != 0.0f ) ? fabsf( const_ln_max_h_change * h / h_dt ) : FLT_MAX;
+        dt_u_change = ( u_dt != 0.0f ) ? fabsf( const_max_u_change * p->u / u_dt ) : FLT_MAX;
+        dt_new = fminf( dt_cfl , fminf( dt_h_change , dt_u_change ) );
         if ( pdt == 0.0f )
-            p->dt = pdt = fminf( dt_cfl , dt_h_change );
+            p->dt = pdt = dt_new;
         else if ( pdt <= dt_step )
-            p->dt = pdt = fminf( fminf( dt_cfl , dt_h_change ) , 2.0f*pdt );
+            p->dt = pdt = fminf( dt_new , 2.0f*pdt );
         else
-            p->dt = pdt = fminf( fminf( dt_cfl , dt_h_change ) , pdt );
+            p->dt = pdt = fminf( dt_new , pdt );
 
         /* Update positions and energies at the half-step. */
         p->v[0] = v[0] = xp->v_old[0] + hdt * p->a[0];
         p->v[1] = v[1] = xp->v_old[1] + hdt * p->a[1];
         p->v[2] = v[2] = xp->v_old[2] + hdt * p->a[2];
-        p->u = u = xp->u_old + hdt * p->force.u_dt;
+        p->u = u = xp->u_old + hdt * u_dt;
 
         /* Get the smallest/largest dt. */
         dt_min = fminf( dt_min , pdt );
diff --git a/src/runner_iact.h b/src/runner_iact.h
index 76f253048f4387df1eb1a3b70c391e404ca2ac0c..05c73c047b7b627ad582ca22a34e296c9e7a072c 100644
--- a/src/runner_iact.h
+++ b/src/runner_iact.h
@@ -366,7 +366,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
     float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr;
     float mi, mj, POrho2i, POrho2j, rhoi, rhoj;
     float v_sig, omega_ij, Pi_ij;
-    float dt_max;
+    // float dt_max;
     float f;
     int k;
     
@@ -407,9 +407,9 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
     Pi_ij *= ( pi->force.balsara + pj->force.balsara );
 
     /* Volker's modified viscosity */
-    dt_max = fmaxf(pi->dt, pj->dt);
-    if(dt_max > 0 && (wi_dr + wj_dr) < 0.)
-        Pi_ij = fminf( Pi_ij, 2.f * omega_ij / ( ( mi + mj ) * ( wi_dr + wj_dr ) * dt_max ) );
+    /* dt_max = fmaxf(pi->dt, pj->dt);
+    if( dt_max > 0 && (wi_dr + wj_dr) < 0. )
+        Pi_ij = fminf( Pi_ij, 2.f * omega_ij / ( ( mi + mj ) * ( wi_dr + wj_dr ) * dt_max ) ); */
 
     /* Get the common factor out. */
     w = ri * ( ( POrho2i * wi_dr + POrho2j * wj_dr ) + 0.25f * Pi_ij * ( wi_dr + wj_dr ) );
@@ -600,14 +600,15 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
     float hi_inv, hi2_inv;
     float hj_inv, hj2_inv;
     float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr;
-    float mi, mj, POrho2i, POrho2j, rhoi, rhoj;
+    float /*mi,*/ mj, POrho2i, POrho2j, rhoi, rhoj;
     float v_sig, omega_ij, Pi_ij;
-    float dt_max;
+    // float dt_max;
     float f;
     int k;
     
     /* Get some values in local variables. */
-    mi = pi->mass; mj = pj->mass;
+    // mi = pi->mass;
+    mj = pj->mass;
     rhoi = pi->rho; rhoj = pj->rho;
     POrho2i = pi->force.POrho2;
     POrho2j = pj->force.POrho2;
@@ -643,9 +644,9 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl
     Pi_ij *= ( pi->force.balsara + pj->force.balsara );
 
     /* Volker's modified viscosity */
-    dt_max = fmaxf(pi->dt, pj->dt);
+    /* dt_max = fmaxf(pi->dt, pj->dt);
     if(dt_max > 0 && (wi_dr + wj_dr) < 0.)
-        Pi_ij = fminf( Pi_ij, 2.f * omega_ij / ( ( mi + mj ) * ( wi_dr + wj_dr ) * dt_max ) );
+        Pi_ij = fminf( Pi_ij, 2.f * omega_ij / ( ( mi + mj ) * ( wi_dr + wj_dr ) * dt_max ) ); */
 
     /* Get the common factor out. */
     w = ri * ( ( POrho2i * wi_dr + POrho2j * wj_dr ) + 0.25f * Pi_ij * ( wi_dr + wj_dr ) );
diff --git a/src/space.c b/src/space.c
index a5f9c211210ce218cb4253d705d30c50749116e7..5e7214b9a91359466634e5a25fba8b2972a8cc45 100644
--- a/src/space.c
+++ b/src/space.c
@@ -88,6 +88,7 @@ int space_marktasks ( struct space *s ) {
     int k, nr_tasks = s->nr_tasks, *ind = s->tasks_ind;
     struct task *t, *tasks = s->tasks;
     float dt_step = s->dt_step;
+    struct cell *ci, *cj;
     
     /* Run through the tasks and mark as skip or not. */
     for ( k = 0 ; k < nr_tasks ; k++ ) {
@@ -121,17 +122,22 @@ int space_marktasks ( struct space *s ) {
             /* Set this task's skip. */
             t->skip = ( t->ci->dt_min > dt_step && t->cj->dt_min > dt_step );
             
+            /* Local pointers. */
+            ci = t->ci;
+            cj = t->cj;
+            
             /* Too much particle movement? */
             if ( t->tight &&
-                 ( t->ci->h2dx_max > t->ci->dmin || t->cj->h2dx_max > t->cj->dmin ) )
+                 ( ci->h2dx_max > ci->dmin || cj->h2dx_max > cj->dmin ||
+                   ci->dx_max > space_maxreldx*ci->h_max || cj->dx_max > space_maxreldx*cj->h_max ) )
                 return 1;
                 
             /* Set the sort flags. */
             if ( !t->skip && t->type == task_type_pair ) {
-                t->ci->sorts->flags |= (1 << t->flags);
-                t->ci->sorts->skip = 0;
-                t->cj->sorts->flags |= (1 << t->flags);
-                t->cj->sorts->skip = 0;
+                ci->sorts->flags |= (1 << t->flags);
+                ci->sorts->skip = 0;
+                cj->sorts->flags |= (1 << t->flags);
+                cj->sorts->skip = 0;
                 }
                 
             }
diff --git a/src/space.h b/src/space.h
index d5cb42f14dff889d9c3f885461099bb6014f4ff8..fdd1ab8c5a0443418c98ee8ed08152d584ec6c0f 100644
--- a/src/space.h
+++ b/src/space.h
@@ -29,6 +29,7 @@
 #define space_dosub                     1
 #define space_stretch                   1.05
 #define space_maxtaskspercell           31
+#define space_maxreldx                  0.2f
 
 
 /* Convert cell location to ID. */