diff --git a/src/engine.c b/src/engine.c
index da540a5ed656f4ac393933042fc1cf99a2d6da54..11a56b450b85826474d0123895e3eaccd1179234 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -431,8 +431,8 @@ void engine_single_force ( double *dim , long long int pid , struct part *__rest
     p.force.u_dt = 0.0f; p.force.h_dt = 0.0f; p.force.v_sig = 0.0f;
             
     /* Loop over all particle pairs (force). */
-    // for ( k = 0 ; k < N ; k++ ) {
-    for ( k = N-1 ; k >= 0 ; k-- ) {
+    for ( k = 0 ; k < N ; k++ ) {
+    // for ( k = N-1 ; k >= 0 ; k-- ) {
         if ( parts[k].id == p.id )
             continue;
         for ( i = 0 ; i < 3 ; i++ ) {
@@ -447,13 +447,7 @@ void engine_single_force ( double *dim , long long int pid , struct part *__rest
             }
         r2 = fdx[0]*fdx[0] + fdx[1]*fdx[1] + fdx[2]*fdx[2];
         if ( r2 < p.h*p.h*kernel_gamma2 || r2 < parts[k].h*parts[k].h*kernel_gamma2 ) {
-            p.a[0] = 0.0f; p.a[1] = 0.0f; p.a[2] = 0.0f;
-            p.force.u_dt = 0.0f; p.force.h_dt = 0.0f; p.force.v_sig = 0.0f;
             runner_iact_nonsym_force( r2 , fdx , p.h , parts[k].h , &p , &parts[k] );
-            /* printf( "pairs_simple: interacting particles %lli and %lli (rho=%.3e,r=%e): a=[%.3e,%.3e,%.3e], u_dt=%.3e, h_dt=%.3e, v_sig=%.3e.\n" ,
-                pid , p2.id , p2.rho , sqrtf(r2) ,
-                p.a[0] , p.a[1] , p.a[2] ,
-                p.force.u_dt , p.force.h_dt , p.force.v_sig ); */
             }
         }
         
@@ -520,7 +514,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
             }
         }
         
-    // engine_single_density( e->s->dim , 6178 , e->s->parts , e->s->nr_parts , e->s->periodic );
+    // engine_single_density( e->s->dim , 494748 , e->s->parts , e->s->nr_parts , e->s->periodic );
 
     /* Start the clock. */
     TIMER_TIC_ND
@@ -538,12 +532,12 @@ void engine_step ( struct engine *e , int sort_queues ) {
     /* Stop the clock. */
     TIMER_TOC(timer_runners);
 
-    // engine_single_force( e->s->dim , 6178 , e->s->parts , e->s->nr_parts , e->s->periodic );
+    // engine_single_force( e->s->dim , 494748 , e->s->parts , e->s->nr_parts , e->s->periodic );
     
     // for(k=0; k<10; ++k)
     //   printParticle(parts, k);
     // printParticle( parts , 432626 );
-    // printParticle( e->s->parts , 6178 , e->s->nr_parts );
+    // printParticle( e->s->parts , 494748 , e->s->nr_parts );
 
     /* Collect the cell data from the second kick. */
     for ( k = 0 ; k < e->s->nr_cells ; k++ ) {
diff --git a/src/kernel.h b/src/kernel.h
index 5996e182611ecaf985e3b7f05fad1d7e4bd1bc29..8c856b23b9bf91406be51e036bcf9521d1ea3327 100644
--- a/src/kernel.h
+++ b/src/kernel.h
@@ -35,6 +35,7 @@
 #define kernel_gamma 2.0f
 #define kernel_gamma2 4.0f
 #define kernel_gamma3 8.0f
+#define kernel_igamma 0.5f
 static float kernel_coeffs[ (kernel_degree + 1) * (kernel_ivals + 1) ] __attribute__ ((aligned (16))) =
     { 3.0/4.0*M_1_PI , -3.0/2.0*M_1_PI , 0.0 , M_1_PI , 
       -0.25*M_1_PI , 3.0/2.0*M_1_PI , -3.0*M_1_PI , M_2_PI , 
diff --git a/src/runner_iact.h b/src/runner_iact.h
index ab0887be2674accafc510c814cfd833d09118bf1..9d6c68a5b9942a12112ea8866e59874278845e0a 100644
--- a/src/runner_iact.h
+++ b/src/runner_iact.h
@@ -229,44 +229,40 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_density (
     float dv[3], curlvr[3];
     int k;
 
-    if ( r2 < hi*hi*kernel_gamma2 ) {
-        
-        /* Get the masses. */
-        mj = pj->mass;
-    
-        /* Get r and r inverse. */
-        r = sqrtf( r2 );
-        ri = 1.0f / r;
-    
-        /* Compute dv dot r */
-        dv[0] = pi->v[0] - pj->v[0];
-        dv[1] = pi->v[1] - pj->v[1];
-        dv[2] = pi->v[2] - pj->v[2];
-        dvdr = dv[0]*dx[0] + dv[1]*dx[1] + dv[2]*dx[2];
-        dvdr *= ri;
-
-        /* Compute dv cross r */
-        curlvr[0] = dv[1]*dx[2] - dv[2]*dx[1];
-        curlvr[1] = dv[2]*dx[0] - dv[0]*dx[2];
-        curlvr[2] = dv[0]*dx[1] - dv[1]*dx[0];
-        for ( k = 0 ; k < 3 ; k++ )
-            curlvr[k] *= ri;
-            
-        h_inv = 1.0 / hi;
-        xi = r * h_inv;
-        kernel_deval( xi , &wi , &wi_dx );
-        
-        pi->rho += mj * wi;
-        pi->rho_dh -= mj * ( 3.0*wi + xi*wi_dx );
-        pi->density.wcount += wi;
-        pi->density.wcount_dh -= xi * wi_dx;
+    /* Get the masses. */
+    mj = pj->mass;
 
-	    pi->density.div_v += mj * dvdr * wi_dx;
-	    for ( k = 0 ; k < 3 ; k++ )
-	        pi->density.curl_v[k] += mj * curlvr[k] * wi_dx;
-            
-        }
+    /* Get r and r inverse. */
+    r = sqrtf( r2 );
+    ri = 1.0f / r;
 
+    /* Compute dv dot r */
+    dv[0] = pi->v[0] - pj->v[0];
+    dv[1] = pi->v[1] - pj->v[1];
+    dv[2] = pi->v[2] - pj->v[2];
+    dvdr = dv[0]*dx[0] + dv[1]*dx[1] + dv[2]*dx[2];
+    dvdr *= ri;
+
+    /* Compute dv cross r */
+    curlvr[0] = dv[1]*dx[2] - dv[2]*dx[1];
+    curlvr[1] = dv[2]*dx[0] - dv[0]*dx[2];
+    curlvr[2] = dv[0]*dx[1] - dv[1]*dx[0];
+    for ( k = 0 ; k < 3 ; k++ )
+        curlvr[k] *= ri;
+
+    h_inv = 1.0 / hi;
+    xi = r * h_inv;
+    kernel_deval( xi , &wi , &wi_dx );
+
+    pi->rho += mj * wi;
+    pi->rho_dh -= mj * ( 3.0*wi + xi*wi_dx );
+    pi->density.wcount += wi;
+    pi->density.wcount_dh -= xi * wi_dx;
+
+	pi->density.div_v += mj * dvdr * wi_dx;
+	for ( k = 0 ; k < 3 ; k++ )
+	    pi->density.curl_v[k] += mj * curlvr[k] * wi_dx;
+            
     }
     
 /**
diff --git a/src/space.h b/src/space.h
index 51ddbd5c01e945176679529f11e7bc9729e5f0c6..d5cb42f14dff889d9c3f885461099bb6014f4ff8 100644
--- a/src/space.h
+++ b/src/space.h
@@ -26,7 +26,7 @@
 #define space_splitratio                0.875
 #define space_splitsize_default         400
 #define space_subsize_default           5000
-#define space_dosub                     0
+#define space_dosub                     1
 #define space_stretch                   1.05
 #define space_maxtaskspercell           31