diff --git a/src/cell.h b/src/cell.h
index 96d39cb43d1c6e39a621df4766251944a5743f13..920ae422e5eb5fca6baa4c78d90e5559ef34b54a 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -67,6 +67,10 @@ struct cell {
     /* The tasks computing this cell's sorts. */
     struct task *sorts[14];
     
+    /* The tasks computing this cell's density. */
+    struct task *density[27];
+    int nr_density;
+    
     /* The ghost task to link density to interactions. */
     struct task *ghost;
     
diff --git a/src/engine.c b/src/engine.c
index 71004d6bfa8b30068f85b6c2ebeda1f698f29f2f..c9433f4d11b4c310c389873e3737b23a02873f22 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -169,6 +169,14 @@ void engine_run ( struct engine *e , int sort_queues ) {
     int j, k;
     struct space *s = e->s;
     
+    /* Re-set the particle data. */
+    for ( k = 0 ; k < s->nr_parts ; k++ ) {
+        s->parts[k].wcount = 0.0f;
+        s->parts[k].wcount_dh = 0.0f;
+        s->parts[k].rho = 0.0f;
+        s->parts[k].rho_dh = 0.0f;
+        }
+    
     /* Run throught the tasks and get all the waits right. */
     for ( k = 0 ; k < s->nr_tasks ; k++ ) {
         s->tasks[k].done = 0;
diff --git a/src/part.h b/src/part.h
index e369295ce8654b2c8cc8f2f8f845785bbb090b63..b7338ab4b198b9e61e8be4ff7c2cf756ead5761a 100644
--- a/src/part.h
+++ b/src/part.h
@@ -72,6 +72,7 @@ struct part {
     /* Particle number density. */
     int icount;
     float wcount;
+    float wcount_dh;
     
     } __attribute__((aligned (32)));
     
diff --git a/src/runner.c b/src/runner.c
index 2abcfd237b161a61f22e8e1db7b8757af6e4dfdd..4ab5ac78b618f2878c93daaa00470cbf90e0a98e 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -442,6 +442,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
 void runner_doghost ( struct runner *r , struct cell *c ) {
 
     struct part *p;
+    struct cell *finger;
     int i, k, redo, count = c->count;
     int *pid;
     float ihg, ihg2;
@@ -478,14 +479,18 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
             ihg2 = ihg * ihg;
             p->rho *= ihg * ihg2;
             p->rho_dh *= ihg2 * ihg2;
+            
+            /* Update the smoothing length. */
+            p->h -= ( p->wcount + kernel_root - const_nwneigh ) / p->wcount_dh;
 
             /* Did we get the right number density? */
             if ( p->wcount + kernel_root > const_nwneigh + 1 ||
                  p->wcount + kernel_root < const_nwneigh - 1 ) {
-                printf( "runner_doghost: particle %i has bad wcount=%f.\n" , p->id , p->wcount + kernel_root );
+                printf( "runner_doghost: particle %i has bad wcount=%f.\n" , p->id , p->wcount + kernel_root ); fflush(stdout);
                 pid[redo] = pid[i];
                 redo += 1;
                 p->wcount = 0.0;
+                p->wcount_dh = 0.0;
                 p->rho = 0.0;
                 p->rho_dh = 0.0;
                 continue;
@@ -512,8 +517,40 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
             
         /* Re-set the counter for the next loop (potentially). */
         count = redo;
-        if ( count > 0 )
-            error( "Bad smoothing length, fixing this isn't implemented yet." );
+        if ( count > 0 ) {
+        
+            // error( "Bad smoothing length, fixing this isn't implemented yet." );
+            
+            /* Climb up the cell hierarchy. */
+            for ( finger = c ; finger != NULL ; finger = finger->parent ) {
+            
+                /* Run through this cell's density interactions. */
+                for ( k = 0 ; k < finger->nr_density ; k++ ) {
+                
+                    /* Self-interaction? */
+                    if ( finger->density[k]->type == task_type_self )
+                        runner_doself_subset_density( r , finger , c->parts , pid , count );
+                        
+                    /* Otherwise, pair interaction? */
+                    else if ( finger->density[k]->type == task_type_pair ) {
+                    
+                        /* Left or right? */
+                        if ( finger->density[k]->ci == finger )
+                            runner_dopair_subset_density( r , finger , c->parts , pid , count , finger->density[k]->cj );
+                        else
+                            runner_dopair_subset_density( r , finger , c->parts , pid , count , finger->density[k]->ci );
+                        
+                        }
+                
+                    /* Otherwise, sub interaction? */
+                    else if ( finger->density[k]->type == task_type_sub ) 
+                        runner_dosub_subset_density( r , finger->density[k]->ci , finger->density[k]->cj , c , c->parts , pid , count , finger->density[k]->flags );
+                
+                    }
+            
+                }
+        
+            }
             
         }
 
diff --git a/src/runner.h b/src/runner.h
index 4d1a6ec64c6bf77276c5eacf078d54454596bff6..548165edf8a8913065c4629e1de30de04ff128eb 100644
--- a/src/runner.h
+++ b/src/runner.h
@@ -29,6 +29,7 @@ enum {
     runner_timer_dopair_force,
     runner_timer_dosub_density,
     runner_timer_dosub_force,
+    runner_timer_dopair_subset,
     runner_timer_doghost,
     runner_timer_getpair,
     runner_timer_steal,
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index 2ef802978894c040fdfa936778fd7e7f12293d18..7ffa3fa14473e544de7668ec01e1476eab99f398 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -29,15 +29,24 @@
 #define DOPAIR2(f) PASTE(runner_dopair,f)
 #define DOPAIR DOPAIR2(FUNCTION)
 
+#define DOPAIR_SUBSET2(f) PASTE(runner_dopair_subset,f)
+#define DOPAIR_SUBSET DOPAIR_SUBSET2(FUNCTION)
+
 #define DOPAIR_NAIVE2(f) PASTE(runner_dopair_naive,f)
 #define DOPAIR_NAIVE DOPAIR_NAIVE2(FUNCTION)
 
 #define DOSELF2(f) PASTE(runner_doself,f)
 #define DOSELF DOSELF2(FUNCTION)
 
+#define DOSELF_SUBSET2(f) PASTE(runner_doself_subset,f)
+#define DOSELF_SUBSET DOSELF_SUBSET2(FUNCTION)
+
 #define DOSUB2(f) PASTE(runner_dosub,f)
 #define DOSUB DOSUB2(FUNCTION)
 
+#define DOSUB_SUBSET2(f) PASTE(runner_dosub_subset,f)
+#define DOSUB_SUBSET DOSUB_SUBSET2(FUNCTION)
+
 #define IACT2(f) PASTE(runner_iact,f)
 #define IACT IACT2(FUNCTION)
 
@@ -50,6 +59,12 @@
 #define TIMER_DOSUB2(f) PASTE(runner_timer_dosub,f)
 #define TIMER_DOSUB TIMER_DOSUB2(FUNCTION)
 
+#define TIMER_DOSELF_SUBSET2(f) PASTE(runner_timer_doself_subset,f)
+#define TIMER_DOSELF_SUBSET TIMER_DOSELF_SUBSET2(FUNCTION)
+
+#define TIMER_DOPAIR_SUBSET2(f) PASTE(runner_timer_dopair_subset,f)
+#define TIMER_DOPAIR_SUBSET TIMER_DOPAIR_SUBSET2(FUNCTION)
+
 
 /**
  * @brief Compute the interactions between a cell pair.
@@ -126,6 +141,158 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *ci , struct cell *cj ) {
     }
 
 
+/**
+ * @brief Compute the interactions between a cell pair, but only for the
+ *      given indices in ci.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param parts_i The #parts to interact with @c cj.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param count The number of particles in @c ind.
+ * @param cj The second #cell.
+ */
+ 
+void DOPAIR_SUBSET ( struct runner *r , struct cell *ci , struct part *parts_i , int *ind , int count , struct cell *cj ) {
+
+    struct engine *e = r->e;
+    int pid, pjd, k, count_j = cj->count;
+    double shift[3] = { 0.0 , 0.0 , 0.0 };
+    struct part *pi, *pj, *parts_j = cj->parts;
+    double pix[3];
+    float dx[3], hi, hi2, r2;
+    TIMER_TIC
+    
+    /* Get the relative distance between the pairs, wrapping. */
+    for ( k = 0 ; k < 3 ; k++ ) {
+        if ( cj->loc[k] - ci->loc[k] < -e->s->dim[k]/2 )
+            shift[k] = e->s->dim[k];
+        else if ( cj->loc[k] - ci->loc[k] > e->s->dim[k]/2 )
+            shift[k] = -e->s->dim[k];
+        }
+        
+    /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts and shift = [ %g %g %g ].\n" ,
+        ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] ,
+        ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
+    tic = getticks(); */
+    
+    /* Loop over the parts in ci. */
+    for ( pid = 0 ; pid < count ; pid++ ) {
+    
+        /* Get a hold of the ith part in ci. */
+        pi = &parts_i[ ind[ pid ] ];
+        for ( k = 0 ; k < 3 ; k++ )
+            pix[k] = pi->x[k] - shift[k];
+        hi = pi->h;
+        hi2 = hi * hi;
+        
+        /* Loop over the parts in cj. */
+        for ( pjd = 0 ; pjd < count_j ; pjd++ ) {
+        
+            /* Get a pointer to the jth particle. */
+            pj = &parts_j[ pjd ];
+        
+            /* Compute the pairwise distance. */
+            r2 = 0.0f;
+            for ( k = 0 ; k < 3 ; k++ ) {
+                dx[k] = pix[k] - pj->x[k];
+                r2 += dx[k]*dx[k];
+                }
+                
+            /* Hit or miss? */
+            if ( r2 < hi2 ) {
+            
+                IACT( r2 , dx , hi , 0.0f , pi , pj );
+            
+                }
+        
+            } /* loop over the parts in cj. */
+    
+        } /* loop over the parts in ci. */
+        
+    #ifdef TIMER_VERBOSE
+        printf( "runner_dopair_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n" , r->id , count_i , count_j , ci->depth , ci->h_max , cj->h_max , ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000 );
+    #else
+        TIMER_TOC(runner_timer_dopair_subset);
+    #endif
+
+
+    }
+
+
+/**
+ * @brief Compute the interactions between a cell pair, but only for the
+ *      given indices in ci.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param parts_i The #parts to interact with @c cj.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param count The number of particles in @c ind.
+ * @param cj The second #cell.
+ */
+ 
+void DOSELF_SUBSET ( struct runner *r , struct cell *ci , struct part *parts , int *ind , int count ) {
+
+    int pid, pjd, k, count_i = ci->count;
+    struct part *pi, *pj, *parts_i = ci->parts;
+    double pix[3];
+    float dx[3], hi, hi2, r2;
+    TIMER_TIC
+    
+    /* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts and shift = [ %g %g %g ].\n" ,
+        ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] ,
+        ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
+    tic = getticks(); */
+    
+    /* Loop over the parts in ci. */
+    for ( pid = 0 ; pid < count ; pid++ ) {
+    
+        /* Get a hold of the ith part in ci. */
+        pi = &parts[ ind[ pid ] ];
+        for ( k = 0 ; k < 3 ; k++ )
+            pix[k] = pi->x[k];
+        hi = pi->h;
+        hi2 = hi * hi;
+        
+        /* Loop over the parts in cj. */
+        for ( pjd = 0 ; pjd < count_i ; pjd++ ) {
+        
+            /* Get a pointer to the jth particle. */
+            pj = &parts_i[ pjd ];
+            
+            /* Skip the particle itself. */
+            if ( pj == pi )
+                continue;
+        
+            /* Compute the pairwise distance. */
+            r2 = 0.0f;
+            for ( k = 0 ; k < 3 ; k++ ) {
+                dx[k] = pix[k] - pj->x[k];
+                r2 += dx[k]*dx[k];
+                }
+                
+            /* Hit or miss? */
+            if ( r2 < hi2 ) {
+            
+                IACT( r2 , dx , hi , 0.0f , pi , pj );
+            
+                }
+        
+            } /* loop over the parts in cj. */
+    
+        } /* loop over the parts in ci. */
+        
+    #ifdef TIMER_VERBOSE
+        printf( "runner_doself_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n" , r->id , count_i , count_j , ci->depth , ci->h_max , cj->h_max , ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000 );
+    #else
+        TIMER_TOC(runner_timer_dopair_subset);
+    #endif
+
+
+    }
+
+
 /**
  * @brief Compute the interactions between a cell pair.
  *
@@ -562,3 +729,349 @@ void DOSUB ( struct runner *r , struct cell *ci , struct cell *cj , int flags )
     }
 
 
+void DOSUB_SUBSET ( struct runner *r , struct cell *ci , struct cell *cj , struct cell *sub , struct part *parts_i , int *ind , int count , int flags ) {
+
+    int j, k;
+
+    // TIMER_TIC
+    
+    /* Different types of flags. */
+    switch ( flags ) {
+    
+        /* Regular sub-cell interactions of a single cell. */
+        case 0:
+            for ( j = 0 ; j < 7 ; j++ )
+                for ( k = j + 1 ; k < 8 ; k++ )
+                    if ( ci->progeny[j] == sub && ci->progeny[k] != NULL )
+                        DOPAIR_SUBSET( r , ci->progeny[j] , parts_i , ind , count , ci->progeny[k] );
+                    else if ( ci->progeny[k] == sub && ci->progeny[j] != NULL)
+                        DOPAIR_SUBSET( r , ci->progeny[k] , parts_i , ind , count , ci->progeny[j] );
+            break;
+            
+        case 1: /* (  1 ,  1 ,  0 ) */
+            if ( ci->progeny[6] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[0] );
+            if ( ci->progeny[6] == sub && cj->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[1] );
+            if ( ci->progeny[7] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[0] );
+            if ( ci->progeny[7] == sub && cj->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[1] );
+            if ( cj->progeny[0] == sub && ci->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[6] );
+            if ( cj->progeny[1] == sub && ci->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[6] );
+            if ( cj->progeny[0] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[7] );
+            if ( cj->progeny[1] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[7] );
+            break;
+    
+        case 3: /* (  1 ,  0 ,  1 ) */
+            if ( ci->progeny[5] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[0] );
+            if ( cj->progeny[0] == sub && ci->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[5] );
+            if ( ci->progeny[5] == sub && cj->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[2] );
+            if ( cj->progeny[2] == sub && ci->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[5] );
+            if ( ci->progeny[7] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[0] );
+            if ( cj->progeny[0] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[7] );
+            if ( ci->progeny[7] == sub && cj->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[2] );
+            if ( cj->progeny[2] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[7] );
+            break;
+                    
+        case 4: /* (  1 ,  0 ,  0 ) */
+            if ( ci->progeny[4] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[4] , parts_i , ind , count , cj->progeny[0] );
+            if ( cj->progeny[0] == sub && ci->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[4] );
+            if ( ci->progeny[4] == sub && cj->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[4] , parts_i , ind , count , cj->progeny[1] );
+            if ( cj->progeny[1] == sub && ci->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[4] );
+            if ( ci->progeny[4] == sub && cj->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[4] , parts_i , ind , count , cj->progeny[2] );
+            if ( cj->progeny[2] == sub && ci->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[4] );
+            if ( ci->progeny[4] == sub && cj->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[4] , parts_i , ind , count , cj->progeny[3] );
+            if ( cj->progeny[3] == sub && ci->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[3] , parts_i , ind , count , ci->progeny[4] );
+            if ( ci->progeny[5] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[0] );
+            if ( cj->progeny[0] == sub && ci->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[5] );
+            if ( ci->progeny[5] == sub && cj->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[1] );
+            if ( cj->progeny[1] == sub && ci->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[5] );
+            if ( ci->progeny[5] == sub && cj->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[2] );
+            if ( cj->progeny[2] == sub && ci->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[5] );
+            if ( ci->progeny[5] == sub && cj->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[3] );
+            if ( cj->progeny[3] == sub && ci->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[3] , parts_i , ind , count , ci->progeny[5] );
+            if ( ci->progeny[6] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[0] );
+            if ( cj->progeny[0] == sub && ci->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[6] );
+            if ( ci->progeny[6] == sub && cj->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[1] );
+            if ( cj->progeny[1] == sub && ci->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[6] );
+            if ( ci->progeny[6] == sub && cj->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[2] );
+            if ( cj->progeny[2] == sub && ci->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[6] );
+            if ( ci->progeny[6] == sub && cj->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[3] );
+            if ( cj->progeny[3] == sub && ci->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[3] , parts_i , ind , count , ci->progeny[6] );
+            if ( ci->progeny[7] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[0] );
+            if ( cj->progeny[0] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[7] );
+            if ( ci->progeny[7] == sub && cj->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[1] );
+            if ( cj->progeny[1] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[7] );
+            if ( ci->progeny[7] == sub && cj->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[2] );
+            if ( cj->progeny[2] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[7] );
+            if ( ci->progeny[7] == sub && cj->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[3] );
+            if ( cj->progeny[3] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[3] , parts_i , ind , count , ci->progeny[7] );
+            break;
+            
+        case 5: /* (  1 ,  0 , -1 ) */
+            if ( ci->progeny[4] == sub && cj->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[4] , parts_i , ind , count , cj->progeny[1] );
+            if ( cj->progeny[1] == sub && ci->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[4] );
+            if ( ci->progeny[4] == sub && cj->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[4] , parts_i , ind , count , cj->progeny[3] );
+            if ( cj->progeny[3] == sub && ci->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[3] , parts_i , ind , count , ci->progeny[4] );
+            if ( ci->progeny[6] == sub && cj->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[1] );
+            if ( cj->progeny[1] == sub && ci->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[6] );
+            if ( ci->progeny[6] == sub && cj->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[3] );
+            if ( cj->progeny[3] == sub && ci->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[3] , parts_i , ind , count , ci->progeny[6] );
+            break;
+                    
+        case 7: /* (  1 , -1 ,  0 ) */
+            if ( ci->progeny[4] == sub && cj->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[4] , parts_i , ind , count , cj->progeny[2] );
+            if ( cj->progeny[2] == sub && ci->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[4] );
+            if ( ci->progeny[4] == sub && cj->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[4] , parts_i , ind , count , cj->progeny[3] );
+            if ( cj->progeny[3] == sub && ci->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[3] , parts_i , ind , count , ci->progeny[4] );
+            if ( ci->progeny[5] == sub && cj->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[2] );
+            if ( cj->progeny[2] == sub && ci->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[5] );
+            if ( ci->progeny[5] == sub && cj->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[3] );
+            if ( cj->progeny[3] == sub && ci->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[3] , parts_i , ind , count , ci->progeny[5] );
+            break;
+                    
+        case 9: /* (  0 ,  1 ,  1 ) */
+            if ( ci->progeny[3] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[0] );
+            if ( cj->progeny[0] == sub && ci->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[3] );
+            if ( ci->progeny[3] == sub && cj->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[4] );
+            if ( cj->progeny[4] == sub && ci->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[3] );
+            if ( ci->progeny[7] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[0] );
+            if ( cj->progeny[0] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[7] );
+            if ( ci->progeny[7] == sub && cj->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[4] );
+            if ( cj->progeny[4] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[7] );
+            break;
+                    
+        case 10: /* (  0 ,  1 ,  0 ) */
+            if ( ci->progeny[2] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[2] , parts_i , ind , count , cj->progeny[0] );
+            if ( cj->progeny[0] == sub && ci->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[2] );
+            if ( ci->progeny[2] == sub && cj->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[2] , parts_i , ind , count , cj->progeny[1] );
+            if ( cj->progeny[1] == sub && ci->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[2] );
+            if ( ci->progeny[2] == sub && cj->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[2] , parts_i , ind , count , cj->progeny[4] );
+            if ( cj->progeny[4] == sub && ci->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[2] );
+            if ( ci->progeny[2] == sub && cj->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[2] , parts_i , ind , count , cj->progeny[5] );
+            if ( cj->progeny[5] == sub && ci->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[5] , parts_i , ind , count , ci->progeny[2] );
+            if ( ci->progeny[3] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[0] );
+            if ( cj->progeny[0] == sub && ci->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[3] );
+            if ( ci->progeny[3] == sub && cj->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[1] );
+            if ( cj->progeny[1] == sub && ci->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[3] );
+            if ( ci->progeny[3] == sub && cj->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[4] );
+            if ( cj->progeny[4] == sub && ci->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[3] );
+            if ( ci->progeny[3] == sub && cj->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[5] );
+            if ( cj->progeny[5] == sub && ci->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[5] , parts_i , ind , count , ci->progeny[3] );
+            if ( ci->progeny[6] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[0] );
+            if ( cj->progeny[0] == sub && ci->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[6] );
+            if ( ci->progeny[6] == sub && cj->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[1] );
+            if ( cj->progeny[1] == sub && ci->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[6] );
+            if ( ci->progeny[6] == sub && cj->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[4] );
+            if ( cj->progeny[4] == sub && ci->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[6] );
+            if ( ci->progeny[6] == sub && cj->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[5] );
+            if ( cj->progeny[5] == sub && ci->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[5] , parts_i , ind , count , ci->progeny[6] );
+            if ( ci->progeny[7] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[0] );
+            if ( cj->progeny[0] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[7] );
+            if ( ci->progeny[7] == sub && cj->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[1] );
+            if ( cj->progeny[1] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[7] );
+            if ( ci->progeny[7] == sub && cj->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[4] );
+            if ( cj->progeny[4] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[7] );
+            if ( ci->progeny[7] == sub && cj->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[5] );
+            if ( cj->progeny[5] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[5] , parts_i , ind , count , ci->progeny[7] );
+            break;
+                    
+        case 11: /* (  0 ,  1 , -1 ) */
+            if ( ci->progeny[2] == sub && cj->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[2] , parts_i , ind , count , cj->progeny[1] );
+            if ( cj->progeny[1] == sub && ci->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[2] );
+            if ( ci->progeny[2] == sub && cj->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[2] , parts_i , ind , count , cj->progeny[5] );
+            if ( cj->progeny[5] == sub && ci->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[5] , parts_i , ind , count , ci->progeny[2] );
+            if ( ci->progeny[6] == sub && cj->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[1] );
+            if ( cj->progeny[1] == sub && ci->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[1] , parts_i , ind , count , ci->progeny[6] );
+            if ( ci->progeny[6] == sub && cj->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[6] , parts_i , ind , count , cj->progeny[5] );
+            if ( cj->progeny[5] == sub && ci->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[5] , parts_i , ind , count , ci->progeny[6] );
+            break;
+                    
+        case 12: /* (  0 ,  0 ,  1 ) */
+            if ( ci->progeny[1] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[1] , parts_i , ind , count , cj->progeny[0] );
+            if ( cj->progeny[0] == sub && ci->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[1] );
+            if ( ci->progeny[1] == sub && cj->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[1] , parts_i , ind , count , cj->progeny[2] );
+            if ( cj->progeny[2] == sub && ci->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[1] );
+            if ( ci->progeny[1] == sub && cj->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[1] , parts_i , ind , count , cj->progeny[4] );
+            if ( cj->progeny[4] == sub && ci->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[1] );
+            if ( ci->progeny[1] == sub && cj->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[1] , parts_i , ind , count , cj->progeny[6] );
+            if ( cj->progeny[6] == sub && ci->progeny[1] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[6] , parts_i , ind , count , ci->progeny[1] );
+            if ( ci->progeny[3] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[0] );
+            if ( cj->progeny[0] == sub && ci->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[3] );
+            if ( ci->progeny[3] == sub && cj->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[2] );
+            if ( cj->progeny[2] == sub && ci->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[3] );
+            if ( ci->progeny[3] == sub && cj->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[4] );
+            if ( cj->progeny[4] == sub && ci->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[3] );
+            if ( ci->progeny[3] == sub && cj->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[3] , parts_i , ind , count , cj->progeny[6] );
+            if ( cj->progeny[6] == sub && ci->progeny[3] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[6] , parts_i , ind , count , ci->progeny[3] );
+            if ( ci->progeny[5] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[0] );
+            if ( cj->progeny[0] == sub && ci->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[5] );
+            if ( ci->progeny[5] == sub && cj->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[2] );
+            if ( cj->progeny[2] == sub && ci->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[5] );
+            if ( ci->progeny[5] == sub && cj->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[4] );
+            if ( cj->progeny[4] == sub && ci->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[5] );
+            if ( ci->progeny[5] == sub && cj->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[5] , parts_i , ind , count , cj->progeny[6] );
+            if ( cj->progeny[6] == sub && ci->progeny[5] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[6] , parts_i , ind , count , ci->progeny[5] );
+            if ( ci->progeny[7] == sub && cj->progeny[0] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[0] );
+            if ( cj->progeny[0] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[0] , parts_i , ind , count , ci->progeny[7] );
+            if ( ci->progeny[7] == sub && cj->progeny[2] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[2] );
+            if ( cj->progeny[2] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[2] , parts_i , ind , count , ci->progeny[7] );
+            if ( ci->progeny[7] == sub && cj->progeny[4] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[4] );
+            if ( cj->progeny[4] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[4] , parts_i , ind , count , ci->progeny[7] );
+            if ( ci->progeny[7] == sub && cj->progeny[6] != NULL )
+                DOPAIR_SUBSET( r , ci->progeny[7] , parts_i , ind , count , cj->progeny[6] );
+            if ( cj->progeny[6] == sub && ci->progeny[7] != NULL )
+                DOPAIR_SUBSET( r , cj->progeny[6] , parts_i , ind , count , ci->progeny[7] );
+            break;
+                
+        }
+    
+
+    // #ifdef TIMER_VERBOSE
+    //     printf( "runner_dosub[%02i]: flags=%i at depth %i took %.3f ms.\n" , r->id , flags , ci->depth , ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000 );
+    // #else
+    //     TIMER_TOC(timer_dopair_subset);
+    // #endif
+
+    }
+
+
diff --git a/src/runner_iact.h b/src/runner_iact.h
index 64ef8d07643503e1941e99d5023dcf22c520e105..1b5bb787fee6e7f6af505c31e0562c51b408b72f 100644
--- a/src/runner_iact.h
+++ b/src/runner_iact.h
@@ -69,31 +69,35 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
 
     float r = sqrtf( r2 );
     float xi, xj;
-    float hg_inv;
+    float h_inv, hg_inv;
     float wi, wj, wi_dx, wj_dx;
     
     if ( r2 < hi*hi && pi != NULL ) {
         
-        hg_inv = kernel_igamma / hi;
+        h_inv = 1.0 / hi;
+        hg_inv = kernel_igamma * h_inv;
         xi = r * hg_inv;
         kernel_deval( xi , &wi , &wi_dx );
         
         pi->rho += pj->mass * wi;
         pi->rho_dh += -pj->mass * ( 3.0*wi + xi*wi_dx );
         pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
+        pi->wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
         pi->icount += 1;
         
         }
 
     if ( r2 < hj*hj && pj != NULL ) {
         
-        hg_inv = kernel_igamma / hj;
+        h_inv = 1.0 / hj;
+        hg_inv = kernel_igamma * h_inv;
         xj = r * hg_inv;
         kernel_deval( xj , &wj , &wj_dx );
         
         pj->rho += pi->mass * wj;
         pj->rho_dh += -pi->mass * ( 3.0*wj + xj*wj_dx );
         pj->wcount += wj * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
+        pj->wcount_dh -= xj * h_inv * wj_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
         pj->icount += 1;
         
         }
diff --git a/src/space.c b/src/space.c
index 1eaec456a06bec080b06bc00d634155815efe6e2..52c582057086715cec098bf237e94d48008519a2 100644
--- a/src/space.c
+++ b/src/space.c
@@ -129,6 +129,7 @@ void space_map_mkghosts ( struct cell *c , void *data ) {
 void space_map_clearnrtasks ( struct cell *c , void *data ) {
 
     c->nr_tasks = 0;
+    c->nr_density = 0;
 
     }
 
@@ -976,23 +977,43 @@ void space_maketasks ( struct space *s , int do_sort ) {
             }
         }
         
-    /* Count the number of tasks associated with each cell. */
+    /* Count the number of tasks associated with each cell and
+       store the density tasks in each cell. */
     space_map_cells( s , 1 , &space_map_clearnrtasks , NULL );
     for ( k = 0 ; k < s->nr_tasks ; k++ ) {
         t = &s->tasks[k];
-        if ( t->type == task_type_self )
+        if ( t->type == task_type_self ) {
             t->ci->nr_tasks += 1;
+            if ( t->subtype == task_subtype_density ) {
+                t->ci->density[ t->ci->nr_density ] = t;
+                t->ci->nr_density += 1;
+                }
+            }
         else if ( t->type == task_type_pair ) {
             t->ci->nr_tasks += 1;
             t->cj->nr_tasks += 1;
+            if ( t->subtype == task_subtype_density ) {
+                t->ci->density[ t->ci->nr_density ] = t;
+                t->ci->nr_density += 1;
+                t->cj->density[ t->cj->nr_density ] = t;
+                t->cj->nr_density += 1;
+                }
             }
         else if ( t->type == task_type_sub ) {
             t->ci->nr_tasks += 1;
             if ( t->cj != NULL )
                 t->cj->nr_tasks += 1;
+            if ( t->subtype == task_subtype_density ) {
+                t->ci->density[ t->ci->nr_density ] = t;
+                t->ci->nr_density += 1;
+                if ( t->cj != NULL ) {
+                    t->cj->density[ t->cj->nr_density ] = t;
+                    t->cj->nr_density += 1;
+                    }
+                }
             }
         }
-    
+        
     /* Append a ghost task to each cell. */
     space_map_cells( s , 1 , &space_map_mkghosts , s );