diff --git a/src/cell.c b/src/cell.c
index bfcecc94d95f9247a43e50a3f31031d979668f3b..356e4769e120bbf395964ad7a3d0ba8cea6a923e 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -258,8 +258,18 @@ void cell_split ( struct cell *c  ) {
     for ( k = 0 ; k < 8 ; k++ ) {
         c->progeny[k]->count = right[k] - left[k] + 1;
         c->progeny[k]->parts = &c->parts[ left[k] ];
+        c->progeny[k]->cparts = &c->cparts[ left[k] ];
         }
         
+    /* Update the condensed particle data. */         
+    for ( k = 0 ; k < c->count ; k++ ) {
+        c->cparts[k].x[0] = c->parts[k].x[0];
+        c->cparts[k].x[1] = c->parts[k].x[1];
+        c->cparts[k].x[2] = c->parts[k].x[2];
+        c->cparts[k].h = c->parts[k].h;
+        c->cparts[k].dt = c->parts[k].dt;
+        }
+
     /* Verify a few sub-cells. */
     /* for ( k = 0 ; k < c->progeny[0]->count ; k++ )
         if ( c->progeny[0]->parts[k].x[0] > pivot[0] ||
diff --git a/src/cell.h b/src/cell.h
index 8d54374f9b7a9de711e8dd0f2f04c9ae92d93ee2..23565c57694672fd5dd61e331c5d046c1b4727d7 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -49,6 +49,7 @@ struct cell {
     
     /* Pointers to the particle data. */
     struct part *parts;
+    struct cpart *cparts;
     
     /* Pointers for the sorted indices. */
     struct entry *sort;
diff --git a/src/part.h b/src/part.h
index 095e85c2e198a1fcaf3938384053cbee80321b46..f17cddaa47f6b3f150cb8171a4d6715825e3f3f6 100644
--- a/src/part.h
+++ b/src/part.h
@@ -24,9 +24,27 @@
 #define part_dtmax                      10
 
 
+/* Condensed data of a single particle. */
+struct cpart {
+
+    /* Particle position. */
+    double x[3];
+    
+    /* Particle cutoff radius. */
+    float h;
+    
+    /* Particle time-step. */
+    float dt;
+    
+    } __attribute__((aligned (32)));
+    
+
 /* Data of a single particle. */
 struct part {
 
+    /* Particle position. */
+    double x[3];
+    
     /* Particle cutoff radius. */
     float h;
     
@@ -39,9 +57,6 @@ struct part {
     /* Particle ID. */
     unsigned long long id;
     
-    /* Particle position. */
-    double x[3];
-    
     /* Particle velocity. */
     float v[3];
     
@@ -70,7 +85,7 @@ struct part {
     float rho_dh;
     
     /* Particle number density. */
-    int icount;
+    // int icount;
     float wcount;
     float wcount_dh;
     
diff --git a/src/runner.c b/src/runner.c
index 6805f927fecdafa136b85a932dd0b93a6c3ca8fc..223f1646741bebd3d949b3ca4366cda84c06d588 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -172,7 +172,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
 
     struct entry *finger;
     struct entry *fingers[8];
-    struct part *parts = c->parts;
+    struct cpart *cparts = c->cparts;
     int j, k, count = c->count;
     int i, ind, off[8], inds[8], temp_i;
     // float shift[3];
@@ -262,9 +262,9 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
     
         /* Fill the sort array. */
         for ( k = 0 ; k < count ; k++ ) {
-            px[0] = parts[k].x[0];
-            px[1] = parts[k].x[1];
-            px[2] = parts[k].x[2];
+            px[0] = cparts[k].x[0];
+            px[1] = cparts[k].x[1];
+            px[2] = cparts[k].x[2];
             for ( j = 0 ; j < 13 ; j++ )
                 if ( flags & (1 << j) ) {
                     c->sort[ j*(count + 1) + k].i = k;
@@ -272,7 +272,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
                     }
             if ( flags & (1 << 14) ) {
                 c->sort[ 14*(count + 1) + k ].i = k;
-                c->sort[ 14*(count + 1) + k ].d = parts[k].dt;
+                c->sort[ 14*(count + 1) + k ].d = cparts[k].dt;
                 }
             }
 
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index a9a5710c288f7a888ddf1ecaa22836fe426482ee..f9e0338176a2b5f9fb596c85e44e035bbe791482 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -82,8 +82,9 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r
     struct engine *e = r->e;
     int pid, pjd, k, count_i = ci->count, count_j = cj->count;
     double shift[3] = { 0.0 , 0.0 , 0.0 };
-    struct part *restrict pi, *restrict pj;
     struct part *restrict parts_i = ci->parts, *restrict parts_j = cj->parts;
+    struct cpart *restrict cpj, *restrict cparts_j = cj->cparts;
+    struct cpart *restrict cpi, *restrict cparts_i = ci->cparts;
     double pix[3];
     float dx[3], hi, hi2, r2;
     TIMER_TIC
@@ -105,29 +106,29 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r
     for ( pid = 0 ; pid < count_i ; pid++ ) {
     
         /* Get a hold of the ith part in ci. */
-        pi = &parts_i[ pid ];
+        cpi = &cparts_i[ pid ];
         for ( k = 0 ; k < 3 ; k++ )
-            pix[k] = pi->x[k] - shift[k];
-        hi = pi->h;
+            pix[k] = cpi->x[k] - shift[k];
+        hi = cpi->h;
         hi2 = hi * hi;
         
         /* Loop over the parts in cj. */
         for ( pjd = 0 ; pjd < count_j ; pjd++ ) {
         
             /* Get a pointer to the jth particle. */
-            pj = &parts_j[ pjd ];
+            cpj = &cparts_j[ pjd ];
         
             /* Compute the pairwise distance. */
             r2 = 0.0f;
             for ( k = 0 ; k < 3 ; k++ ) {
-                dx[k] = pix[k] - pj->x[k];
+                dx[k] = pix[k] - cpj->x[k];
                 r2 += dx[k]*dx[k];
                 }
                 
             /* Hit or miss? */
-            if ( r2 < hi2 || r2 < pj->h*pj->h ) {
+            if ( r2 < hi2 || r2 < cpj->h*cpj->h ) {
             
-                IACT( r2 , dx , hi , pj->h , pi , pj );
+                IACT( r2 , dx , hi , cpj->h , &parts_i[ pid ] , &parts_j[pjd] );
             
                 }
         
@@ -162,7 +163,8 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
     struct engine *e = r->e;
     int pid, pjd, sid, k, count_j = cj->count, flipped;
     double shift[3] = { 0.0 , 0.0 , 0.0 };
-    struct part *restrict pi, *restrict pj, *restrict parts_j = cj->parts;
+    struct part *restrict pi, *restrict parts_j = cj->parts;
+    struct cpart *restrict cpj, *restrict cparts_j = cj->cparts;
     double pix[3];
     float dx[3], hi, hi2, r2, di;
     struct entry *sort_j;
@@ -210,19 +212,19 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
             for ( pjd = 0 ; pjd < count_j && sort_j[ pjd ].d < di ; pjd++ ) {
 
                 /* Get a pointer to the jth particle. */
-                pj = &parts_j[ sort_j[ pjd ].i ];
+                cpj = &cparts_j[ sort_j[ pjd ].i ];
 
                 /* Compute the pairwise distance. */
                 r2 = 0.0f;
                 for ( k = 0 ; k < 3 ; k++ ) {
-                    dx[k] = pix[k] - pj->x[k];
+                    dx[k] = pix[k] - cpj->x[k];
                     r2 += dx[k]*dx[k];
                     }
 
                 /* Hit or miss? */
                 if ( r2 < hi2 ) {
 
-                    IACT_NONSYM( r2 , dx , hi , pj->h , pi , pj );
+                    IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts_j[ sort_j[ pjd ].i ] );
                     
                     }
 
@@ -250,19 +252,19 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
             for ( pjd = count_j-1 ; pjd >= 0 && di < sort_j[ pjd ].d ; pjd-- ) {
 
                 /* Get a pointer to the jth particle. */
-                pj = &parts_j[ sort_j[ pjd ].i ];
+                cpj = &cparts_j[ sort_j[ pjd ].i ];
 
                 /* Compute the pairwise distance. */
                 r2 = 0.0f;
                 for ( k = 0 ; k < 3 ; k++ ) {
-                    dx[k] = pix[k] - pj->x[k];
+                    dx[k] = pix[k] - cpj->x[k];
                     r2 += dx[k]*dx[k];
                     }
 
                 /* Hit or miss? */
                 if ( r2 < hi2 ) {
 
-                    IACT_NONSYM( r2 , dx , hi , pj->h , pi , pj );
+                    IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts_j[ sort_j[ pjd ].i ] );
 
                     }
 
@@ -297,7 +299,9 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
 void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *restrict parts , int *restrict ind , int count ) {
 
     int pid, pjd, k, count_i = ci->count;
-    struct part *restrict pi, *restrict pj, *restrict parts_i = ci->parts;
+    struct part *restrict parts_i = ci->parts;
+    struct part *restrict pi;
+    struct cpart *restrict cpj, *restrict cparts = ci->cparts;
     double pix[3];
     float dx[3], hi, hi2, r2;
     TIMER_TIC
@@ -321,23 +325,19 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
         for ( pjd = 0 ; pjd < count_i ; pjd++ ) {
         
             /* Get a pointer to the jth particle. */
-            pj = &parts_i[ pjd ];
+            cpj = &cparts[ pjd ];
             
-            /* Skip the particle itself. */
-            if ( pj == pi )
-                continue;
-        
             /* Compute the pairwise distance. */
             r2 = 0.0f;
             for ( k = 0 ; k < 3 ; k++ ) {
-                dx[k] = pix[k] - pj->x[k];
+                dx[k] = pix[k] - cpj->x[k];
                 r2 += dx[k]*dx[k];
                 }
                 
             /* Hit or miss? */
-            if ( r2 < hi2 ) {
+            if ( r2 > 0.0f && r2 < hi2 ) {
             
-                IACT_NONSYM( r2 , dx , hi , pj->h , pi , pj );
+                IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts_i[ pjd ] );
             
                 }
         
@@ -370,7 +370,9 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
     double rshift, shift[3] = { 0.0 , 0.0 , 0.0 };
     struct cell *temp;
     struct entry *restrict sort_i, *restrict sort_j;
-    struct part *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j;
+    struct part *restrict parts_i, *restrict parts_j;
+    struct cpart *restrict cpi, *restrict cparts_i;
+    struct cpart *restrict cpj, *restrict cparts_j;
     double pix[3], pjx[3], di, dj;
     float dx[3], hi, hi2, hj, hj2, r2;
     double hi_max, hj_max;
@@ -427,6 +429,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
     hi_max = ci->h_max - rshift; hj_max = cj->h_max;
     count_i = ci->count; count_j = cj->count;
     parts_i = ci->parts; parts_j = cj->parts;
+    cparts_i = ci->cparts; cparts_j = cj->cparts;
     di_max = sort_i[count_i-1].d - rshift;
     dj_min = sort_j[0].d;
     
@@ -437,33 +440,33 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
     for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max > dj_min ; pid-- ) {
     
         /* Get a hold of the ith part in ci. */
-        pi = &parts_i[ sort_i[ pid ].i ];
-        hi = pi->h;
+        cpi = &cparts_i[ sort_i[ pid ].i ];
+        hi = cpi->h;
         di = sort_i[pid].d + hi - rshift;
         if ( di < dj_min )
             continue;
             
-        hi2 = pi->h * pi->h;
+        hi2 = hi * hi;
         for ( k = 0 ; k < 3 ; k++ )
-            pix[k] = pi->x[k] - shift[k];
+            pix[k] = cpi->x[k] - shift[k];
         
         /* Loop over the parts in cj. */
         for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d < di ; pjd++ ) {
         
             /* Get a pointer to the jth particle. */
-            pj = &parts_j[ sort_j[pjd].i ];
+            cpj = &cparts_j[ sort_j[pjd].i ];
         
             /* Compute the pairwise distance. */
             r2 = 0.0f;
             for ( k = 0 ; k < 3 ; k++ ) {
-                dx[k] = pix[k] - pj->x[k];
+                dx[k] = pix[k] - cpj->x[k];
                 r2 += dx[k]*dx[k];
                 }
                 
             /* Hit or miss? */
             if ( r2 < hi2 ) {
             
-                IACT( r2 , dx , hi , pj->h , pi , pj );
+                IACT( r2 , dx , hi , cpj->h , &parts_i[ sort_i[ pid ].i ] , &parts_j[ sort_j[pjd].i ] );
             
                 }
         
@@ -478,33 +481,33 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
     for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max < di_max ; pjd++ ) {
     
         /* Get a hold of the jth part in cj. */
-        pj = &parts_j[ sort_j[ pjd ].i ];
-        hj = pj->h;
+        cpj = &cparts_j[ sort_j[ pjd ].i ];
+        hj = cpj->h;
         dj = sort_j[pjd].d - hj - rshift;
         if ( dj > di_max )
             continue;
             
         for ( k = 0 ; k < 3 ; k++ )
-            pjx[k] = pj->x[k] + shift[k];
-        hj2 = pj->h * pj->h;
+            pjx[k] = cpj->x[k] + shift[k];
+        hj2 = hj * hj;
         
         /* Loop over the parts in ci. */
         for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d > dj ; pid-- ) {
         
             /* Get a pointer to the jth particle. */
-            pi = &parts_i[ sort_i[pid].i ];
+            cpi = &cparts_i[ sort_i[pid].i ];
             
             /* Compute the pairwise distance. */
             r2 = 0.0f;
             for ( k = 0 ; k < 3 ; k++ ) {
-                dx[k] = pi->x[k] - pjx[k];
+                dx[k] = cpi->x[k] - pjx[k];
                 r2 += dx[k]*dx[k];
                 }
                 
             /* Hit or miss? */
-            if ( r2 < hj2 && r2 > pi->h*pi->h ) {
+            if ( r2 < hj2 && r2 > cpi->h*cpi->h ) {
             
-                IACT( r2 , dx , pi->h , hj , pi , pj );
+                IACT( r2 , dx , cpi->h , hj , &parts_i[ sort_i[pid].i ] , &parts_j[ sort_j[ pjd ].i ] );
             
                 }
         
@@ -533,7 +536,8 @@ void DOSELF ( struct runner *r , struct cell *restrict c ) {
     int k, pid, pjd, count = c->count;
     double pix[3];
     float dx[3], hi, hi2, r2;
-    struct part *restrict pi, *restrict pj, *restrict parts = c->parts;
+    struct part *restrict parts = c->parts;
+    struct cpart *restrict cpi, *restrict cpj, *restrict cparts = c->cparts;
     TIMER_TIC
     
     if ( c->split )
@@ -543,31 +547,31 @@ void DOSELF ( struct runner *r , struct cell *restrict c ) {
     for ( pid = 0 ; pid < count ; pid++ ) {
     
         /* Get a pointer to the ith particle. */
-        pi = &parts[pid];
+        cpi = &cparts[pid];
     
         /* Get the particle position and radius. */
         for ( k = 0 ; k < 3 ; k++ )
-            pix[k] = pi->x[k];
-        hi = pi->h;
+            pix[k] = cpi->x[k];
+        hi = cpi->h;
         hi2 = hi * hi;
             
         /* Loop over the other particles .*/
         for ( pjd = pid+1 ; pjd < count ; pjd++ ) {
         
             /* Get a pointer to the jth particle. */
-            pj = &parts[pjd];
+            cpj = &cparts[pjd];
         
             /* Compute the pairwise distance. */
             r2 = 0.0f;
             for ( k = 0 ; k < 3 ; k++ ) {
-                dx[k] = pix[k] - pj->x[k];
+                dx[k] = pix[k] - cpj->x[k];
                 r2 += dx[k]*dx[k];
                 }
                 
             /* Hit or miss? */
-            if ( r2 < hi2 || r2 < pj->h*pj->h ) {
+            if ( r2 < hi2 || r2 < cpj->h*cpj->h ) {
             
-                IACT( r2 , dx , hi , pj->h , pi , pj );
+                IACT( r2 , dx , hi , cpj->h , &parts[pid] , &parts[pjd] );
                 
                 }
         
diff --git a/src/runner_iact.h b/src/runner_iact.h
index 67db41bbd11d22c8225bdd2efe53928b079dccc0..dae13c2fa6e272a905e8dadfb2c0615218a332f4 100644
--- a/src/runner_iact.h
+++ b/src/runner_iact.h
@@ -83,7 +83,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
         pi->rho_dh += -pj->mass * ( 3.0*wi + xi*wi_dx );
         pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
         pi->wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
-        pi->icount += 1;
+        // pi->icount += 1;
         
         }
 
@@ -98,7 +98,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
         pj->rho_dh += -pi->mass * ( 3.0*wj + xj*wj_dx );
         pj->wcount += wj * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
         pj->wcount_dh -= xj * h_inv * wj_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
-        pj->icount += 1;
+        // pj->icount += 1;
         
         }
         
@@ -135,7 +135,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_density (
         pi->rho_dh += -pj->mass * ( 3.0*wi + xi*wi_dx );
         pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
         pi->wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
-        pi->icount += 1;
+        // pi->icount += 1;
         
         }
 
diff --git a/src/space.c b/src/space.c
index 60d2019f19f5c94a8d3e0edbb60b28e5811a0bd6..69239e8a51bb0a798bbec7bc54239fa3fe554f21 100644
--- a/src/space.c
+++ b/src/space.c
@@ -118,7 +118,7 @@ int space_rebuild_recurse ( struct space *s , struct cell *c ) {
 
         /* Count the particles below that. */
         for ( count = 0 , k = 0 ; k < c->count ; k++ ) {
-            h = c->parts[k].h;
+            h = c->cparts[k].h;
             if ( h <= h_limit )
                 count += 1;
             if ( h > h_max )
@@ -212,14 +212,16 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
     int i, j, k, cdim[3];
     struct cell *c;
     struct part *finger;
+    struct cpart *cfinger;
     int *ind, changes = 0;
     
     /* Run through the parts and get the current h_max. */
-    for ( k = 0 ; k < s->nr_parts ; k++ )
+    for ( k = 0 ; k < s->nr_parts ; k++ ) {
         if ( s->parts[k].h > h_max )
             h_max = s->parts[k].h;
         else if ( s->parts[k].h < h_min )
             h_min = s->parts[k].h;
+        }
     s->h_min = h_min;
     s->h_max = h_max;
     
@@ -264,28 +266,43 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
                     c->depth = 0;
                     }
                     
-        /* Run through the particles and get their cell index. */
-        ind = (int *)alloca( sizeof(int) * s->nr_parts );
-        for ( k = 0 ; k < s->nr_parts ; k++ )  {
-            ind[k] = cell_getid( cdim , s->parts[k].x[0]*s->ih[0] , s->parts[k].x[1]*s->ih[1] , s->parts[k].x[2]*s->ih[2] );
-            s->cells[ ind[k] ].count += 1;
-            }
-
-        /* Sort the parts according to their cells. */
-        parts_sort( s->parts , ind , s->nr_parts , 0 , s->nr_cells );        
-                    
-        /* Hook the cells up to the parts. */
-        for ( finger = s->parts , k = 0 ; k < s->nr_cells ; k++ ) {
-            c = &s->cells[ k ];
-            c->parts = finger;
-            finger = &finger[ c->count ];
-            }
-        
         /* There were massive changes. */
         changes = 1;
         
         } /* re-build upper-level cells? */
         
+        
+    /* Run through the particles and get their cell index. */
+    ind = (int *)alloca( sizeof(int) * s->nr_parts );
+    for ( k = 0 ; k < s->nr_cells ; k++ )
+        s->cells[ k ].count = 0;
+    for ( k = 0 ; k < s->nr_parts ; k++ )  {
+        ind[k] = cell_getid( s->cdim , s->parts[k].x[0]*s->ih[0] , s->parts[k].x[1]*s->ih[1] , s->parts[k].x[2]*s->ih[2] );
+        s->cells[ ind[k] ].count += 1;
+        }
+
+    /* Sort the parts according to their cells. */
+    parts_sort( s->parts , ind , s->nr_parts , 0 , s->nr_cells );        
+
+    /* Update the condensed particle data. */         
+    for ( k = 0 ; k < s->nr_parts ; k++ ) {
+        s->cparts[k].x[0] = s->parts[k].x[0];
+        s->cparts[k].x[1] = s->parts[k].x[1];
+        s->cparts[k].x[2] = s->parts[k].x[2];
+        s->cparts[k].h = s->parts[k].h;
+        s->cparts[k].dt = s->parts[k].dt;
+        }
+
+    /* Hook the cells up to the parts. */
+    for ( finger = s->parts , cfinger = s->cparts , k = 0 ; k < s->nr_cells ; k++ ) {
+        c = &s->cells[ k ];
+        c->parts = finger;
+        c->cparts = cfinger;
+        finger = &finger[ c->count ];
+        cfinger = &cfinger[ c->count ];
+        }
+        
+        
     /* At this point, we have the upper-level cells, old or new. Now make
        sure that the parts in each cell are ok. */
     #pragma omp parallel for shared(s) reduction(+:changes)
@@ -303,7 +320,7 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
 
 
 /**
- * @brief Sort the particles according to the given indices.
+ * @brief Sort the particles and condensed particles according to the given indices.
  *
  * @param parts The list of #part
  * @param ind The indices with respect to which the parts are sorted.
@@ -321,57 +338,79 @@ void parts_sort ( struct part *parts , int *ind , int N , int min , int max ) {
     int temp_i;
     struct part temp_p;
     
-    /* One pass of quicksort. */
-    while ( i < j ) {
-        while ( i < N && ind[i] <= pivot )
-            i++;
-        while ( j >= 0 && ind[j] > pivot )
-            j--;
-        if ( i < j ) {
-            temp_i = ind[i]; ind[i] = ind[j]; ind[j] = temp_i;
-            temp_p = parts[i]; parts[i] = parts[j]; parts[j] = temp_p;
-            }
+    /* If N is small enough, just do insert sort. */
+    if ( N < 16 ) {
+    
+        for ( i = 1 ; i < N ; i++ )
+            if ( ind[i] < ind[i-1] ) {
+                temp_i = ind[i];
+                temp_p = parts[j];
+                for ( j = i ; j > 0 && ind[j-1] > temp_i ; j-- ) {
+                    ind[j] = ind[j-1];
+                    parts[j] = parts[j-1];
+                    }
+                ind[j] = temp_i;
+                parts[j] = temp_p;
+                }
+    
         }
         
-    /* Verify sort. */
-    for ( int k = 0 ; k <= j ; k++ )
-        if ( ind[k] > pivot ) {
-            printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N );
-            error( "Sorting failed (<=pivot)." );
+    /* Otherwise, recurse with Quicksort. */
+    else {
+    
+        /* One pass of quicksort. */
+        while ( i < j ) {
+            while ( i < N && ind[i] <= pivot )
+                i++;
+            while ( j >= 0 && ind[j] > pivot )
+                j--;
+            if ( i < j ) {
+                temp_i = ind[i]; ind[i] = ind[j]; ind[j] = temp_i;
+                temp_p = parts[i]; parts[i] = parts[j]; parts[j] = temp_p;
+                }
             }
-    for ( int k = j+1 ; k < N ; k++ )
-        if ( ind[k] <= pivot ) {
-            printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N );
-            error( "Sorting failed (>pivot)." );
+
+        /* Verify sort. */
+        for ( int k = 0 ; k <= j ; k++ )
+            if ( ind[k] > pivot ) {
+                printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N );
+                error( "Sorting failed (<=pivot)." );
+                }
+        for ( int k = j+1 ; k < N ; k++ )
+            if ( ind[k] <= pivot ) {
+                printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N );
+                error( "Sorting failed (>pivot)." );
+                }
+
+        /* Try to recurse in parallel. */
+        if ( N < 100 ) {
+
+            /* Recurse on the left? */
+            if ( j > 0 && pivot > min )
+                parts_sort( parts , ind , j+1 , min , pivot );
+
+            /* Recurse on the right? */
+            if ( i < N && pivot+1 < max )
+                parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max );
+
             }
-        
-    /* Try to recurse in parallel. */
-    if ( N < 100 ) {
-    
-        /* Recurse on the left? */
-        if ( j > 0 && pivot > min )
-            parts_sort( parts , ind , j+1 , min , pivot );
 
-        /* Recurse on the right? */
-        if ( i < N && pivot+1 < max )
-            parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max );
+        else
+        #pragma omp parallel sections
+        {
 
-        }
-        
-    else
-    #pragma omp parallel sections
-    {
-    
-        /* Recurse on the left? */
-        #pragma omp section
-        if ( j > 0 && pivot > min )
-            parts_sort( parts , ind , j+1 , min , pivot );
+            /* Recurse on the left? */
+            #pragma omp section
+            if ( j > 0 && pivot > min )
+                parts_sort( parts , ind , j+1 , min , pivot );
 
-        /* Recurse on the right? */
-        #pragma omp section
-        if ( i < N && pivot+1 < max )
-            parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max );
+            /* Recurse on the right? */
+            #pragma omp section
+            if ( i < N && pivot+1 < max )
+                parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max );
 
+            }
+            
         }
     
     }
@@ -1306,7 +1345,7 @@ void space_split ( struct space *s , struct cell *c ) {
     
     /* Count the particles below that. */
     for ( count = 0 , k = 0 ; k < c->count ; k++ ) {
-        h = c->parts[k].h;
+        h = c->cparts[k].h;
         if ( h <= h_limit )
             count += 1;
         if ( h > h_max )
@@ -1387,6 +1426,9 @@ void space_recycle ( struct space *s , struct cell *c ) {
     /* Clear this cell's sort arrays. */
     if ( c->sort != NULL )
         free( c->sort );
+        
+    /* Clear the cell data. */
+    bzero( c , sizeof(struct cell) );
     
     /* Hook this cell into the buffer. */
     c->next = s->cells_new;
@@ -1465,6 +1507,10 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N ,
     s->nr_parts = N;
     s->parts = parts;
     
+    /* Allocate the cparts array. */
+    if ( posix_memalign( (void *)&s->cparts , 32 ,  N * sizeof(struct cpart) ) != 0 )
+        error( "Failed to allocate cparts." );
+    
     /* Init the space lock. */
     if ( lock_init( &s->lock ) != 0 )
         error( "Failed to create space spin-lock." );
diff --git a/src/space.h b/src/space.h
index 6230cb298f419da4d5412b30acadda735d8ca985..e648b680608f58813b032b05183123cdf3e55040 100644
--- a/src/space.h
+++ b/src/space.h
@@ -75,6 +75,7 @@ struct space {
     
     /* The particle data (cells have pointers to this). */
     struct part *parts;
+    struct cpart *cparts;
     
     /* The sortlist data (cells hold pointers to these). */
     struct entry *sortlist;