diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c
index 4803666efc7fbadfa9c3938489a216c44c4e59e4..4f5083da52b57f3dbb4c8c743ac726e4c732af34 100644
--- a/src/runner_doiact_vec.c
+++ b/src/runner_doiact_vec.c
@@ -1357,21 +1357,21 @@ void runner_dopair1_density_auto_vec(struct runner *r, struct cell *ci, struct c
 
   /* Get the particle cache from the runner and re-allocate
    * the cache if it is not big enough for the cell. */
-  struct cache *restrict ci_cache = &r->par_cache;
+  //struct cache *restrict ci_cache = &r->par_cache;
 
-  if (ci_cache->count < count_i) {
-    cache_init(ci_cache, count_i);
-  }
-  if (cj_cache.count < count_j) {
-    cache_init(&cj_cache, count_j);
-  }
+  //if (ci_cache->count < count_i) {
+  //  cache_init(ci_cache, count_i);
+  //}
+  //if (cj_cache.count < count_j) {
+  //  cache_init(&cj_cache, count_j);
+  //}
 
   //cache_read_two_cells(ci, cj, ci_cache, &cj_cache, shift);
-  cache_read_two_cells_sorted(ci, cj, ci_cache, &cj_cache, sort_i, sort_j, shift);
+  cache_read_two_cells_sorted(ci, cj, &ci_cache, &cj_cache, sort_i, sort_j, shift);
 
   /* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */
   /* For particles in ci */  
-  populate_max_d(ci, cj, sort_i, sort_j, ci_cache, &cj_cache, dx_max, rshift, max_di, max_dj);
+  populate_max_d(ci, cj, sort_i, sort_j, &ci_cache, &cj_cache, dx_max, rshift, max_di, max_dj);
 
   float di, dj;
 
@@ -1394,7 +1394,7 @@ void runner_dopair1_density_auto_vec(struct runner *r, struct cell *ci, struct c
 
     int ci_cache_idx = pid; //sort_i[pid].i;
 
-    const float hi = ci_cache->h[ci_cache_idx];
+    const float hi = ci_cache.h[ci_cache_idx];
     const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
     if (di < dj_min) continue;
 
@@ -1405,46 +1405,35 @@ void runner_dopair1_density_auto_vec(struct runner *r, struct cell *ci, struct c
     float hi_inv;
 
     /* Fill particle pi vectors. */
-    pix = ci_cache->x[ci_cache_idx];
-    piy = ci_cache->y[ci_cache_idx];
-    piz = ci_cache->z[ci_cache_idx];
-    vix = ci_cache->vx[ci_cache_idx];
-    viy = ci_cache->vy[ci_cache_idx];
-    viz = ci_cache->vz[ci_cache_idx];
+    pix = ci_cache.x[ci_cache_idx];
+    piy = ci_cache.y[ci_cache_idx];
+    piz = ci_cache.z[ci_cache_idx];
+    vix = ci_cache.vx[ci_cache_idx];
+    viy = ci_cache.vy[ci_cache_idx];
+    viz = ci_cache.vz[ci_cache_idx];
 
     /* Get the inverse of hi. */
     hi_inv = 1.0f / hi;
 
-    float rho = 0, rho_dh = 0, wcount = 0, wcount_dh = 0, div_v = 0, curl_vx = 0, curl_vy = 0, curl_vz = 0;
-
     /* Loop over the parts in cj. */
     for (int pjd = 0; pjd <= exit_iteration; pjd++) {
 
       /* Get the cache index to the jth particle. */
-      int cj_cache_idx = pjd; //sort_j[pjd].i;
+      //int cj_cache_idx = pjd; //sort_j[pjd].i;
 
       float dx, dy, dz, r2;
 
       /* Compute the pairwise distance. */
-      dx = pix - cj_cache.x[cj_cache_idx];
-      dy = piy - cj_cache.y[cj_cache_idx];
-      dz = piz - cj_cache.z[cj_cache_idx];
+      dx =  pix - cj_cache.x[pjd];
+      dy =  piy - cj_cache.y[pjd];
+      dz =  piz - cj_cache.z[pjd];
 
       r2 = dx*dx + dy*dy + dz*dz;
 
-      //runner_iact_nonsym_density_jsw(r2, hig2, dx, dy, dz, hi_inv, cj_cache.h[cj_cache_idx], vix, viy, viz, cj_cache.vx[cj_cache_idx], cj_cache.vy[cj_cache_idx], cj_cache.vz[cj_cache_idx], cj_cache.m[cj_cache_idx], &pi->rho, &pi->density.rho_dh, &pi->density.wcount, &pi->density.wcount_dh, &pi->density.div_v, &pi->density.rot_v[0], &pi->density.rot_v[1], &pi->density.rot_v[2]);
-      //runner_iact_nonsym_density_jsw(r2, hig2, dx, dy, dz, hi_inv, cj_cache.h[cj_cache_idx], vix, viy, viz, cj_cache.vx[cj_cache_idx], cj_cache.vy[cj_cache_idx], cj_cache.vz[cj_cache_idx], cj_cache.m[cj_cache_idx], &ci_cache->rho[ci_cache_idx], &ci_cache->rho_dh[ci_cache_idx], &ci_cache->wcount[ci_cache_idx], &ci_cache->wcount_dh[ci_cache_idx], &ci_cache->div_v[ci_cache_idx], &ci_cache->curl_vx[ci_cache_idx], &ci_cache->curl_vy[ci_cache_idx], &ci_cache->curl_vz[ci_cache_idx]);
-      runner_iact_nonsym_density_jsw(r2, hig2, dx, dy, dz, hi_inv, cj_cache.h[cj_cache_idx], vix, viy, viz, cj_cache.vx[cj_cache_idx], cj_cache.vy[cj_cache_idx], cj_cache.vz[cj_cache_idx], cj_cache.m[cj_cache_idx], &rho, &rho_dh, &wcount, &wcount_dh, &div_v, &curl_vx, &curl_vy, &curl_vz);
+      runner_iact_nonsym_density_jsw(r2, hig2, dx, dy, dz, hi_inv, cj_cache.h[pjd], vix, viy, viz, cj_cache.vx[pjd], cj_cache.vy[pjd], cj_cache.vz[pjd], cj_cache.m[pjd], &ci_cache.rho[pid], &ci_cache.rho_dh[pid], &ci_cache.wcount[pid], &ci_cache.wcount_dh[pid], &ci_cache.div_v[pid], &ci_cache.curl_vx[pid], &ci_cache.curl_vy[pid], &ci_cache.curl_vz[pid]);
       
     } /* loop over the parts in cj. */
-    pi->rho += rho;
-    pi->density.rho_dh += rho_dh;
-    pi->density.wcount += wcount;
-    pi->density.wcount_dh += wcount_dh;
-    pi->density.div_v += div_v;
-    pi->density.rot_v[0] += curl_vx;
-    pi->density.rot_v[1] += curl_vy;
-    pi->density.rot_v[2] += curl_vz;
+    
   } /* loop over the parts in ci. */
 
   int max_ind_i = 0;
@@ -1487,8 +1476,6 @@ void runner_dopair1_density_auto_vec(struct runner *r, struct cell *ci, struct c
     /* Get the inverse of hj. */
     hj_inv = 1.0f / hj;
 
-    float rho = 0, rho_dh = 0, wcount = 0, wcount_dh = 0, div_v = 0, curl_vx = 0, curl_vy = 0, curl_vz = 0;
-
     /* Loop over the parts in ci. */
     for (int pid = exit_iteration; pid < count_i; pid++) {
 
@@ -1498,28 +1485,19 @@ void runner_dopair1_density_auto_vec(struct runner *r, struct cell *ci, struct c
       float dx, dy, dz, r2;
 
       /* Compute the pairwise distance. */
-      dx = pjx - ci_cache->x[ci_cache_idx];
-      dy = pjy - ci_cache->y[ci_cache_idx];
-      dz = pjz - ci_cache->z[ci_cache_idx];
+      dx = pjx - ci_cache.x[ci_cache_idx];
+      dy = pjy - ci_cache.y[ci_cache_idx];
+      dz = pjz - ci_cache.z[ci_cache_idx];
 
       r2 = dx*dx + dy*dy + dz*dz;
       
-      //runner_iact_nonsym_density_jsw(r2, hjg2, dx, dy, dz, hj_inv, ci_cache->h[ci_cache_idx], vjx, vjy, vjz, ci_cache->vx[ci_cache_idx], ci_cache->vy[ci_cache_idx], ci_cache->vz[ci_cache_idx], ci_cache->m[ci_cache_idx], &pj->rho, &pj->density.rho_dh, &pj->density.wcount, &pj->density.wcount_dh, &pj->density.div_v, &pj->density.rot_v[0], &pj->density.rot_v[1], &pj->density.rot_v[2]);
-      //runner_iact_nonsym_density_jsw(r2, hjg2, dx, dy, dz, hj_inv, ci_cache->h[ci_cache_idx], vjx, vjy, vjz, ci_cache->vx[ci_cache_idx], ci_cache->vy[ci_cache_idx], ci_cache->vz[ci_cache_idx], ci_cache->m[ci_cache_idx], &cj_cache.rho[cj_cache_idx], &cj_cache.rho_dh[cj_cache_idx], &cj_cache.wcount[cj_cache_idx], &cj_cache.wcount_dh[cj_cache_idx], &cj_cache.div_v[cj_cache_idx], &cj_cache.curl_vx[cj_cache_idx], &cj_cache.curl_vy[cj_cache_idx], &cj_cache.curl_vz[cj_cache_idx]);
-      runner_iact_nonsym_density_jsw(r2, hjg2, dx, dy, dz, hj_inv, ci_cache->h[ci_cache_idx], vjx, vjy, vjz, ci_cache->vx[ci_cache_idx], ci_cache->vy[ci_cache_idx], ci_cache->vz[ci_cache_idx], ci_cache->m[ci_cache_idx], &rho, &rho_dh, &wcount, &wcount_dh, &div_v, &curl_vx, &curl_vy, &curl_vz);
+      runner_iact_nonsym_density_jsw(r2, hjg2, dx, dy, dz, hj_inv, ci_cache.h[ci_cache_idx], vjx, vjy, vjz, ci_cache.vx[ci_cache_idx], ci_cache.vy[ci_cache_idx], ci_cache.vz[ci_cache_idx], ci_cache.m[ci_cache_idx], &cj_cache.rho[cj_cache_idx], &cj_cache.rho_dh[cj_cache_idx], &cj_cache.wcount[cj_cache_idx], &cj_cache.wcount_dh[cj_cache_idx], &cj_cache.div_v[cj_cache_idx], &cj_cache.curl_vx[cj_cache_idx], &cj_cache.curl_vy[cj_cache_idx], &cj_cache.curl_vz[cj_cache_idx]);
       
     } /* loop over the parts in ci. */
-    pj->rho += rho;
-    pj->density.rho_dh += rho_dh;
-    pj->density.wcount += wcount;
-    pj->density.wcount_dh += wcount_dh;
-    pj->density.div_v += div_v;
-    pj->density.rot_v[0] += curl_vx;
-    pj->density.rot_v[1] += curl_vy;
-    pj->density.rot_v[2] += curl_vz;
+    
   } /* loop over the parts in cj. */
-
-  //cache_write_sorted_particles(ci_cache, &cj_cache, ci, cj, sort_i, sort_j);
+    
+  cache_write_sorted_particles(&ci_cache, &cj_cache, ci, cj, sort_i, sort_j);
 
   TIMER_TOC(timer_dopair_density);