diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c
index 6f081f26249191633e5084e291c18be59ea9234b..f7839261deb3c74b444845ca2eab8ca7eee57880 100644
--- a/src/runner_doiact_vec.c
+++ b/src/runner_doiact_vec.c
@@ -943,8 +943,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
   const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)];
 
   /* Get some other useful values. */
-  const double hi_max = ci->h_max * kernel_gamma - rshift;
-  const double hj_max = cj->h_max * kernel_gamma;
   const int count_i = ci->count;
   const int count_j = cj->count;
   struct part *restrict parts_i = ci->parts;
@@ -967,14 +965,29 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
   //cache_read_two_cells(ci, cj, ci_cache, &cj_cache, shift);
   cache_read_two_cells_sorted(ci, cj, ci_cache, &cj_cache, sort_i, sort_j, shift);
 
+  /* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */
+  /* For particles in ci */  
+  populate_max_d(ci, cj, sort_i, sort_j, ci_cache, &cj_cache, dx_max, rshift, max_di, max_dj);
+
+  float di, dj;
+
+  int max_ind_j = count_j - 1;
+
   /* Loop over the parts in ci. */
-  for (int pid = count_i - 1;
-       pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
+  for (int pid = count_i - 1; pid >= 0 && max_ind_j >= 0; pid--) {
 
     /* Get a hold of the ith part in ci. */
     struct part *restrict pi = &parts_i[sort_i[pid].i];
     if (!part_is_active(pi, e)) continue;
 
+    dj = sort_j[max_ind_j].d;
+    while(max_ind_j > 0 && max_di[pid] < dj) {
+      max_ind_j--;
+
+      dj = sort_j[max_ind_j].d;
+    }
+    int exit_iteration = max_ind_j;    
+
     int ci_cache_idx = pid; //sort_i[pid].i;
 
     const float hi = ci_cache->h[ci_cache_idx];
@@ -1014,14 +1027,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
     curlvySum.v = vec_setzero();
     curlvzSum.v = vec_setzero();
 
-    int exit_iteration = count_j;
-    for (int pjd = 0; pjd < count_j ; pjd++) {
-      if(sort_j[pjd].d > di) {
-        exit_iteration = pjd;
-        break;
-      }
-    }
-    
     /* Pad cache if there is a serial remainder. */
     int exit_iteration_align = exit_iteration;
     int rem = exit_iteration % (num_vec_proc * VEC_SIZE);
@@ -1129,14 +1134,22 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
 
   } /* loop over the parts in ci. */
 
+  int max_ind_i = 0;
   /* Loop over the parts in cj. */
-  for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
-       pjd++) {
+  for (int pjd = 0; pjd < count_j && max_ind_i < count_i; pjd++) {
 
     /* Get a hold of the jth part in cj. */
     struct part *restrict pj = &parts_j[sort_j[pjd].i];
     if (!part_is_active(pj, e)) continue;
 
+    di = sort_i[max_ind_i].d;
+    while(max_ind_i < count_i - 1 && max_dj[pjd] > di) {
+      max_ind_i++;
+
+      di = sort_i[max_ind_i].d;
+    }
+    int exit_iteration = max_ind_i;
+    
     int cj_cache_idx = pjd;
 
     const float hj = cj_cache.h[cj_cache_idx];
@@ -1177,14 +1190,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
     curlvySum.v = vec_setzero();
     curlvzSum.v = vec_setzero();
 
-    int exit_iteration = 0;
-    for (int pid = count_i - 1; pid >= 0; pid--) {
-      if(sort_i[pid].d < dj) {
-        exit_iteration = pid;
-        break;
-      }
-    }
-
     /* Pad cache if there is a serial remainder. */
     int exit_iteration_align = exit_iteration;
     int rem = exit_iteration % (num_vec_proc * VEC_SIZE);
@@ -1198,7 +1203,6 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
     vector pivx, pivy, pivz, mi;
 
     /* Loop over the parts in ci. */
-    //for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
     for (int pid = count_i - 1; pid >= 0; pid -= VEC_SIZE) {
 
       /* Get the cache index to the ith particle. */
@@ -1306,191 +1310,191 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
  * @param ci The first #cell.
  * @param cj The second #cell.
  */
-void runner_dopair1_density_auto_vec(struct runner *r, struct cell *ci, struct cell *cj) {
-
-#ifdef WITH_VECTORIZATION
-  const struct engine *restrict e = r->e;
-
-  TIMER_TIC;
-
-  /* Anything to do here? */
-  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
-
-#ifdef SWIFT_DEBUG_CHECKS
-  cell_is_drifted(ci, e);
-  cell_is_drifted(cj, e);
-#endif
-
-  /* Get the sort ID. */
-  double shift[3] = {0.0, 0.0, 0.0};
-  const int sid = space_getsid(e->s, &ci, &cj, shift);
-
-  /* Have the cells been sorted? */
-  if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid)))
-    error("Trying to interact unsorted cells.");
-
-  /* Get the cutoff shift. */
-  double rshift = 0.0;
-  for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
-
-  /* Pick-out the sorted lists. */
-  const struct entry *restrict sort_i = &ci->sort[sid * (ci->count + 1)];
-  const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)];
-
-  /* Get some other useful values. */
-  const int count_i = ci->count;
-  const int count_j = cj->count;
-  struct part *restrict parts_i = ci->parts;
-  struct part *restrict parts_j = cj->parts;
-  const double di_max = sort_i[count_i - 1].d - rshift;
-  const double dj_min = sort_j[0].d;
-  const float dx_max = (ci->dx_max + cj->dx_max);
-
-  /* Get the particle cache from the runner and re-allocate
-   * the cache if it is not big enough for the cell. */
-  //struct cache *restrict ci_cache = &r->par_cache;
-
-  //if (ci_cache->count < count_i) {
-  //  cache_init(ci_cache, count_i);
-  //}
-  //if (cj_cache.count < count_j) {
-  //  cache_init(&cj_cache, count_j);
-  //}
-
-  //cache_read_two_cells(ci, cj, ci_cache, &cj_cache, shift);
-  cache_read_two_cells_sorted(ci, cj, &ci_cache, &cj_cache, sort_i, sort_j, shift);
-
-  /* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */
-  /* For particles in ci */  
-  populate_max_d(ci, cj, sort_i, sort_j, &ci_cache, &cj_cache, dx_max, rshift, max_di, max_dj);
-
-  float di, dj;
-
-  int max_ind_j = count_j - 1;
-
-  /* Loop over the parts in ci. */
-  for (int pid = count_i - 1; pid >= 0 && max_ind_j >= 0; pid--) {
-
-    /* Get a hold of the ith part in ci. */
-    struct part *restrict pi = &parts_i[sort_i[pid].i];
-    if (!part_is_active(pi, e)) continue;
-
-    dj = sort_j[max_ind_j].d;
-    while(max_ind_j > 0 && max_di[pid] < dj) {
-      max_ind_j--;
-
-      dj = sort_j[max_ind_j].d;
-    }
-    int exit_iteration = max_ind_j;    
-
-    int ci_cache_idx = pid; //sort_i[pid].i;
-
-    const float hi = ci_cache.h[ci_cache_idx];
-    const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
-    if (di < dj_min) continue;
-
-    const float hig2 = hi * hi * kernel_gamma2;
-
-    float pix, piy, piz;
-    float vix, viy, viz;
-    float hi_inv;
-
-    /* Fill particle pi vectors. */
-    pix = ci_cache.x[ci_cache_idx];
-    piy = ci_cache.y[ci_cache_idx];
-    piz = ci_cache.z[ci_cache_idx];
-    vix = ci_cache.vx[ci_cache_idx];
-    viy = ci_cache.vy[ci_cache_idx];
-    viz = ci_cache.vz[ci_cache_idx];
-
-    /* Get the inverse of hi. */
-    hi_inv = 1.0f / hi;
-
-    /* Loop over the parts in cj. */
-    for (int pjd = 0; pjd <= exit_iteration; pjd++) {
-
-      /* Get the cache index to the jth particle. */
-      //int cj_cache_idx = pjd; //sort_j[pjd].i;
-
-      float dx, dy, dz, r2;
-
-      /* Compute the pairwise distance. */
-      dx =  pix - cj_cache.x[pjd];
-      dy =  piy - cj_cache.y[pjd];
-      dz =  piz - cj_cache.z[pjd];
-
-      r2 = dx*dx + dy*dy + dz*dz;
-
-      runner_iact_nonsym_density_jsw(r2, hig2, dx, dy, dz, hi_inv, cj_cache.h[pjd], vix, viy, viz, cj_cache.vx[pjd], cj_cache.vy[pjd], cj_cache.vz[pjd], cj_cache.m[pjd], &ci_cache.rho[pid], &ci_cache.rho_dh[pid], &ci_cache.wcount[pid], &ci_cache.wcount_dh[pid], &ci_cache.div_v[pid], &ci_cache.curl_vx[pid], &ci_cache.curl_vy[pid], &ci_cache.curl_vz[pid]);
-      
-    } /* loop over the parts in cj. */
-    
-  } /* loop over the parts in ci. */
-
-  int max_ind_i = 0;
-  /* Loop over the parts in cj. */
-  for (int pjd = 0; pjd < count_j && max_ind_i < count_i; pjd++) {
-
-    /* Get a hold of the jth part in cj. */
-    struct part *restrict pj = &parts_j[sort_j[pjd].i];
-    if (!part_is_active(pj, e)) continue;
-
-    di = sort_i[max_ind_i].d;
-
-    while(max_ind_i < count_i - 1 && max_dj[pjd] > di) {
-      max_ind_i++;
-
-      di = sort_i[max_ind_i].d;
-    }
-    int exit_iteration = max_ind_i;
-
-    int cj_cache_idx = pjd;
-
-    const float hj = cj_cache.h[cj_cache_idx];
-    const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift;
-    if (dj > di_max) continue;
-
-    const float hjg2 = hj * hj * kernel_gamma2;
-
-    float pjx, pjy, pjz;
-    float vjx, vjy, vjz;
-    float hj_inv;
-
-    /* Fill particle pi vectors. */
-    pjx = cj_cache.x[cj_cache_idx];
-    pjy = cj_cache.y[cj_cache_idx];
-    pjz = cj_cache.z[cj_cache_idx];
-    vjx = cj_cache.vx[cj_cache_idx];
-    vjy = cj_cache.vy[cj_cache_idx];
-    vjz = cj_cache.vz[cj_cache_idx];
-
-    /* Get the inverse of hj. */
-    hj_inv = 1.0f / hj;
-
-    /* Loop over the parts in ci. */
-    for (int pid = exit_iteration; pid < count_i; pid++) {
-
-      /* Get the cache index to the ith particle. */
-      int ci_cache_idx = pid; //sort_i[pid].i;
-
-      float dx, dy, dz, r2;
-
-      /* Compute the pairwise distance. */
-      dx = pjx - ci_cache.x[ci_cache_idx];
-      dy = pjy - ci_cache.y[ci_cache_idx];
-      dz = pjz - ci_cache.z[ci_cache_idx];
-
-      r2 = dx*dx + dy*dy + dz*dz;
-      
-      runner_iact_nonsym_density_jsw(r2, hjg2, dx, dy, dz, hj_inv, ci_cache.h[ci_cache_idx], vjx, vjy, vjz, ci_cache.vx[ci_cache_idx], ci_cache.vy[ci_cache_idx], ci_cache.vz[ci_cache_idx], ci_cache.m[ci_cache_idx], &cj_cache.rho[cj_cache_idx], &cj_cache.rho_dh[cj_cache_idx], &cj_cache.wcount[cj_cache_idx], &cj_cache.wcount_dh[cj_cache_idx], &cj_cache.div_v[cj_cache_idx], &cj_cache.curl_vx[cj_cache_idx], &cj_cache.curl_vy[cj_cache_idx], &cj_cache.curl_vz[cj_cache_idx]);
-      
-    } /* loop over the parts in ci. */
-    
-  } /* loop over the parts in cj. */
-    
-  cache_write_sorted_particles(&ci_cache, &cj_cache, ci, cj, sort_i, sort_j);
-
-  TIMER_TOC(timer_dopair_density);
-
-#endif /* WITH_VECTORIZATION */
-}
+//void runner_dopair1_density_auto_vec(struct runner *r, struct cell *ci, struct cell *cj) {
+//
+//#ifdef WITH_VECTORIZATION
+//  const struct engine *restrict e = r->e;
+//
+//  TIMER_TIC;
+//
+//  /* Anything to do here? */
+//  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
+//
+//#ifdef SWIFT_DEBUG_CHECKS
+//  cell_is_drifted(ci, e);
+//  cell_is_drifted(cj, e);
+//#endif
+//
+//  /* Get the sort ID. */
+//  double shift[3] = {0.0, 0.0, 0.0};
+//  const int sid = space_getsid(e->s, &ci, &cj, shift);
+//
+//  /* Have the cells been sorted? */
+//  if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid)))
+//    error("Trying to interact unsorted cells.");
+//
+//  /* Get the cutoff shift. */
+//  double rshift = 0.0;
+//  for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
+//
+//  /* Pick-out the sorted lists. */
+//  const struct entry *restrict sort_i = &ci->sort[sid * (ci->count + 1)];
+//  const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)];
+//
+//  /* Get some other useful values. */
+//  const int count_i = ci->count;
+//  const int count_j = cj->count;
+//  struct part *restrict parts_i = ci->parts;
+//  struct part *restrict parts_j = cj->parts;
+//  const double di_max = sort_i[count_i - 1].d - rshift;
+//  const double dj_min = sort_j[0].d;
+//  const float dx_max = (ci->dx_max + cj->dx_max);
+//
+//  /* Get the particle cache from the runner and re-allocate
+//   * the cache if it is not big enough for the cell. */
+//  //struct cache *restrict ci_cache = &r->par_cache;
+//
+//  //if (ci_cache->count < count_i) {
+//  //  cache_init(ci_cache, count_i);
+//  //}
+//  //if (cj_cache.count < count_j) {
+//  //  cache_init(&cj_cache, count_j);
+//  //}
+//
+//  //cache_read_two_cells(ci, cj, ci_cache, &cj_cache, shift);
+//  cache_read_two_cells_sorted(ci, cj, &ci_cache, &cj_cache, sort_i, sort_j, shift);
+//
+//  /* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */
+//  /* For particles in ci */  
+//  populate_max_d(ci, cj, sort_i, sort_j, &ci_cache, &cj_cache, dx_max, rshift, max_di, max_dj);
+//
+//  float di, dj;
+//
+//  int max_ind_j = count_j - 1;
+//
+//  /* Loop over the parts in ci. */
+//  for (int pid = count_i - 1; pid >= 0 && max_ind_j >= 0; pid--) {
+//
+//    /* Get a hold of the ith part in ci. */
+//    struct part *restrict pi = &parts_i[sort_i[pid].i];
+//    if (!part_is_active(pi, e)) continue;
+//
+//    dj = sort_j[max_ind_j].d;
+//    while(max_ind_j > 0 && max_di[pid] < dj) {
+//      max_ind_j--;
+//
+//      dj = sort_j[max_ind_j].d;
+//    }
+//    int exit_iteration = max_ind_j;    
+//
+//    int ci_cache_idx = pid; //sort_i[pid].i;
+//
+//    const float hi = ci_cache.h[ci_cache_idx];
+//    const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
+//    if (di < dj_min) continue;
+//
+//    const float hig2 = hi * hi * kernel_gamma2;
+//
+//    float pix, piy, piz;
+//    float vix, viy, viz;
+//    float hi_inv;
+//
+//    /* Fill particle pi vectors. */
+//    pix = ci_cache.x[ci_cache_idx];
+//    piy = ci_cache.y[ci_cache_idx];
+//    piz = ci_cache.z[ci_cache_idx];
+//    vix = ci_cache.vx[ci_cache_idx];
+//    viy = ci_cache.vy[ci_cache_idx];
+//    viz = ci_cache.vz[ci_cache_idx];
+//
+//    /* Get the inverse of hi. */
+//    hi_inv = 1.0f / hi;
+//
+//    /* Loop over the parts in cj. */
+//    for (int pjd = 0; pjd <= exit_iteration; pjd++) {
+//
+//      /* Get the cache index to the jth particle. */
+//      //int cj_cache_idx = pjd; //sort_j[pjd].i;
+//
+//      float dx, dy, dz, r2;
+//
+//      /* Compute the pairwise distance. */
+//      dx =  pix - cj_cache.x[pjd];
+//      dy =  piy - cj_cache.y[pjd];
+//      dz =  piz - cj_cache.z[pjd];
+//
+//      r2 = dx*dx + dy*dy + dz*dz;
+//
+//      runner_iact_nonsym_density_jsw(r2, hig2, dx, dy, dz, hi_inv, cj_cache.h[pjd], vix, viy, viz, cj_cache.vx[pjd], cj_cache.vy[pjd], cj_cache.vz[pjd], cj_cache.m[pjd], &ci_cache.rho[pid], &ci_cache.rho_dh[pid], &ci_cache.wcount[pid], &ci_cache.wcount_dh[pid], &ci_cache.div_v[pid], &ci_cache.curl_vx[pid], &ci_cache.curl_vy[pid], &ci_cache.curl_vz[pid]);
+//      
+//    } /* loop over the parts in cj. */
+//    
+//  } /* loop over the parts in ci. */
+//
+//  int max_ind_i = 0;
+//  /* Loop over the parts in cj. */
+//  for (int pjd = 0; pjd < count_j && max_ind_i < count_i; pjd++) {
+//
+//    /* Get a hold of the jth part in cj. */
+//    struct part *restrict pj = &parts_j[sort_j[pjd].i];
+//    if (!part_is_active(pj, e)) continue;
+//
+//    di = sort_i[max_ind_i].d;
+//
+//    while(max_ind_i < count_i - 1 && max_dj[pjd] > di) {
+//      max_ind_i++;
+//
+//      di = sort_i[max_ind_i].d;
+//    }
+//    int exit_iteration = max_ind_i;
+//
+//    int cj_cache_idx = pjd;
+//
+//    const float hj = cj_cache.h[cj_cache_idx];
+//    const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift;
+//    if (dj > di_max) continue;
+//
+//    const float hjg2 = hj * hj * kernel_gamma2;
+//
+//    float pjx, pjy, pjz;
+//    float vjx, vjy, vjz;
+//    float hj_inv;
+//
+//    /* Fill particle pi vectors. */
+//    pjx = cj_cache.x[cj_cache_idx];
+//    pjy = cj_cache.y[cj_cache_idx];
+//    pjz = cj_cache.z[cj_cache_idx];
+//    vjx = cj_cache.vx[cj_cache_idx];
+//    vjy = cj_cache.vy[cj_cache_idx];
+//    vjz = cj_cache.vz[cj_cache_idx];
+//
+//    /* Get the inverse of hj. */
+//    hj_inv = 1.0f / hj;
+//
+//    /* Loop over the parts in ci. */
+//    for (int pid = exit_iteration; pid < count_i; pid++) {
+//
+//      /* Get the cache index to the ith particle. */
+//      int ci_cache_idx = pid; //sort_i[pid].i;
+//
+//      float dx, dy, dz, r2;
+//
+//      /* Compute the pairwise distance. */
+//      dx = pjx - ci_cache.x[ci_cache_idx];
+//      dy = pjy - ci_cache.y[ci_cache_idx];
+//      dz = pjz - ci_cache.z[ci_cache_idx];
+//
+//      r2 = dx*dx + dy*dy + dz*dz;
+//      
+//      runner_iact_nonsym_density_jsw(r2, hjg2, dx, dy, dz, hj_inv, ci_cache.h[ci_cache_idx], vjx, vjy, vjz, ci_cache.vx[ci_cache_idx], ci_cache.vy[ci_cache_idx], ci_cache.vz[ci_cache_idx], ci_cache.m[ci_cache_idx], &cj_cache.rho[cj_cache_idx], &cj_cache.rho_dh[cj_cache_idx], &cj_cache.wcount[cj_cache_idx], &cj_cache.wcount_dh[cj_cache_idx], &cj_cache.div_v[cj_cache_idx], &cj_cache.curl_vx[cj_cache_idx], &cj_cache.curl_vy[cj_cache_idx], &cj_cache.curl_vz[cj_cache_idx]);
+//      
+//    } /* loop over the parts in ci. */
+//    
+//  } /* loop over the parts in cj. */
+//    
+//  cache_write_sorted_particles(&ci_cache, &cj_cache, ci, cj, sort_i, sort_j);
+//
+//  TIMER_TOC(timer_dopair_density);
+//
+//#endif /* WITH_VECTORIZATION */
+//}