diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c
index 7b418cc9bb82c0d3ce3d2597b70851134a4a7b4a..4803666efc7fbadfa9c3938489a216c44c4e59e4 100644
--- a/src/runner_doiact_vec.c
+++ b/src/runner_doiact_vec.c
@@ -257,6 +257,34 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
   }
 
 }
+
+__attribute__((always_inline)) INLINE static void populate_max_d(const struct cell *ci, const struct cell *cj, const struct entry *restrict sort_i, const struct entry *restrict sort_j, const struct cache *ci_cache, const struct cache *cj_cache, const float dx_max, const float rshift, float *max_di, float *max_dj) {
+
+  float h = ci_cache->h[0];
+  float d;
+  
+  /* For particles in ci */  
+  max_di[0] = sort_i[0].d + h * kernel_gamma + dx_max - rshift;
+
+  for (int k = 1; k < ci->count; k++) {
+    h = ci_cache->h[k];
+    d = sort_i[k].d + h * kernel_gamma + dx_max - rshift;
+    
+    max_di[k] = fmaxf(max_di[k - 1], d);
+  }
+
+  /* For particles in cj */  
+  h = cj_cache->h[0];
+  max_dj[0] = sort_j[0].d - h * kernel_gamma - dx_max - rshift;
+  
+  for (int k = 1; k < cj->count; k++) {
+    h = cj_cache->h[k];
+    d = sort_j[k].d - h * kernel_gamma - dx_max - rshift;
+    
+    max_dj[k] = fmaxf(max_dj[k - 1], d);
+  }
+}
+
 #endif /* WITH_VECTORIZATION */ 
 
 /**
@@ -1277,6 +1305,9 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct cell *
 #endif /* WITH_VECTORIZATION */
 }
 
+float max_di[MAX_NO_OF_PARTS] __attribute__((aligned(sizeof(VEC_SIZE * sizeof(float))))); /* max distance into ci */
+float max_dj[MAX_NO_OF_PARTS] __attribute__((aligned(sizeof(VEC_SIZE * sizeof(float))))); /* max distance into cj */
+
 /**
  * @brief Compute the interactions between a cell pair (non-symmetric).
  *
@@ -1316,8 +1347,6 @@ void runner_dopair1_density_auto_vec(struct runner *r, struct cell *ci, struct c
   const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)];
 
   /* Get some other useful values. */
-  const double hi_max = ci->h_max * kernel_gamma - rshift;
-  const double hj_max = cj->h_max * kernel_gamma;
   const int count_i = ci->count;
   const int count_j = cj->count;
   struct part *restrict parts_i = ci->parts;
@@ -1340,14 +1369,29 @@ void runner_dopair1_density_auto_vec(struct runner *r, struct cell *ci, struct c
   //cache_read_two_cells(ci, cj, ci_cache, &cj_cache, shift);
   cache_read_two_cells_sorted(ci, cj, ci_cache, &cj_cache, sort_i, sort_j, shift);
 
+  /* Find particles maximum distance into cj, max_di[] and ci, max_dj[]. */
+  /* For particles in ci */  
+  populate_max_d(ci, cj, sort_i, sort_j, ci_cache, &cj_cache, dx_max, rshift, max_di, max_dj);
+
+  float di, dj;
+
+  int max_ind_j = count_j - 1;
+
   /* Loop over the parts in ci. */
-  for (int pid = count_i - 1;
-       pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
+  for (int pid = count_i - 1; pid >= 0 && max_ind_j >= 0; pid--) {
 
     /* Get a hold of the ith part in ci. */
     struct part *restrict pi = &parts_i[sort_i[pid].i];
     if (!part_is_active(pi, e)) continue;
 
+    dj = sort_j[max_ind_j].d;
+    while(max_ind_j > 0 && max_di[pid] < dj) {
+      max_ind_j--;
+
+      dj = sort_j[max_ind_j].d;
+    }
+    int exit_iteration = max_ind_j;    
+
     int ci_cache_idx = pid; //sort_i[pid].i;
 
     const float hi = ci_cache->h[ci_cache_idx];
@@ -1371,18 +1415,10 @@ void runner_dopair1_density_auto_vec(struct runner *r, struct cell *ci, struct c
     /* Get the inverse of hi. */
     hi_inv = 1.0f / hi;
 
-    int exit_iteration = count_j;
-    for (int pjd = 0; pjd < count_j ; pjd++) {
-      if(sort_j[pjd].d >= di) {
-        exit_iteration = pjd;
-        break;
-      }
-    }
-
     float rho = 0, rho_dh = 0, wcount = 0, wcount_dh = 0, div_v = 0, curl_vx = 0, curl_vy = 0, curl_vz = 0;
 
     /* Loop over the parts in cj. */
-    for (int pjd = 0; pjd < exit_iteration; pjd++) {
+    for (int pjd = 0; pjd <= exit_iteration; pjd++) {
 
       /* Get the cache index to the jth particle. */
       int cj_cache_idx = pjd; //sort_j[pjd].i;
@@ -1411,14 +1447,23 @@ void runner_dopair1_density_auto_vec(struct runner *r, struct cell *ci, struct c
     pi->density.rot_v[2] += curl_vz;
   } /* loop over the parts in ci. */
 
+  int max_ind_i = 0;
   /* Loop over the parts in cj. */
-  for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
-       pjd++) {
+  for (int pjd = 0; pjd < count_j && max_ind_i < count_i; pjd++) {
 
     /* Get a hold of the jth part in cj. */
     struct part *restrict pj = &parts_j[sort_j[pjd].i];
     if (!part_is_active(pj, e)) continue;
 
+    di = sort_i[max_ind_i].d;
+
+    while(max_ind_i < count_i - 1 && max_dj[pjd] > di) {
+      max_ind_i++;
+
+      di = sort_i[max_ind_i].d;
+    }
+    int exit_iteration = max_ind_i;
+
     int cj_cache_idx = pjd;
 
     const float hj = cj_cache.h[cj_cache_idx];
@@ -1442,19 +1487,10 @@ void runner_dopair1_density_auto_vec(struct runner *r, struct cell *ci, struct c
     /* Get the inverse of hj. */
     hj_inv = 1.0f / hj;
 
-    int exit_iteration = count_i - 1;
-    for (int pid = count_i - 1; pid >= 0; pid--) {
-      if(sort_i[pid].d <= dj) {
-        exit_iteration = pid;
-        break;
-      }
-    }
-    
     float rho = 0, rho_dh = 0, wcount = 0, wcount_dh = 0, div_v = 0, curl_vx = 0, curl_vy = 0, curl_vz = 0;
 
     /* Loop over the parts in ci. */
-    //for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {
-    for (int pid = count_i - 1; pid > exit_iteration; pid--) {
+    for (int pid = exit_iteration; pid < count_i; pid++) {
 
       /* Get the cache index to the ith particle. */
       int ci_cache_idx = pid; //sort_i[pid].i;