diff --git a/src/cache.h b/src/cache.h
index 6e014dc831bdf5069a2309c4fddc316e70d5d5a4..0e138cc5f0ae19e858aeb62f5172e88c9e6a3a9b 100644
--- a/src/cache.h
+++ b/src/cache.h
@@ -216,6 +216,21 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
 #endif
 }
 
+/**
+ * @brief Populate cache by only reading particles that are within range of
+ * each other within the adjoining cell. Also read the particles into the cache
+ * in sorted order.
+ *
+ * @param ci The i #cell.
+ * @param ci_cache The #cache for cell ci.
+ * @param sort_i The array of sorted particle indices for cell ci.
+ * @param shift The amount to shift the particle positions to account for BCs
+ * @param first_pi The first particle in cell ci that is in range.
+ * @param last_pi The last particle in cell ci that is in range.
+ * @param last_pi The last particle in cell ci that is in range.
+ * @param loc The cell location to remove from the particle positions.
+ * @param flipped Flag to check whether the cells have been flipped or not.
+ */
 __attribute__((always_inline)) INLINE void cache_read_particles_subpair(
     const struct cell *restrict const ci,
     struct cache *restrict const ci_cache, const struct entry *restrict sort_i, int *first_pi, int *last_pi, const double *loc, const int flipped) {
@@ -235,17 +250,19 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subpair(
 
   const struct part *restrict parts = ci->parts;
 
-  /* Shift the particles positions to a local frame so single precision can be
-   * used instead of double precision. */
+  /* The cell is on the right so read the particles 
+   * into the cache from the start of the cell. */
   if(!flipped) {
     int rem = (*last_pi + 1) % VEC_SIZE;
     if (rem != 0) {
       int pad = VEC_SIZE - rem;
 
-      /* Increase last_pj if there are particles in the cell left to read. */
+      /* Increase last_pi if there are particles in the cell left to read. */
       if (*last_pi + pad < ci->count) *last_pi += pad;
     }
 
+    /* Shift the particles positions to a local frame so single precision can be
+     * used instead of double precision. */
     for (int i = 0; i < *last_pi; i++) {
       const int idx = sort_i[i].i;
       x[i] = (float)(parts[idx].x[0] - loc[0]);
@@ -277,8 +294,9 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subpair(
       vz[i] = 1.f;
     }
   }
+  /* The cell is on the left so read the particles 
+   * into the cache from the end of the cell. */
   else {
-
     int rem = (ci->count - *first_pi) % VEC_SIZE;
     if (rem != 0) {
       int pad = VEC_SIZE - rem;
@@ -289,10 +307,8 @@ __attribute__((always_inline)) INLINE void cache_read_particles_subpair(
 
     int ci_cache_count = ci->count - *first_pi;
 
-    //message("ci_cache_count: %d, count_i: %d, first_pi: %d", ci_cache_count, ci->count, *first_pi);
-
-    /* Shift the particles positions to a local frame (ci frame) so single
-     * precision can be used instead of double precision.  */
+    /* Shift the particles positions to a local frame so single precision can be
+     * used instead of double precision. */
     for (int i = 0; i < ci_cache_count; i++) {
       const int idx = sort_i[i + *first_pi].i;
       x[i] = (float)(parts[idx].x[0] - loc[0]);
@@ -402,7 +418,6 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
  * @param shift The amount to shift the particle positions to account for BCs
  * @param first_pi The first particle in cell ci that is in range.
  * @param last_pj The last particle in cell cj that is in range.
- * interaction.
  */
 __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
     const struct cell *restrict const ci, const struct cell *restrict const cj,
@@ -620,7 +635,6 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
  * @param shift The amount to shift the particle positions to account for BCs
  * @param first_pi The first particle in cell ci that is in range.
  * @param last_pj The last particle in cell cj that is in range.
- * interaction.
  */
 __attribute__((always_inline)) INLINE void
 cache_read_two_partial_cells_sorted_force(
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index a605f5d65b20bfeb9b03164ea937ace2cf66b0a5..2b29f8239e7b13b87706e23e7bd424168b7a6503 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -470,6 +470,7 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) {
  * @param ind The list of indices of particles in @c ci to interact with.
  * @param count The number of particles in @c ind.
  * @param cj The second #cell.
+ * @param shift The shift vector to apply to the particles in ci.
  */
 void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
                          struct part *restrict parts_i, int *restrict ind,
@@ -539,6 +540,9 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
  * @param ind The list of indices of particles in @c ci to interact with.
  * @param count The number of particles in @c ind.
  * @param cj The second #cell.
+ * @param sid The direction of the pair.
+ * @param flipped Flag to check whether the cells have been flipped or not.
+ * @param shift The shift vector to apply to the particles in ci.
  */
 void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
                    struct part *restrict parts_i, int *restrict ind, int count,
@@ -569,11 +573,9 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
       const double di = hi * kernel_gamma + dxj + pix * runner_shift[sid][0] +
                         piy * runner_shift[sid][1] + piz * runner_shift[sid][2];
 
-//      int ctr = 0;
       /* Loop over the parts in cj. */
       for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
 
-//        ctr++;
         /* Get a pointer to the jth particle. */
         struct part *restrict pj = &parts_j[sort_j[pjd].i];
         const float hj = pj->h;
@@ -599,7 +601,6 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
           IACT_NONSYM(r2, dx, hi, hj, pi, pj);
         }
       } /* loop over the parts in cj. */
-//      message("pi: %lld, iterations in inner loop: %d", pi->id, ctr);
     }   /* loop over the parts in ci. */
   }
 
@@ -783,7 +784,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
  * @param r The #runner.
  * @param ci The first #cell.
  * @param cj The second #cell.
- * @param sid The direction of the pair
+ * @param sid The direction of the pair.
  * @param shift The shift vector to apply to the particles in ci.
  */
 void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c
index 1d32d954110996e859ce22e04932b420d251281a..032f3246c6087d5893916d4dbc3a664e6d25e2ab 100644
--- a/src/runner_doiact_vec.c
+++ b/src/runner_doiact_vec.c
@@ -513,29 +513,25 @@ populate_max_index_force(
 }
 
 /**
- * @brief Populates the arrays max_index_i and max_index_j with the maximum
- * indices of
- * particles into their neighbouring cells. Also finds the first pi that
- * interacts with any particle in cj and the last pj that interacts with any
- * particle in ci.
+ * @brief Populates the array max_index_i with the maximum
+ * index of
+ * particles into the neighbouring cell. Also finds the first/last pj that
+ * interacts with any particle in ci.
  *
- * @param ci #cell pointer to ci
- * @param cj #cell pointer to cj
- * @param sort_i #entry array for particle distance in ci
+ * @param count_i The number of particles in ci.
+ * @param count_j The number of particles in cj.
+ * @param parts_i The #part to interact with @c cj.
+ * @param ind The list of indices of particles in @c ci to interact with.
+ * @param total_ci_shift The shift vector to apply to the particles in ci.
+ * @param dxj Maximum particle movement allowed in cell cj.
+ * @param di_shift_correction The correction to di after the particles have been shifted to the frame of cell ci.
+ * @param runner_shift_x The runner_shift in the x direction.
+ * @param runner_shift_y The runner_shift in the y direction.
+ * @param runner_shift_z The runner_shift in the z direction.
  * @param sort_j #entry array for particle distance in cj
- * @param dx_max maximum particle movement allowed in cell
- * @param rshift cutoff shift
- * @param hi_max Maximal smoothing length in cell ci
- * @param hj_max Maximal smoothing length in cell cj
- * @param di_max Maximal position on the axis that can interact in cell ci
- * @param dj_min Minimal position on the axis that can interact in cell ci
- * @param max_index_i array to hold the maximum distances of pi particles into
- * #cell cj
- * @param max_index_j array to hold the maximum distances of pj particles into
- * #cell cj
- * @param init_pi first pi to interact with a pj particle
- * @param init_pj last pj to interact with a pi particle
- * @param max_active_bin The largest time-bin active during this step.
+ * @param max_index_i array to hold the maximum distances of pi particles into #cell cj
+ * @param flipped Flag to check whether the cells have been flipped or not.
+ * @return first_pj/last_pj first or last pj to interact with any particle in ci depending whether the cells have been flipped or not.
  */
 __attribute__((always_inline)) INLINE static int populate_max_index_subset(
     const int count_i,
@@ -551,6 +547,8 @@ __attribute__((always_inline)) INLINE static int populate_max_index_subset(
     const struct entry *restrict sort_j,
     int *max_index_i, const int flipped) {
 
+  /* The cell is on the right so read the particles 
+   * into the cache from the start of the cell. */
   if(!flipped) {
     
     /* Find the rightmost particle in cell j that interacts with any
@@ -573,6 +571,8 @@ __attribute__((always_inline)) INLINE static int populate_max_index_subset(
     }
     return last_pj;
   }
+  /* The cell is on the left so read the particles 
+   * into the cache from the end of the cell. */
   else {
 
     int first_pj = count_j - 1;
@@ -1626,6 +1626,9 @@ __attribute__((always_inline)) INLINE void runner_dopair1_density_vec(struct run
  * @param ind The list of indices of particles in @c ci to interact with.
  * @param count The number of particles in @c ind.
  * @param cj The second #cell.
+ * @param sid The direction of the pair.
+ * @param flipped Flag to check whether the cells have been flipped or not.
+ * @param shift The shift vector to apply to the particles in ci.
  */
 __attribute__((always_inline)) INLINE void runner_dopair_subset_density_vec(struct runner *r, struct cell *restrict ci,
     struct part *restrict parts_i, int *restrict ind, int count,
@@ -1655,6 +1658,7 @@ __attribute__((always_inline)) INLINE void runner_dopair_subset_density_vec(stru
   const double total_ci_shift[3] = {
     ci->loc[0] + shift[0], ci->loc[1] + shift[1], ci->loc[2] + shift[2]};
 
+  /* Calculate the correction to di after the particles have been shifted to the frame of cell ci. */
   const double di_shift_correction = ci->loc[0]*runner_shift_x + 
     ci->loc[1]*runner_shift_y + 
     ci->loc[2]*runner_shift_z;
@@ -1663,7 +1667,6 @@ __attribute__((always_inline)) INLINE void runner_dopair_subset_density_vec(stru
   for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
 
   int *restrict max_index_i SWIFT_CACHE_ALIGN;
-
   max_index_i = r->ci_cache.max_index;
 
   /* Parts are on the left? */
@@ -1715,10 +1718,10 @@ __attribute__((always_inline)) INLINE void runner_dopair_subset_density_vec(stru
       vector v_curlvySum = vector_setzero();
       vector v_curlvzSum = vector_setzero();
 
-      int exit_iteration = max_index_i[pid];
+      int exit_iteration_end = max_index_i[pid] + 1;
 
       /* Loop over the parts in cj. */
-      for (int pjd = 0; pjd <= exit_iteration; pjd += VEC_SIZE) {
+      for (int pjd = 0; pjd < exit_iteration_end; pjd += VEC_SIZE) {
 
         /* Get the cache index to the jth particle. */
         const int cj_cache_idx = pjd;