diff --git a/src/cache.h b/src/cache.h
index cb0f1c62d3473630d8125a9a2c47b28b5a852501..1b675e1cc0da5daab53ef14fa06106eca186bd15 100644
--- a/src/cache.h
+++ b/src/cache.h
@@ -198,8 +198,8 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
   swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
 
   const struct part *restrict parts = ci->parts;
-  const double loc[3] = {ci->loc[0], ci->loc[1],ci->loc[2]};
-  
+  const double loc[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
+
   /* Shift the particles positions to a local frame so single precision can be
    * used instead of double precision. */
   for (int i = 0; i < ci->count; i++) {
@@ -250,7 +250,7 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
                             SWIFT_CACHE_ALIGNMENT);
 
   const struct part *restrict parts = ci->parts;
-  const double loc[3] = {ci->loc[0], ci->loc[1],ci->loc[2]};
+  const double loc[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
 
   /* Shift the particles positions to a local frame so single precision can be
    * used instead of double precision. */
@@ -296,7 +296,6 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
     const struct entry *restrict sort_j, const double *restrict const shift,
     int *first_pi, int *last_pj) {
 
-  int idx;
   /* Pad number of particles read to the vector size. */
   int rem = (ci->count - *first_pi) % VEC_SIZE;
   if (rem != 0) {
@@ -312,17 +311,17 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
     if (*last_pj + pad < cj->count) *last_pj += pad;
   }
 
+  /* Get some local pointers */
   const int first_pi_align = *first_pi;
   const int last_pj_align = *last_pj;
   const struct part *restrict parts_i = ci->parts;
   const struct part *restrict parts_j = cj->parts;
 
   /* Shift particles to the local frame and account for boundary conditions.*/
-  const double total_ci_shift[3] = {cj->loc[0] + shift[0], cj->loc[1] + shift[1],
-                             cj->loc[2] + shift[2]};
-  const double total_cj_shift[3] = {cj->loc[0], cj->loc[1],
-                             cj->loc[2]};
-  
+  const double total_ci_shift[3] = {
+      cj->loc[0] + shift[0], cj->loc[1] + shift[1], cj->loc[2] + shift[2]};
+  const double total_cj_shift[3] = {cj->loc[0], cj->loc[1], cj->loc[2]};
+
   /* Let the compiler know that the data is aligned and create pointers to the
    * arrays inside the cache. */
   swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
@@ -335,16 +334,11 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
   swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);
 
   int ci_cache_count = ci->count - first_pi_align;
-  
+
   /* Shift the particles positions to a local frame (ci frame) so single
-   * precision
-   * can be
-   * used instead of double precision. Also shift the cell ci, particles
-   * positions
-   * due to BCs but leave cell cj. */
+   * precision can be used instead of double precision.  */
   for (int i = 0; i < ci_cache_count; i++) {
-    /* Make sure ci_cache is filled from the first element. */
-    idx = sort_i[i + first_pi_align].i;
+    const int idx = sort_i[i + first_pi_align].i;
     x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]);
     y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
     z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
@@ -371,30 +365,31 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
           "is not within "
           "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
           "2*space_maxreldx)]. x=%f, ci->width[0]=%f",
-          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, x[i],
-          ci->width[0]);
+          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
+          cj->loc[2], i, x[i], ci->width[0]);
     if (y[i] > shift_threshold_y || y[i] < -shift_threshold_y)
       error(
           "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d y pos "
           "is not within "
           "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
           "2*space_maxreldx)]. y=%f, ci->width[1]=%f",
-          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, y[i],
-          ci->width[1]);
+          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
+          cj->loc[2], i, y[i], ci->width[1]);
     if (z[i] > shift_threshold_z || z[i] < -shift_threshold_z)
       error(
           "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d z pos "
           "is not within "
           "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
           "2*space_maxreldx)]. z=%f, ci->width[2]=%f",
-          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, z[i],
-          ci->width[2]);
+          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
+          cj->loc[2], i, z[i], ci->width[2]);
   }
 #endif
 
   /* Pad cache with fake particles that exist outside the cell so will not
-   * interact.*/
-  const float max_dx = max(ci->dx_max_part, cj->dx_max_part);
+   * interact. We use values of the same magnitude (but negative!) as the real
+   * particles to avoid overflow problems. */
+  const double max_dx = max(ci->dx_max_part, cj->dx_max_part);
   const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
                                -(2. * ci->width[1] + max_dx),
                                -(2. * ci->width[2] + max_dx)};
@@ -425,7 +420,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
   swift_declare_aligned_ptr(float, vzj, cj_cache->vz, SWIFT_CACHE_ALIGNMENT);
 
   for (int i = 0; i <= last_pj_align; i++) {
-    idx = sort_j[i].i;
+    const int idx = sort_j[i].i;
     xj[i] = (float)(parts_j[idx].x[0] - total_cj_shift[0]);
     yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]);
     zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]);
@@ -445,29 +440,30 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
           "pos is not within "
           "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
           "2*space_maxreldx)]. xj=%f, ci->width[0]=%f",
-          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, xj[i],
-          ci->width[0]);
+          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
+          cj->loc[2], i, xj[i], ci->width[0]);
     if (yj[i] > shift_threshold_y || yj[i] < -shift_threshold_y)
       error(
           "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d yj "
           "pos is not within "
           "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
           "2*space_maxreldx)]. yj=%f, ci->width[1]=%f",
-          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, yj[i],
-          ci->width[1]);
+          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
+          cj->loc[2], i, yj[i], ci->width[1]);
     if (zj[i] > shift_threshold_z || zj[i] < -shift_threshold_z)
       error(
           "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d zj "
           "pos is not within "
           "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
           "2*space_maxreldx)]. zj=%f, ci->width[2]=%f",
-          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, zj[i],
-          ci->width[2]);
+          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
+          cj->loc[2], i, zj[i], ci->width[2]);
   }
 #endif
 
   /* Pad cache with fake particles that exist outside the cell so will not
-   * interact.*/
+   * interact. We use values of the same magnitude (but negative!) as the real
+   * particles to avoid overflow problems. */
   const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx),
                                  -(2. * cj->width[1] + max_dx),
                                  -(2. * cj->width[2] + max_dx)};
@@ -508,7 +504,6 @@ cache_read_two_partial_cells_sorted_force(
     const struct entry *restrict sort_i, const struct entry *restrict sort_j,
     const double *const shift, int *first_pi, int *last_pj) {
 
-  int idx;
   /* Pad number of particles read to the vector size. */
   int rem = (ci->count - *first_pi) % VEC_SIZE;
   if (rem != 0) {
@@ -524,16 +519,16 @@ cache_read_two_partial_cells_sorted_force(
     if (*last_pj + pad < cj->count) *last_pj += pad;
   }
 
+  /* Get some local pointers */
   const int first_pi_align = *first_pi;
   const int last_pj_align = *last_pj;
   const struct part *restrict parts_i = ci->parts;
   const struct part *restrict parts_j = cj->parts;
-  
+
   /* Shift particles to the local frame and account for boundary conditions.*/
-  const double total_ci_shift[3] = {cj->loc[0] + shift[0], cj->loc[1] + shift[1],
-                             cj->loc[2] + shift[2]};
-  const double total_cj_shift[3] = {cj->loc[0], cj->loc[1],
-                             cj->loc[2]};
+  const double total_ci_shift[3] = {
+      cj->loc[0] + shift[0], cj->loc[1] + shift[1], cj->loc[2] + shift[2]};
+  const double total_cj_shift[3] = {cj->loc[0], cj->loc[1], cj->loc[2]};
 
   /* Let the compiler know that the data is aligned and create pointers to the
    * arrays inside the cache. */
@@ -557,14 +552,10 @@ cache_read_two_partial_cells_sorted_force(
 
   int ci_cache_count = ci->count - first_pi_align;
   /* Shift the particles positions to a local frame (ci frame) so single
-   * precision
-   * can be
-   * used instead of double precision. Also shift the cell ci, particles
-   * positions
-   * due to BCs but leave cell cj. */
+   * precision can be  used instead of double precision.  */
   for (int i = 0; i < ci_cache_count; i++) {
-    /* Make sure ci_cache is filled from the first element. */
-    idx = sort_i[i + first_pi_align].i;
+
+    const int idx = sort_i[i + first_pi_align].i;
     x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]);
     y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
     z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
@@ -581,8 +572,9 @@ cache_read_two_partial_cells_sorted_force(
   }
 
   /* Pad cache with fake particles that exist outside the cell so will not
-   * interact.*/
-  const float max_dx = max(ci->dx_max_part, cj->dx_max_part);
+   * interact. We use values of the same magnitude (but negative!) as the real
+   * particles to avoid overflow problems. */
+  const double max_dx = max(ci->dx_max_part, cj->dx_max_part);
   const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
                                -(2. * ci->width[1] + max_dx),
                                -(2. * ci->width[2] + max_dx)};
@@ -626,7 +618,7 @@ cache_read_two_partial_cells_sorted_force(
                             SWIFT_CACHE_ALIGNMENT);
 
   for (int i = 0; i <= last_pj_align; i++) {
-    idx = sort_j[i].i;
+    const int idx = sort_j[i].i;
     xj[i] = (float)(parts_j[idx].x[0] - total_cj_shift[0]);
     yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]);
     zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]);
@@ -643,7 +635,8 @@ cache_read_two_partial_cells_sorted_force(
   }
 
   /* Pad cache with fake particles that exist outside the cell so will not
-   * interact.*/
+   * interact. We use values of the same magnitude (but negative!) as the real
+   * particles to avoid overflow problems. */
   const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx),
                                  -(2. * cj->width[1] + max_dx),
                                  -(2. * cj->width[2] + max_dx)};
diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c
index 6ecf72c1e7444f4a73cf99045978f748bed67bfc..c9b76db8e863e763437cf53aa621e8b636d2ddad 100644
--- a/src/runner_doiact_vec.c
+++ b/src/runner_doiact_vec.c
@@ -41,9 +41,11 @@ static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2);
  * gradient update on pi.
  * @param v_wcountSum (return) #vector holding the cumulative sum of the wcount
  * update on pi.
- * @param v_wcount_dhSum (return) #vector holding the cumulative sum of the wcount
+ * @param v_wcount_dhSum (return) #vector holding the cumulative sum of the
+ * wcount
  * gradient update on pi.
- * @param v_div_vSum (return) #vector holding the cumulative sum of the divergence
+ * @param v_div_vSum (return) #vector holding the cumulative sum of the
+ * divergence
  * update on pi.
  * @param v_curlvxSum (return) #vector holding the cumulative sum of the curl of
  * vx update on pi.
@@ -61,9 +63,9 @@ static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2);
 __attribute__((always_inline)) INLINE static void calcRemInteractions(
     struct c2_cache *const int_cache, const int icount, vector *v_rhoSum,
     vector *v_rho_dhSum, vector *v_wcountSum, vector *v_wcount_dhSum,
-    vector *v_div_vSum, vector *v_curlvxSum, vector *v_curlvySum, vector *v_curlvzSum,
-    vector v_hi_inv, vector v_vix, vector v_viy, vector v_viz,
-    int *icount_align) {
+    vector *v_div_vSum, vector *v_curlvxSum, vector *v_curlvySum,
+    vector *v_curlvzSum, vector v_hi_inv, vector v_vix, vector v_viy,
+    vector v_viz, int *icount_align) {
 
   mask_t int_mask, int_mask2;
 
@@ -108,8 +110,8 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
         v_hi_inv, v_vix, v_viy, v_viz, &int_cache->vxq[*icount_align],
         &int_cache->vyq[*icount_align], &int_cache->vzq[*icount_align],
         &int_cache->mq[*icount_align], v_rhoSum, v_rho_dhSum, v_wcountSum,
-        v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum, v_curlvzSum, int_mask,
-        int_mask2, 1);
+        v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum, v_curlvzSum,
+        int_mask, int_mask2, 1);
   }
 }
 
@@ -127,20 +129,25 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
  * @param int_cache (return) secondary #cache of interactions between two
  * particles.
  * @param icount Interaction count.
- * @param v_rhoSum #vector holding the cumulative sum of the density update on pi.
+ * @param v_rhoSum #vector holding the cumulative sum of the density update on
+ * pi.
  * @param v_rho_dhSum #vector holding the cumulative sum of the density gradient
  * update on pi.
  * @param v_wcountSum #vector holding the cumulative sum of the wcount update on
  * pi.
- * @param v_wcount_dhSum #vector holding the cumulative sum of the wcount gradient
+ * @param v_wcount_dhSum #vector holding the cumulative sum of the wcount
+ * gradient
  * update on pi.
  * @param v_div_vSum #vector holding the cumulative sum of the divergence update
  * on pi.
- * @param v_curlvxSum #vector holding the cumulative sum of the curl of vx update
+ * @param v_curlvxSum #vector holding the cumulative sum of the curl of vx
+ * update
  * on pi.
- * @param v_curlvySum #vector holding the cumulative sum of the curl of vy update
+ * @param v_curlvySum #vector holding the cumulative sum of the curl of vy
+ * update
  * on pi.
- * @param v_curlvzSum #vector holding the cumulative sum of the curl of vz update
+ * @param v_curlvzSum #vector holding the cumulative sum of the curl of vz
+ * update
  * on pi.
  * @param v_hi_inv #vector of 1/h for pi.
  * @param v_vix #vector of x velocity of pi.
@@ -152,8 +159,9 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
     vector *v_dz, const struct cache *const cell_cache,
     struct c2_cache *const int_cache, int *icount, vector *v_rhoSum,
     vector *v_rho_dhSum, vector *v_wcountSum, vector *v_wcount_dhSum,
-    vector *v_div_vSum, vector *v_curlvxSum, vector *v_curlvySum, vector *v_curlvzSum,
-    vector v_hi_inv, vector v_vix, vector v_viy, vector v_viz) {
+    vector *v_div_vSum, vector *v_curlvxSum, vector *v_curlvySum,
+    vector *v_curlvzSum, vector v_hi_inv, vector v_vix, vector v_viy,
+    vector v_viz) {
 
 /* Left-pack values needed into the secondary cache using the interaction mask.
  */
@@ -203,8 +211,9 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
 
     /* Peform remainder interactions. */
     calcRemInteractions(int_cache, *icount, v_rhoSum, v_rho_dhSum, v_wcountSum,
-                        v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum, v_curlvzSum,
-                        v_hi_inv, v_vix, v_viy, v_viz, &icount_align);
+                        v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum,
+                        v_curlvzSum, v_hi_inv, v_vix, v_viy, v_viz,
+                        &icount_align);
 
     mask_t int_mask, int_mask2;
     vec_init_mask_true(int_mask);
@@ -216,8 +225,8 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
           &int_cache->r2q[j], &int_cache->dxq[j], &int_cache->dyq[j],
           &int_cache->dzq[j], v_hi_inv, v_vix, v_viy, v_viz, &int_cache->vxq[j],
           &int_cache->vyq[j], &int_cache->vzq[j], &int_cache->mq[j], v_rhoSum,
-          v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum,
-          v_curlvzSum, int_mask, int_mask2, 0);
+          v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum,
+          v_curlvySum, v_curlvzSum, int_mask, int_mask2, 0);
     }
 
     /* Reset interaction count. */
@@ -574,8 +583,8 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
     v_hig2.v = vec_set1(hig2);
 
     /* Reset cumulative sums of update vectors. */
-    vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum,
-        v_curlvySum, v_curlvzSum;
+    vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum,
+        v_curlvxSum, v_curlvySum, v_curlvzSum;
 
     /* Get the inverse of hi. */
     vector v_hi_inv;
@@ -671,24 +680,25 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
        * cache. */
       if (doi_mask) {
         storeInteractions(doi_mask, pjd, &v_r2, &v_dx, &v_dy, &v_dz, cell_cache,
-                          &int_cache, &icount, &v_rhoSum, &v_rho_dhSum, &v_wcountSum,
-                          &v_wcount_dhSum, &v_div_vSum, &v_curlvxSum, &v_curlvySum,
-                          &v_curlvzSum, v_hi_inv, v_vix, v_viy, v_viz);
+                          &int_cache, &icount, &v_rhoSum, &v_rho_dhSum,
+                          &v_wcountSum, &v_wcount_dhSum, &v_div_vSum,
+                          &v_curlvxSum, &v_curlvySum, &v_curlvzSum, v_hi_inv,
+                          v_vix, v_viy, v_viz);
       }
       if (doi_mask2) {
         storeInteractions(doi_mask2, pjd + VEC_SIZE, &v_r2_2, &v_dx_2, &v_dy_2,
                           &v_dz_2, cell_cache, &int_cache, &icount, &v_rhoSum,
-                          &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum, &v_div_vSum,
-                          &v_curlvxSum, &v_curlvySum, &v_curlvzSum, v_hi_inv, v_vix,
-                          v_viy, v_viz);
+                          &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum,
+                          &v_div_vSum, &v_curlvxSum, &v_curlvySum, &v_curlvzSum,
+                          v_hi_inv, v_vix, v_viy, v_viz);
       }
     }
 
     /* Perform padded vector remainder interactions if any are present. */
-    calcRemInteractions(&int_cache, icount, &v_rhoSum, &v_rho_dhSum, &v_wcountSum,
-                        &v_wcount_dhSum, &v_div_vSum, &v_curlvxSum, &v_curlvySum,
-                        &v_curlvzSum, v_hi_inv, v_vix, v_viy, v_viz,
-                        &icount_align);
+    calcRemInteractions(&int_cache, icount, &v_rhoSum, &v_rho_dhSum,
+                        &v_wcountSum, &v_wcount_dhSum, &v_div_vSum,
+                        &v_curlvxSum, &v_curlvySum, &v_curlvzSum, v_hi_inv,
+                        v_vix, v_viy, v_viz, &icount_align);
 
     /* Initialise masks to true in case remainder interactions have been
      * performed. */
@@ -702,9 +712,9 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
           &int_cache.r2q[pjd], &int_cache.dxq[pjd], &int_cache.dyq[pjd],
           &int_cache.dzq[pjd], v_hi_inv, v_vix, v_viy, v_viz,
           &int_cache.vxq[pjd], &int_cache.vyq[pjd], &int_cache.vzq[pjd],
-          &int_cache.mq[pjd], &v_rhoSum, &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum,
-          &v_div_vSum, &v_curlvxSum, &v_curlvySum, &v_curlvzSum, int_mask, int_mask2,
-          0);
+          &int_cache.mq[pjd], &v_rhoSum, &v_rho_dhSum, &v_wcountSum,
+          &v_wcount_dhSum, &v_div_vSum, &v_curlvxSum, &v_curlvySum,
+          &v_curlvzSum, int_mask, int_mask2, 0);
     }
 
     /* Perform horizontal adds on vector sums and store result in particle pi.
@@ -903,8 +913,8 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
             &cell_cache->grad_h[pjd], &cell_cache->pOrho2[pjd],
             &cell_cache->balsara[pjd], &cell_cache->soundspeed[pjd],
             &cell_cache->m[pjd], v_hi_inv, v_hj_inv, &v_a_hydro_xSum,
-            &v_a_hydro_ySum, &v_a_hydro_zSum, &v_h_dtSum, &v_sigSum, &v_entropy_dtSum,
-            v_doi_mask);
+            &v_a_hydro_ySum, &v_a_hydro_zSum, &v_h_dtSum, &v_sigSum,
+            &v_entropy_dtSum, v_doi_mask);
       }
 
     } /* Loop over all other particles. */
@@ -1030,8 +1040,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
 
   /* Read the needed particles into the two caches. */
   cache_read_two_partial_cells_sorted(ci, cj, ci_cache, cj_cache, sort_i,
-                                      sort_j, shift, &first_pi,
-                                      &last_pj);
+                                      sort_j, shift, &first_pi, &last_pj);
 
   /* Get the number of particles read into the ci cache. */
   int ci_cache_count = count_i - first_pi;
@@ -1073,8 +1082,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
       v_hig2.v = vec_set1(hig2);
 
       /* Reset cumulative sums of update vectors. */
-      vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum,
-          v_curlvySum, v_curlvzSum;
+      vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum,
+          v_curlvxSum, v_curlvySum, v_curlvzSum;
 
       /* Get the inverse of hi. */
       vector v_hi_inv;
@@ -1113,8 +1122,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
 #ifdef SWIFT_DEBUG_CHECKS
         if (cj_cache_idx % VEC_SIZE != 0 || cj_cache_idx < 0 ||
             cj_cache_idx + (VEC_SIZE - 1) > (last_pj + 1 + VEC_SIZE)) {
-          error("Unaligned read!!! cj_cache_idx=%d, last_pj=%d",
-                cj_cache_idx, last_pj);
+          error("Unaligned read!!! cj_cache_idx=%d, last_pj=%d", cj_cache_idx,
+                last_pj);
         }
 #endif
 
@@ -1146,9 +1155,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
           runner_iact_nonsym_1_vec_density(
               &v_r2, &v_dx, &v_dy, &v_dz, v_hi_inv, v_vix, v_viy, v_viz,
               &cj_cache->vx[cj_cache_idx], &cj_cache->vy[cj_cache_idx],
-              &cj_cache->vz[cj_cache_idx], &cj_cache->m[cj_cache_idx], &v_rhoSum,
-              &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum, &v_div_vSum, &v_curlvxSum,
-              &v_curlvySum, &v_curlvzSum, v_doi_mask);
+              &cj_cache->vz[cj_cache_idx], &cj_cache->m[cj_cache_idx],
+              &v_rhoSum, &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum,
+              &v_div_vSum, &v_curlvxSum, &v_curlvySum, &v_curlvzSum,
+              v_doi_mask);
 
       } /* loop over the parts in cj. */
 
@@ -1203,8 +1213,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
       v_hjg2.v = vec_set1(hjg2);
 
       /* Reset cumulative sums of update vectors. */
-      vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum,
-          v_curlvySum, v_curlvzSum;
+      vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum,
+          v_curlvxSum, v_curlvySum, v_curlvzSum;
 
       /* Get the inverse of hj. */
       vector v_hj_inv;
@@ -1238,8 +1248,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
 
 #ifdef SWIFT_DEBUG_CHECKS
         if (ci_cache_idx % VEC_SIZE != 0 || ci_cache_idx < 0 ||
-            ci_cache_idx + (VEC_SIZE - 1) >
-                (count_i - first_pi + VEC_SIZE)) {
+            ci_cache_idx + (VEC_SIZE - 1) > (count_i - first_pi + VEC_SIZE)) {
           error(
               "Unaligned read!!! ci_cache_idx=%d, first_pi=%d, "
               "count_i=%d",
@@ -1277,9 +1286,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
           runner_iact_nonsym_1_vec_density(
               &v_r2, &v_dx, &v_dy, &v_dz, v_hj_inv, v_vjx, v_vjy, v_vjz,
               &ci_cache->vx[ci_cache_idx], &ci_cache->vy[ci_cache_idx],
-              &ci_cache->vz[ci_cache_idx], &ci_cache->m[ci_cache_idx], &v_rhoSum,
-              &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum, &v_div_vSum, &v_curlvxSum,
-              &v_curlvySum, &v_curlvzSum, v_doj_mask);
+              &ci_cache->vz[ci_cache_idx], &ci_cache->m[ci_cache_idx],
+              &v_rhoSum, &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum,
+              &v_div_vSum, &v_curlvxSum, &v_curlvySum, &v_curlvzSum,
+              v_doj_mask);
 
       } /* loop over the parts in ci. */
 
@@ -1351,7 +1361,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
   /* Check if any particles are active and return if there are none. */
   int numActive = 0;
 
-  /* Use the largest smoothing length to make sure that no interactions are missed. */
+  /* Use the largest smoothing length to make sure that no interactions are
+   * missed. */
   const double h_max = max(hi_max, hj_max);
 
   if (active_ci) {
@@ -1416,8 +1427,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
 
   /* Read the needed particles into the two caches. */
   cache_read_two_partial_cells_sorted_force(ci, cj, ci_cache, cj_cache, sort_i,
-                                            sort_j, shift, &first_pi,
-                                            &last_pj);
+                                            sort_j, shift, &first_pi, &last_pj);
 
   /* Get the number of particles read into the ci cache. */
   int ci_cache_count = count_i - first_pi;
@@ -1465,8 +1475,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
       v_hig2.v = vec_set1(hig2);
 
       /* Reset cumulative sums of update vectors. */
-      vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum, v_sigSum,
-          v_entropy_dtSum;
+      vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum,
+          v_sigSum, v_entropy_dtSum;
 
       /* Get the inverse of hi. */
       vector v_hi_inv;
@@ -1502,8 +1512,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
 #ifdef SWIFT_DEBUG_CHECKS
         if (cj_cache_idx % VEC_SIZE != 0 || cj_cache_idx < 0 ||
             cj_cache_idx + (VEC_SIZE - 1) > (last_pj + 1 + VEC_SIZE)) {
-          error("Unaligned read!!! cj_cache_idx=%d, last_pj=%d",
-                cj_cache_idx, last_pj);
+          error("Unaligned read!!! cj_cache_idx=%d, last_pj=%d", cj_cache_idx,
+                last_pj);
         }
 #endif
 
@@ -1547,8 +1557,9 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
               &cj_cache->grad_h[cj_cache_idx], &cj_cache->pOrho2[cj_cache_idx],
               &cj_cache->balsara[cj_cache_idx],
               &cj_cache->soundspeed[cj_cache_idx], &cj_cache->m[cj_cache_idx],
-              v_hi_inv, v_hj_inv, &v_a_hydro_xSum, &v_a_hydro_ySum, &v_a_hydro_zSum,
-              &v_h_dtSum, &v_sigSum, &v_entropy_dtSum, v_doi_mask);
+              v_hi_inv, v_hj_inv, &v_a_hydro_xSum, &v_a_hydro_ySum,
+              &v_a_hydro_zSum, &v_h_dtSum, &v_sigSum, &v_entropy_dtSum,
+              v_doi_mask);
         }
 
       } /* loop over the parts in cj. */
@@ -1610,8 +1621,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
       v_hjg2.v = vec_set1(hjg2);
 
       /* Reset cumulative sums of update vectors. */
-      vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum, v_sigSum,
-          v_entropy_dtSum;
+      vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum,
+          v_sigSum, v_entropy_dtSum;
 
       /* Get the inverse of hj. */
       vector v_hj_inv;
@@ -1689,8 +1700,9 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
               &ci_cache->grad_h[ci_cache_idx], &ci_cache->pOrho2[ci_cache_idx],
               &ci_cache->balsara[ci_cache_idx],
               &ci_cache->soundspeed[ci_cache_idx], &ci_cache->m[ci_cache_idx],
-              v_hj_inv, v_hi_inv, &v_a_hydro_xSum, &v_a_hydro_ySum, &v_a_hydro_zSum,
-              &v_h_dtSum, &v_sigSum, &v_entropy_dtSum, v_doj_mask);
+              v_hj_inv, v_hi_inv, &v_a_hydro_xSum, &v_a_hydro_ySum,
+              &v_a_hydro_zSum, &v_h_dtSum, &v_sigSum, &v_entropy_dtSum,
+              v_doj_mask);
         }
       } /* loop over the parts in ci. */