Code formatting. Small style changes. Updates some comments in the cache construction.

bcffe08f · Matthieu Schaller · 52507e8b · bcffe08f · bcffe08f
Commit bcffe08f authored 7 years ago by Matthieu Schaller
--- a/src/cache.h
+++ b/src/cache.h
@@ -198,8 +198,8 @@ __attribute__((always_inline)) INLINE void cache_read_particles(
  swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);

  const struct part *restrict parts = ci->parts;
-  const double loc[3] = {ci->loc[0], ci->loc[1],ci->loc[2]};
-  
+  const double loc[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
+
  /* Shift the particles positions to a local frame so single precision can be
   * used instead of double precision. */
  for (int i = 0; i < ci->count; i++) {
@@ -250,7 +250,7 @@ __attribute__((always_inline)) INLINE void cache_read_force_particles(
                            SWIFT_CACHE_ALIGNMENT);

  const struct part *restrict parts = ci->parts;
-  const double loc[3] = {ci->loc[0], ci->loc[1],ci->loc[2]};
+  const double loc[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};

  /* Shift the particles positions to a local frame so single precision can be
   * used instead of double precision. */
@@ -296,7 +296,6 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
    const struct entry *restrict sort_j, const double *restrict const shift,
    int *first_pi, int *last_pj) {

-  int idx;
  /* Pad number of particles read to the vector size. */
  int rem = (ci->count - *first_pi) % VEC_SIZE;
  if (rem != 0) {
@@ -312,17 +311,17 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
    if (*last_pj + pad < cj->count) *last_pj += pad;
  }

+  /* Get some local pointers */
  const int first_pi_align = *first_pi;
  const int last_pj_align = *last_pj;
  const struct part *restrict parts_i = ci->parts;
  const struct part *restrict parts_j = cj->parts;

  /* Shift particles to the local frame and account for boundary conditions.*/
-  const double total_ci_shift[3] = {cj->loc[0] + shift[0], cj->loc[1] + shift[1],
-                             cj->loc[2] + shift[2]};
-  const double total_cj_shift[3] = {cj->loc[0], cj->loc[1],
-                             cj->loc[2]};
-  
+  const double total_ci_shift[3] = {
+      cj->loc[0] + shift[0], cj->loc[1] + shift[1], cj->loc[2] + shift[2]};
+  const double total_cj_shift[3] = {cj->loc[0], cj->loc[1], cj->loc[2]};
+
  /* Let the compiler know that the data is aligned and create pointers to the
   * arrays inside the cache. */
  swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
@@ -335,16 +334,11 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
  swift_declare_aligned_ptr(float, vz, ci_cache->vz, SWIFT_CACHE_ALIGNMENT);

  int ci_cache_count = ci->count - first_pi_align;
-  
+
  /* Shift the particles positions to a local frame (ci frame) so single
-   * precision
-   * can be
-   * used instead of double precision. Also shift the cell ci, particles
-   * positions
-   * due to BCs but leave cell cj. */
+   * precision can be used instead of double precision.  */
  for (int i = 0; i < ci_cache_count; i++) {
-    /* Make sure ci_cache is filled from the first element. */
-    idx = sort_i[i + first_pi_align].i;
+    const int idx = sort_i[i + first_pi_align].i;
    x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]);
    y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
    z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
@@ -371,30 +365,31 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
          "is not within "
          "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
          "2*space_maxreldx)]. x=%f, ci->width[0]=%f",
-          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, x[i],
-          ci->width[0]);
+          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
+          cj->loc[2], i, x[i], ci->width[0]);
    if (y[i] > shift_threshold_y || y[i] < -shift_threshold_y)
      error(
          "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d y pos "
          "is not within "
          "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
          "2*space_maxreldx)]. y=%f, ci->width[1]=%f",
-          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, y[i],
-          ci->width[1]);
+          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
+          cj->loc[2], i, y[i], ci->width[1]);
    if (z[i] > shift_threshold_z || z[i] < -shift_threshold_z)
      error(
          "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d z pos "
          "is not within "
          "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
          "2*space_maxreldx)]. z=%f, ci->width[2]=%f",
-          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, z[i],
-          ci->width[2]);
+          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
+          cj->loc[2], i, z[i], ci->width[2]);
  }
 #endif

  /* Pad cache with fake particles that exist outside the cell so will not
-   * interact.*/
-  const float max_dx = max(ci->dx_max_part, cj->dx_max_part);
+   * interact. We use values of the same magnitude (but negative!) as the real
+   * particles to avoid overflow problems. */
+  const double max_dx = max(ci->dx_max_part, cj->dx_max_part);
  const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
                               -(2. * ci->width[1] + max_dx),
                               -(2. * ci->width[2] + max_dx)};
@@ -425,7 +420,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
  swift_declare_aligned_ptr(float, vzj, cj_cache->vz, SWIFT_CACHE_ALIGNMENT);

  for (int i = 0; i <= last_pj_align; i++) {
-    idx = sort_j[i].i;
+    const int idx = sort_j[i].i;
    xj[i] = (float)(parts_j[idx].x[0] - total_cj_shift[0]);
    yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]);
    zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]);
@@ -445,29 +440,30 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted(
          "pos is not within "
          "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
          "2*space_maxreldx)]. xj=%f, ci->width[0]=%f",
-          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, xj[i],
-          ci->width[0]);
+          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
+          cj->loc[2], i, xj[i], ci->width[0]);
    if (yj[i] > shift_threshold_y || yj[i] < -shift_threshold_y)
      error(
          "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d yj "
          "pos is not within "
          "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
          "2*space_maxreldx)]. yj=%f, ci->width[1]=%f",
-          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, yj[i],
-          ci->width[1]);
+          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
+          cj->loc[2], i, yj[i], ci->width[1]);
    if (zj[i] > shift_threshold_z || zj[i] < -shift_threshold_z)
      error(
          "Error: ci->loc[%lf,%lf,%lf], cj->loc[%lf,%lf,%lf] Particle %d zj "
          "pos is not within "
          "[-4*ci->width*(1 + 2*space_maxreldx), 4*ci->width*(1 + "
          "2*space_maxreldx)]. zj=%f, ci->width[2]=%f",
-          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1], cj->loc[2], i, zj[i],
-          ci->width[2]);
+          ci->loc[0], ci->loc[1], ci->loc[2], cj->loc[0], cj->loc[1],
+          cj->loc[2], i, zj[i], ci->width[2]);
  }
 #endif

  /* Pad cache with fake particles that exist outside the cell so will not
-   * interact.*/
+   * interact. We use values of the same magnitude (but negative!) as the real
+   * particles to avoid overflow problems. */
  const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx),
                                 -(2. * cj->width[1] + max_dx),
                                 -(2. * cj->width[2] + max_dx)};
@@ -508,7 +504,6 @@ cache_read_two_partial_cells_sorted_force(
    const struct entry *restrict sort_i, const struct entry *restrict sort_j,
    const double *const shift, int *first_pi, int *last_pj) {

-  int idx;
  /* Pad number of particles read to the vector size. */
  int rem = (ci->count - *first_pi) % VEC_SIZE;
  if (rem != 0) {
@@ -524,16 +519,16 @@ cache_read_two_partial_cells_sorted_force(
    if (*last_pj + pad < cj->count) *last_pj += pad;
  }

+  /* Get some local pointers */
  const int first_pi_align = *first_pi;
  const int last_pj_align = *last_pj;
  const struct part *restrict parts_i = ci->parts;
  const struct part *restrict parts_j = cj->parts;
-  
+
  /* Shift particles to the local frame and account for boundary conditions.*/
-  const double total_ci_shift[3] = {cj->loc[0] + shift[0], cj->loc[1] + shift[1],
-                             cj->loc[2] + shift[2]};
-  const double total_cj_shift[3] = {cj->loc[0], cj->loc[1],
-                             cj->loc[2]};
+  const double total_ci_shift[3] = {
+      cj->loc[0] + shift[0], cj->loc[1] + shift[1], cj->loc[2] + shift[2]};
+  const double total_cj_shift[3] = {cj->loc[0], cj->loc[1], cj->loc[2]};

  /* Let the compiler know that the data is aligned and create pointers to the
   * arrays inside the cache. */
@@ -557,14 +552,10 @@ cache_read_two_partial_cells_sorted_force(

  int ci_cache_count = ci->count - first_pi_align;
  /* Shift the particles positions to a local frame (ci frame) so single
-   * precision
-   * can be
-   * used instead of double precision. Also shift the cell ci, particles
-   * positions
-   * due to BCs but leave cell cj. */
+   * precision can be  used instead of double precision.  */
  for (int i = 0; i < ci_cache_count; i++) {
-    /* Make sure ci_cache is filled from the first element. */
-    idx = sort_i[i + first_pi_align].i;
+
+    const int idx = sort_i[i + first_pi_align].i;
    x[i] = (float)(parts_i[idx].x[0] - total_ci_shift[0]);
    y[i] = (float)(parts_i[idx].x[1] - total_ci_shift[1]);
    z[i] = (float)(parts_i[idx].x[2] - total_ci_shift[2]);
@@ -581,8 +572,9 @@ cache_read_two_partial_cells_sorted_force(
  }

  /* Pad cache with fake particles that exist outside the cell so will not
-   * interact.*/
-  const float max_dx = max(ci->dx_max_part, cj->dx_max_part);
+   * interact. We use values of the same magnitude (but negative!) as the real
+   * particles to avoid overflow problems. */
+  const double max_dx = max(ci->dx_max_part, cj->dx_max_part);
  const float pos_padded[3] = {-(2. * ci->width[0] + max_dx),
                               -(2. * ci->width[1] + max_dx),
                               -(2. * ci->width[2] + max_dx)};
@@ -626,7 +618,7 @@ cache_read_two_partial_cells_sorted_force(
                            SWIFT_CACHE_ALIGNMENT);

  for (int i = 0; i <= last_pj_align; i++) {
-    idx = sort_j[i].i;
+    const int idx = sort_j[i].i;
    xj[i] = (float)(parts_j[idx].x[0] - total_cj_shift[0]);
    yj[i] = (float)(parts_j[idx].x[1] - total_cj_shift[1]);
    zj[i] = (float)(parts_j[idx].x[2] - total_cj_shift[2]);
@@ -643,7 +635,8 @@ cache_read_two_partial_cells_sorted_force(
  }

  /* Pad cache with fake particles that exist outside the cell so will not
-   * interact.*/
+   * interact. We use values of the same magnitude (but negative!) as the real
+   * particles to avoid overflow problems. */
  const float pos_padded_j[3] = {-(2. * cj->width[0] + max_dx),
                                 -(2. * cj->width[1] + max_dx),
                                 -(2. * cj->width[2] + max_dx)};

--- a/src/runner_doiact_vec.c
+++ b/src/runner_doiact_vec.c
@@ -41,9 +41,11 @@ static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2);
 * gradient update on pi.
 * @param v_wcountSum (return) #vector holding the cumulative sum of the wcount
 * update on pi.
- * @param v_wcount_dhSum (return) #vector holding the cumulative sum of the wcount
+ * @param v_wcount_dhSum (return) #vector holding the cumulative sum of the
+ * wcount
 * gradient update on pi.
- * @param v_div_vSum (return) #vector holding the cumulative sum of the divergence
+ * @param v_div_vSum (return) #vector holding the cumulative sum of the
+ * divergence
 * update on pi.
 * @param v_curlvxSum (return) #vector holding the cumulative sum of the curl of
 * vx update on pi.
@@ -61,9 +63,9 @@ static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2);
 __attribute__((always_inline)) INLINE static void calcRemInteractions(
    struct c2_cache *const int_cache, const int icount, vector *v_rhoSum,
    vector *v_rho_dhSum, vector *v_wcountSum, vector *v_wcount_dhSum,
-    vector *v_div_vSum, vector *v_curlvxSum, vector *v_curlvySum, vector *v_curlvzSum,
-    vector v_hi_inv, vector v_vix, vector v_viy, vector v_viz,
-    int *icount_align) {
+    vector *v_div_vSum, vector *v_curlvxSum, vector *v_curlvySum,
+    vector *v_curlvzSum, vector v_hi_inv, vector v_vix, vector v_viy,
+    vector v_viz, int *icount_align) {

  mask_t int_mask, int_mask2;

@@ -108,8 +110,8 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
        v_hi_inv, v_vix, v_viy, v_viz, &int_cache->vxq[*icount_align],
        &int_cache->vyq[*icount_align], &int_cache->vzq[*icount_align],
        &int_cache->mq[*icount_align], v_rhoSum, v_rho_dhSum, v_wcountSum,
-        v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum, v_curlvzSum, int_mask,
-        int_mask2, 1);
+        v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum, v_curlvzSum,
+        int_mask, int_mask2, 1);
  }
 }

@@ -127,20 +129,25 @@ __attribute__((always_inline)) INLINE static void calcRemInteractions(
 * @param int_cache (return) secondary #cache of interactions between two
 * particles.
 * @param icount Interaction count.
- * @param v_rhoSum #vector holding the cumulative sum of the density update on pi.
+ * @param v_rhoSum #vector holding the cumulative sum of the density update on
+ * pi.
 * @param v_rho_dhSum #vector holding the cumulative sum of the density gradient
 * update on pi.
 * @param v_wcountSum #vector holding the cumulative sum of the wcount update on
 * pi.
- * @param v_wcount_dhSum #vector holding the cumulative sum of the wcount gradient
+ * @param v_wcount_dhSum #vector holding the cumulative sum of the wcount
+ * gradient
 * update on pi.
 * @param v_div_vSum #vector holding the cumulative sum of the divergence update
 * on pi.
- * @param v_curlvxSum #vector holding the cumulative sum of the curl of vx update
+ * @param v_curlvxSum #vector holding the cumulative sum of the curl of vx
+ * update
 * on pi.
- * @param v_curlvySum #vector holding the cumulative sum of the curl of vy update
+ * @param v_curlvySum #vector holding the cumulative sum of the curl of vy
+ * update
 * on pi.
- * @param v_curlvzSum #vector holding the cumulative sum of the curl of vz update
+ * @param v_curlvzSum #vector holding the cumulative sum of the curl of vz
+ * update
 * on pi.
 * @param v_hi_inv #vector of 1/h for pi.
 * @param v_vix #vector of x velocity of pi.
@@ -152,8 +159,9 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
    vector *v_dz, const struct cache *const cell_cache,
    struct c2_cache *const int_cache, int *icount, vector *v_rhoSum,
    vector *v_rho_dhSum, vector *v_wcountSum, vector *v_wcount_dhSum,
-    vector *v_div_vSum, vector *v_curlvxSum, vector *v_curlvySum, vector *v_curlvzSum,
-    vector v_hi_inv, vector v_vix, vector v_viy, vector v_viz) {
+    vector *v_div_vSum, vector *v_curlvxSum, vector *v_curlvySum,
+    vector *v_curlvzSum, vector v_hi_inv, vector v_vix, vector v_viy,
+    vector v_viz) {

 /* Left-pack values needed into the secondary cache using the interaction mask.
 */
@@ -203,8 +211,9 @@ __attribute__((always_inline)) INLINE static void storeInteractions(

    /* Peform remainder interactions. */
    calcRemInteractions(int_cache, *icount, v_rhoSum, v_rho_dhSum, v_wcountSum,
-                        v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum, v_curlvzSum,
-                        v_hi_inv, v_vix, v_viy, v_viz, &icount_align);
+                        v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum,
+                        v_curlvzSum, v_hi_inv, v_vix, v_viy, v_viz,
+                        &icount_align);

    mask_t int_mask, int_mask2;
    vec_init_mask_true(int_mask);
@@ -216,8 +225,8 @@ __attribute__((always_inline)) INLINE static void storeInteractions(
          &int_cache->r2q[j], &int_cache->dxq[j], &int_cache->dyq[j],
          &int_cache->dzq[j], v_hi_inv, v_vix, v_viy, v_viz, &int_cache->vxq[j],
          &int_cache->vyq[j], &int_cache->vzq[j], &int_cache->mq[j], v_rhoSum,
-          v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum, v_curlvySum,
-          v_curlvzSum, int_mask, int_mask2, 0);
+          v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum,
+          v_curlvySum, v_curlvzSum, int_mask, int_mask2, 0);
    }

    /* Reset interaction count. */
@@ -574,8 +583,8 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
    v_hig2.v = vec_set1(hig2);

    /* Reset cumulative sums of update vectors. */
-    vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum,
-        v_curlvySum, v_curlvzSum;
+    vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum,
+        v_curlvxSum, v_curlvySum, v_curlvzSum;

    /* Get the inverse of hi. */
    vector v_hi_inv;
@@ -671,24 +680,25 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
       * cache. */
      if (doi_mask) {
        storeInteractions(doi_mask, pjd, &v_r2, &v_dx, &v_dy, &v_dz, cell_cache,
-                          &int_cache, &icount, &v_rhoSum, &v_rho_dhSum, &v_wcountSum,
-                          &v_wcount_dhSum, &v_div_vSum, &v_curlvxSum, &v_curlvySum,
-                          &v_curlvzSum, v_hi_inv, v_vix, v_viy, v_viz);
+                          &int_cache, &icount, &v_rhoSum, &v_rho_dhSum,
+                          &v_wcountSum, &v_wcount_dhSum, &v_div_vSum,
+                          &v_curlvxSum, &v_curlvySum, &v_curlvzSum, v_hi_inv,
+                          v_vix, v_viy, v_viz);
      }
      if (doi_mask2) {
        storeInteractions(doi_mask2, pjd + VEC_SIZE, &v_r2_2, &v_dx_2, &v_dy_2,
                          &v_dz_2, cell_cache, &int_cache, &icount, &v_rhoSum,
-                          &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum, &v_div_vSum,
-                          &v_curlvxSum, &v_curlvySum, &v_curlvzSum, v_hi_inv, v_vix,
-                          v_viy, v_viz);
+                          &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum,
+                          &v_div_vSum, &v_curlvxSum, &v_curlvySum, &v_curlvzSum,
+                          v_hi_inv, v_vix, v_viy, v_viz);
      }
    }

    /* Perform padded vector remainder interactions if any are present. */
-    calcRemInteractions(&int_cache, icount, &v_rhoSum, &v_rho_dhSum, &v_wcountSum,
-                        &v_wcount_dhSum, &v_div_vSum, &v_curlvxSum, &v_curlvySum,
-                        &v_curlvzSum, v_hi_inv, v_vix, v_viy, v_viz,
-                        &icount_align);
+    calcRemInteractions(&int_cache, icount, &v_rhoSum, &v_rho_dhSum,
+                        &v_wcountSum, &v_wcount_dhSum, &v_div_vSum,
+                        &v_curlvxSum, &v_curlvySum, &v_curlvzSum, v_hi_inv,
+                        v_vix, v_viy, v_viz, &icount_align);

    /* Initialise masks to true in case remainder interactions have been
     * performed. */
@@ -702,9 +712,9 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec(
          &int_cache.r2q[pjd], &int_cache.dxq[pjd], &int_cache.dyq[pjd],
          &int_cache.dzq[pjd], v_hi_inv, v_vix, v_viy, v_viz,
          &int_cache.vxq[pjd], &int_cache.vyq[pjd], &int_cache.vzq[pjd],
-          &int_cache.mq[pjd], &v_rhoSum, &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum,
-          &v_div_vSum, &v_curlvxSum, &v_curlvySum, &v_curlvzSum, int_mask, int_mask2,
-          0);
+          &int_cache.mq[pjd], &v_rhoSum, &v_rho_dhSum, &v_wcountSum,
+          &v_wcount_dhSum, &v_div_vSum, &v_curlvxSum, &v_curlvySum,
+          &v_curlvzSum, int_mask, int_mask2, 0);
    }

    /* Perform horizontal adds on vector sums and store result in particle pi.
@@ -903,8 +913,8 @@ __attribute__((always_inline)) INLINE void runner_doself2_force_vec(
            &cell_cache->grad_h[pjd], &cell_cache->pOrho2[pjd],
            &cell_cache->balsara[pjd], &cell_cache->soundspeed[pjd],
            &cell_cache->m[pjd], v_hi_inv, v_hj_inv, &v_a_hydro_xSum,
-            &v_a_hydro_ySum, &v_a_hydro_zSum, &v_h_dtSum, &v_sigSum, &v_entropy_dtSum,
-            v_doi_mask);
+            &v_a_hydro_ySum, &v_a_hydro_zSum, &v_h_dtSum, &v_sigSum,
+            &v_entropy_dtSum, v_doi_mask);
      }

    } /* Loop over all other particles. */
@@ -1030,8 +1040,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,

  /* Read the needed particles into the two caches. */
  cache_read_two_partial_cells_sorted(ci, cj, ci_cache, cj_cache, sort_i,
-                                      sort_j, shift, &first_pi,
-                                      &last_pj);
+                                      sort_j, shift, &first_pi, &last_pj);

  /* Get the number of particles read into the ci cache. */
  int ci_cache_count = count_i - first_pi;
@@ -1073,8 +1082,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
      v_hig2.v = vec_set1(hig2);

      /* Reset cumulative sums of update vectors. */
-      vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum,
-          v_curlvySum, v_curlvzSum;
+      vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum,
+          v_curlvxSum, v_curlvySum, v_curlvzSum;

      /* Get the inverse of hi. */
      vector v_hi_inv;
@@ -1113,8 +1122,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
 #ifdef SWIFT_DEBUG_CHECKS
        if (cj_cache_idx % VEC_SIZE != 0 || cj_cache_idx < 0 ||
            cj_cache_idx + (VEC_SIZE - 1) > (last_pj + 1 + VEC_SIZE)) {
-          error("Unaligned read!!! cj_cache_idx=%d, last_pj=%d",
-                cj_cache_idx, last_pj);
+          error("Unaligned read!!! cj_cache_idx=%d, last_pj=%d", cj_cache_idx,
+                last_pj);
        }
 #endif

@@ -1146,9 +1155,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
          runner_iact_nonsym_1_vec_density(
              &v_r2, &v_dx, &v_dy, &v_dz, v_hi_inv, v_vix, v_viy, v_viz,
              &cj_cache->vx[cj_cache_idx], &cj_cache->vy[cj_cache_idx],
-              &cj_cache->vz[cj_cache_idx], &cj_cache->m[cj_cache_idx], &v_rhoSum,
-              &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum, &v_div_vSum, &v_curlvxSum,
-              &v_curlvySum, &v_curlvzSum, v_doi_mask);
+              &cj_cache->vz[cj_cache_idx], &cj_cache->m[cj_cache_idx],
+              &v_rhoSum, &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum,
+              &v_div_vSum, &v_curlvxSum, &v_curlvySum, &v_curlvzSum,
+              v_doi_mask);

      } /* loop over the parts in cj. */

@@ -1203,8 +1213,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
      v_hjg2.v = vec_set1(hjg2);

      /* Reset cumulative sums of update vectors. */
-      vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum, v_curlvxSum,
-          v_curlvySum, v_curlvzSum;
+      vector v_rhoSum, v_rho_dhSum, v_wcountSum, v_wcount_dhSum, v_div_vSum,
+          v_curlvxSum, v_curlvySum, v_curlvzSum;

      /* Get the inverse of hj. */
      vector v_hj_inv;
@@ -1238,8 +1248,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,

 #ifdef SWIFT_DEBUG_CHECKS
        if (ci_cache_idx % VEC_SIZE != 0 || ci_cache_idx < 0 ||
-            ci_cache_idx + (VEC_SIZE - 1) >
-                (count_i - first_pi + VEC_SIZE)) {
+            ci_cache_idx + (VEC_SIZE - 1) > (count_i - first_pi + VEC_SIZE)) {
          error(
              "Unaligned read!!! ci_cache_idx=%d, first_pi=%d, "
              "count_i=%d",
@@ -1277,9 +1286,10 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci,
          runner_iact_nonsym_1_vec_density(
              &v_r2, &v_dx, &v_dy, &v_dz, v_hj_inv, v_vjx, v_vjy, v_vjz,
              &ci_cache->vx[ci_cache_idx], &ci_cache->vy[ci_cache_idx],
-              &ci_cache->vz[ci_cache_idx], &ci_cache->m[ci_cache_idx], &v_rhoSum,
-              &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum, &v_div_vSum, &v_curlvxSum,
-              &v_curlvySum, &v_curlvzSum, v_doj_mask);
+              &ci_cache->vz[ci_cache_idx], &ci_cache->m[ci_cache_idx],
+              &v_rhoSum, &v_rho_dhSum, &v_wcountSum, &v_wcount_dhSum,
+              &v_div_vSum, &v_curlvxSum, &v_curlvySum, &v_curlvzSum,
+              v_doj_mask);

      } /* loop over the parts in ci. */

@@ -1351,7 +1361,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
  /* Check if any particles are active and return if there are none. */
  int numActive = 0;

-  /* Use the largest smoothing length to make sure that no interactions are missed. */
+  /* Use the largest smoothing length to make sure that no interactions are
+   * missed. */
  const double h_max = max(hi_max, hj_max);

  if (active_ci) {
@@ -1416,8 +1427,7 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,

  /* Read the needed particles into the two caches. */
  cache_read_two_partial_cells_sorted_force(ci, cj, ci_cache, cj_cache, sort_i,
-                                            sort_j, shift, &first_pi,
-                                            &last_pj);
+                                            sort_j, shift, &first_pi, &last_pj);

  /* Get the number of particles read into the ci cache. */
  int ci_cache_count = count_i - first_pi;
@@ -1465,8 +1475,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
      v_hig2.v = vec_set1(hig2);

      /* Reset cumulative sums of update vectors. */
-      vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum, v_sigSum,
-          v_entropy_dtSum;
+      vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum,
+          v_sigSum, v_entropy_dtSum;

      /* Get the inverse of hi. */
      vector v_hi_inv;
@@ -1502,8 +1512,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
 #ifdef SWIFT_DEBUG_CHECKS
        if (cj_cache_idx % VEC_SIZE != 0 || cj_cache_idx < 0 ||
            cj_cache_idx + (VEC_SIZE - 1) > (last_pj + 1 + VEC_SIZE)) {
-          error("Unaligned read!!! cj_cache_idx=%d, last_pj=%d",
-                cj_cache_idx, last_pj);
+          error("Unaligned read!!! cj_cache_idx=%d, last_pj=%d", cj_cache_idx,
+                last_pj);
        }
 #endif

@@ -1547,8 +1557,9 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
              &cj_cache->grad_h[cj_cache_idx], &cj_cache->pOrho2[cj_cache_idx],
              &cj_cache->balsara[cj_cache_idx],
              &cj_cache->soundspeed[cj_cache_idx], &cj_cache->m[cj_cache_idx],
-              v_hi_inv, v_hj_inv, &v_a_hydro_xSum, &v_a_hydro_ySum, &v_a_hydro_zSum,
-              &v_h_dtSum, &v_sigSum, &v_entropy_dtSum, v_doi_mask);
+              v_hi_inv, v_hj_inv, &v_a_hydro_xSum, &v_a_hydro_ySum,
+              &v_a_hydro_zSum, &v_h_dtSum, &v_sigSum, &v_entropy_dtSum,
+              v_doi_mask);
        }

      } /* loop over the parts in cj. */
@@ -1610,8 +1621,8 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
      v_hjg2.v = vec_set1(hjg2);

      /* Reset cumulative sums of update vectors. */
-      vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum, v_sigSum,
-          v_entropy_dtSum;
+      vector v_a_hydro_xSum, v_a_hydro_ySum, v_a_hydro_zSum, v_h_dtSum,
+          v_sigSum, v_entropy_dtSum;

      /* Get the inverse of hj. */
      vector v_hj_inv;
@@ -1689,8 +1700,9 @@ void runner_dopair2_force_vec(struct runner *r, struct cell *ci,
              &ci_cache->grad_h[ci_cache_idx], &ci_cache->pOrho2[ci_cache_idx],
              &ci_cache->balsara[ci_cache_idx],
              &ci_cache->soundspeed[ci_cache_idx], &ci_cache->m[ci_cache_idx],
-              v_hj_inv, v_hi_inv, &v_a_hydro_xSum, &v_a_hydro_ySum, &v_a_hydro_zSum,
-              &v_h_dtSum, &v_sigSum, &v_entropy_dtSum, v_doj_mask);
+              v_hj_inv, v_hi_inv, &v_a_hydro_xSum, &v_a_hydro_ySum,
+              &v_a_hydro_zSum, &v_h_dtSum, &v_sigSum, &v_entropy_dtSum,
+              v_doj_mask);
        }
      } /* loop over the parts in ci. */