runner_doiact_grav.h 40.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/*******************************************************************************
 * This file is part of SWIFT.
 * Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
 *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
#ifndef SWIFT_RUNNER_DOIACT_GRAV_H
#define SWIFT_RUNNER_DOIACT_GRAV_H

/* Includes. */
#include "cell.h"
25
#include "gravity.h"
26
#include "inline.h"
27
#include "part.h"
28
#include "timers.h"
29

Matthieu Schaller's avatar
Matthieu Schaller committed
30
31
32
33
34
35
36
37
/**
 * @brief Recursively propagate the multipoles down the tree by applying the
 * L2L and L2P kernels.
 *
 * @param r The #runner.
 * @param c The #cell we are working on.
 * @param timer Are we timing this ?
 */
38
static INLINE void runner_do_grav_down(struct runner *r, struct cell *c, int timer) {
39

40
  /* Some constants */
41
  const struct engine *e = r->e;
42
43

  /* Cell properties */
44
45
  struct gpart *gparts = c->gparts;
  const int gcount = c->gcount;
46

47
  TIMER_TIC;
48

49
50
#ifdef SWIFT_DEBUG_CHECKS
  if (c->ti_old_multipole != e->ti_current) error("c->multipole not drifted.");
51
52
  if (c->multipole->pot.ti_init != e->ti_current)
    error("c->field tensor not initialised");
53
54
#endif

55
  if (c->split) { /* Node case */
56

57
    /* Add the field-tensor to all the 8 progenitors */
58
59
60
    for (int k = 0; k < 8; ++k) {
      struct cell *cp = c->progeny[k];

61
      /* Do we have a progenitor with any active g-particles ? */
62
      if (cp != NULL && cell_is_active_gravity(cp, e)) {
63

64
65
66
#ifdef SWIFT_DEBUG_CHECKS
        if (cp->ti_old_multipole != e->ti_current)
          error("cp->multipole not drifted.");
67
68
        if (cp->multipole->pot.ti_init != e->ti_current)
          error("cp->field tensor not initialised");
69
#endif
70
        struct grav_tensor shifted_tensor;
71

72
73
        /* If the tensor received any contribution, push it down */
        if (c->multipole->pot.interacted) {
74

75
76
77
78
79
80
81
          /* Shift the field tensor */
          gravity_L2L(&shifted_tensor, &c->multipole->pot, cp->multipole->CoM,
                      c->multipole->CoM);

          /* Add it to this level's tensor */
          gravity_field_tensors_add(&cp->multipole->pot, &shifted_tensor);
        }
82

83
        /* Recurse */
84
        runner_do_grav_down(r, cp, 0);
85
86
87
      }
    }

88
  } else { /* Leaf case */
89

90
91
92
    /* We can abort early if no interactions via multipole happened */
    if (!c->multipole->pot.interacted) return;

93
94
    if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts");

95
96
    /* Apply accelerations to the particles */
    for (int i = 0; i < gcount; ++i) {
97
98

      /* Get a handle on the gpart */
99
      struct gpart *gp = &gparts[i];
100
101

      /* Update if active */
102
103
104
105
106
107
      if (gpart_is_active(gp, e)) {

#ifdef SWIFT_DEBUG_CHECKS
        /* Check that particles have been drifted to the current time */
        if (gp->ti_drift != e->ti_current)
          error("gpart not drifted to current time");
108
109
        if (c->multipole->pot.ti_init != e->ti_current)
          error("c->field tensor not initialised");
110
111
#endif

112
        /* Apply the kernel */
113
        gravity_L2P(&c->multipole->pot, c->multipole->CoM, gp);
114
      }
115
    }
116
  }
117
118

  if (timer) TIMER_TOC(timer_dograv_down);
119
120
}

121
122
123
124
125
126
127
128
/**
 * @brief Computes the interaction of the field tensor in a cell with the
 * multipole of another cell.
 *
 * @param r The #runner.
 * @param ci The #cell with field tensor to interact.
 * @param cj The #cell with the multipole.
 */
129
static INLINE void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci,
130
                           struct cell *restrict cj) {
131

132
  /* Some constants */
133
  const struct engine *e = r->e;
134
135
136
  const struct space *s = e->s;
  const int periodic = s->periodic;
  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
137
  const struct gravity_props *props = e->gravity_properties;
138
139
  // const float a_smooth = e->gravity_properties->a_smooth;
  // const float rlr_inv = 1. / (a_smooth * ci->super->width[0]);
140
141
142

  TIMER_TIC;

143
  /* Anything to do here? */
144
  if (!cell_is_active_gravity(ci, e) || ci->nodeID != engine_rank) return;
145

146
147
148
  /* Short-cut to the multipole */
  const struct multipole *multi_j = &cj->multipole->m_pole;

149
#ifdef SWIFT_DEBUG_CHECKS
150
151
  if (ci == cj) error("Interacting a cell with itself using M2L");

Matthieu Schaller's avatar
Matthieu Schaller committed
152
153
  if (multi_j->num_gpart == 0)
    error("Multipole does not seem to have been set.");
154

155
156
  if (ci->multipole->pot.ti_init != e->ti_current)
    error("ci->grav tensor not initialised.");
157
#endif
158

159
  /* Do we need to drift the multipole ? */
160
161
162
163
164
165
  if (cj->ti_old_multipole != e->ti_current)
    error(
        "Undrifted multipole cj->ti_old_multipole=%lld cj->nodeID=%d "
        "ci->nodeID=%d "
        "e->ti_current=%lld",
        cj->ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current);
166
167

  /* Let's interact at this level */
168
  gravity_M2L(&ci->multipole->pot, multi_j, ci->multipole->CoM,
169
              cj->multipole->CoM, props, periodic, dim);
170
171
172
173

  TIMER_TOC(timer_dopair_grav_mm);
}

174
175
176
177
178
179
180
static INLINE void runner_dopair_grav_pp_full(const struct engine *e,
                                              struct gravity_cache *ci_cache,
                                              struct gravity_cache *cj_cache,
                                              int gcount_i, int gcount_j,
                                              int gcount_padded_j,
                                              struct gpart *restrict gparts_i,
                                              struct gpart *restrict gparts_j) {
Matthieu Schaller's avatar
Matthieu Schaller committed
181

182
183
  TIMER_TIC;

184
185
  /* Loop over all particles in ci... */
  for (int pid = 0; pid < gcount_i; pid++) {
186

187
188
    /* Skip inactive particles */
    if (!ci_cache->active[pid]) continue;
189

190
191
    /* Skip particle that can use the multipole */
    if (ci_cache->use_mpole[pid]) continue;
192

193
194
195
196
#ifdef SWIFT_DEBUG_CHECKS
    if (!gpart_is_active(&gparts_i[pid], e))
      error("Active particle went through the cache");
#endif
197

198
199
200
    const float x_i = ci_cache->x[pid];
    const float y_i = ci_cache->y[pid];
    const float z_i = ci_cache->z[pid];
201

202
203
204
205
206
    /* Some powers of the softening length */
    const float h_i = ci_cache->epsilon[pid];
    const float h2_i = h_i * h_i;
    const float h_inv_i = 1.f / h_i;
    const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;
207

208
209
    /* Local accumulators for the acceleration and potential */
    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
210

211
    /* Make the compiler understand we are in happy vectorization land */
Matthieu Schaller's avatar
Matthieu Schaller committed
212
213
214
215
    swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT);
216
    swift_assume_size(gcount_padded_j, VEC_SIZE);
217

218
219
    /* Loop over every particle in the other cell. */
    for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
220

221
222
223
224
225
      /* Get info about j */
      const float x_j = cj_cache->x[pjd];
      const float y_j = cj_cache->y[pjd];
      const float z_j = cj_cache->z[pjd];
      const float mass_j = cj_cache->m[pjd];
226

227
228
229
230
      /* Compute the pairwise (square) distance. */
      const float dx = x_i - x_j;
      const float dy = y_i - y_j;
      const float dz = z_i - z_j;
231
232
      const float r2 = dx * dx + dy * dy + dz * dz;

233
234
#ifdef SWIFT_DEBUG_CHECKS
      if (r2 == 0.f) error("Interacting particles with 0 distance");
235

236
237
238
239
240
241
      /* Check that particles have been drifted to the current time */
      if (gparts_i[pid].ti_drift != e->ti_current)
        error("gpi not drifted to current time");
      if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current)
        error("gpj not drifted to current time");
#endif
242

243
      /* Interact! */
244
245
246
      float f_ij, pot_ij;
      runner_iact_grav_pp_full(r2, h2_i, h_inv_i, h_inv3_i, mass_j, &f_ij,
                               &pot_ij);
247
248
249
250
251

      /* Store it back */
      a_x -= f_ij * dx;
      a_y -= f_ij * dy;
      a_z -= f_ij * dz;
252
      pot += pot_ij;
253
254

#ifdef SWIFT_DEBUG_CHECKS
255
256
      /* Update the interaction counter if it's not a padded gpart */
      if (pjd < gcount_j) gparts_i[pid].num_interacted++;
257
#endif
258
    }
259

260
261
262
263
    /* Store everything back in cache */
    ci_cache->a_x[pid] = a_x;
    ci_cache->a_y[pid] = a_y;
    ci_cache->a_z[pid] = a_z;
264
    ci_cache->pot[pid] = pot;
265
  }
266
267

  TIMER_TOC(timer_dopair_grav_pp);
268
}
269

270
271
272
273
274
static INLINE void runner_dopair_grav_pp_truncated(
    const struct engine *e, const float rlr_inv, struct gravity_cache *ci_cache,
    struct gravity_cache *cj_cache, int gcount_i, int gcount_j,
    int gcount_padded_j, struct gpart *restrict gparts_i,
    struct gpart *restrict gparts_j) {
275

276
277
  TIMER_TIC;

278
279
  /* Loop over all particles in ci... */
  for (int pid = 0; pid < gcount_i; pid++) {
280

281
282
    /* Skip inactive particles */
    if (!ci_cache->active[pid]) continue;
283

284
285
    /* Skip particle that can use the multipole */
    if (ci_cache->use_mpole[pid]) continue;
286
287

#ifdef SWIFT_DEBUG_CHECKS
288
289
    if (!gpart_is_active(&gparts_i[pid], e))
      error("Active particle went through the cache");
290
291
#endif

292
293
294
    const float x_i = ci_cache->x[pid];
    const float y_i = ci_cache->y[pid];
    const float z_i = ci_cache->z[pid];
295

296
297
298
299
300
    /* Some powers of the softening length */
    const float h_i = ci_cache->epsilon[pid];
    const float h2_i = h_i * h_i;
    const float h_inv_i = 1.f / h_i;
    const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;
301

302
303
    /* Local accumulators for the acceleration and potential */
    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
304

305
    /* Make the compiler understand we are in happy vectorization land */
Matthieu Schaller's avatar
Matthieu Schaller committed
306
307
308
309
    swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT);
310
    swift_assume_size(gcount_padded_j, VEC_SIZE);
311

312
313
    /* Loop over every particle in the other cell. */
    for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
314

315
      /* Get info about j */
316
317
318
      const float x_j = cj_cache->x[pjd];
      const float y_j = cj_cache->y[pjd];
      const float z_j = cj_cache->z[pjd];
319
      const float mass_j = cj_cache->m[pjd];
Matthieu Schaller's avatar
Matthieu Schaller committed
320

321
322
323
324
      /* Compute the pairwise (square) distance. */
      const float dx = x_i - x_j;
      const float dy = y_i - y_j;
      const float dz = z_i - z_j;
325
326
      const float r2 = dx * dx + dy * dy + dz * dz;

327
328
#ifdef SWIFT_DEBUG_CHECKS
      if (r2 == 0.f) error("Interacting particles with 0 distance");
329

330
331
332
333
334
335
      /* Check that particles have been drifted to the current time */
      if (gparts_i[pid].ti_drift != e->ti_current)
        error("gpi not drifted to current time");
      if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current)
        error("gpj not drifted to current time");
#endif
336

337
      /* Interact! */
338
      float f_ij, pot_ij;
339
      runner_iact_grav_pp_truncated(r2, h2_i, h_inv_i, h_inv3_i, mass_j,
340
                                    rlr_inv, &f_ij, &pot_ij);
341
342
343
344
345

      /* Store it back */
      a_x -= f_ij * dx;
      a_y -= f_ij * dy;
      a_z -= f_ij * dz;
346
      pot += pot_ij;
347
348

#ifdef SWIFT_DEBUG_CHECKS
349
350
      /* Update the interaction counter if it's not a padded gpart */
      if (pjd < gcount_j) gparts_i[pid].num_interacted++;
351
#endif
352
    }
353

354
355
356
357
    /* Store everything back in cache */
    ci_cache->a_x[pid] = a_x;
    ci_cache->a_y[pid] = a_y;
    ci_cache->a_z[pid] = a_z;
358
    ci_cache->pot[pid] = pot;
359
  }
360
361

  TIMER_TOC(timer_dopair_grav_pp);
362
}
363

364
365
366
367
368
369
static INLINE void runner_dopair_grav_pm(
    const struct engine *restrict e, struct gravity_cache *ci_cache,
    int gcount_i, int gcount_padded_i, struct gpart *restrict gparts_i,
    const float CoM_j[3], const struct multipole *restrict multi_j,
    struct cell *restrict cj) {

370
371
  TIMER_TIC;

372
  /* Make the compiler understand we are in happy vectorization land */
373
374
375
376
377
378
379
380
381
382
383
384
  swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon,
                            SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(int, active, ci_cache->active,
                            SWIFT_CACHE_ALIGNMENT);
  swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole,
                            SWIFT_CACHE_ALIGNMENT);
385
  swift_assume_size(gcount_padded_i, VEC_SIZE);
386

387
388
  /* Loop over all particles in ci... */
  for (int pid = 0; pid < gcount_padded_i; pid++) {
389

390
    /* Skip inactive particles */
391
    if (!active[pid]) continue;
392

393
    /* Skip particle that cannot use the multipole */
394
    if (!use_mpole[pid]) continue;
395
396

#ifdef SWIFT_DEBUG_CHECKS
397
398
    if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e))
      error("Active particle went through the cache");
399
400
#endif

401
402
403
    const float x_i = x[pid];
    const float y_i = y[pid];
    const float z_i = z[pid];
404
405

    /* Some powers of the softening length */
406
    const float h_i = epsilon[pid];
407
408
409
410
411
412
413
    const float h_inv_i = 1.f / h_i;

    /* Distance to the Multipole */
    const float dx = x_i - CoM_j[0];
    const float dy = y_i - CoM_j[1];
    const float dz = z_i - CoM_j[2];
    const float r2 = dx * dx + dy * dy + dz * dz;
Matthieu Schaller's avatar
Matthieu Schaller committed
414

415
416
417
418
419
420
    /* Interact! */
    float f_x, f_y, f_z;
    runner_iact_grav_pm(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y,
                        &f_z);

    /* Store it back */
421
422
423
    a_x[pid] = f_x;
    a_y[pid] = f_y;
    a_z[pid] = f_z;
424
425

#ifdef SWIFT_DEBUG_CHECKS
426
427
428
    /* Update the interaction counter */
    if (pid < gcount_i)
      gparts_i[pid].num_interacted += cj->multipole->m_pole.num_gpart;
429
430
#endif
  }
431
432

  TIMER_TOC(timer_dopair_grav_pm);
433
434
435
436
}

/**
 * @brief Computes the interaction of all the particles in a cell with all the
437
 * particles of another cell (switching function between full and truncated).
438
439
440
441
442
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The other #cell.
 */
443
static INLINE void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) {
444

445
446
447
448
449
  const struct engine *e = r->e;

  TIMER_TIC;

  /* Anything to do here? */
450
  if (!cell_is_active_gravity(ci, e) && !cell_is_active_gravity(cj, e)) return;
451
452
453
454
455
456
457
458
459

  /* Check that we are not doing something stupid */
  if (ci->split || cj->split) error("Running P-P on splitable cells");

  /* Let's start by drifting things */
  if (!cell_are_gpart_drifted(ci, e)) error("Un-drifted gparts");
  if (!cell_are_gpart_drifted(cj, e)) error("Un-drifted gparts");

  /* Recover some useful constants */
460
  struct space *s = e->s;
461
  const int periodic = s->periodic;
462
  const double cell_width = s->width[0];
463
  const float theta_crit2 = e->gravity_properties->theta_crit2;
464
465
  const double a_smooth = e->gravity_properties->a_smooth;
  const double r_cut_min = e->gravity_properties->r_cut_min;
466
  const double rlr = cell_width * a_smooth;
467
  const double min_trunc = rlr * r_cut_min;
468
469
470
471
472
473
  const float rlr_inv = 1. / rlr;

  /* Caches to play with */
  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
  struct gravity_cache *const cj_cache = &r->cj_gravity_cache;

474
475
476
477
478
  /* Get the distance vector between the pairs, wrapping. */
  double cell_shift[3];
  space_getsid(s, &ci, &cj, cell_shift);

  /* Record activity status */
479
480
  const int ci_active = cell_is_active_gravity(ci, e);
  const int cj_active = cell_is_active_gravity(cj, e);
481
482
483

  /* Do we need to drift the multipoles ? */
  if (cj_active && ci->ti_old_multipole != e->ti_current)
484
    error("Un-drifted multipole");
485
  if (ci_active && cj->ti_old_multipole != e->ti_current)
486
    error("Un-drifted multipole");
487
488
489
490
491

  /* Centre of the cell pair */
  const double loc[3] = {ci->loc[0],   // + 0. * ci->width[0],
                         ci->loc[1],   // + 0. * ci->width[1],
                         ci->loc[2]};  // + 0. * ci->width[2]};
492

493
494
495
496
497
498
499
500
501
502
503
504
  /* Shift to apply to the particles in each cell */
  const double shift_i[3] = {loc[0] + cell_shift[0], loc[1] + cell_shift[1],
                             loc[2] + cell_shift[2]};
  const double shift_j[3] = {loc[0], loc[1], loc[2]};

  /* Recover the multipole info and shift the CoM locations */
  const float rmax_i = ci->multipole->r_max;
  const float rmax_j = cj->multipole->r_max;
  const float rmax2_i = rmax_i * rmax_i;
  const float rmax2_j = rmax_j * rmax_j;
  const struct multipole *multi_i = &ci->multipole->m_pole;
  const struct multipole *multi_j = &cj->multipole->m_pole;
505
506
507
508
509
510
  const float CoM_i[3] = {(float)(ci->multipole->CoM[0] - shift_i[0]),
                          (float)(ci->multipole->CoM[1] - shift_i[1]),
                          (float)(ci->multipole->CoM[2] - shift_i[2])};
  const float CoM_j[3] = {(float)(cj->multipole->CoM[0] - shift_j[0]),
                          (float)(cj->multipole->CoM[1] - shift_j[1]),
                          (float)(cj->multipole->CoM[2] - shift_j[2])};
511
512

  /* Start by constructing particle caches */
513
514

  /* Computed the padded counts */
515
516
  const int gcount_i = ci->gcount;
  const int gcount_j = cj->gcount;
517
518
  const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
  const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE;
519

520
#ifdef SWIFT_DEBUG_CHECKS
521
  /* Check that we fit in cache */
Matthieu Schaller's avatar
Matthieu Schaller committed
522
523
524
  if (gcount_i > ci_cache->count || gcount_j > cj_cache->count)
    error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i,
          gcount_j);
525
#endif
526

527
528
  /* Fill the caches */
  gravity_cache_populate(e->max_active_bin, ci_cache, ci->gparts, gcount_i,
529
530
                         gcount_padded_i, shift_i, CoM_j, rmax2_j, theta_crit2,
                         ci);
531
  gravity_cache_populate(e->max_active_bin, cj_cache, cj->gparts, gcount_j,
532
533
                         gcount_padded_j, shift_j, CoM_i, rmax2_i, theta_crit2,
                         cj);
534

535
536
  /* Can we use the Newtonian version or do we need the truncated one ? */
  if (!periodic) {
537

538
    /* Not periodic -> Can always use Newtonian potential */
Matthieu Schaller's avatar
Matthieu Schaller committed
539

540
541
    /* Let's updated the active cell(s) only */
    if (ci_active) {
542

543
544
545
      /* First the P2P */
      runner_dopair_grav_pp_full(e, ci_cache, cj_cache, gcount_i, gcount_j,
                                 gcount_padded_j, ci->gparts, cj->gparts);
546

547
548
549
      /* Then the M2P */
      runner_dopair_grav_pm(e, ci_cache, gcount_i, gcount_padded_i, ci->gparts,
                            CoM_j, multi_j, cj);
550
    }
551
552
553
554
555
556
557
558
    if (cj_active) {

      /* First the P2P */
      runner_dopair_grav_pp_full(e, cj_cache, ci_cache, gcount_j, gcount_i,
                                 gcount_padded_i, cj->gparts, ci->gparts);
      /* Then the M2P */
      runner_dopair_grav_pm(e, cj_cache, gcount_j, gcount_padded_j, cj->gparts,
                            CoM_i, multi_i, ci);
559
    }
560

561
  } else { /* Periodic BC */
562

563
564
565
566
    /* Get the relative distance between the CoMs */
    const double dx[3] = {CoM_j[0] - CoM_i[0], CoM_j[1] - CoM_i[1],
                          CoM_j[2] - CoM_i[2]};
    const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
567
568

    /* Get the maximal distance between any two particles */
569
    const double max_r = sqrt(r2) + rmax_i + rmax_j;
570
571

    /* Do we need to use the truncated interactions ? */
572
573
574
575
576
    if (max_r > min_trunc) {

      /* Periodic but far-away cells must use the truncated potential */

      /* Let's updated the active cell(s) only */
577
578
579
      if (ci_active) {

        /* First the (truncated) P2P */
580
581
582
        runner_dopair_grav_pp_truncated(e, rlr_inv, ci_cache, cj_cache,
                                        gcount_i, gcount_j, gcount_padded_j,
                                        ci->gparts, cj->gparts);
583
584
585
586
587
588
589
590

        /* Then the M2P */
        runner_dopair_grav_pm(e, ci_cache, gcount_i, gcount_padded_i,
                              ci->gparts, CoM_j, multi_j, cj);
      }
      if (cj_active) {

        /* First the (truncated) P2P */
591
592
593
        runner_dopair_grav_pp_truncated(e, rlr_inv, cj_cache, ci_cache,
                                        gcount_j, gcount_i, gcount_padded_i,
                                        cj->gparts, ci->gparts);
594
595
596
597
598
599

        /* Then the M2P */
        runner_dopair_grav_pm(e, cj_cache, gcount_j, gcount_padded_j,
                              cj->gparts, CoM_i, multi_i, ci);
      }

600
601
602
603
604
    } else {

      /* Periodic but close-by cells can use the full Newtonian potential */

      /* Let's updated the active cell(s) only */
605
606
607
      if (ci_active) {

        /* First the (Newtonian) P2P */
608
609
        runner_dopair_grav_pp_full(e, ci_cache, cj_cache, gcount_i, gcount_j,
                                   gcount_padded_j, ci->gparts, cj->gparts);
610
611
612
613
614
615
616
617

        /* Then the M2P */
        runner_dopair_grav_pm(e, ci_cache, gcount_i, gcount_padded_i,
                              ci->gparts, CoM_j, multi_j, cj);
      }
      if (cj_active) {

        /* First the (Newtonian) P2P */
618
619
        runner_dopair_grav_pp_full(e, cj_cache, ci_cache, gcount_j, gcount_i,
                                   gcount_padded_i, cj->gparts, ci->gparts);
620
621
622
623
624

        /* Then the M2P */
        runner_dopair_grav_pm(e, cj_cache, gcount_j, gcount_padded_j,
                              cj->gparts, CoM_i, multi_i, ci);
      }
625
    }
626
  }
627

628
629
630
631
  /* Write back to the particles */
  if (ci_active) gravity_cache_write_back(ci_cache, ci->gparts, gcount_i);
  if (cj_active) gravity_cache_write_back(cj_cache, cj->gparts, gcount_j);

632
  TIMER_TOC(timer_dopair_grav_branch);
633
634
}

635
/**
636
637
 * @brief Computes the interaction of all the particles in a cell using the
 * full Newtonian potential.
638
639
 *
 * @param r The #runner.
Matthieu Schaller's avatar
Matthieu Schaller committed
640
 * @param c The #cell.
641
642
643
 *
 * @todo Use a local cache for the particles.
 */
644
static INLINE void runner_doself_grav_pp_full(struct runner *r, struct cell *c) {
645

646
647
648
649
650
651
652
  /* Some constants */
  const struct engine *const e = r->e;
  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;

  /* Cell properties */
  const int gcount = c->gcount;
  struct gpart *restrict gparts = c->gparts;
653
  const int c_active = cell_is_active_gravity(c, e);
654
655
656
  const double loc[3] = {c->loc[0] + 0.5 * c->width[0],
                         c->loc[1] + 0.5 * c->width[1],
                         c->loc[2] + 0.5 * c->width[2]};
657
658
659
660

  /* Anything to do here ?*/
  if (!c_active) return;

661
#ifdef SWIFT_DEBUG_CHECKS
662
663
  /* Check that we fit in cache */
  if (gcount > ci_cache->count)
664
    error("Not enough space in the cache! gcount=%d", gcount);
665
#endif
666
667
668
669

  /* Computed the padded counts */
  const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE;

670
  gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, gparts, gcount,
671
                                  gcount_padded, loc, c);
672
673
674
675
676

  /* Ok... Here we go ! */

  /* Loop over all particles in ci... */
  for (int pid = 0; pid < gcount; pid++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
677

678
    /* Skip inactive particles */
679
    if (!ci_cache->active[pid]) continue;
Matthieu Schaller's avatar
Matthieu Schaller committed
680

681
682
683
    const float x_i = ci_cache->x[pid];
    const float y_i = ci_cache->y[pid];
    const float z_i = ci_cache->z[pid];
Matthieu Schaller's avatar
Matthieu Schaller committed
684

685
686
687
688
689
    /* Some powers of the softening length */
    const float h_i = ci_cache->epsilon[pid];
    const float h2_i = h_i * h_i;
    const float h_inv_i = 1.f / h_i;
    const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;
Matthieu Schaller's avatar
Matthieu Schaller committed
690

691
    /* Local accumulators for the acceleration */
692
    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
Matthieu Schaller's avatar
Matthieu Schaller committed
693

694
    /* Make the compiler understand we are in happy vectorization land */
Matthieu Schaller's avatar
Matthieu Schaller committed
695
696
697
698
    swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT);
699
    swift_assume_size(gcount_padded, VEC_SIZE);
Matthieu Schaller's avatar
Matthieu Schaller committed
700

701
702
    /* Loop over every other particle in the cell. */
    for (int pjd = 0; pjd < gcount_padded; pjd++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
703

704
      /* No self interaction */
Matthieu Schaller's avatar
Matthieu Schaller committed
705
      if (pid == pjd) continue;
706
707
708
709
710
711

      /* Get info about j */
      const float x_j = ci_cache->x[pjd];
      const float y_j = ci_cache->y[pjd];
      const float z_j = ci_cache->z[pjd];
      const float mass_j = ci_cache->m[pjd];
Matthieu Schaller's avatar
Matthieu Schaller committed
712

713
714
715
716
717
      /* Compute the pairwise (square) distance. */
      const float dx = x_i - x_j;
      const float dy = y_i - y_j;
      const float dz = z_i - z_j;
      const float r2 = dx * dx + dy * dy + dz * dz;
Matthieu Schaller's avatar
Matthieu Schaller committed
718

719
720
#ifdef SWIFT_DEBUG_CHECKS
      if (r2 == 0.f) error("Interacting particles with 0 distance");
Matthieu Schaller's avatar
Matthieu Schaller committed
721

722
723
      /* Check that particles have been drifted to the current time */
      if (gparts[pid].ti_drift != e->ti_current)
Matthieu Schaller's avatar
Matthieu Schaller committed
724
        error("gpi not drifted to current time");
725
      if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current)
Matthieu Schaller's avatar
Matthieu Schaller committed
726
        error("gpj not drifted to current time");
727
#endif
Matthieu Schaller's avatar
Matthieu Schaller committed
728

729
      /* Interact! */
730
731
732
      float f_ij, pot_ij;
      runner_iact_grav_pp_full(r2, h2_i, h_inv_i, h_inv3_i, mass_j, &f_ij,
                               &pot_ij);
Matthieu Schaller's avatar
Matthieu Schaller committed
733

734
735
736
737
      /* Store it back */
      a_x -= f_ij * dx;
      a_y -= f_ij * dy;
      a_z -= f_ij * dz;
738
      pot += pot_ij;
Matthieu Schaller's avatar
Matthieu Schaller committed
739

740
741
742
743
744
#ifdef SWIFT_DEBUG_CHECKS
      /* Update the interaction counter if it's not a padded gpart */
      if (pjd < gcount) gparts[pid].num_interacted++;
#endif
    }
Matthieu Schaller's avatar
Matthieu Schaller committed
745

746
    /* Store everything back in cache */
747
748
749
    ci_cache->a_x[pid] = a_x;
    ci_cache->a_y[pid] = a_y;
    ci_cache->a_z[pid] = a_z;
750
    ci_cache->pot[pid] = pot;
751
752
  }

753
  /* Write back to the particles */
754
  gravity_cache_write_back(ci_cache, gparts, gcount);
755
756
757
758
759
760
761
762
763
764
765
}

/**
 * @brief Computes the interaction of all the particles in a cell using the
 * truncated Newtonian potential.
 *
 * @param r The #runner.
 * @param c The #cell.
 *
 * @todo Use a local cache for the particles.
 */
766
static INLINE void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) {
767
768
769
770
771
772
773
774
775

  /* Some constants */
  const struct engine *const e = r->e;
  const struct space *s = e->s;
  const double cell_width = s->width[0];
  const double a_smooth = e->gravity_properties->a_smooth;
  const double rlr = cell_width * a_smooth;
  const float rlr_inv = 1. / rlr;

776
777
778
779
780
781
  /* Caches to play with */
  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;

  /* Cell properties */
  const int gcount = c->gcount;
  struct gpart *restrict gparts = c->gparts;
782
  const int c_active = cell_is_active_gravity(c, e);
783
784
785
  const double loc[3] = {c->loc[0] + 0.5 * c->width[0],
                         c->loc[1] + 0.5 * c->width[1],
                         c->loc[2] + 0.5 * c->width[2]};
786
787
788
789

  /* Anything to do here ?*/
  if (!c_active) return;

790
#ifdef SWIFT_DEBUG_CHECKS
791
792
  /* Check that we fit in cache */
  if (gcount > ci_cache->count)
793
    error("Not enough space in the caches! gcount=%d", gcount);
794
#endif
795
796
797
798

  /* Computed the padded counts */
  const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE;

799
  gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, gparts, gcount,
800
                                  gcount_padded, loc, c);
801
802
803
804
805

  /* Ok... Here we go ! */

  /* Loop over all particles in ci... */
  for (int pid = 0; pid < gcount; pid++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
806

807
    /* Skip inactive particles */
808
    if (!ci_cache->active[pid]) continue;
Matthieu Schaller's avatar
Matthieu Schaller committed
809

810
811
812
    const float x_i = ci_cache->x[pid];
    const float y_i = ci_cache->y[pid];
    const float z_i = ci_cache->z[pid];
Matthieu Schaller's avatar
Matthieu Schaller committed
813

814
815
816
817
818
    /* Some powers of the softening length */
    const float h_i = ci_cache->epsilon[pid];
    const float h2_i = h_i * h_i;
    const float h_inv_i = 1.f / h_i;
    const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;
Matthieu Schaller's avatar
Matthieu Schaller committed
819

820
821
    /* Local accumulators for the acceleration and potential */
    float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f;
Matthieu Schaller's avatar
Matthieu Schaller committed
822

823
    /* Make the compiler understand we are in happy vectorization land */
Matthieu Schaller's avatar
Matthieu Schaller committed
824
825
826
827
    swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT);
828
    swift_assume_size(gcount_padded, VEC_SIZE);
Matthieu Schaller's avatar
Matthieu Schaller committed
829

830
831
    /* Loop over every other particle in the cell. */
    for (int pjd = 0; pjd < gcount_padded; pjd++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
832

833
      /* No self interaction */
Matthieu Schaller's avatar
Matthieu Schaller committed
834
      if (pid == pjd) continue;
835
836
837
838
839
840

      /* Get info about j */
      const float x_j = ci_cache->x[pjd];
      const float y_j = ci_cache->y[pjd];
      const float z_j = ci_cache->z[pjd];
      const float mass_j = ci_cache->m[pjd];
Matthieu Schaller's avatar
Matthieu Schaller committed
841

842
843
844
845
846
      /* Compute the pairwise (square) distance. */
      const float dx = x_i - x_j;
      const float dy = y_i - y_j;
      const float dz = z_i - z_j;
      const float r2 = dx * dx + dy * dy + dz * dz;
Matthieu Schaller's avatar
Matthieu Schaller committed
847

848
849
#ifdef SWIFT_DEBUG_CHECKS
      if (r2 == 0.f) error("Interacting particles with 0 distance");
Matthieu Schaller's avatar
Matthieu Schaller committed
850

851
852
      /* Check that particles have been drifted to the current time */
      if (gparts[pid].ti_drift != e->ti_current)
Matthieu Schaller's avatar
Matthieu Schaller committed
853
        error("gpi not drifted to current time");
854
      if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current)
Matthieu Schaller's avatar
Matthieu Schaller committed
855
        error("gpj not drifted to current time");
856
#endif
Matthieu Schaller's avatar
Matthieu Schaller committed
857

858
      /* Interact! */
859
      float f_ij, pot_ij;
860
      runner_iact_grav_pp_truncated(r2, h2_i, h_inv_i, h_inv3_i, mass_j,
861
                                    rlr_inv, &f_ij, &pot_ij);
862
863
864
865
866

      /* Store it back */
      a_x -= f_ij * dx;
      a_y -= f_ij * dy;
      a_z -= f_ij * dz;
867
      pot += pot_ij;
Matthieu Schaller's avatar
Matthieu Schaller committed
868

869
870
871
872
873
#ifdef SWIFT_DEBUG_CHECKS
      /* Update the interaction counter if it's not a padded gpart */
      if (pjd < gcount) gparts[pid].num_interacted++;
#endif
    }
Matthieu Schaller's avatar
Matthieu Schaller committed
874

875
    /* Store everything back in cache */
876
877
878
    ci_cache->a_x[pid] = a_x;
    ci_cache->a_y[pid] = a_y;
    ci_cache->a_z[pid] = a_z;
879
    ci_cache->pot[pid] = pot;
880
881
  }

882
  /* Write back to the particles */
883
  gravity_cache_write_back(ci_cache, gparts, gcount);
884
885
886
887
888
889
890
891
892
}

/**
 * @brief Computes the interaction of all the particles in a cell directly
 * (Switching function between truncated and full)
 *
 * @param r The #runner.
 * @param c The #cell.
 */
893
static INLINE void runner_doself_grav_pp(struct runner *r, struct cell *c) {
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910

  /* Some properties of the space */
  const struct engine *e = r->e;
  const struct space *s = e->s;
  const int periodic = s->periodic;
  const double cell_width = s->width[0];
  const double a_smooth = e->gravity_properties->a_smooth;
  const double r_cut_min = e->gravity_properties->r_cut_min;
  const double min_trunc = cell_width * r_cut_min * a_smooth;

  TIMER_TIC;

#ifdef SWIFT_DEBUG_CHECKS
  if (c->gcount == 0) error("Doing self gravity on an empty cell !");
#endif

  /* Anything to do here? */
911
  if (!cell_is_active_gravity(c, e)) return;
912

913
914
915
  /* Check that we are not doing something stupid */
  if (c->split) error("Running P-P on a splitable cell");

916
  /* Do we need to start by drifting things ? */
917
  if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts");
918
919
920
921
922
923
924

  /* Can we use the Newtonian version or do we need the truncated one ? */
  if (!