runner_doiact_grav.h 38.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/*******************************************************************************
 * This file is part of SWIFT.
 * Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
 *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
#ifndef SWIFT_RUNNER_DOIACT_GRAV_H
#define SWIFT_RUNNER_DOIACT_GRAV_H

/* Includes. */
#include "cell.h"
25
#include "gravity.h"
26
#include "inline.h"
27
28
#include "part.h"

Matthieu Schaller's avatar
Matthieu Schaller committed
29
30
31
32
33
34
35
36
/**
 * @brief Recursively propagate the multipoles down the tree by applying the
 * L2L and L2P kernels.
 *
 * @param r The #runner.
 * @param c The #cell we are working on.
 * @param timer Are we timing this ?
 */
37
38
void runner_do_grav_down(struct runner *r, struct cell *c, int timer) {

39
  /* Some constants */
40
  const struct engine *e = r->e;
41
42

  /* Cell properties */
43
44
  struct gpart *gparts = c->gparts;
  const int gcount = c->gcount;
45

46
  TIMER_TIC;
47

48
49
#ifdef SWIFT_DEBUG_CHECKS
  if (c->ti_old_multipole != e->ti_current) error("c->multipole not drifted.");
50
51
  if (c->multipole->pot.ti_init != e->ti_current)
    error("c->field tensor not initialised");
52
53
#endif

54
  if (c->split) { /* Node case */
55

56
    /* Add the field-tensor to all the 8 progenitors */
57
58
59
    for (int k = 0; k < 8; ++k) {
      struct cell *cp = c->progeny[k];

60
61
      /* Do we have a progenitor with any active g-particles ? */
      if (cp != NULL && cell_is_active(cp, e)) {
62

63
64
65
#ifdef SWIFT_DEBUG_CHECKS
        if (cp->ti_old_multipole != e->ti_current)
          error("cp->multipole not drifted.");
66
67
        if (cp->multipole->pot.ti_init != e->ti_current)
          error("cp->field tensor not initialised");
68
#endif
69
        struct grav_tensor shifted_tensor;
70

71
        /* Shift the field tensor */
72
73
        gravity_L2L(&shifted_tensor, &c->multipole->pot, cp->multipole->CoM,
                    c->multipole->CoM);
74

75
        /* Add it to this level's tensor */
76
        gravity_field_tensors_add(&cp->multipole->pot, &shifted_tensor);
77

78
        /* Recurse */
79
        runner_do_grav_down(r, cp, 0);
80
81
82
      }
    }

83
  } else { /* Leaf case */
84

85
86
    if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts");

87
88
    /* Apply accelerations to the particles */
    for (int i = 0; i < gcount; ++i) {
89
90

      /* Get a handle on the gpart */
91
      struct gpart *gp = &gparts[i];
92
93

      /* Update if active */
94
95
96
97
98
99
100
101
      if (gpart_is_active(gp, e)) {

#ifdef SWIFT_DEBUG_CHECKS
        /* Check that particles have been drifted to the current time */
        if (gp->ti_drift != e->ti_current)
          error("gpart not drifted to current time");
#endif

102
        /* Apply the kernel */
103
        gravity_L2P(&c->multipole->pot, c->multipole->CoM, gp);
104
      }
105
    }
106
  }
107
108

  if (timer) TIMER_TOC(timer_dograv_down);
109
110
}

111
112
113
114
115
116
117
118
/**
 * @brief Computes the interaction of the field tensor in a cell with the
 * multipole of another cell.
 *
 * @param r The #runner.
 * @param ci The #cell with field tensor to interact.
 * @param cj The #cell with the multipole.
 */
119
120
void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci,
                           struct cell *restrict cj) {
121

122
  /* Some constants */
123
  const struct engine *e = r->e;
124
125
126
  const struct space *s = e->s;
  const int periodic = s->periodic;
  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
127
  const struct gravity_props *props = e->gravity_properties;
128
129
  // const float a_smooth = e->gravity_properties->a_smooth;
  // const float rlr_inv = 1. / (a_smooth * ci->super->width[0]);
130
131
132

  TIMER_TIC;

133
134
135
  /* Anything to do here? */
  if (!cell_is_active(ci, e)) return;

136
137
138
  /* Short-cut to the multipole */
  const struct multipole *multi_j = &cj->multipole->m_pole;

139
#ifdef SWIFT_DEBUG_CHECKS
140
141
  if (ci == cj) error("Interacting a cell with itself using M2L");

142
  if (multi_j->M_000 == 0.f) error("Multipole does not seem to have been set.");
143

144
145
  if (ci->multipole->pot.ti_init != e->ti_current)
    error("ci->grav tensor not initialised.");
146
#endif
147

148
149
150
151
  /* Do we need to drift the multipole ? */
  if (cj->ti_old_multipole != e->ti_current) cell_drift_multipole(cj, e);

  /* Let's interact at this level */
152
  gravity_M2L(&ci->multipole->pot, multi_j, ci->multipole->CoM,
153
              cj->multipole->CoM, props, periodic, dim);
154
155
156
157

  TIMER_TOC(timer_dopair_grav_mm);
}

158
159
160
161
162
163
164
static INLINE void runner_dopair_grav_pp_full(const struct engine *e,
                                              struct gravity_cache *ci_cache,
                                              struct gravity_cache *cj_cache,
                                              int gcount_i, int gcount_j,
                                              int gcount_padded_j,
                                              struct gpart *restrict gparts_i,
                                              struct gpart *restrict gparts_j) {
Matthieu Schaller's avatar
Matthieu Schaller committed
165

166
167
  /* Loop over all particles in ci... */
  for (int pid = 0; pid < gcount_i; pid++) {
168

169
170
    /* Skip inactive particles */
    if (!ci_cache->active[pid]) continue;
171

172
173
    /* Skip particle that can use the multipole */
    if (ci_cache->use_mpole[pid]) continue;
174

175
176
177
178
#ifdef SWIFT_DEBUG_CHECKS
    if (!gpart_is_active(&gparts_i[pid], e))
      error("Active particle went through the cache");
#endif
179

180
181
182
    const float x_i = ci_cache->x[pid];
    const float y_i = ci_cache->y[pid];
    const float z_i = ci_cache->z[pid];
183

184
185
186
187
188
    /* Some powers of the softening length */
    const float h_i = ci_cache->epsilon[pid];
    const float h2_i = h_i * h_i;
    const float h_inv_i = 1.f / h_i;
    const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;
189

190
191
    /* Local accumulators for the acceleration */
    float a_x = 0.f, a_y = 0.f, a_z = 0.f;
192

193
194
195
196
197
198
    /* Make the compiler understand we are in happy vectorization land */
    swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT);
    swift_assume_size(gcount_padded_j, VEC_SIZE);
199

200
201
    /* Loop over every particle in the other cell. */
    for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
202

203
204
205
206
207
      /* Get info about j */
      const float x_j = cj_cache->x[pjd];
      const float y_j = cj_cache->y[pjd];
      const float z_j = cj_cache->z[pjd];
      const float mass_j = cj_cache->m[pjd];
208

209
210
211
212
      /* Compute the pairwise (square) distance. */
      const float dx = x_i - x_j;
      const float dy = y_i - y_j;
      const float dz = z_i - z_j;
213
214
      const float r2 = dx * dx + dy * dy + dz * dz;

215
216
#ifdef SWIFT_DEBUG_CHECKS
      if (r2 == 0.f) error("Interacting particles with 0 distance");
217

218
219
220
221
222
223
      /* Check that particles have been drifted to the current time */
      if (gparts_i[pid].ti_drift != e->ti_current)
        error("gpi not drifted to current time");
      if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current)
        error("gpj not drifted to current time");
#endif
224

225
226
227
228
229
230
231
232
      /* Interact! */
      float f_ij;
      runner_iact_grav_pp_full(r2, h2_i, h_inv_i, h_inv3_i, mass_j, &f_ij);

      /* Store it back */
      a_x -= f_ij * dx;
      a_y -= f_ij * dy;
      a_z -= f_ij * dz;
233
234

#ifdef SWIFT_DEBUG_CHECKS
235
236
      /* Update the interaction counter if it's not a padded gpart */
      if (pjd < gcount_j) gparts_i[pid].num_interacted++;
237
#endif
238
    }
239

240
241
242
243
244
245
    /* Store everything back in cache */
    ci_cache->a_x[pid] = a_x;
    ci_cache->a_y[pid] = a_y;
    ci_cache->a_z[pid] = a_z;
  }
}
246

247
248
249
250
251
static INLINE void runner_dopair_grav_pp_truncated(
    const struct engine *e, const float rlr_inv, struct gravity_cache *ci_cache,
    struct gravity_cache *cj_cache, int gcount_i, int gcount_j,
    int gcount_padded_j, struct gpart *restrict gparts_i,
    struct gpart *restrict gparts_j) {
252

253
254
  /* Loop over all particles in ci... */
  for (int pid = 0; pid < gcount_i; pid++) {
255

256
257
    /* Skip inactive particles */
    if (!ci_cache->active[pid]) continue;
258

259
260
    /* Skip particle that can use the multipole */
    if (ci_cache->use_mpole[pid]) continue;
261
262

#ifdef SWIFT_DEBUG_CHECKS
263
264
    if (!gpart_is_active(&gparts_i[pid], e))
      error("Active particle went through the cache");
265
266
#endif

267
268
269
    const float x_i = ci_cache->x[pid];
    const float y_i = ci_cache->y[pid];
    const float z_i = ci_cache->z[pid];
270

271
272
273
274
275
    /* Some powers of the softening length */
    const float h_i = ci_cache->epsilon[pid];
    const float h2_i = h_i * h_i;
    const float h_inv_i = 1.f / h_i;
    const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;
276

277
278
    /* Local accumulators for the acceleration */
    float a_x = 0.f, a_y = 0.f, a_z = 0.f;
279

280
281
282
283
284
285
    /* Make the compiler understand we are in happy vectorization land */
    swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT);
    swift_assume_size(gcount_padded_j, VEC_SIZE);
286

287
288
    /* Loop over every particle in the other cell. */
    for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
289

290
      /* Get info about j */
291
292
293
      const float x_j = cj_cache->x[pjd];
      const float y_j = cj_cache->y[pjd];
      const float z_j = cj_cache->z[pjd];
294
      const float mass_j = cj_cache->m[pjd];
Matthieu Schaller's avatar
Matthieu Schaller committed
295

296
297
298
299
      /* Compute the pairwise (square) distance. */
      const float dx = x_i - x_j;
      const float dy = y_i - y_j;
      const float dz = z_i - z_j;
300
301
      const float r2 = dx * dx + dy * dy + dz * dz;

302
303
#ifdef SWIFT_DEBUG_CHECKS
      if (r2 == 0.f) error("Interacting particles with 0 distance");
304

305
306
307
308
309
310
      /* Check that particles have been drifted to the current time */
      if (gparts_i[pid].ti_drift != e->ti_current)
        error("gpi not drifted to current time");
      if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current)
        error("gpj not drifted to current time");
#endif
311

312
313
314
315
316
317
318
319
320
      /* Interact! */
      float f_ij;
      runner_iact_grav_pp_truncated(r2, h2_i, h_inv_i, h_inv3_i, mass_j,
                                    rlr_inv, &f_ij);

      /* Store it back */
      a_x -= f_ij * dx;
      a_y -= f_ij * dy;
      a_z -= f_ij * dz;
321
322

#ifdef SWIFT_DEBUG_CHECKS
323
324
      /* Update the interaction counter if it's not a padded gpart */
      if (pjd < gcount_j) gparts_i[pid].num_interacted++;
325
#endif
326
    }
327

328
329
330
331
332
333
    /* Store everything back in cache */
    ci_cache->a_x[pid] = a_x;
    ci_cache->a_y[pid] = a_y;
    ci_cache->a_z[pid] = a_z;
  }
}
334

335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
static INLINE void runner_dopair_grav_pm(
    const struct engine *restrict e, struct gravity_cache *ci_cache,
    int gcount_i, int gcount_padded_i, struct gpart *restrict gparts_i,
    const float CoM_j[3], const struct multipole *restrict multi_j,
    struct cell *restrict cj) {

  /* Make the compiler understand we are in happy vectorization land */
  swift_align_information(ci_cache->x, SWIFT_CACHE_ALIGNMENT);
  swift_align_information(ci_cache->y, SWIFT_CACHE_ALIGNMENT);
  swift_align_information(ci_cache->z, SWIFT_CACHE_ALIGNMENT);
  swift_align_information(ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT);
  swift_align_information(ci_cache->a_x, SWIFT_CACHE_ALIGNMENT);
  swift_align_information(ci_cache->a_y, SWIFT_CACHE_ALIGNMENT);
  swift_align_information(ci_cache->a_z, SWIFT_CACHE_ALIGNMENT);
  swift_align_information(ci_cache->active, SWIFT_CACHE_ALIGNMENT);
  swift_align_information(ci_cache->use_mpole, SWIFT_CACHE_ALIGNMENT);
  swift_assume_size(gcount_padded_i, VEC_SIZE);
352

353
354
  /* Loop over all particles in ci... */
  for (int pid = 0; pid < gcount_padded_i; pid++) {
355

356
357
    /* Skip inactive particles */
    if (!ci_cache->active[pid]) continue;
358

359
360
    /* Skip particle that cannot use the multipole */
    if (!ci_cache->use_mpole[pid]) continue;
361
362

#ifdef SWIFT_DEBUG_CHECKS
363
364
    if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e))
      error("Active particle went through the cache");
365
366
#endif

367
368
369
370
371
372
373
374
375
376
377
378
379
    const float x_i = ci_cache->x[pid];
    const float y_i = ci_cache->y[pid];
    const float z_i = ci_cache->z[pid];

    /* Some powers of the softening length */
    const float h_i = ci_cache->epsilon[pid];
    const float h_inv_i = 1.f / h_i;

    /* Distance to the Multipole */
    const float dx = x_i - CoM_j[0];
    const float dy = y_i - CoM_j[1];
    const float dz = z_i - CoM_j[2];
    const float r2 = dx * dx + dy * dy + dz * dz;
Matthieu Schaller's avatar
Matthieu Schaller committed
380

381
382
383
384
385
386
387
388
389
    /* Interact! */
    float f_x, f_y, f_z;
    runner_iact_grav_pm(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y,
                        &f_z);

    /* Store it back */
    ci_cache->a_x[pid] = f_x;
    ci_cache->a_y[pid] = f_y;
    ci_cache->a_z[pid] = f_z;
390
391

#ifdef SWIFT_DEBUG_CHECKS
392
393
394
    /* Update the interaction counter */
    if (pid < gcount_i)
      gparts_i[pid].num_interacted += cj->multipole->m_pole.num_gpart;
395
396
#endif
  }
397
398
399
400
}

/**
 * @brief Computes the interaction of all the particles in a cell with all the
401
 * particles of another cell (switching function between full and truncated).
402
403
404
405
406
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The other #cell.
 */
407
void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) {
408

409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
  const struct engine *e = r->e;

  TIMER_TIC;

  /* Anything to do here? */
  const int ci_active = cell_is_active(ci, e);
  const int cj_active = cell_is_active(cj, e);
  if (!ci_active && !cj_active) return;

  /* Check that we are not doing something stupid */
  if (ci->split || cj->split) error("Running P-P on splitable cells");

  /* Let's start by drifting things */
  if (!cell_are_gpart_drifted(ci, e)) error("Un-drifted gparts");
  if (!cell_are_gpart_drifted(cj, e)) error("Un-drifted gparts");

  /* Recover some useful constants */
426
  const struct space *s = e->s;
427
  const int periodic = s->periodic;
428
  const double cell_width = s->width[0];
429
  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
430
  const float theta_crit2 = e->gravity_properties->theta_crit2;
431
432
  const double a_smooth = e->gravity_properties->a_smooth;
  const double r_cut_min = e->gravity_properties->r_cut_min;
433
  const double rlr = cell_width * a_smooth;
434
  const double min_trunc = rlr * r_cut_min;
435
436
437
438
439
440
  const float rlr_inv = 1. / rlr;

  /* Caches to play with */
  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
  struct gravity_cache *const cj_cache = &r->cj_gravity_cache;

441
442
443
444
445
446
447
448
449
  /* Centre of the cell pais */
  const double loc_mean[3] = {0.5 * (ci->loc[0] + cj->loc[0]),
                              0.5 * (ci->loc[1] + cj->loc[1]),
                              0.5 * (ci->loc[2] + cj->loc[2])};
  // MATTHIEU deal with periodicity

  /* Star by constructing particle caches */

  /* Computed the padded counts */
450
451
  const int gcount_i = ci->gcount;
  const int gcount_j = cj->gcount;
452
453
  const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
  const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE;
454
455

  /* Check that we fit in cache */
Matthieu Schaller's avatar
Matthieu Schaller committed
456
457
458
  if (gcount_i > ci_cache->count || gcount_j > cj_cache->count)
    error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i,
          gcount_j);
459

460
461
462
  /* Recover the multipole info and shift the CoM locations */
  const float rmax_i = ci->multipole->r_max;
  const float rmax_j = cj->multipole->r_max;
463
464
  const float rmax2_i = rmax_i * rmax_i;
  const float rmax2_j = rmax_j * rmax_j;
465
466
  const struct multipole *multi_i = &ci->multipole->m_pole;
  const struct multipole *multi_j = &cj->multipole->m_pole;
467
468
469
470
471
472
  const float CoM_i[3] = {ci->multipole->CoM[0] - loc_mean[0],
                          ci->multipole->CoM[1] - loc_mean[1],
                          ci->multipole->CoM[2] - loc_mean[2]};
  const float CoM_j[3] = {cj->multipole->CoM[0] - loc_mean[0],
                          cj->multipole->CoM[1] - loc_mean[1],
                          cj->multipole->CoM[2] - loc_mean[2]};
473
  // MATTHIEU deal with periodicity
474

475
476
477
478
479
480
481
  /* Fill the caches */
  gravity_cache_populate(e->max_active_bin, ci_cache, ci->gparts, gcount_i,
                         gcount_padded_i, loc_mean, CoM_j, rmax2_j,
                         theta_crit2);
  gravity_cache_populate(e->max_active_bin, cj_cache, cj->gparts, gcount_j,
                         gcount_padded_j, loc_mean, CoM_i, rmax2_i,
                         theta_crit2);
482

483
484
  /* Can we use the Newtonian version or do we need the truncated one ? */
  if (!periodic) {
485

486
    /* Not periodic -> Can always use Newtonian potential */
Matthieu Schaller's avatar
Matthieu Schaller committed
487

488
489
    /* Let's updated the active cell(s) only */
    if (ci_active) {
490

491
492
493
      /* First the P2P */
      runner_dopair_grav_pp_full(e, ci_cache, cj_cache, gcount_i, gcount_j,
                                 gcount_padded_j, ci->gparts, cj->gparts);
494

495
496
497
      /* Then the M2P */
      runner_dopair_grav_pm(e, ci_cache, gcount_i, gcount_padded_i, ci->gparts,
                            CoM_j, multi_j, cj);
498
    }
499
500
501
502
503
504
505
506
    if (cj_active) {

      /* First the P2P */
      runner_dopair_grav_pp_full(e, cj_cache, ci_cache, gcount_j, gcount_i,
                                 gcount_padded_i, cj->gparts, ci->gparts);
      /* Then the M2P */
      runner_dopair_grav_pm(e, cj_cache, gcount_j, gcount_padded_j, cj->gparts,
                            CoM_i, multi_i, ci);
507
    }
508

509
  } else { /* Periodic BC */
510

511
    // MATTHIEU deal with periodicity
512
513

    /* Get the relative distance between the pairs, wrapping. */
514
    double shift[3] = {0.0, 0.0, 0.0};
515
516
517
518
519
520
521
    shift[0] = nearest(cj->loc[0] - ci->loc[0], dim[0]);
    shift[1] = nearest(cj->loc[1] - ci->loc[1], dim[1]);
    shift[2] = nearest(cj->loc[2] - ci->loc[2], dim[2]);
    const double r2 =
        shift[0] * shift[0] + shift[1] * shift[1] + shift[2] * shift[2];

    /* Get the maximal distance between any two particles */
522
    const double max_r = sqrt(r2) + rmax_i + rmax_j;
523
524

    /* Do we need to use the truncated interactions ? */
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
    if (max_r > min_trunc) {

      /* Periodic but far-away cells must use the truncated potential */

      /* Let's updated the active cell(s) only */
      if (ci_active)
        runner_dopair_grav_pp_truncated(e, rlr_inv, ci_cache, cj_cache,
                                        gcount_i, gcount_j, gcount_padded_j,
                                        ci->gparts, cj->gparts);
      if (cj_active)
        runner_dopair_grav_pp_truncated(e, rlr_inv, cj_cache, ci_cache,
                                        gcount_j, gcount_i, gcount_padded_i,
                                        cj->gparts, ci->gparts);
    } else {

      /* Periodic but close-by cells can use the full Newtonian potential */

      /* Let's updated the active cell(s) only */
      if (ci_active)
        runner_dopair_grav_pp_full(e, ci_cache, cj_cache, gcount_i, gcount_j,
                                   gcount_padded_j, ci->gparts, cj->gparts);
      if (cj_active)
        runner_dopair_grav_pp_full(e, cj_cache, ci_cache, gcount_j, gcount_i,
                                   gcount_padded_i, cj->gparts, ci->gparts);
    }
550
  }
551

552
553
554
555
  /* Write back to the particles */
  if (ci_active) gravity_cache_write_back(ci_cache, ci->gparts, gcount_i);
  if (cj_active) gravity_cache_write_back(cj_cache, cj->gparts, gcount_j);

556
  TIMER_TOC(timer_dopair_grav_pp);
557
558
}

559
/**
560
561
 * @brief Computes the interaction of all the particles in a cell using the
 * full Newtonian potential.
562
563
 *
 * @param r The #runner.
Matthieu Schaller's avatar
Matthieu Schaller committed
564
 * @param c The #cell.
565
566
567
 *
 * @todo Use a local cache for the particles.
 */
568
void runner_doself_grav_pp_full(struct runner *r, struct cell *c) {
569

570
571
572
573
574
575
576
577
  /* Some constants */
  const struct engine *const e = r->e;
  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;

  /* Cell properties */
  const int gcount = c->gcount;
  struct gpart *restrict gparts = c->gparts;
  const int c_active = cell_is_active(c, e);
578
579
580
  const double loc[3] = {c->loc[0] + 0.5 * c->width[0],
                         c->loc[1] + 0.5 * c->width[1],
                         c->loc[2] + 0.5 * c->width[2]};
581
582
583
584
585
586

  /* Anything to do here ?*/
  if (!c_active) return;

  /* Check that we fit in cache */
  if (gcount > ci_cache->count)
587
    error("Not enough space in the cache! gcount=%d", gcount);
588
589
590
591

  /* Computed the padded counts */
  const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE;

592
593
  gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, gparts, gcount,
                                  gcount_padded, loc);
594
595
596
597
598

  /* Ok... Here we go ! */

  /* Loop over all particles in ci... */
  for (int pid = 0; pid < gcount; pid++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
599

600
    /* Skip inactive particles */
601
    if (!ci_cache->active[pid]) continue;
Matthieu Schaller's avatar
Matthieu Schaller committed
602

603
604
605
    const float x_i = ci_cache->x[pid];
    const float y_i = ci_cache->y[pid];
    const float z_i = ci_cache->z[pid];
Matthieu Schaller's avatar
Matthieu Schaller committed
606

607
608
609
610
611
    /* Some powers of the softening length */
    const float h_i = ci_cache->epsilon[pid];
    const float h2_i = h_i * h_i;
    const float h_inv_i = 1.f / h_i;
    const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;
Matthieu Schaller's avatar
Matthieu Schaller committed
612

613
614
    /* Local accumulators for the acceleration */
    float a_x = 0.f, a_y = 0.f, a_z = 0.f;
Matthieu Schaller's avatar
Matthieu Schaller committed
615

616
617
618
619
620
621
    /* Make the compiler understand we are in happy vectorization land */
    swift_align_information(ci_cache->x, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(ci_cache->y, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(ci_cache->z, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(ci_cache->m, SWIFT_CACHE_ALIGNMENT);
    swift_assume_size(gcount_padded, VEC_SIZE);
Matthieu Schaller's avatar
Matthieu Schaller committed
622

623
624
    /* Loop over every other particle in the cell. */
    for (int pjd = 0; pjd < gcount_padded; pjd++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
625

626
      /* No self interaction */
Matthieu Schaller's avatar
Matthieu Schaller committed
627
      if (pid == pjd) continue;
628
629
630
631
632
633

      /* Get info about j */
      const float x_j = ci_cache->x[pjd];
      const float y_j = ci_cache->y[pjd];
      const float z_j = ci_cache->z[pjd];
      const float mass_j = ci_cache->m[pjd];
Matthieu Schaller's avatar
Matthieu Schaller committed
634

635
636
637
638
639
      /* Compute the pairwise (square) distance. */
      const float dx = x_i - x_j;
      const float dy = y_i - y_j;
      const float dz = z_i - z_j;
      const float r2 = dx * dx + dy * dy + dz * dz;
Matthieu Schaller's avatar
Matthieu Schaller committed
640

641
642
#ifdef SWIFT_DEBUG_CHECKS
      if (r2 == 0.f) error("Interacting particles with 0 distance");
Matthieu Schaller's avatar
Matthieu Schaller committed
643

644
645
      /* Check that particles have been drifted to the current time */
      if (gparts[pid].ti_drift != e->ti_current)
Matthieu Schaller's avatar
Matthieu Schaller committed
646
        error("gpi not drifted to current time");
647
      if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current)
Matthieu Schaller's avatar
Matthieu Schaller committed
648
        error("gpj not drifted to current time");
649
#endif
Matthieu Schaller's avatar
Matthieu Schaller committed
650

651
652
653
      /* Interact! */
      float f_ij;
      runner_iact_grav_pp_full(r2, h2_i, h_inv_i, h_inv3_i, mass_j, &f_ij);
Matthieu Schaller's avatar
Matthieu Schaller committed
654

655
656
657
658
      /* Store it back */
      a_x -= f_ij * dx;
      a_y -= f_ij * dy;
      a_z -= f_ij * dz;
Matthieu Schaller's avatar
Matthieu Schaller committed
659

660
661
662
663
664
#ifdef SWIFT_DEBUG_CHECKS
      /* Update the interaction counter if it's not a padded gpart */
      if (pjd < gcount) gparts[pid].num_interacted++;
#endif
    }
Matthieu Schaller's avatar
Matthieu Schaller committed
665

666
    /* Store everything back in cache */
667
668
669
    ci_cache->a_x[pid] = a_x;
    ci_cache->a_y[pid] = a_y;
    ci_cache->a_z[pid] = a_z;
670
671
  }

672
  /* Write back to the particles */
673
  gravity_cache_write_back(ci_cache, gparts, gcount);
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
}

/**
 * @brief Computes the interaction of all the particles in a cell using the
 * truncated Newtonian potential.
 *
 * @param r The #runner.
 * @param c The #cell.
 *
 * @todo Use a local cache for the particles.
 */
void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) {

  /* Some constants */
  const struct engine *const e = r->e;
  const struct space *s = e->s;
  const double cell_width = s->width[0];
  const double a_smooth = e->gravity_properties->a_smooth;
  const double rlr = cell_width * a_smooth;
  const float rlr_inv = 1. / rlr;

695
696
697
698
699
700
701
  /* Caches to play with */
  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;

  /* Cell properties */
  const int gcount = c->gcount;
  struct gpart *restrict gparts = c->gparts;
  const int c_active = cell_is_active(c, e);
702
703
704
  const double loc[3] = {c->loc[0] + 0.5 * c->width[0],
                         c->loc[1] + 0.5 * c->width[1],
                         c->loc[2] + 0.5 * c->width[2]};
705
706
707
708
709
710

  /* Anything to do here ?*/
  if (!c_active) return;

  /* Check that we fit in cache */
  if (gcount > ci_cache->count)
711
    error("Not enough space in the caches! gcount=%d", gcount);
712
713
714
715

  /* Computed the padded counts */
  const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE;

716
717
  gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, gparts, gcount,
                                  gcount_padded, loc);
718
719
720
721
722

  /* Ok... Here we go ! */

  /* Loop over all particles in ci... */
  for (int pid = 0; pid < gcount; pid++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
723

724
    /* Skip inactive particles */
725
    if (!ci_cache->active[pid]) continue;
Matthieu Schaller's avatar
Matthieu Schaller committed
726

727
728
729
    const float x_i = ci_cache->x[pid];
    const float y_i = ci_cache->y[pid];
    const float z_i = ci_cache->z[pid];
Matthieu Schaller's avatar
Matthieu Schaller committed
730

731
732
733
734
735
    /* Some powers of the softening length */
    const float h_i = ci_cache->epsilon[pid];
    const float h2_i = h_i * h_i;
    const float h_inv_i = 1.f / h_i;
    const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;
Matthieu Schaller's avatar
Matthieu Schaller committed
736

737
738
    /* Local accumulators for the acceleration */
    float a_x = 0.f, a_y = 0.f, a_z = 0.f;
Matthieu Schaller's avatar
Matthieu Schaller committed
739

740
741
742
743
744
745
    /* Make the compiler understand we are in happy vectorization land */
    swift_align_information(ci_cache->x, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(ci_cache->y, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(ci_cache->z, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(ci_cache->m, SWIFT_CACHE_ALIGNMENT);
    swift_assume_size(gcount_padded, VEC_SIZE);
Matthieu Schaller's avatar
Matthieu Schaller committed
746

747
748
    /* Loop over every other particle in the cell. */
    for (int pjd = 0; pjd < gcount_padded; pjd++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
749

750
      /* No self interaction */
Matthieu Schaller's avatar
Matthieu Schaller committed
751
      if (pid == pjd) continue;
752
753
754
755
756
757

      /* Get info about j */
      const float x_j = ci_cache->x[pjd];
      const float y_j = ci_cache->y[pjd];
      const float z_j = ci_cache->z[pjd];
      const float mass_j = ci_cache->m[pjd];
Matthieu Schaller's avatar
Matthieu Schaller committed
758

759
760
761
762
763
      /* Compute the pairwise (square) distance. */
      const float dx = x_i - x_j;
      const float dy = y_i - y_j;
      const float dz = z_i - z_j;
      const float r2 = dx * dx + dy * dy + dz * dz;
Matthieu Schaller's avatar
Matthieu Schaller committed
764

765
766
#ifdef SWIFT_DEBUG_CHECKS
      if (r2 == 0.f) error("Interacting particles with 0 distance");
Matthieu Schaller's avatar
Matthieu Schaller committed
767

768
769
      /* Check that particles have been drifted to the current time */
      if (gparts[pid].ti_drift != e->ti_current)
Matthieu Schaller's avatar
Matthieu Schaller committed
770
        error("gpi not drifted to current time");
771
      if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current)
Matthieu Schaller's avatar
Matthieu Schaller committed
772
        error("gpj not drifted to current time");
773
#endif
Matthieu Schaller's avatar
Matthieu Schaller committed
774

775
776
777
778
      /* Interact! */
      float f_ij;
      runner_iact_grav_pp_truncated(r2, h2_i, h_inv_i, h_inv3_i, mass_j,
                                    rlr_inv, &f_ij);
779
780
781
782
783

      /* Store it back */
      a_x -= f_ij * dx;
      a_y -= f_ij * dy;
      a_z -= f_ij * dz;
Matthieu Schaller's avatar
Matthieu Schaller committed
784

785
786
787
788
789
#ifdef SWIFT_DEBUG_CHECKS
      /* Update the interaction counter if it's not a padded gpart */
      if (pjd < gcount) gparts[pid].num_interacted++;
#endif
    }
Matthieu Schaller's avatar
Matthieu Schaller committed
790

791
    /* Store everything back in cache */
792
793
794
    ci_cache->a_x[pid] = a_x;
    ci_cache->a_y[pid] = a_y;
    ci_cache->a_z[pid] = a_z;
795
796
  }

797
  /* Write back to the particles */
798
  gravity_cache_write_back(ci_cache, gparts, gcount);
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
}

/**
 * @brief Computes the interaction of all the particles in a cell directly
 * (Switching function between truncated and full)
 *
 * @param r The #runner.
 * @param c The #cell.
 */
void runner_doself_grav_pp(struct runner *r, struct cell *c) {

  /* Some properties of the space */
  const struct engine *e = r->e;
  const struct space *s = e->s;
  const int periodic = s->periodic;
  const double cell_width = s->width[0];
  const double a_smooth = e->gravity_properties->a_smooth;
  const double r_cut_min = e->gravity_properties->r_cut_min;
  const double min_trunc = cell_width * r_cut_min * a_smooth;

  TIMER_TIC;

#ifdef SWIFT_DEBUG_CHECKS
  if (c->gcount == 0) error("Doing self gravity on an empty cell !");
#endif

  /* Anything to do here? */
  if (!cell_is_active(c, e)) return;

828
829
830
  /* Check that we are not doing something stupid */
  if (c->split) error("Running P-P on a splitable cell");

831
  /* Do we need to start by drifting things ? */
832
  if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts");
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847

  /* Can we use the Newtonian version or do we need the truncated one ? */
  if (!periodic) {
    runner_doself_grav_pp_full(r, c);
  } else {

    /* Get the maximal distance between any two particles */
    const double max_r = 2 * c->multipole->r_max;

    /* Do we need to use the truncated interactions ? */
    if (max_r > min_trunc)
      runner_doself_grav_pp_truncated(r, c);
    else
      runner_doself_grav_pp_full(r, c);
  }
848

849
  TIMER_TOC(timer_doself_grav_pp);
850
851
}

Matthieu Schaller's avatar
Matthieu Schaller committed
852
853
854
855
856
857
858
/**
 * @brief Computes the interaction of all the particles in a cell with all the
 * particles of another cell.
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The other #cell.
859
 * @param gettimer Are we timing this ?
Matthieu Schaller's avatar
Matthieu Schaller committed
860
861
862
 *
 * @todo Use a local cache for the particles.
 */
863
864
void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj,
                        int gettimer) {
Matthieu Schaller's avatar
Matthieu Schaller committed
865

866
867
  /* Some constants */
  const struct engine *e = r->e;
868
869
  const struct space *s = e->s;
  const int periodic = s->periodic;
870
  const double cell_width = s->width[0];
871
  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
872
  const struct gravity_props *props = e->gravity_properties;
873
  const double theta_crit2 = props->theta_crit2;
874
875
  const double max_distance = props->a_smooth * props->r_cut_max * cell_width;
  const double max_distance2 = max_distance * max_distance;
876

Matthieu Schaller's avatar
Matthieu Schaller committed
877
#ifdef SWIFT_DEBUG_CHECKS
Matthieu Schaller's avatar
Matthieu Schaller committed
878

Matthieu Schaller's avatar
Matthieu Schaller committed
879
880
881
882
  const int gcount_i = ci->gcount;
  const int gcount_j = cj->gcount;

  /* Early abort? */
Matthieu Schaller's avatar
Matthieu Schaller committed
883
884
  if (gcount_i == 0 || gcount_j == 0)
    error("Doing pair gravity on an empty cell !");
Matthieu Schaller's avatar
Matthieu Schaller committed
885
886

  /* Sanity check */
887
  if (ci == cj) error("Pair interaction between a cell and itself.");
888
889
890
891
892

  if (cell_is_active(ci, e) && ci->ti_old_multipole != e->ti_current)
    error("ci->multipole not drifted.");
  if (cell_is_active(cj, e) && cj->ti_old_multipole != e->ti_current)
    error("cj->multipole not drifted.");
893
894
#endif

895
  TIMER_TIC;
896

897
898
  /* Anything to do here? */
  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
Matthieu Schaller's avatar
Matthieu Schaller committed
899

900
901
902
  /* Recover the multipole information */
  struct gravity_tensors *const multi_i = ci->multipole;
  struct gravity_tensors *const multi_j = cj->multipole;
903

904
905
906
907
  /* Get the distance between the CoMs */
  double dx = multi_i->CoM[0] - multi_j->CoM[0];
  double dy = multi_i->CoM[1] - multi_j->CoM[1];
  double dz = multi_i->CoM[2] - multi_j->CoM[2];
908

909
910
911
912
913
  /* Apply BC */
  if (periodic) {
    dx = nearest(dx, dim[0]);
    dy = nearest(dy, dim[1]);
    dz = nearest(dz, dim[2]);
914
  }
915
  const double r2 = dx * dx + dy * dy + dz * dz;
916

917
918
  /* Are we beyond the distance where the truncated forces are 0? */
  if (periodic && r2 > max_distance2) {
919

920
921
922
923
924
925
926
927
928
929
930
931
#ifdef SWIFT_DEBUG_CHECKS
    /* Need to account for the interactions we missed */
    if (cell_is_active(ci, e))
      multi_i->pot.num_interacted += multi_j->m_pole.num_gpart;
    if (cell_is_active(cj, e))
      multi_j->pot.num_interacted += multi_i->m_pole.num_gpart;
#endif
    return;
  }

  /* OK, we actually need to compute this pair. Let's find the cheapest
   * option... */
932

933
  /* Can we use M-M interactions ? */
934
935
  if (gravity_multipole_accept(multi_i->r_max, multi_j->r_max, theta_crit2,
                               r2)) {
936

937
    /* MATTHIEU: make a symmetric M-M interaction function ! */
938
939
940
941
942
943
944
945
946
    runner_dopair_grav_mm(r, ci, cj);
    runner_dopair_grav_mm(r, cj, ci);
  }
  /* We have two leaves. Go P-P. */
  else if (!ci->split && !cj->split) {
    runner_dopair_grav_pp(r, ci, cj);
  }
  /* Alright, we'll have to split and recurse. */
  else {
Matthieu Schaller's avatar
Matthieu Schaller committed
947

948
949
    const double ri_max = multi_i->r_max;
    const double rj_max = multi_j->r_max;
950
951
952
953
954
955
956
957
958
959
960
961

    /* Split the larger of the two cells and start over again */
    if (ri_max > rj_max) {

      /* Can we actually split that interaction ? */
      if (ci->split) {

        /* Loop over ci's children */
        for (int k = 0; k < 8; k++) {
          if (ci->progeny[k] != NULL)
            runner_dopair_grav(r, ci->progeny[k], cj, 0);
        }
Matthieu Schaller's avatar
Matthieu Schaller committed
962

963
      } else if (cj->split) {
964
        /* MATTHIEU: This could maybe be replaced by P-M interactions ?  */
965
966

        /* Loop over cj's children */
Matthieu Schaller's avatar
Matthieu Schaller committed
967
        for (int k = 0; k < 8; k++) {
968
969
970
          if (cj->progeny[k] != NULL)
            runner_dopair_grav(r, ci, cj->progeny[k], 0);
        }
Matthieu Schaller's avatar
Matthieu Schaller committed
971

972
      } else {
973
        error("Fundamental error in the logic");
974
975
      }
    } else {
Matthieu Schaller's avatar
Matthieu Schaller committed
976

977
978
979
980
981
982
983
984
      /* Can we actually split that interaction ? */
      if (cj->split) {

        /* Loop over cj's children */
        for (int k = 0; k < 8; k++) {
          if (cj->progeny[k] != NULL)
            runner_dopair_grav(r, ci, cj->progeny[k], 0);
        }
Matthieu Schaller's avatar
Matthieu Schaller committed
985

986
      } else if (ci->split) {
987
        /* MATTHIEU: This could maybe be replaced by P-M interactions ?  */
Matthieu Schaller's avatar
Matthieu Schaller committed
988

989
990
991
992
        /* Loop over ci's children */
        for (int k = 0; k < 8; k++) {
          if (ci->progeny[k] != NULL)
            runner_dopair_grav(r, ci->progeny[k], cj, 0);
Matthieu Schaller's avatar
Matthieu Schaller committed
993
        }
994
995

      } else {
996
        error("Fundamental error in the logic");
Matthieu Schaller's avatar
Matthieu Schaller committed
997
      }
998
    }
Matthieu Schaller's avatar
Matthieu Schaller committed
999
  }
1000

For faster browsing, not all history is shown. View entire blame