runner_doiact_grav.h 52.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/*******************************************************************************
 * This file is part of SWIFT.
 * Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
 *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
#ifndef SWIFT_RUNNER_DOIACT_GRAV_H
#define SWIFT_RUNNER_DOIACT_GRAV_H

/* Includes. */
#include "cell.h"
25
#include "gravity.h"
26
#include "inline.h"
27
28
#include "part.h"

Matthieu Schaller's avatar
Matthieu Schaller committed
29
30
31
32
33
34
35
36
/**
 * @brief Recursively propagate the multipoles down the tree by applying the
 * L2L and L2P kernels.
 *
 * @param r The #runner.
 * @param c The #cell we are working on.
 * @param timer Are we timing this ?
 */
37
38
void runner_do_grav_down(struct runner *r, struct cell *c, int timer) {

39
  /* Some constants */
40
  const struct engine *e = r->e;
41
42

  /* Cell properties */
43
44
  struct gpart *gparts = c->gparts;
  const int gcount = c->gcount;
45

46
  TIMER_TIC;
47

48
49
#ifdef SWIFT_DEBUG_CHECKS
  if (c->ti_old_multipole != e->ti_current) error("c->multipole not drifted.");
50
51
  if (c->multipole->pot.ti_init != e->ti_current)
    error("c->field tensor not initialised");
52
53
#endif

54
  if (c->split) { /* Node case */
55

56
    /* Add the field-tensor to all the 8 progenitors */
57
58
59
    for (int k = 0; k < 8; ++k) {
      struct cell *cp = c->progeny[k];

60
61
      /* Do we have a progenitor with any active g-particles ? */
      if (cp != NULL && cell_is_active(cp, e)) {
62

63
64
65
#ifdef SWIFT_DEBUG_CHECKS
        if (cp->ti_old_multipole != e->ti_current)
          error("cp->multipole not drifted.");
66
67
        if (cp->multipole->pot.ti_init != e->ti_current)
          error("cp->field tensor not initialised");
68
#endif
69
        struct grav_tensor shifted_tensor;
70

71
        /* Shift the field tensor */
72
73
        gravity_L2L(&shifted_tensor, &c->multipole->pot, cp->multipole->CoM,
                    c->multipole->CoM);
74

75
        /* Add it to this level's tensor */
76
        gravity_field_tensors_add(&cp->multipole->pot, &shifted_tensor);
77

78
        /* Recurse */
79
        runner_do_grav_down(r, cp, 0);
80
81
82
      }
    }

83
  } else { /* Leaf case */
84

85
86
    if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts");

87
88
    /* Apply accelerations to the particles */
    for (int i = 0; i < gcount; ++i) {
89
90

      /* Get a handle on the gpart */
91
      struct gpart *gp = &gparts[i];
92
93

      /* Update if active */
94
95
96
97
98
99
100
101
      if (gpart_is_active(gp, e)) {

#ifdef SWIFT_DEBUG_CHECKS
        /* Check that particles have been drifted to the current time */
        if (gp->ti_drift != e->ti_current)
          error("gpart not drifted to current time");
#endif

102
        /* Apply the kernel */
103
        gravity_L2P(&c->multipole->pot, c->multipole->CoM, gp);
104
      }
105
    }
106
  }
107
108

  if (timer) TIMER_TOC(timer_dograv_down);
109
110
}

111
112
113
114
115
116
117
118
/**
 * @brief Computes the interaction of the field tensor in a cell with the
 * multipole of another cell.
 *
 * @param r The #runner.
 * @param ci The #cell with field tensor to interact.
 * @param cj The #cell with the multipole.
 */
119
120
void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci,
                           struct cell *restrict cj) {
121

122
  /* Some constants */
123
  const struct engine *e = r->e;
124
125
126
  const struct space *s = e->s;
  const int periodic = s->periodic;
  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
127
  const struct gravity_props *props = e->gravity_properties;
128
129
  // const float a_smooth = e->gravity_properties->a_smooth;
  // const float rlr_inv = 1. / (a_smooth * ci->super->width[0]);
130
131
132

  TIMER_TIC;

133
134
135
  /* Anything to do here? */
  if (!cell_is_active(ci, e)) return;

136
137
138
  /* Short-cut to the multipole */
  const struct multipole *multi_j = &cj->multipole->m_pole;

139
#ifdef SWIFT_DEBUG_CHECKS
140
141
  if (ci == cj) error("Interacting a cell with itself using M2L");

142
  if (multi_j->M_000 == 0.f) error("Multipole does not seem to have been set.");
143

144
145
  if (ci->multipole->pot.ti_init != e->ti_current)
    error("ci->grav tensor not initialised.");
146
#endif
147

148
149
150
151
  /* Do we need to drift the multipole ? */
  if (cj->ti_old_multipole != e->ti_current) cell_drift_multipole(cj, e);

  /* Let's interact at this level */
152
  gravity_M2L(&ci->multipole->pot, multi_j, ci->multipole->CoM,
153
              cj->multipole->CoM, props, periodic, dim);
154
155
156
157

  TIMER_TOC(timer_dopair_grav_mm);
}

158
159
/**
 * @brief Computes the interaction of all the particles in a cell with all the
160
 * particles of another cell using the full Newtonian potential
161
162
163
164
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The other #cell.
165
166
 * @param shift The distance vector (periodically wrapped) between the cell
 * centres.
167
 */
168
169
void runner_dopair_grav_pp_full(struct runner *r, struct cell *ci,
                                struct cell *cj, double shift[3]) {
170

171
172
173
174
175
176
177
178
179
180
181
182
  /* Some constants */
  const struct engine *const e = r->e;
  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
  struct gravity_cache *const cj_cache = &r->cj_gravity_cache;

  /* Cell properties */
  const int gcount_i = ci->gcount;
  const int gcount_j = cj->gcount;
  struct gpart *restrict gparts_i = ci->gparts;
  struct gpart *restrict gparts_j = cj->gparts;
  const int ci_active = cell_is_active(ci, e);
  const int cj_active = cell_is_active(cj, e);
183
184
185
186
187
  const double loc_i[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
  const double loc_j[3] = {cj->loc[0], cj->loc[1], cj->loc[2]};
  const double loc_mean[3] = {0.5 * (loc_i[0] + loc_j[0]),
                              0.5 * (loc_i[1] + loc_j[1]),
                              0.5 * (loc_i[2] + loc_j[2])};
Matthieu Schaller's avatar
Matthieu Schaller committed
188

189
190
191
192
  /* Anything to do here ?*/
  if (!ci_active && !cj_active) return;

  /* Check that we fit in cache */
Matthieu Schaller's avatar
Matthieu Schaller committed
193
194
195
  if (gcount_i > ci_cache->count || gcount_j > cj_cache->count)
    error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i,
          gcount_j);
196
197
198
199
200
201

  /* Computed the padded counts */
  const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
  const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE;

  /* Fill the caches */
202
203
204
205
  gravity_cache_populate(ci_cache, gparts_i, gcount_i, gcount_padded_i,
                         loc_mean);
  gravity_cache_populate(cj_cache, gparts_j, gcount_j, gcount_padded_j,
                         loc_mean);
206
207
208

  /* Ok... Here we go ! */

Matthieu Schaller's avatar
Matthieu Schaller committed
209
  if (ci_active) {
210
211
212
213

    /* Loop over all particles in ci... */
    for (int pid = 0; pid < gcount_i; pid++) {

214
215
216
      /* Skip inactive particles */
      if (!gpart_is_active(&gparts_i[pid], e)) continue;

217
218
219
      const float x_i = ci_cache->x[pid];
      const float y_i = ci_cache->y[pid];
      const float z_i = ci_cache->z[pid];
Matthieu Schaller's avatar
Matthieu Schaller committed
220

221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
      /* Some powers of the softening length */
      const float h_i = ci_cache->epsilon[pid];
      const float h2_i = h_i * h_i;
      const float h_inv_i = 1.f / h_i;
      const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;

      /* Local accumulators for the acceleration */
      float a_x = 0.f, a_y = 0.f, a_z = 0.f;

      /* Make the compiler understand we are in happy vectorization land */
      swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT);
      swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT);
      swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT);
      swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT);
      swift_assume_size(gcount_padded_j, VEC_SIZE);

      /* Loop over every particle in the other cell. */
      for (int pjd = 0; pjd < gcount_padded_j; pjd++) {

Matthieu Schaller's avatar
Matthieu Schaller committed
240
241
242
243
244
        /* Get info about j */
        const float x_j = cj_cache->x[pjd];
        const float y_j = cj_cache->y[pjd];
        const float z_j = cj_cache->z[pjd];
        const float mass_j = cj_cache->m[pjd];
245
246

        /* Compute the pairwise (square) distance. */
Matthieu Schaller's avatar
Matthieu Schaller committed
247
248
249
250
        const float dx = x_i - x_j;
        const float dy = y_i - y_j;
        const float dz = z_i - z_j;
        const float r2 = dx * dx + dy * dy + dz * dz;
251
252

#ifdef SWIFT_DEBUG_CHECKS
Matthieu Schaller's avatar
Matthieu Schaller committed
253
        if (r2 == 0.f) error("Interacting particles with 0 distance");
254
255
256
257
258
259
260
261

        /* Check that particles have been drifted to the current time */
        if (gparts_i[pid].ti_drift != e->ti_current)
          error("gpi not drifted to current time");
        if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current)
          error("gpj not drifted to current time");
#endif

Matthieu Schaller's avatar
Matthieu Schaller committed
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
        /* Get the inverse distance */
        const float r_inv = 1.f / sqrtf(r2);

        float f_ij, W_ij;

        if (r2 >= h2_i) {

          /* Get Newtonian gravity */
          f_ij = mass_j * r_inv * r_inv * r_inv;

        } else {

          const float r = r2 * r_inv;
          const float ui = r * h_inv_i;

          kernel_grav_eval(ui, &W_ij);

          /* Get softened gravity */
          f_ij = mass_j * h_inv3_i * W_ij;
        }

        /* Store it back */
        a_x -= f_ij * dx;
        a_y -= f_ij * dy;
        a_z -= f_ij * dz;
287
288

#ifdef SWIFT_DEBUG_CHECKS
289
290
        /* Update the interaction counter if it's not a padded gpart */
        if (pjd < gcount_j) gparts_i[pid].num_interacted++;
291
292
293
294
#endif
      }

      /* Store everything back in cache */
295
296
297
      ci_cache->a_x[pid] = a_x;
      ci_cache->a_y[pid] = a_y;
      ci_cache->a_z[pid] = a_z;
298
299
300
301
    }
  }

  /* Now do the opposite loop */
Matthieu Schaller's avatar
Matthieu Schaller committed
302
  if (cj_active) {
303
304
305
306

    /* Loop over all particles in ci... */
    for (int pjd = 0; pjd < gcount_j; pjd++) {

307
308
309
      /* Skip inactive particles */
      if (!gpart_is_active(&gparts_j[pjd], e)) continue;

310
311
312
      const float x_j = cj_cache->x[pjd];
      const float y_j = cj_cache->y[pjd];
      const float z_j = cj_cache->z[pjd];
Matthieu Schaller's avatar
Matthieu Schaller committed
313

314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
      /* Some powers of the softening length */
      const float h_j = cj_cache->epsilon[pjd];
      const float h2_j = h_j * h_j;
      const float h_inv_j = 1.f / h_j;
      const float h_inv3_j = h_inv_j * h_inv_j * h_inv_j;

      /* Local accumulators for the acceleration */
      float a_x = 0.f, a_y = 0.f, a_z = 0.f;

      /* Make the compiler understand we are in happy vectorization land */
      swift_align_information(ci_cache->x, SWIFT_CACHE_ALIGNMENT);
      swift_align_information(ci_cache->y, SWIFT_CACHE_ALIGNMENT);
      swift_align_information(ci_cache->z, SWIFT_CACHE_ALIGNMENT);
      swift_align_information(ci_cache->m, SWIFT_CACHE_ALIGNMENT);
      swift_assume_size(gcount_padded_i, VEC_SIZE);

      /* Loop over every particle in the other cell. */
      for (int pid = 0; pid < gcount_padded_i; pid++) {

Matthieu Schaller's avatar
Matthieu Schaller committed
333
334
335
336
337
        /* Get info about j */
        const float x_i = ci_cache->x[pid];
        const float y_i = ci_cache->y[pid];
        const float z_i = ci_cache->z[pid];
        const float mass_i = ci_cache->m[pid];
338
339

        /* Compute the pairwise (square) distance. */
Matthieu Schaller's avatar
Matthieu Schaller committed
340
341
342
343
        const float dx = x_j - x_i;
        const float dy = y_j - y_i;
        const float dz = z_j - z_i;
        const float r2 = dx * dx + dy * dy + dz * dz;
344
345

#ifdef SWIFT_DEBUG_CHECKS
Matthieu Schaller's avatar
Matthieu Schaller committed
346
        if (r2 == 0.f) error("Interacting particles with 0 distance");
347
348
349
350
351
352
353
354

        /* Check that particles have been drifted to the current time */
        if (gparts_j[pjd].ti_drift != e->ti_current)
          error("gpj not drifted to current time");
        if (pid < gcount_i && gparts_i[pid].ti_drift != e->ti_current)
          error("gpi not drifted to current time");
#endif

Matthieu Schaller's avatar
Matthieu Schaller committed
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
        /* Get the inverse distance */
        const float r_inv = 1.f / sqrtf(r2);

        float f_ji, W_ji;

        if (r2 >= h2_j) {

          /* Get Newtonian gravity */
          f_ji = mass_i * r_inv * r_inv * r_inv;

        } else {

          const float r = r2 * r_inv;
          const float uj = r * h_inv_j;

          kernel_grav_eval(uj, &W_ji);

          /* Get softened gravity */
          f_ji = mass_i * h_inv3_j * W_ji;
        }

        /* Store it back */
        a_x -= f_ji * dx;
        a_y -= f_ji * dy;
        a_z -= f_ji * dz;
380
381

#ifdef SWIFT_DEBUG_CHECKS
382
383
        /* Update the interaction counter if it's not a padded gpart */
        if (pid < gcount_i) gparts_j[pjd].num_interacted++;
384
385
386
387
#endif
      }

      /* Store everything back in cache */
388
389
390
      cj_cache->a_x[pjd] = a_x;
      cj_cache->a_y[pjd] = a_y;
      cj_cache->a_z[pjd] = a_z;
391
392
393
394
    }
  }

  /* Write back to the particles */
Matthieu Schaller's avatar
Matthieu Schaller committed
395
396
397
  if (ci_active) gravity_cache_write_back(ci_cache, gparts_i, gcount_i);
  if (cj_active) gravity_cache_write_back(cj_cache, gparts_j, gcount_j);

398
399
#ifdef MATTHIEU_OLD_STUFF

400
  /* Some constants */
401
  const struct engine *const e = r->e;
402
403

  /* Cell properties */
404
405
406
407
408
  const int gcount_i = ci->gcount;
  const int gcount_j = cj->gcount;
  struct gpart *restrict gparts_i = ci->gparts;
  struct gpart *restrict gparts_j = cj->gparts;

409
  /* MATTHIEU: Should we use local DP accumulators ? */
410

411
412
413
  /* Loop over all particles in ci... */
  if (cell_is_active(ci, e)) {
    for (int pid = 0; pid < gcount_i; pid++) {
414

415
416
      /* Get a hold of the ith part in ci. */
      struct gpart *restrict gpi = &gparts_i[pid];
417

418
      if (!gpart_is_active(gpi, e)) continue;
Matthieu Schaller's avatar
Matthieu Schaller committed
419

420
421
422
      /* Apply boundary condition */
      const double pix[3] = {gpi->x[0] - shift[0], gpi->x[1] - shift[1],
                             gpi->x[2] - shift[2]};
Matthieu Schaller's avatar
Matthieu Schaller committed
423

424
425
      /* Loop over every particle in the other cell. */
      for (int pjd = 0; pjd < gcount_j; pjd++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
426

427
428
        /* Get a hold of the jth part in cj. */
        const struct gpart *restrict gpj = &gparts_j[pjd];
Matthieu Schaller's avatar
Matthieu Schaller committed
429

430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
        /* Compute the pairwise distance. */
        const float dx[3] = {pix[0] - gpj->x[0],   // x
                             pix[1] - gpj->x[1],   // y
                             pix[2] - gpj->x[2]};  // z
        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];

#ifdef SWIFT_DEBUG_CHECKS
        /* Check that particles have been drifted to the current time */
        if (gpi->ti_drift != e->ti_current)
          error("gpi not drifted to current time");
        if (gpj->ti_drift != e->ti_current)
          error("gpj not drifted to current time");
#endif

        /* Interact ! */
        runner_iact_grav_pp_nonsym(r2, dx, gpi, gpj);
Matthieu Schaller's avatar
Matthieu Schaller committed
446

447
448
449
450
451
#ifdef SWIFT_DEBUG_CHECKS
        gpi->num_interacted++;
#endif
      }
    }
452
  }
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484

  /* Loop over all particles in cj... */
  if (cell_is_active(cj, e)) {
    for (int pjd = 0; pjd < gcount_j; pjd++) {

      /* Get a hold of the ith part in ci. */
      struct gpart *restrict gpj = &gparts_j[pjd];

      if (!gpart_is_active(gpj, e)) continue;

      /* Apply boundary condition */
      const double pjx[3] = {gpj->x[0] + shift[0], gpj->x[1] + shift[1],
                             gpj->x[2] + shift[2]};

      /* Loop over every particle in the other cell. */
      for (int pid = 0; pid < gcount_i; pid++) {

        /* Get a hold of the ith part in ci. */
        const struct gpart *restrict gpi = &gparts_i[pid];

        /* Compute the pairwise distance. */
        const float dx[3] = {pjx[0] - gpi->x[0],   // x
                             pjx[1] - gpi->x[1],   // y
                             pjx[2] - gpi->x[2]};  // z
        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];

#ifdef SWIFT_DEBUG_CHECKS
        /* Check that particles have been drifted to the current time */
        if (gpi->ti_drift != e->ti_current)
          error("gpi not drifted to current time");
        if (gpj->ti_drift != e->ti_current)
          error("gpj not drifted to current time");
485
#endif
Matthieu Schaller's avatar
Matthieu Schaller committed
486

487
488
489
490
491
492
493
494
        /* Interact ! */
        runner_iact_grav_pp_nonsym(r2, dx, gpj, gpi);

#ifdef SWIFT_DEBUG_CHECKS
        gpj->num_interacted++;
#endif
      }
    }
495
  }
496
#endif
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
}

/**
 * @brief Computes the interaction of all the particles in a cell with all the
 * particles of another cell using the truncated Newtonian potential
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The other #cell.
 * @param shift The distance vector (periodically wrapped) between the cell
 * centres.
 */
void runner_dopair_grav_pp_truncated(struct runner *r, struct cell *ci,
                                     struct cell *cj, double shift[3]) {

512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
  /* Some constants */
  const struct engine *const e = r->e;
  const struct space *s = e->s;
  const double cell_width = s->width[0];
  const double a_smooth = e->gravity_properties->a_smooth;
  const double rlr = cell_width * a_smooth;
  const float rlr_inv = 1. / rlr;

  /* Caches to play with */
  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
  struct gravity_cache *const cj_cache = &r->cj_gravity_cache;

  /* Cell properties */
  const int gcount_i = ci->gcount;
  const int gcount_j = cj->gcount;
  struct gpart *restrict gparts_i = ci->gparts;
  struct gpart *restrict gparts_j = cj->gparts;
  const int ci_active = cell_is_active(ci, e);
  const int cj_active = cell_is_active(cj, e);
531
532
533
534
535
  const double loc_i[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
  const double loc_j[3] = {cj->loc[0], cj->loc[1], cj->loc[2]};
  const double loc_mean[3] = {0.5 * (loc_i[0] + loc_j[0]),
                              0.5 * (loc_i[1] + loc_j[1]),
                              0.5 * (loc_i[2] + loc_j[2])};
Matthieu Schaller's avatar
Matthieu Schaller committed
536

537
538
539
540
  /* Anything to do here ?*/
  if (!ci_active && !cj_active) return;

  /* Check that we fit in cache */
Matthieu Schaller's avatar
Matthieu Schaller committed
541
542
543
  if (gcount_i > ci_cache->count || gcount_j > cj_cache->count)
    error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i,
          gcount_j);
544
545
546
547
548
549

  /* Computed the padded counts */
  const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
  const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE;

  /* Fill the caches */
550
551
552
553
  gravity_cache_populate(ci_cache, gparts_i, gcount_i, gcount_padded_i,
                         loc_mean);
  gravity_cache_populate(cj_cache, gparts_j, gcount_j, gcount_padded_j,
                         loc_mean);
554
555
556

  /* Ok... Here we go ! */

Matthieu Schaller's avatar
Matthieu Schaller committed
557
  if (ci_active) {
558
559
560
561

    /* Loop over all particles in ci... */
    for (int pid = 0; pid < gcount_i; pid++) {

562
563
564
      /* Skip inactive particles */
      if (!gpart_is_active(&gparts_i[pid], e)) continue;

565
566
567
      const float x_i = ci_cache->x[pid];
      const float y_i = ci_cache->y[pid];
      const float z_i = ci_cache->z[pid];
Matthieu Schaller's avatar
Matthieu Schaller committed
568

569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
      /* Some powers of the softening length */
      const float h_i = ci_cache->epsilon[pid];
      const float h2_i = h_i * h_i;
      const float h_inv_i = 1.f / h_i;
      const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;

      /* Local accumulators for the acceleration */
      float a_x = 0.f, a_y = 0.f, a_z = 0.f;

      /* Make the compiler understand we are in happy vectorization land */
      swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT);
      swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT);
      swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT);
      swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT);
      swift_assume_size(gcount_padded_j, VEC_SIZE);

      /* Loop over every particle in the other cell. */
      for (int pjd = 0; pjd < gcount_padded_j; pjd++) {

Matthieu Schaller's avatar
Matthieu Schaller committed
588
589
590
591
592
        /* Get info about j */
        const float x_j = cj_cache->x[pjd];
        const float y_j = cj_cache->y[pjd];
        const float z_j = cj_cache->z[pjd];
        const float mass_j = cj_cache->m[pjd];
593
594

        /* Compute the pairwise (square) distance. */
Matthieu Schaller's avatar
Matthieu Schaller committed
595
596
597
598
        const float dx = x_i - x_j;
        const float dy = y_i - y_j;
        const float dz = z_i - z_j;
        const float r2 = dx * dx + dy * dy + dz * dz;
599
600

#ifdef SWIFT_DEBUG_CHECKS
Matthieu Schaller's avatar
Matthieu Schaller committed
601
        if (r2 == 0.f) error("Interacting particles with 0 distance");
602
603
604
605
606
607
608
609

        /* Check that particles have been drifted to the current time */
        if (gparts_i[pid].ti_drift != e->ti_current)
          error("gpi not drifted to current time");
        if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current)
          error("gpj not drifted to current time");
#endif

Matthieu Schaller's avatar
Matthieu Schaller committed
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
        /* Get the inverse distance */
        const float r_inv = 1.f / sqrtf(r2);
        const float r = r2 * r_inv;

        float f_ij, W_ij, corr_lr;

        if (r2 >= h2_i) {

          /* Get Newtonian gravity */
          f_ij = mass_j * r_inv * r_inv * r_inv;

        } else {

          const float ui = r * h_inv_i;

          kernel_grav_eval(ui, &W_ij);

          /* Get softened gravity */
          f_ij = mass_j * h_inv3_i * W_ij;
        }

        /* Get long-range correction */
        const float u_lr = r * rlr_inv;
        kernel_long_grav_eval(u_lr, &corr_lr);
        f_ij *= corr_lr;

        /* Store it back */
        a_x -= f_ij * dx;
        a_y -= f_ij * dy;
        a_z -= f_ij * dz;
640
641

#ifdef SWIFT_DEBUG_CHECKS
642
643
        /* Update the interaction counter if it's not a padded gpart */
        if (pjd < gcount_j) gparts_i[pid].num_interacted++;
644
645
646
647
#endif
      }

      /* Store everything back in cache */
648
649
650
      ci_cache->a_x[pid] = a_x;
      ci_cache->a_y[pid] = a_y;
      ci_cache->a_z[pid] = a_z;
651
652
653
654
    }
  }

  /* Now do the opposite loop */
Matthieu Schaller's avatar
Matthieu Schaller committed
655
  if (cj_active) {
656
657
658
659

    /* Loop over all particles in ci... */
    for (int pjd = 0; pjd < gcount_j; pjd++) {

660
661
662
      /* Skip inactive particles */
      if (!gpart_is_active(&gparts_j[pjd], e)) continue;

663
664
665
      const float x_j = cj_cache->x[pjd];
      const float y_j = cj_cache->y[pjd];
      const float z_j = cj_cache->z[pjd];
Matthieu Schaller's avatar
Matthieu Schaller committed
666

667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
      /* Some powers of the softening length */
      const float h_j = cj_cache->epsilon[pjd];
      const float h2_j = h_j * h_j;
      const float h_inv_j = 1.f / h_j;
      const float h_inv3_j = h_inv_j * h_inv_j * h_inv_j;

      /* Local accumulators for the acceleration */
      float a_x = 0.f, a_y = 0.f, a_z = 0.f;

      /* Make the compiler understand we are in happy vectorization land */
      swift_align_information(ci_cache->x, SWIFT_CACHE_ALIGNMENT);
      swift_align_information(ci_cache->y, SWIFT_CACHE_ALIGNMENT);
      swift_align_information(ci_cache->z, SWIFT_CACHE_ALIGNMENT);
      swift_align_information(ci_cache->m, SWIFT_CACHE_ALIGNMENT);
      swift_assume_size(gcount_padded_i, VEC_SIZE);

      /* Loop over every particle in the other cell. */
      for (int pid = 0; pid < gcount_padded_i; pid++) {

Matthieu Schaller's avatar
Matthieu Schaller committed
686
687
688
689
690
        /* Get info about j */
        const float x_i = ci_cache->x[pid];
        const float y_i = ci_cache->y[pid];
        const float z_i = ci_cache->z[pid];
        const float mass_i = ci_cache->m[pid];
691
692

        /* Compute the pairwise (square) distance. */
Matthieu Schaller's avatar
Matthieu Schaller committed
693
694
695
696
        const float dx = x_j - x_i;
        const float dy = y_j - y_i;
        const float dz = z_j - z_i;
        const float r2 = dx * dx + dy * dy + dz * dz;
697
698

#ifdef SWIFT_DEBUG_CHECKS
Matthieu Schaller's avatar
Matthieu Schaller committed
699
        if (r2 == 0.f) error("Interacting particles with 0 distance");
700
701
702
703
704
705
706
707

        /* Check that particles have been drifted to the current time */
        if (gparts_j[pjd].ti_drift != e->ti_current)
          error("gpj not drifted to current time");
        if (pid < gcount_i && gparts_i[pid].ti_drift != e->ti_current)
          error("gpi not drifted to current time");
#endif

Matthieu Schaller's avatar
Matthieu Schaller committed
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
        /* Get the inverse distance */
        const float r_inv = 1.f / sqrtf(r2);
        const float r = r2 * r_inv;

        float f_ji, W_ji, corr_lr;

        if (r2 >= h2_j) {

          /* Get Newtonian gravity */
          f_ji = mass_i * r_inv * r_inv * r_inv;

        } else {

          const float uj = r * h_inv_j;

          kernel_grav_eval(uj, &W_ji);

          /* Get softened gravity */
          f_ji = mass_i * h_inv3_j * W_ji;
        }

        /* Get long-range correction */
        const float u_lr = r * rlr_inv;
        kernel_long_grav_eval(u_lr, &corr_lr);
        f_ji *= corr_lr;

        /* Store it back */
        a_x -= f_ji * dx;
        a_y -= f_ji * dy;
        a_z -= f_ji * dz;
738
739

#ifdef SWIFT_DEBUG_CHECKS
740
741
        /* Update the interaction counter if it's not a padded gpart */
        if (pid < gcount_i) gparts_j[pjd].num_interacted++;
742
743
744
745
#endif
      }

      /* Store everything back in cache */
746
747
748
      cj_cache->a_x[pjd] = a_x;
      cj_cache->a_y[pjd] = a_y;
      cj_cache->a_z[pjd] = a_z;
749
750
751
752
    }
  }

  /* Write back to the particles */
Matthieu Schaller's avatar
Matthieu Schaller committed
753
754
  if (ci_active) gravity_cache_write_back(ci_cache, gparts_i, gcount_i);
  if (cj_active) gravity_cache_write_back(cj_cache, gparts_j, gcount_j);
755
756

#ifdef MATTHIEU_OLD_STUFF
757
758
759
760
761
762
763
764
765
766
767
768
769
  /* Some constants */
  const struct engine *const e = r->e;
  const struct space *s = e->s;
  const double cell_width = s->width[0];
  const double a_smooth = e->gravity_properties->a_smooth;
  const double rlr = cell_width * a_smooth;
  const float rlr_inv = 1. / rlr;

  /* Cell properties */
  const int gcount_i = ci->gcount;
  const int gcount_j = cj->gcount;
  struct gpart *restrict gparts_i = ci->gparts;
  struct gpart *restrict gparts_j = cj->gparts;
770

771
772
  /* MATTHIEU: Should we use local DP accumulators ? */

773
  /* Loop over all particles in ci... */
774
775
  if (cell_is_active(ci, e)) {
    for (int pid = 0; pid < gcount_i; pid++) {
776

777
778
      /* Get a hold of the ith part in ci. */
      struct gpart *restrict gpi = &gparts_i[pid];
779

780
      if (!gpart_is_active(gpi, e)) continue;
781

782
783
784
785
      /* Apply boundary condition */
      const double pix[3] = {gpi->x[0] - shift[0], gpi->x[1] - shift[1],
                             gpi->x[2] - shift[2]};

786
787
      /* Loop over every particle in the other cell. */
      for (int pjd = 0; pjd < gcount_j; pjd++) {
788

789
790
        /* Get a hold of the jth part in cj. */
        const struct gpart *restrict gpj = &gparts_j[pjd];
791

792
        /* Compute the pairwise distance. */
793
794
795
        const float dx[3] = {pix[0] - gpj->x[0],   // x
                             pix[1] - gpj->x[1],   // y
                             pix[2] - gpj->x[2]};  // z
796
        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
797

798
799
800
801
802
803
804
805
806
#ifdef SWIFT_DEBUG_CHECKS
        /* Check that particles have been drifted to the current time */
        if (gpi->ti_drift != e->ti_current)
          error("gpi not drifted to current time");
        if (gpj->ti_drift != e->ti_current)
          error("gpj not drifted to current time");
#endif

        /* Interact ! */
807
        runner_iact_grav_pp_truncated_nonsym(r2, dx, gpi, gpj, rlr_inv);
808

809
#ifdef SWIFT_DEBUG_CHECKS
810
        gpi->num_interacted++;
811
#endif
812
      }
813
814
    }
  }
815

816
  /* Loop over all particles in cj... */
817
818
  if (cell_is_active(cj, e)) {
    for (int pjd = 0; pjd < gcount_j; pjd++) {
819

820
821
      /* Get a hold of the ith part in ci. */
      struct gpart *restrict gpj = &gparts_j[pjd];
822

823
      if (!gpart_is_active(gpj, e)) continue;
824

825
826
827
828
      /* Apply boundary condition */
      const double pjx[3] = {gpj->x[0] + shift[0], gpj->x[1] + shift[1],
                             gpj->x[2] + shift[2]};

829
830
      /* Loop over every particle in the other cell. */
      for (int pid = 0; pid < gcount_i; pid++) {
831

832
833
        /* Get a hold of the ith part in ci. */
        const struct gpart *restrict gpi = &gparts_i[pid];
834

835
        /* Compute the pairwise distance. */
836
837
838
        const float dx[3] = {pjx[0] - gpi->x[0],   // x
                             pjx[1] - gpi->x[1],   // y
                             pjx[2] - gpi->x[2]};  // z
839
        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
840

841
842
843
844
845
846
847
848
849
#ifdef SWIFT_DEBUG_CHECKS
        /* Check that particles have been drifted to the current time */
        if (gpi->ti_drift != e->ti_current)
          error("gpi not drifted to current time");
        if (gpj->ti_drift != e->ti_current)
          error("gpj not drifted to current time");
#endif

        /* Interact ! */
850
        runner_iact_grav_pp_truncated_nonsym(r2, dx, gpj, gpi, rlr_inv);
851
852

#ifdef SWIFT_DEBUG_CHECKS
853
        gpj->num_interacted++;
854
#endif
855
      }
856
857
    }
  }
858
859

#endif
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
}

/**
 * @brief Computes the interaction of all the particles in a cell with all the
 * particles of another cell (switching function between full and truncated).
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The other #cell.
 */
void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) {

  /* Some properties of the space */
  const struct engine *e = r->e;
  const struct space *s = e->s;
  const int periodic = s->periodic;
  const double cell_width = s->width[0];
  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
  const double a_smooth = e->gravity_properties->a_smooth;
  const double r_cut_min = e->gravity_properties->r_cut_min;
  const double min_trunc = cell_width * r_cut_min * a_smooth;
  double shift[3] = {0.0, 0.0, 0.0};

  TIMER_TIC;

  /* Anything to do here? */
  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;

  /* Let's start by drifting things */
889
890
  if (!cell_are_gpart_drifted(ci, e)) error("Un-drifted gparts");
  if (!cell_are_gpart_drifted(cj, e)) error("Un-drifted gparts");
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912

  /* Can we use the Newtonian version or do we need the truncated one ? */
  if (!periodic) {
    runner_dopair_grav_pp_full(r, ci, cj, shift);
  } else {

    /* Get the relative distance between the pairs, wrapping. */
    shift[0] = nearest(cj->loc[0] - ci->loc[0], dim[0]);
    shift[1] = nearest(cj->loc[1] - ci->loc[1], dim[1]);
    shift[2] = nearest(cj->loc[2] - ci->loc[2], dim[2]);
    const double r2 =
        shift[0] * shift[0] + shift[1] * shift[1] + shift[2] * shift[2];

    /* Get the maximal distance between any two particles */
    const double max_r = sqrt(r2) + ci->multipole->r_max + cj->multipole->r_max;

    /* Do we need to use the truncated interactions ? */
    if (max_r > min_trunc)
      runner_dopair_grav_pp_truncated(r, ci, cj, shift);
    else
      runner_dopair_grav_pp_full(r, ci, cj, shift);
  }
913

914
  TIMER_TOC(timer_dopair_grav_pp);
915
916
}

917
/**
918
919
 * @brief Computes the interaction of all the particles in a cell using the
 * full Newtonian potential.
920
921
 *
 * @param r The #runner.
Matthieu Schaller's avatar
Matthieu Schaller committed
922
 * @param c The #cell.
923
924
925
 *
 * @todo Use a local cache for the particles.
 */
926
void runner_doself_grav_pp_full(struct runner *r, struct cell *c) {
927

928
929
930
931
932
933
934
935
  /* Some constants */
  const struct engine *const e = r->e;
  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;

  /* Cell properties */
  const int gcount = c->gcount;
  struct gpart *restrict gparts = c->gparts;
  const int c_active = cell_is_active(c, e);
936
937
938
  const double loc[3] = {c->loc[0] + 0.5 * c->width[0],
                         c->loc[1] + 0.5 * c->width[1],
                         c->loc[2] + 0.5 * c->width[2]};
939
940
941
942
943
944

  /* Anything to do here ?*/
  if (!c_active) return;

  /* Check that we fit in cache */
  if (gcount > ci_cache->count)
945
    error("Not enough space in the cache! gcount=%d", gcount);
946
947
948
949

  /* Computed the padded counts */
  const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE;

950
  gravity_cache_populate(ci_cache, gparts, gcount, gcount_padded, loc);
951
952
953
954
955

  /* Ok... Here we go ! */

  /* Loop over all particles in ci... */
  for (int pid = 0; pid < gcount; pid++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
956

957
958
    /* Skip inactive particles */
    if (!gpart_is_active(&gparts[pid], e)) continue;
Matthieu Schaller's avatar
Matthieu Schaller committed
959

960
961
962
    const float x_i = ci_cache->x[pid];
    const float y_i = ci_cache->y[pid];
    const float z_i = ci_cache->z[pid];
Matthieu Schaller's avatar
Matthieu Schaller committed
963

964
965
966
967
968
    /* Some powers of the softening length */
    const float h_i = ci_cache->epsilon[pid];
    const float h2_i = h_i * h_i;
    const float h_inv_i = 1.f / h_i;
    const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;
Matthieu Schaller's avatar
Matthieu Schaller committed
969

970
971
    /* Local accumulators for the acceleration */
    float a_x = 0.f, a_y = 0.f, a_z = 0.f;
Matthieu Schaller's avatar
Matthieu Schaller committed
972

973
974
975
976
977
978
    /* Make the compiler understand we are in happy vectorization land */
    swift_align_information(ci_cache->x, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(ci_cache->y, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(ci_cache->z, SWIFT_CACHE_ALIGNMENT);
    swift_align_information(ci_cache->m, SWIFT_CACHE_ALIGNMENT);
    swift_assume_size(gcount_padded, VEC_SIZE);
Matthieu Schaller's avatar
Matthieu Schaller committed
979

980
981
    /* Loop over every other particle in the cell. */
    for (int pjd = 0; pjd < gcount_padded; pjd++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
982

983
      /* No self interaction */
Matthieu Schaller's avatar
Matthieu Schaller committed
984
      if (pid == pjd) continue;
985
986
987
988
989
990

      /* Get info about j */
      const float x_j = ci_cache->x[pjd];
      const float y_j = ci_cache->y[pjd];
      const float z_j = ci_cache->z[pjd];
      const float mass_j = ci_cache->m[pjd];
Matthieu Schaller's avatar
Matthieu Schaller committed
991

992
993
994
995
996
      /* Compute the pairwise (square) distance. */
      const float dx = x_i - x_j;
      const float dy = y_i - y_j;
      const float dz = z_i - z_j;
      const float r2 = dx * dx + dy * dy + dz * dz;
Matthieu Schaller's avatar
Matthieu Schaller committed
997

998
999
#ifdef SWIFT_DEBUG_CHECKS
      if (r2 == 0.f) error("Interacting particles with 0 distance");
Matthieu Schaller's avatar
Matthieu Schaller committed
1000

For faster browsing, not all history is shown. View entire blame