runner_doiact.h 95.5 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
5
 *
6
7
8
9
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
10
 *
11
12
13
14
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
15
 *
16
17
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
 *
19
20
21
22
23
24
25
26
 ******************************************************************************/

/* Before including this file, define FUNCTION, which is the
   name of the interaction function. This creates the interaction functions
   runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION,
   and runner_dosub_FUNCTION calling the pairwise interaction function
   runner_iact_FUNCTION. */

27
#define PASTE(x, y) x##_##y
28

29
30
31
#define _DOPAIR1_BRANCH(f) PASTE(runner_dopair1_branch, f)
#define DOPAIR1_BRANCH _DOPAIR1_BRANCH(FUNCTION)

32
#define _DOPAIR1(f) PASTE(runner_dopair1, f)
33
#define DOPAIR1 _DOPAIR1(FUNCTION)
34

35
#define _DOPAIR2(f) PASTE(runner_dopair2, f)
36
#define DOPAIR2 _DOPAIR2(FUNCTION)
37

38
#define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f)
39
#define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION)
40

41
42
43
#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f)
#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION)

44
#define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f)
Pedro Gonnet's avatar
Pedro Gonnet committed
45
46
#define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION)

47
48
49
50
51
#define _DOPAIR1_NAIVE(f) PASTE(runner_dopair1_naive, f)
#define DOPAIR1_NAIVE _DOPAIR1_NAIVE(FUNCTION)

#define _DOPAIR2_NAIVE(f) PASTE(runner_dopair2_naive, f)
#define DOPAIR2_NAIVE _DOPAIR2_NAIVE(FUNCTION)
52

Matthieu Schaller's avatar
Matthieu Schaller committed
53
54
#define _DOSELF2_NAIVE(f) PASTE(runner_doself2_naive, f)
#define DOSELF2_NAIVE _DOSELF2_NAIVE(FUNCTION)
55

56
#define _DOSELF1(f) PASTE(runner_doself1, f)
57
#define DOSELF1 _DOSELF1(FUNCTION)
58

59
#define _DOSELF2(f) PASTE(runner_doself2, f)
60
#define DOSELF2 _DOSELF2(FUNCTION)
61

62
#define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f)
63
#define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION)
64

65
66
67
68
69
70
71
72
#define _DOSUB_SELF1(f) PASTE(runner_dosub_self1, f)
#define DOSUB_SELF1 _DOSUB_SELF1(FUNCTION)

#define _DOSUB_PAIR1(f) PASTE(runner_dosub_pair1, f)
#define DOSUB_PAIR1 _DOSUB_PAIR1(FUNCTION)

#define _DOSUB_SELF2(f) PASTE(runner_dosub_self2, f)
#define DOSUB_SELF2 _DOSUB_SELF2(FUNCTION)
73

74
75
#define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f)
#define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION)
76

77
#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f)
78
#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION)
79

80
#define _IACT_NONSYM(f) PASTE(runner_iact_nonsym, f)
81
#define IACT_NONSYM _IACT_NONSYM(FUNCTION)
82

83
#define _IACT(f) PASTE(runner_iact, f)
84
#define IACT _IACT(FUNCTION)
85

86
87
88
89
90
91
#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f)
#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)

#define _IACT_VEC(f) PASTE(runner_iact_vec, f)
#define IACT_VEC _IACT_VEC(FUNCTION)

92
#define _TIMER_DOSELF(f) PASTE(timer_doself, f)
93
#define TIMER_DOSELF _TIMER_DOSELF(FUNCTION)
94

95
#define _TIMER_DOPAIR(f) PASTE(timer_dopair, f)
96
#define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION)
Pedro Gonnet's avatar
Pedro Gonnet committed
97

98
99
100
101
102
#define _TIMER_DOSUB_SELF(f) PASTE(timer_dosub_self, f)
#define TIMER_DOSUB_SELF _TIMER_DOSUB_SELF(FUNCTION)

#define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f)
#define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION)
103

104
#define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f)
105
106
#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION)

107
#define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f)
108
109
#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)

110
/**
Matthieu Schaller's avatar
Matthieu Schaller committed
111
112
113
 * @brief Compute the interactions between a cell pair (non-symmetric case).
 *
 * Inefficient version using a brute-force algorithm.
114
115
116
117
118
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
 */
119
void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci,
120
                   struct cell *restrict cj) {
121
122
123
124

  const struct engine *e = r->e;

#ifndef SWIFT_DEBUG_CHECKS
Matthieu Schaller's avatar
Matthieu Schaller committed
125
  error("Don't use in actual runs ! Slow code !");
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#endif

  TIMER_TIC;

  /* Anything to do here? */
  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;

  const int count_i = ci->count;
  const int count_j = cj->count;
  struct part *restrict parts_i = ci->parts;
  struct part *restrict parts_j = cj->parts;

  /* Get the relative distance between the pairs, wrapping. */
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Loop over the parts in ci. */
  for (int pid = 0; pid < count_i; pid++) {

    /* Get a hold of the ith part in ci. */
    struct part *restrict pi = &parts_i[pid];
152
    const int pi_active = part_is_active(pi, e);
153
154
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
155
156
157
    const float pix[3] = {pi->x[0] - (cj->loc[0] + shift[0]),
                          pi->x[1] - (cj->loc[1] + shift[1]),
                          pi->x[2] - (cj->loc[2] + shift[2])};
158
159
160
161
162
163

    /* Loop over the parts in cj. */
    for (int pjd = 0; pjd < count_j; pjd++) {

      /* Get a pointer to the jth particle. */
      struct part *restrict pj = &parts_j[pjd];
164
      const float hj = pj->h;
165
      const float hjg2 = hj * hj * kernel_gamma2;
166
      const int pj_active = part_is_active(pj, e);
167
168

      /* Compute the pairwise distance. */
169
170
      const float pjx[3] = {pj->x[0] - cj->loc[0], pj->x[1] - cj->loc[1],
                            pj->x[2] - cj->loc[2]};
171
172
      float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]};
      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
173
174

      /* Hit or miss? */
175
      if (r2 < hig2 && pi_active) {
176

177
        IACT_NONSYM(r2, dx, hi, hj, pi, pj);
178
      }
179
      if (r2 < hjg2 && pj_active) {
180

181
182
183
        dx[0] = -dx[0];
        dx[1] = -dx[1];
        dx[2] = -dx[2];
184

185
        IACT_NONSYM(r2, dx, hj, hi, pj, pi);
186
187
188
      }

    } /* loop over the parts in cj. */
189
  }   /* loop over the parts in ci. */
190
191
192
193

  TIMER_TOC(TIMER_DOPAIR);
}

Matthieu Schaller's avatar
Matthieu Schaller committed
194
195
196
197
198
199
200
201
202
/**
 * @brief Compute the interactions between a cell pair (symmetric case).
 *
 * Inefficient version using a brute-force algorithm.
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
 */
203
void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci,
204
                   struct cell *restrict cj) {
205

206
207
  const struct engine *e = r->e;

208
#ifndef SWIFT_DEBUG_CHECKS
Matthieu Schaller's avatar
Matthieu Schaller committed
209
  error("Don't use in actual runs ! Slow code !");
210
#endif
211

Matthieu Schaller's avatar
Matthieu Schaller committed
212
  TIMER_TIC;
213
214

  /* Anything to do here? */
215
  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
216

217
218
219
220
221
  const int count_i = ci->count;
  const int count_j = cj->count;
  struct part *restrict parts_i = ci->parts;
  struct part *restrict parts_j = cj->parts;

222
  /* Get the relative distance between the pairs, wrapping. */
223
224
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
225
226
227
228
229
230
231
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Loop over the parts in ci. */
232
  for (int pid = 0; pid < count_i; pid++) {
233
234

    /* Get a hold of the ith part in ci. */
235
    struct part *restrict pi = &parts_i[pid];
236
    const int pi_active = part_is_active(pi, e);
237
238
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
239
240
241
    const float pix[3] = {pi->x[0] - (cj->loc[0] + shift[0]),
                          pi->x[1] - (cj->loc[1] + shift[1]),
                          pi->x[2] - (cj->loc[2] + shift[2])};
242
243

    /* Loop over the parts in cj. */
244
    for (int pjd = 0; pjd < count_j; pjd++) {
245
246

      /* Get a pointer to the jth particle. */
247
      struct part *restrict pj = &parts_j[pjd];
248
      const int pj_active = part_is_active(pj, e);
249
      const float hj = pj->h;
250
      const float hjg2 = hj * hj * kernel_gamma2;
251
252

      /* Compute the pairwise distance. */
253
254
      const float pjx[3] = {pj->x[0] - cj->loc[0], pj->x[1] - cj->loc[1],
                            pj->x[2] - cj->loc[2]};
255
256
      float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]};
      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
257
258

      /* Hit or miss? */
259
      if (r2 < hig2 || r2 < hjg2) {
260

261
        if (pi_active && pj_active) {
262

263
264
          IACT(r2, dx, hi, hj, pi, pj);
        } else if (pi_active) {
265

266
267
          IACT_NONSYM(r2, dx, hi, hj, pi, pj);
        } else if (pj_active) {
268

269
270
271
          dx[0] = -dx[0];
          dx[1] = -dx[1];
          dx[2] = -dx[2];
272

273
          IACT_NONSYM(r2, dx, hj, hi, pj, pi);
274
        }
275
276
      }
    } /* loop over the parts in cj. */
277
  }   /* loop over the parts in ci. */
278
279
280
281

  TIMER_TOC(TIMER_DOPAIR);
}

Matthieu Schaller's avatar
Matthieu Schaller committed
282
283
284
285
286
287
288
289
290
/**
 * @brief Compute the interactions within a cell (symmetric case).
 *
 * Inefficient version using a brute-force algorithm.
 *
 * @param r The #runner.
 * @param c The #cell.
 */
void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) {
291

292
  const struct engine *e = r->e;
293

294
#ifndef SWIFT_DEBUG_CHECKS
295
  error("Don't use in actual runs ! Slow code !");
296
#endif
297

Matthieu Schaller's avatar
Matthieu Schaller committed
298
  TIMER_TIC;
299
300

  /* Anything to do here? */
301
  if (!cell_is_active(c, e)) return;
302

303
304
  const int count = c->count;
  struct part *restrict parts = c->parts;
305
306

  /* Loop over the parts in ci. */
307
  for (int pid = 0; pid < count; pid++) {
308
309

    /* Get a hold of the ith part in ci. */
310
311
312
313
    struct part *restrict pi = &parts[pid];
    const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]};
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
314
    const int pi_active = part_is_active(pi, e);
315

316
    /* Loop over the parts in cj. */
317
    for (int pjd = pid + 1; pjd < count; pjd++) {
318
319

      /* Get a pointer to the jth particle. */
320
      struct part *restrict pj = &parts[pjd];
321
322
      const float hj = pj->h;
      const int pj_active = part_is_active(pj, e);
323
324

      /* Compute the pairwise distance. */
325
326
327
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
328
329
330
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }
331
      const float hjg2 = hj * hj * kernel_gamma2;
332
333

      /* Hit or miss? */
334
      if (r2 < hig2 || r2 < hjg2) {
335

336
        if (pi_active && pj_active) {
337

338
339
          IACT(r2, dx, hi, hj, pi, pj);
        } else if (pi_active) {
340

341
342
          IACT_NONSYM(r2, dx, hi, hj, pi, pj);
        } else if (pj_active) {
343

344
345
346
          dx[0] = -dx[0];
          dx[1] = -dx[1];
          dx[2] = -dx[2];
347

348
          IACT_NONSYM(r2, dx, hj, hi, pj, pi);
349
350
        }
      }
351

352
    } /* loop over the parts in cj. */
353

354
355
356
357
  } /* loop over the parts in ci. */

  TIMER_TOC(TIMER_DOSELF);
}
358

359
360
361
362
/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
Matthieu Schaller's avatar
Matthieu Schaller committed
363
364
 * Version using a brute-force algorithm.
 *
365
366
 * @param r The #runner.
 * @param ci The first #cell.
367
 * @param parts_i The #part to interact with @c cj.
368
369
370
371
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 * @param cj The second #cell.
 */
372
373
374
375
void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
                         struct part *restrict parts_i, int *restrict ind,
                         int count, struct cell *restrict cj) {

376
  const struct engine *e = r->e;
377

Matthieu Schaller's avatar
Matthieu Schaller committed
378
379
380
381
#ifndef SWIFT_DEBUG_CHECKS
  error("Don't use in actual runs ! Slow code !");
#endif

Matthieu Schaller's avatar
Matthieu Schaller committed
382
  TIMER_TIC;
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405

  const int count_j = cj->count;
  struct part *restrict parts_j = cj->parts;

  /* Get the relative distance between the pairs, wrapping. */
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Loop over the parts_i. */
  for (int pid = 0; pid < count; pid++) {

    /* Get a hold of the ith part in ci. */
    struct part *restrict pi = &parts_i[ind[pid]];
    double pix[3];
    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;

406
407
408
409
410
411
#ifdef SWIFT_DEBUG_CHECKS
    if (!part_is_active(pi, e))
      error("Trying to correct smoothing length of inactive particle !");

#endif

412
413
414
415
416
417
418
419
420
421
422
423
424
    /* Loop over the parts in cj. */
    for (int pjd = 0; pjd < count_j; pjd++) {

      /* Get a pointer to the jth particle. */
      struct part *restrict pj = &parts_j[pjd];

      /* Compute the pairwise distance. */
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }
425

426
427
428
429
430
431
432
433
#ifdef SWIFT_DEBUG_CHECKS
      /* Check that particles have been drifted to the current time */
      if (pi->ti_drift != e->ti_current)
        error("Particle pi not drifted to current time");
      if (pj->ti_drift != e->ti_current)
        error("Particle pj not drifted to current time");
#endif

434
435
436
437
438
439
      /* Hit or miss? */
      if (r2 < hig2) {

        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
      }
    } /* loop over the parts in cj. */
440
  }   /* loop over the parts in ci. */
441

442
  TIMER_TOC(timer_dopair_subset_naive);
443
444
445
446
447
448
449
450
451
452
453
454
455
}

/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param parts_i The #part to interact with @c cj.
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 * @param cj The second #cell.
 */
456
457
458
459
void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
                   struct part *restrict parts_i, int *restrict ind, int count,
                   struct cell *restrict cj) {

460
  const struct engine *e = r->e;
461

Matthieu Schaller's avatar
Matthieu Schaller committed
462
  TIMER_TIC;
463

464
465
466
  const int count_j = cj->count;
  struct part *restrict parts_j = cj->parts;

467
  /* Get the relative distance between the pairs, wrapping. */
468
469
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
470
471
472
473
474
475
476
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Get the sorting index. */
477
478
  int sid = 0;
  for (int k = 0; k < 3; k++)
479
480
481
482
483
    sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
                         ? 0
                         : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1);

  /* Switch the cells around? */
484
  const int flipped = runner_flip[sid];
485
486
  sid = sortlistID[sid];

487
488
489
490
  /* Has the cell cj been sorted? */
  if (!(cj->sorted & (1 << sid)) ||
      cj->dx_max_sort_old > space_maxreldx * cj->dmin)
    error("Interacting unsorted cells.");
Pedro Gonnet's avatar
Pedro Gonnet committed
491

492
  /* Pick-out the sorted lists. */
493
  const struct entry *restrict sort_j = cj->sort[sid];
494
  const float dxj = cj->dx_max_sort;
495
496
497
498
499

  /* Parts are on the left? */
  if (!flipped) {

    /* Loop over the parts_i. */
500
    for (int pid = 0; pid < count; pid++) {
501
502

      /* Get a hold of the ith part in ci. */
503
      struct part *restrict pi = &parts_i[ind[pid]];
504
505
506
      const double pix = pi->x[0] - (shift[0]);
      const double piy = pi->x[1] - (shift[1]);
      const double piz = pi->x[2] - (shift[2]);
507
508
      const float hi = pi->h;
      const float hig2 = hi * hi * kernel_gamma2;
509
      const double di = hi * kernel_gamma + dxj + pix * runner_shift[sid][0] +
510
                        piy * runner_shift[sid][1] + piz * runner_shift[sid][2];
511
512

      /* Loop over the parts in cj. */
513
      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
514
515

        /* Get a pointer to the jth particle. */
516
        struct part *restrict pj = &parts_j[sort_j[pjd].i];
517
        const float hj = pj->h;
518
519
520
        const double pjx = pj->x[0];
        const double pjy = pj->x[1];
        const double pjz = pj->x[2];
521

522
        /* Compute the pairwise distance. */
523
524
        float dx[3] = {pix - pjx, piy - pjy, piz - pjz};
        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
525
526
527
528

        /* Hit or miss? */
        if (r2 < hig2) {

529
          IACT_NONSYM(r2, dx, hi, hj, pi, pj);
530
        }
531
      } /* loop over the parts in cj. */
532
    }   /* loop over the parts in ci. */
533
534
535
536
537
538
  }

  /* Parts are on the right. */
  else {

    /* Loop over the parts_i. */
539
    for (int pid = 0; pid < count; pid++) {
540
541

      /* Get a hold of the ith part in ci. */
542
      struct part *restrict pi = &parts_i[ind[pid]];
543
544
545
      const double pix = pi->x[0] - (shift[0]);
      const double piy = pi->x[1] - (shift[1]);
      const double piz = pi->x[2] - (shift[2]);
546
547
      const float hi = pi->h;
      const float hig2 = hi * hi * kernel_gamma2;
548
549
      const double di = -hi * kernel_gamma - dxj + pix * runner_shift[sid][0] +
                        piy * runner_shift[sid][1] + piz * runner_shift[sid][2];
550
551

      /* Loop over the parts in cj. */
552
      for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) {
553
554

        /* Get a pointer to the jth particle. */
555
        struct part *restrict pj = &parts_j[sort_j[pjd].i];
556
        const float hj = pj->h;
557
558
559
        const double pjx = pj->x[0];
        const double pjy = pj->x[1];
        const double pjz = pj->x[2];
560

561
        /* Compute the pairwise distance. */
562
563
        float dx[3] = {pix - pjx, piy - pjy, piz - pjz};
        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
564

565
566
        /* Hit or miss? */
        if (r2 < hig2) {
567

568
          IACT_NONSYM(r2, dx, hi, hj, pi, pj);
569
570
        }
      } /* loop over the parts in cj. */
571
    }   /* loop over the parts in ci. */
572
573
574
575
  }

  TIMER_TOC(timer_dopair_subset);
}
Pedro Gonnet's avatar
Pedro Gonnet committed
576

577
578
579
580
581
582
/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
583
 * @param parts The #part to interact.
584
585
586
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 */
587
588
589
void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
                   struct part *restrict parts, int *restrict ind, int count) {

590
#ifdef SWIFT_DEBUG_CHECKS
591
  const struct engine *e = r->e;
592
#endif
593

Matthieu Schaller's avatar
Matthieu Schaller committed
594
  TIMER_TIC;
595

596
597
  const int count_i = ci->count;
  struct part *restrict parts_j = ci->parts;
598
599

  /* Loop over the parts in ci. */
600
  for (int pid = 0; pid < count; pid++) {
601
602

    /* Get a hold of the ith part in ci. */
603
    struct part *pi = &parts[ind[pid]];
Matthieu Schaller's avatar
Matthieu Schaller committed
604
605
    const float pix[3] = {pi->x[0] - ci->loc[0], pi->x[1] - ci->loc[1],
                          pi->x[2] - ci->loc[2]};
606
607
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
608

609
610
611
612
#ifdef SWIFT_DEBUG_CHECKS
    if (!part_is_active(pi, e)) error("Inactive particle in subset function!");
#endif

613
    /* Loop over the parts in cj. */
614
    for (int pjd = 0; pjd < count_i; pjd++) {
615
616

      /* Get a pointer to the jth particle. */
617
      struct part *restrict pj = &parts_j[pjd];
618

619
      /* Compute the pairwise distance. */
Matthieu Schaller's avatar
Matthieu Schaller committed
620
621
      const float pjx[3] = {pj->x[0] - ci->loc[0], pj->x[1] - ci->loc[1],
                            pj->x[2] - ci->loc[2]};
622
623
      float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]};
      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
624
625

      /* Hit or miss? */
626
      if (r2 > 0.f && r2 < hig2) {
627
628
629
630

        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
      }
    } /* loop over the parts in cj. */
631
  }   /* loop over the parts in ci. */
632

633
  TIMER_TOC(timer_doself_subset);
634
}
635

636
/**
637
 * @brief Compute the interactions between a cell pair (non-symmetric).
638
639
640
641
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
642
643
 * @param sid The direction of the pair
 * @param shift The shift vector to apply to the particles in ci.
644
 */
645
646
void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
             const double *shift) {
647

648
  const struct engine *restrict e = r->e;
649

650
651
  // DOPAIR1_NAIVE(r, ci, cj);
  // return;
652

Matthieu Schaller's avatar
Matthieu Schaller committed
653
  TIMER_TIC;
654
655

  /* Get the cutoff shift. */
656
657
  double rshift = 0.0;
  for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
658
659

  /* Pick-out the sorted lists. */
660
661
  const struct entry *restrict sort_i = ci->sort[sid];
  const struct entry *restrict sort_j = cj->sort[sid];
662

663
#ifdef SWIFT_DEBUG_CHECKS
664
665
666
667
668
669
670
671
  /* Check that the dx_max_sort values in the cell are indeed an upper
     bound on particle movement. */
  for (int pid = 0; pid < ci->count; pid++) {
    const struct part *p = &ci->parts[sort_i[pid].i];
    const float d = p->x[0] * runner_shift[sid][0] +
                    p->x[1] * runner_shift[sid][1] +
                    p->x[2] * runner_shift[sid][2];
    if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort >
672
        1.0e-4 * max(fabsf(d), ci->dx_max_sort_old))
Pedro Gonnet's avatar
Pedro Gonnet committed
673
674
675
676
677
678
      error(
          "particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d "
          "cj->nodeID=%d d=%e sort_i[pid].d=%e ci->dx_max_sort=%e "
          "ci->dx_max_sort_old=%e",
          ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->dx_max_sort,
          ci->dx_max_sort_old);
679
680
681
682
683
684
685
  }
  for (int pjd = 0; pjd < cj->count; pjd++) {
    const struct part *p = &cj->parts[sort_j[pjd].i];
    const float d = p->x[0] * runner_shift[sid][0] +
                    p->x[1] * runner_shift[sid][1] +
                    p->x[2] * runner_shift[sid][2];
    if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort >
686
        1.0e-4 * max(fabsf(d), cj->dx_max_sort_old))
Pedro Gonnet's avatar
Pedro Gonnet committed
687
688
689
690
691
692
      error(
          "particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d "
          "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->dx_max_sort=%e "
          "cj->dx_max_sort_old=%e",
          cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->dx_max_sort,
          cj->dx_max_sort_old);
693
  }
694
695
#endif /* SWIFT_DEBUG_CHECKS */

696
  /* Get some other useful values. */
697
698
699
700
701
702
703
704
  const double hi_max = ci->h_max * kernel_gamma - rshift;
  const double hj_max = cj->h_max * kernel_gamma;
  const int count_i = ci->count;
  const int count_j = cj->count;
  struct part *restrict parts_i = ci->parts;
  struct part *restrict parts_j = cj->parts;
  const double di_max = sort_i[count_i - 1].d - rshift;
  const double dj_min = sort_j[0].d;
705
  const float dx_max = (ci->dx_max_sort + cj->dx_max_sort);
706

707
  if (cell_is_active(ci, e)) {
708

709
710
711
    /* Loop over the parts in ci. */
    for (int pid = count_i - 1;
         pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
712

713
714
715
      /* Get a hold of the ith part in ci. */
      struct part *restrict pi = &parts_i[sort_i[pid].i];
      const float hi = pi->h;
716
717
718
719
720

      /* Skip inactive particles */
      if (!part_is_active(pi, e)) continue;

      /* Is there anything we need to interact with ? */
721
722
      const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
      if (di < dj_min) continue;
723

724
      /* Get some additional information about pi */
725
      const float hig2 = hi * hi * kernel_gamma2;
726
727
728
      const float pix = pi->x[0] - (cj->loc[0] + shift[0]);
      const float piy = pi->x[1] - (cj->loc[1] + shift[1]);
      const float piz = pi->x[2] - (cj->loc[2] + shift[2]);
729

730
731
      /* Loop over the parts in cj. */
      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
732

733
734
735
        /* Recover pj */
        struct part *pj = &parts_j[sort_j[pjd].i];
        const float hj = pj->h;
736
737
738
        const float pjx = pj->x[0] - cj->loc[0];
        const float pjy = pj->x[1] - cj->loc[1];
        const float pjz = pj->x[2] - cj->loc[2];
739
740

        /* Compute the pairwise distance. */
741
742
        float dx[3] = {pix - pjx, piy - pjy, piz - pjz};
        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
743

744
#ifdef SWIFT_DEBUG_CHECKS
745
746
747
748
749
        /* Check that particles have been drifted to the current time */
        if (pi->ti_drift != e->ti_current)
          error("Particle pi not drifted to current time");
        if (pj->ti_drift != e->ti_current)
          error("Particle pj not drifted to current time");
750
751
#endif

752
753
        /* Hit or miss? */
        if (r2 < hig2) {
754

755
          IACT_NONSYM(r2, dx, hi, hj, pi, pj);
756
757
        }
      } /* loop over the parts in cj. */
758
759
    }   /* loop over the parts in ci. */
  }     /* Cell ci is active */
760

761
  if (cell_is_active(cj, e)) {
762

763
764
765
    /* Loop over the parts in cj. */
    for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
         pjd++) {
766

767
      /* Get a hold of the jth part in cj. */
768
      struct part *pj = &parts_j[sort_j[pjd].i];
769
      const float hj = pj->h;
770
771
772
773
774

      /* Skip inactive particles */
      if (!part_is_active(pj, e)) continue;

      /* Is there anything we need to interact with ? */
775
776
      const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max + rshift;
      if (dj - rshift > di_max) continue;
Matthieu Schaller's avatar
Matthieu Schaller committed
777

778
      /* Get some additional information about pj */
779
      const float hjg2 = hj * hj * kernel_gamma2;
780
781
782
      const float pjx = pj->x[0] - cj->loc[0];
      const float pjy = pj->x[1] - cj->loc[1];
      const float pjz = pj->x[2] - cj->loc[2];
783

784
785
786
      /* Loop over the parts in ci. */
      for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {

787
788
789
        /* Recover pi */
        struct part *pi = &parts_i[sort_i[pid].i];
        const float hi = pi->h;
790
791
792
        const float pix = pi->x[0] - (cj->loc[0] + shift[0]);
        const float piy = pi->x[1] - (cj->loc[1] + shift[1]);
        const float piz = pi->x[2] - (cj->loc[2] + shift[2]);
793
794

        /* Compute the pairwise distance. */
795
796
        float dx[3] = {pjx - pix, pjy - piy, pjz - piz};
        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
797

798
#ifdef SWIFT_DEBUG_CHECKS
799
800
801
802
803
        /* Check that particles have been drifted to the current time */
        if (pi->ti_drift != e->ti_current)
          error("Particle pi not drifted to current time");
        if (pj->ti_drift != e->ti_current)
          error("Particle pj not drifted to current time");
804
805
#endif

806
807
        /* Hit or miss? */
        if (r2 < hjg2) {
808

809
          IACT_NONSYM(r2, dx, hj, hi, pj, pi);
810
811
        }
      } /* loop over the parts in ci. */
812
813
    }   /* loop over the parts in cj. */
  }     /* Cell cj is active */
814
815
816
817

  TIMER_TOC(TIMER_DOPAIR);
}

818
819
820
/**
 * @brief Determine which version of DOPAIR1 needs to be called depending on the
 * orientation of the cells or whether DOPAIR1 needs to be called at all.
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
 *
 * @param r #runner
 * @param ci #cell ci
 * @param cj #cell cj
 *
 */
void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) {

  const struct engine *restrict e = r->e;

  /* Anything to do here? */
  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;

  /* Check that cells are drifted. */
  if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
    error("Interacting undrifted cells.");
837

838
839
840
841
842
  /* Get the sort ID. */
  double shift[3] = {0.0, 0.0, 0.0};
  const int sid = space_getsid(e->s, &ci, &cj, shift);

  /* Have the cells been sorted? */
843
844
  if (!(ci->sorted & (1 << sid)) ||
      ci->dx_max_sort_old > space_maxreldx * ci->dmin)
Matthieu Schaller's avatar
Matthieu Schaller committed
845
    error("Interacting unsorted cells.");
846
847
  if (!(cj->sorted & (1 << sid)) ||
      cj->dx_max_sort_old > space_maxreldx * cj->dmin)
Matthieu Schaller's avatar
Matthieu Schaller committed
848
    error("Interacting unsorted cells.");
849

850
851
852
#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && \
    (DOPAIR1_BRANCH == runner_dopair1_density_branch)
  if (!sort_is_corner(sid))
853
854
855
    runner_dopair1_density_vec(r, ci, cj, sid, shift);
  else
    DOPAIR1(r, ci, cj, sid, shift);
856
#else
857
858
859
860
  DOPAIR1(r, ci, cj, sid, shift);
#endif
}

861
862
863
864
865
866
867
/**
 * @brief Compute the interactions between a cell pair (symmetric)
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
 */
868
869
870
void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {

  struct engine *restrict e = r->e;
871
872
873

  // DOPAIR2_NAIVE(r, ci, cj);
  // return;
874

875
  TIMER_TIC;
876

877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
  /* Anything to do here? */
  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;

  if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e))
    error("Interacting undrifted cells.");

  /* Get the shift ID. */
  double shift[3] = {0.0, 0.0, 0.0};
  const int sid = space_getsid(e->s, &ci, &cj, shift);

  /* Have the cells been sorted? */
  if (!(ci->sorted & (1 << sid)) ||