runner_doiact.h 114 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
5
 *
6
7
8
9
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
10
 *
11
12
13
14
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
15
 *
16
17
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
 *
19
20
21
22
23
24
25
26
 ******************************************************************************/

/* Before including this file, define FUNCTION, which is the
   name of the interaction function. This creates the interaction functions
   runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION,
   and runner_dosub_FUNCTION calling the pairwise interaction function
   runner_iact_FUNCTION. */

27
#define PASTE(x, y) x##_##y
28

29
30
31
#define _DOPAIR1_BRANCH(f) PASTE(runner_dopair1_branch, f)
#define DOPAIR1_BRANCH _DOPAIR1_BRANCH(FUNCTION)

32
#define _DOPAIR1(f) PASTE(runner_dopair1, f)
33
#define DOPAIR1 _DOPAIR1(FUNCTION)
34

35
36
37
#define _DOPAIR2_BRANCH(f) PASTE(runner_dopair2_branch, f)
#define DOPAIR2_BRANCH _DOPAIR2_BRANCH(FUNCTION)

38
#define _DOPAIR2(f) PASTE(runner_dopair2, f)
39
#define DOPAIR2 _DOPAIR2(FUNCTION)
40

41
#define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f)
42
#define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION)
43

44
45
46
#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f)
#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION)

47
#define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f)
Pedro Gonnet's avatar
Pedro Gonnet committed
48
49
#define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION)

50
51
52
53
54
#define _DOPAIR1_NAIVE(f) PASTE(runner_dopair1_naive, f)
#define DOPAIR1_NAIVE _DOPAIR1_NAIVE(FUNCTION)

#define _DOPAIR2_NAIVE(f) PASTE(runner_dopair2_naive, f)
#define DOPAIR2_NAIVE _DOPAIR2_NAIVE(FUNCTION)
55

56
57
58
#define _DOSELF1_NAIVE(f) PASTE(runner_doself1_naive, f)
#define DOSELF1_NAIVE _DOSELF1_NAIVE(FUNCTION)

Matthieu Schaller's avatar
Matthieu Schaller committed
59
60
#define _DOSELF2_NAIVE(f) PASTE(runner_doself2_naive, f)
#define DOSELF2_NAIVE _DOSELF2_NAIVE(FUNCTION)
61

62
#define _DOSELF1(f) PASTE(runner_doself1, f)
63
#define DOSELF1 _DOSELF1(FUNCTION)
64

65
#define _DOSELF2(f) PASTE(runner_doself2, f)
66
#define DOSELF2 _DOSELF2(FUNCTION)
67

68
#define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f)
69
#define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION)
70

71
72
73
74
75
76
77
78
#define _DOSUB_SELF1(f) PASTE(runner_dosub_self1, f)
#define DOSUB_SELF1 _DOSUB_SELF1(FUNCTION)

#define _DOSUB_PAIR1(f) PASTE(runner_dosub_pair1, f)
#define DOSUB_PAIR1 _DOSUB_PAIR1(FUNCTION)

#define _DOSUB_SELF2(f) PASTE(runner_dosub_self2, f)
#define DOSUB_SELF2 _DOSUB_SELF2(FUNCTION)
79

80
81
#define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f)
#define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION)
82

83
#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f)
84
#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION)
85

86
#define _IACT_NONSYM(f) PASTE(runner_iact_nonsym, f)
87
#define IACT_NONSYM _IACT_NONSYM(FUNCTION)
88

89
#define _IACT(f) PASTE(runner_iact, f)
90
#define IACT _IACT(FUNCTION)
91

92
93
94
95
96
97
#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f)
#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)

#define _IACT_VEC(f) PASTE(runner_iact_vec, f)
#define IACT_VEC _IACT_VEC(FUNCTION)

98
#define _TIMER_DOSELF(f) PASTE(timer_doself, f)
99
#define TIMER_DOSELF _TIMER_DOSELF(FUNCTION)
100

101
#define _TIMER_DOPAIR(f) PASTE(timer_dopair, f)
102
#define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION)
Pedro Gonnet's avatar
Pedro Gonnet committed
103

104
105
106
107
108
#define _TIMER_DOSUB_SELF(f) PASTE(timer_dosub_self, f)
#define TIMER_DOSUB_SELF _TIMER_DOSUB_SELF(FUNCTION)

#define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f)
#define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION)
109

110
#define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f)
111
112
#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION)

113
#define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f)
114
115
#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)

116
/**
Matthieu Schaller's avatar
Matthieu Schaller committed
117
118
119
 * @brief Compute the interactions between a cell pair (non-symmetric case).
 *
 * Inefficient version using a brute-force algorithm.
120
121
122
123
124
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
 */
125
void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci,
126
                   struct cell *restrict cj) {
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153

  const struct engine *e = r->e;

  TIMER_TIC;

  /* Anything to do here? */
  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;

  const int count_i = ci->count;
  const int count_j = cj->count;
  struct part *restrict parts_i = ci->parts;
  struct part *restrict parts_j = cj->parts;

  /* Get the relative distance between the pairs, wrapping. */
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Loop over the parts in ci. */
  for (int pid = 0; pid < count_i; pid++) {

    /* Get a hold of the ith part in ci. */
    struct part *restrict pi = &parts_i[pid];
154
    const int pi_active = part_is_active(pi, e);
155
156
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
157
158
159
    const float pix[3] = {pi->x[0] - (cj->loc[0] + shift[0]),
                          pi->x[1] - (cj->loc[1] + shift[1]),
                          pi->x[2] - (cj->loc[2] + shift[2])};
160
161
162
163
164
165

    /* Loop over the parts in cj. */
    for (int pjd = 0; pjd < count_j; pjd++) {

      /* Get a pointer to the jth particle. */
      struct part *restrict pj = &parts_j[pjd];
166
      const float hj = pj->h;
167
      const float hjg2 = hj * hj * kernel_gamma2;
168
      const int pj_active = part_is_active(pj, e);
169
170

      /* Compute the pairwise distance. */
171
172
      const float pjx[3] = {pj->x[0] - cj->loc[0], pj->x[1] - cj->loc[1],
                            pj->x[2] - cj->loc[2]};
173
174
      float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]};
      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
175

176
177
178
179
180
181
182
183
#ifdef SWIFT_DEBUG_CHECKS
      /* Check that particles have been drifted to the current time */
      if (pi->ti_drift != e->ti_current)
        error("Particle pi not drifted to current time");
      if (pj->ti_drift != e->ti_current)
        error("Particle pj not drifted to current time");
#endif

184
      /* Hit or miss? */
185
      if (r2 < hig2 && pi_active) {
186

187
        IACT_NONSYM(r2, dx, hi, hj, pi, pj);
188
      }
189
      if (r2 < hjg2 && pj_active) {
190

191
192
193
        dx[0] = -dx[0];
        dx[1] = -dx[1];
        dx[2] = -dx[2];
194

195
        IACT_NONSYM(r2, dx, hj, hi, pj, pi);
196
197
198
      }

    } /* loop over the parts in cj. */
199
  }   /* loop over the parts in ci. */
200
201
202
203

  TIMER_TOC(TIMER_DOPAIR);
}

Matthieu Schaller's avatar
Matthieu Schaller committed
204
205
206
207
208
209
210
211
212
/**
 * @brief Compute the interactions between a cell pair (symmetric case).
 *
 * Inefficient version using a brute-force algorithm.
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
 */
213
void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci,
214
                   struct cell *restrict cj) {
215

216
217
  const struct engine *e = r->e;

Matthieu Schaller's avatar
Matthieu Schaller committed
218
  TIMER_TIC;
219
220

  /* Anything to do here? */
221
  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
222

223
224
225
226
227
  const int count_i = ci->count;
  const int count_j = cj->count;
  struct part *restrict parts_i = ci->parts;
  struct part *restrict parts_j = cj->parts;

228
  /* Get the relative distance between the pairs, wrapping. */
229
230
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
231
232
233
234
235
236
237
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Loop over the parts in ci. */
238
  for (int pid = 0; pid < count_i; pid++) {
239
240

    /* Get a hold of the ith part in ci. */
241
    struct part *restrict pi = &parts_i[pid];
242
    const int pi_active = part_is_active(pi, e);
243
244
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
245
246
247
    const float pix[3] = {pi->x[0] - (cj->loc[0] + shift[0]),
                          pi->x[1] - (cj->loc[1] + shift[1]),
                          pi->x[2] - (cj->loc[2] + shift[2])};
248
249

    /* Loop over the parts in cj. */
250
    for (int pjd = 0; pjd < count_j; pjd++) {
251
252

      /* Get a pointer to the jth particle. */
253
      struct part *restrict pj = &parts_j[pjd];
254
      const int pj_active = part_is_active(pj, e);
255
      const float hj = pj->h;
256
      const float hjg2 = hj * hj * kernel_gamma2;
257
258

      /* Compute the pairwise distance. */
259
260
      const float pjx[3] = {pj->x[0] - cj->loc[0], pj->x[1] - cj->loc[1],
                            pj->x[2] - cj->loc[2]};
261
262
      float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]};
      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
263

264
265
266
267
268
269
270
271
#ifdef SWIFT_DEBUG_CHECKS
      /* Check that particles have been drifted to the current time */
      if (pi->ti_drift != e->ti_current)
        error("Particle pi not drifted to current time");
      if (pj->ti_drift != e->ti_current)
        error("Particle pj not drifted to current time");
#endif

272
      /* Hit or miss? */
273
      if (r2 < hig2 || r2 < hjg2) {
274

275
        if (pi_active && pj_active) {
276

277
278
          IACT(r2, dx, hi, hj, pi, pj);
        } else if (pi_active) {
279

280
281
          IACT_NONSYM(r2, dx, hi, hj, pi, pj);
        } else if (pj_active) {
282

283
284
285
          dx[0] = -dx[0];
          dx[1] = -dx[1];
          dx[2] = -dx[2];
286

287
          IACT_NONSYM(r2, dx, hj, hi, pj, pi);
288
        }
289
290
      }
    } /* loop over the parts in cj. */
291
  }   /* loop over the parts in ci. */
292
293
294
295

  TIMER_TOC(TIMER_DOPAIR);
}

Matthieu Schaller's avatar
Matthieu Schaller committed
296
/**
297
 * @brief Compute the interactions within a cell (non-symmetric case).
Matthieu Schaller's avatar
Matthieu Schaller committed
298
299
300
301
302
303
 *
 * Inefficient version using a brute-force algorithm.
 *
 * @param r The #runner.
 * @param c The #cell.
 */
304
void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) {
305

306
  const struct engine *e = r->e;
307

Matthieu Schaller's avatar
Matthieu Schaller committed
308
  TIMER_TIC;
309
310

  /* Anything to do here? */
311
  if (!cell_is_active(c, e)) return;
312

313
314
  const int count = c->count;
  struct part *restrict parts = c->parts;
315
316

  /* Loop over the parts in ci. */
317
  for (int pid = 0; pid < count; pid++) {
318
319

    /* Get a hold of the ith part in ci. */
320
    struct part *restrict pi = &parts[pid];
321
    const int pi_active = part_is_active(pi, e);
322
323
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
324
325
    const float pix[3] = {pi->x[0] - c->loc[0], pi->x[1] - c->loc[1],
                          pi->x[2] - c->loc[2]};
326

327
    /* Loop over the parts in cj. */
328
    for (int pjd = pid + 1; pjd < count; pjd++) {
329
330

      /* Get a pointer to the jth particle. */
331
      struct part *restrict pj = &parts[pjd];
332
      const float hj = pj->h;
333
      const float hjg2 = hj * hj * kernel_gamma2;
334
      const int pj_active = part_is_active(pj, e);
335
336

      /* Compute the pairwise distance. */
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
      const float pjx[3] = {pj->x[0] - c->loc[0], pj->x[1] - c->loc[1],
                            pj->x[2] - c->loc[2]};
      float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]};
      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];

      const int doi = pi_active && (r2 < hig2);
      const int doj = pj_active && (r2 < hjg2);

#ifdef SWIFT_DEBUG_CHECKS
      /* Check that particles have been drifted to the current time */
      if (pi->ti_drift != e->ti_current)
        error("Particle pi not drifted to current time");
      if (pj->ti_drift != e->ti_current)
        error("Particle pj not drifted to current time");
#endif

      /* Hit or miss? */
      if (doi && doj) {

        IACT(r2, dx, hi, hj, pi, pj);
      } else if (doi) {

        IACT_NONSYM(r2, dx, hi, hj, pi, pj);
      } else if (doj) {

        dx[0] = -dx[0];
        dx[1] = -dx[1];
        dx[2] = -dx[2];

        IACT_NONSYM(r2, dx, hj, hi, pj, pi);
367
      }
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
    } /* loop over the parts in cj. */
  }   /* loop over the parts in ci. */

  TIMER_TOC(TIMER_DOSELF);
}

/**
 * @brief Compute the interactions within a cell (symmetric case).
 *
 * Inefficient version using a brute-force algorithm.
 *
 * @param r The #runner.
 * @param c The #cell.
 */
void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) {

  const struct engine *e = r->e;

  TIMER_TIC;

  /* Anything to do here? */
  if (!cell_is_active(c, e)) return;

  const int count = c->count;
  struct part *restrict parts = c->parts;

  /* Loop over the parts in ci. */
  for (int pid = 0; pid < count; pid++) {

    /* Get a hold of the ith part in ci. */
    struct part *restrict pi = &parts[pid];
    const int pi_active = part_is_active(pi, e);
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
    const float pix[3] = {pi->x[0] - c->loc[0], pi->x[1] - c->loc[1],
                          pi->x[2] - c->loc[2]};

    /* Loop over the parts in cj. */
    for (int pjd = pid + 1; pjd < count; pjd++) {

      /* Get a pointer to the jth particle. */
      struct part *restrict pj = &parts[pjd];
      const float hj = pj->h;
411
      const float hjg2 = hj * hj * kernel_gamma2;
412
      const int pj_active = part_is_active(pj, e);
413

414
415
416
417
418
      /* Compute the pairwise distance. */
      const float pjx[3] = {pj->x[0] - c->loc[0], pj->x[1] - c->loc[1],
                            pj->x[2] - c->loc[2]};
      float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]};
      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
419

420
421
      const int doi = pi_active && ((r2 < hig2) || (r2 < hjg2));
      const int doj = pj_active && ((r2 < hig2) || (r2 < hjg2));
422

423
424
425
426
427
428
429
#ifdef SWIFT_DEBUG_CHECKS
      /* Check that particles have been drifted to the current time */
      if (pi->ti_drift != e->ti_current)
        error("Particle pi not drifted to current time");
      if (pj->ti_drift != e->ti_current)
        error("Particle pj not drifted to current time");
#endif
430

431
432
      /* Hit or miss? */
      if (doi && doj) {
433

434
435
        IACT(r2, dx, hi, hj, pi, pj);
      } else if (doi) {
436

437
438
        IACT_NONSYM(r2, dx, hi, hj, pi, pj);
      } else if (doj) {
439

440
441
442
        dx[0] = -dx[0];
        dx[1] = -dx[1];
        dx[2] = -dx[2];
443

444
445
446
447
        IACT_NONSYM(r2, dx, hj, hi, pj, pi);
      }
    } /* loop over the parts in cj. */
  }   /* loop over the parts in ci. */
448
449
450

  TIMER_TOC(TIMER_DOSELF);
}
451

452
453
454
455
/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
Matthieu Schaller's avatar
Matthieu Schaller committed
456
457
 * Version using a brute-force algorithm.
 *
458
459
 * @param r The #runner.
 * @param ci The first #cell.
460
 * @param parts_i The #part to interact with @c cj.
461
462
463
464
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 * @param cj The second #cell.
 */
465
466
467
468
void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
                         struct part *restrict parts_i, int *restrict ind,
                         int count, struct cell *restrict cj) {

469
  const struct engine *e = r->e;
470

Matthieu Schaller's avatar
Matthieu Schaller committed
471
  TIMER_TIC;
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494

  const int count_j = cj->count;
  struct part *restrict parts_j = cj->parts;

  /* Get the relative distance between the pairs, wrapping. */
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Loop over the parts_i. */
  for (int pid = 0; pid < count; pid++) {

    /* Get a hold of the ith part in ci. */
    struct part *restrict pi = &parts_i[ind[pid]];
    double pix[3];
    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;

495
496
497
498
499
500
#ifdef SWIFT_DEBUG_CHECKS
    if (!part_is_active(pi, e))
      error("Trying to correct smoothing length of inactive particle !");

#endif

501
502
503
504
505
506
507
508
509
510
511
512
513
    /* Loop over the parts in cj. */
    for (int pjd = 0; pjd < count_j; pjd++) {

      /* Get a pointer to the jth particle. */
      struct part *restrict pj = &parts_j[pjd];

      /* Compute the pairwise distance. */
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }
514

515
516
517
518
519
520
521
522
#ifdef SWIFT_DEBUG_CHECKS
      /* Check that particles have been drifted to the current time */
      if (pi->ti_drift != e->ti_current)
        error("Particle pi not drifted to current time");
      if (pj->ti_drift != e->ti_current)
        error("Particle pj not drifted to current time");
#endif

523
524
525
526
527
528
      /* Hit or miss? */
      if (r2 < hig2) {

        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
      }
    } /* loop over the parts in cj. */
529
  }   /* loop over the parts in ci. */
530

531
  TIMER_TOC(timer_dopair_subset_naive);
532
533
534
535
536
537
538
539
540
541
542
543
544
}

/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param parts_i The #part to interact with @c cj.
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 * @param cj The second #cell.
 */
545
546
547
548
void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
                   struct part *restrict parts_i, int *restrict ind, int count,
                   struct cell *restrict cj) {

549
  const struct engine *e = r->e;
550

551
552
553
554
555
#ifdef SWIFT_USE_NAIVE_INTERACTIONS
  DOPAIR_SUBSET_NAIVE(r, ci, parts_i, ind, count, cj);
  return;
#endif

Matthieu Schaller's avatar
Matthieu Schaller committed
556
  TIMER_TIC;
557

558
559
560
  const int count_j = cj->count;
  struct part *restrict parts_j = cj->parts;

561
  /* Get the relative distance between the pairs, wrapping. */
562
563
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
564
565
566
567
568
569
570
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Get the sorting index. */
571
572
  int sid = 0;
  for (int k = 0; k < 3; k++)
573
574
575
576
577
    sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
                         ? 0
                         : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1);

  /* Switch the cells around? */
578
  const int flipped = runner_flip[sid];
579
580
  sid = sortlistID[sid];

581
582
583
584
  /* Has the cell cj been sorted? */
  if (!(cj->sorted & (1 << sid)) ||
      cj->dx_max_sort_old > space_maxreldx * cj->dmin)
    error("Interacting unsorted cells.");
Pedro Gonnet's avatar
Pedro Gonnet committed
585

586
  /* Pick-out the sorted lists. */
587
  const struct entry *restrict sort_j = cj->sort[sid];
588
  const float dxj = cj->dx_max_sort;
589
590
591
592
593

  /* Parts are on the left? */
  if (!flipped) {

    /* Loop over the parts_i. */
594
    for (int pid = 0; pid < count; pid++) {
595
596

      /* Get a hold of the ith part in ci. */
597
      struct part *restrict pi = &parts_i[ind[pid]];
598
599
600
      const double pix = pi->x[0] - (shift[0]);
      const double piy = pi->x[1] - (shift[1]);
      const double piz = pi->x[2] - (shift[2]);
601
602
      const float hi = pi->h;
      const float hig2 = hi * hi * kernel_gamma2;
603
      const double di = hi * kernel_gamma + dxj + pix * runner_shift[sid][0] +
604
                        piy * runner_shift[sid][1] + piz * runner_shift[sid][2];
605
606

      /* Loop over the parts in cj. */
607
      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
608
609

        /* Get a pointer to the jth particle. */
610
        struct part *restrict pj = &parts_j[sort_j[pjd].i];
611
        const float hj = pj->h;
612
613
614
        const double pjx = pj->x[0];
        const double pjy = pj->x[1];
        const double pjz = pj->x[2];
615

616
        /* Compute the pairwise distance. */
617
618
        float dx[3] = {pix - pjx, piy - pjy, piz - pjz};
        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
619

620
621
622
623
624
625
626
627
#ifdef SWIFT_DEBUG_CHECKS
        /* Check that particles have been drifted to the current time */
        if (pi->ti_drift != e->ti_current)
          error("Particle pi not drifted to current time");
        if (pj->ti_drift != e->ti_current)
          error("Particle pj not drifted to current time");
#endif

628
629
630
        /* Hit or miss? */
        if (r2 < hig2) {

631
          IACT_NONSYM(r2, dx, hi, hj, pi, pj);
632
        }
633
      } /* loop over the parts in cj. */
634
    }   /* loop over the parts in ci. */
635
636
637
638
639
640
  }

  /* Parts are on the right. */
  else {

    /* Loop over the parts_i. */
641
    for (int pid = 0; pid < count; pid++) {
642
643

      /* Get a hold of the ith part in ci. */
644
      struct part *restrict pi = &parts_i[ind[pid]];
645
646
647
      const double pix = pi->x[0] - (shift[0]);
      const double piy = pi->x[1] - (shift[1]);
      const double piz = pi->x[2] - (shift[2]);
648
649
      const float hi = pi->h;
      const float hig2 = hi * hi * kernel_gamma2;
650
651
      const double di = -hi * kernel_gamma - dxj + pix * runner_shift[sid][0] +
                        piy * runner_shift[sid][1] + piz * runner_shift[sid][2];
652
653

      /* Loop over the parts in cj. */
654
      for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) {
655
656

        /* Get a pointer to the jth particle. */
657
        struct part *restrict pj = &parts_j[sort_j[pjd].i];
658
        const float hj = pj->h;
659
660
661
        const double pjx = pj->x[0];
        const double pjy = pj->x[1];
        const double pjz = pj->x[2];
662

663
        /* Compute the pairwise distance. */
664
665
        float dx[3] = {pix - pjx, piy - pjy, piz - pjz};
        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
666

667
668
669
670
671
672
673
674
#ifdef SWIFT_DEBUG_CHECKS
        /* Check that particles have been drifted to the current time */
        if (pi->ti_drift != e->ti_current)
          error("Particle pi not drifted to current time");
        if (pj->ti_drift != e->ti_current)
          error("Particle pj not drifted to current time");
#endif

675
676
        /* Hit or miss? */
        if (r2 < hig2) {
677

678
          IACT_NONSYM(r2, dx, hi, hj, pi, pj);
679
680
        }
      } /* loop over the parts in cj. */
681
    }   /* loop over the parts in ci. */
682
683
684
685
  }

  TIMER_TOC(timer_dopair_subset);
}
Pedro Gonnet's avatar
Pedro Gonnet committed
686

687
688
689
690
691
692
/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
693
 * @param parts The #part to interact.
694
695
696
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 */
697
698
699
void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
                   struct part *restrict parts, int *restrict ind, int count) {

700
#ifdef SWIFT_DEBUG_CHECKS
701
  const struct engine *e = r->e;
702
#endif
703

Matthieu Schaller's avatar
Matthieu Schaller committed
704
  TIMER_TIC;
705

706
707
  const int count_i = ci->count;
  struct part *restrict parts_j = ci->parts;
708
709

  /* Loop over the parts in ci. */
710
  for (int pid = 0; pid < count; pid++) {
711
712

    /* Get a hold of the ith part in ci. */
713
    struct part *pi = &parts[ind[pid]];
Matthieu Schaller's avatar
Matthieu Schaller committed
714
715
    const float pix[3] = {pi->x[0] - ci->loc[0], pi->x[1] - ci->loc[1],
                          pi->x[2] - ci->loc[2]};
716
717
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
718

719
720
721
722
#ifdef SWIFT_DEBUG_CHECKS
    if (!part_is_active(pi, e)) error("Inactive particle in subset function!");
#endif

723
    /* Loop over the parts in cj. */
724
    for (int pjd = 0; pjd < count_i; pjd++) {
725
726

      /* Get a pointer to the jth particle. */
727
      struct part *restrict pj = &parts_j[pjd];
728

729
      /* Compute the pairwise distance. */
Matthieu Schaller's avatar
Matthieu Schaller committed
730
731
      const float pjx[3] = {pj->x[0] - ci->loc[0], pj->x[1] - ci->loc[1],
                            pj->x[2] - ci->loc[2]};
732
733
      float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]};
      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
734

735
736
737
738
739
740
741
742
#ifdef SWIFT_DEBUG_CHECKS
      /* Check that particles have been drifted to the current time */
      if (pi->ti_drift != e->ti_current)
        error("Particle pi not drifted to current time");
      if (pj->ti_drift != e->ti_current)
        error("Particle pj not drifted to current time");
#endif

743
      /* Hit or miss? */
744
      if (r2 > 0.f && r2 < hig2) {
745
746
747
748

        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
      }
    } /* loop over the parts in cj. */
749
  }   /* loop over the parts in ci. */
750

751
  TIMER_TOC(timer_doself_subset);
752
}
753

754
/**
755
 * @brief Compute the interactions between a cell pair (non-symmetric).
756
757
758
759
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
760
761
 * @param sid The direction of the pair
 * @param shift The shift vector to apply to the particles in ci.
762
 */
763
764
void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid,
             const double *shift) {
765

766
  const struct engine *restrict e = r->e;
767

768
769
770
771
#ifdef SWIFT_USE_NAIVE_INTERACTIONS
  DOPAIR1_NAIVE(r, ci, cj);
  return;
#endif
772

Matthieu Schaller's avatar
Matthieu Schaller committed
773
  TIMER_TIC;
774
775

  /* Get the cutoff shift. */
776
777
  double rshift = 0.0;
  for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
778
779

  /* Pick-out the sorted lists. */
780
781
  const struct entry *restrict sort_i = ci->sort[sid];
  const struct entry *restrict sort_j = cj->sort[sid];
782

783
#ifdef SWIFT_DEBUG_CHECKS
784
785
  /* Some constants used to checks that the parts are in the right frame */
  const float shift_threshold_x =
786
      2. * ci->width[0] + 2. * max(ci->dx_max_part, cj->dx_max_part);
787
  const float shift_threshold_y =
788
      2. * ci->width[1] + 2. * max(ci->dx_max_part, cj->dx_max_part);
789
  const float shift_threshold_z =
790
      2. * ci->width[2] + 2. * max(ci->dx_max_part, cj->dx_max_part);
791
792
#endif /* SWIFT_DEBUG_CHECKS */

793
  /* Get some other useful values. */
794
795
796
797
798
799
800
801
  const double hi_max = ci->h_max * kernel_gamma - rshift;
  const double hj_max = cj->h_max * kernel_gamma;
  const int count_i = ci->count;
  const int count_j = cj->count;
  struct part *restrict parts_i = ci->parts;
  struct part *restrict parts_j = cj->parts;
  const double di_max = sort_i[count_i - 1].d - rshift;
  const double dj_min = sort_j[0].d;
802
  const float dx_max = (ci->dx_max_sort + cj->dx_max_sort);
803

804
  if (cell_is_active(ci, e)) {
805

806
807
808
    /* Loop over the parts in ci. */
    for (int pid = count_i - 1;
         pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
809

810
811
812
      /* Get a hold of the ith part in ci. */
      struct part *restrict pi = &parts_i[sort_i[pid].i];
      const float hi = pi->h;
813
814
815
816
817

      /* Skip inactive particles */
      if (!part_is_active(pi, e)) continue;

      /* Is there anything we need to interact with ? */
818
819
      const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
      if (di < dj_min) continue;
820

821
      /* Get some additional information about pi */
822
      const float hig2 = hi * hi * kernel_gamma2;
823
824
825
      const float pix = pi->x[0] - (cj->loc[0] + shift[0]);
      const float piy = pi->x[1] - (cj->loc[1] + shift[1]);
      const float piz = pi->x[2] - (cj->loc[2] + shift[2]);
826

827
828
      /* Loop over the parts in cj. */
      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
829

830
831
832
        /* Recover pj */
        struct part *pj = &parts_j[sort_j[pjd].i];
        const float hj = pj->h;
833
834
835
        const float pjx = pj->x[0] - cj->loc[0];
        const float pjy = pj->x[1] - cj->loc[1];
        const float pjz = pj->x[2] - cj->loc[2];
836
837

        /* Compute the pairwise distance. */
838
839
        float dx[3] = {pix - pjx, piy - pjy, piz - pjz};
        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
840

841
#ifdef SWIFT_DEBUG_CHECKS
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
        /* Check that particles are in the correct frame after the shifts */
        if (pix > shift_threshold_x || pix < -shift_threshold_x)
          error(
              "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)",
              pix, ci->width[0]);
        if (piy > shift_threshold_y || piy < -shift_threshold_y)
          error(
              "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)",
              piy, ci->width[1]);
        if (piz > shift_threshold_z || piz < -shift_threshold_z)
          error(
              "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)",
              piz, ci->width[2]);
        if (pjx > shift_threshold_x || pjx < -shift_threshold_x)
          error(
              "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)",
              pjx, ci->width[0]);
        if (pjy > shift_threshold_y || pjy < -shift_threshold_y)
          error(
              "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)",
              pjy, ci->width[1]);
        if (pjz > shift_threshold_z || pjz < -shift_threshold_z)
          error(
              "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)",
              pjz, ci->width[2]);

868
869
870
871
872
        /* Check that particles have been drifted to the current time */
        if (pi->ti_drift != e->ti_current)
          error("Particle pi not drifted to current time");
        if (pj->ti_drift != e->ti_current)
          error("Particle pj not drifted to current time");
873
874
#endif

875
876
        /* Hit or miss? */
        if (r2 < hig2) {
877

878
          IACT_NONSYM(r2, dx, hi, hj, pi, pj);
879
880
        }
      } /* loop over the parts in cj. */
881
882
    }   /* loop over the parts in ci. */
  }     /* Cell ci is active */
883

884
  if (cell_is_active(cj, e)) {
885

886
887
888
    /* Loop over the parts in cj. */
    for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
         pjd++) {
889

890
      /* Get a hold of the jth part in cj. */
891
      struct part *pj = &parts_j[sort_j[pjd].i];
892
      const float hj = pj->h;
893
894
895
896
897

      /* Skip inactive particles */
      if (!part_is_active(pj, e)) continue;

      /* Is there anything we need to interact with ? */
898
899
      const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max + rshift;
      if (dj - rshift > di_max) continue;
Matthieu Schaller's avatar
Matthieu Schaller committed
900

901
      /* Get some additional information about pj */
902
      const float hjg2 = hj * hj * kernel_gamma2;
903
904
905
      const float pjx = pj->x[0] - cj->loc[0];
      const float pjy = pj->x[1] - cj->loc[1];
      const float pjz = pj->x[2] - cj->loc[2];
906

907
908
909
      /* Loop over the parts in ci. */
      for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {

910
911
912
        /* Recover pi */
        struct part *pi = &parts_i[sort_i[pid].i];
        const float hi = pi->h;
913
914
915
        const float pix = pi->x[0] - (cj->loc[0] + shift[0]);
        const float piy = pi->x[1] - (cj->loc[1] + shift[1]);
        const float piz = pi->x[2] - (cj->loc[2] + shift[2]);
916
917

        /* Compute the pairwise distance. */
918
919
        float dx[3] = {pjx - pix, pjy - piy, pjz - piz};
        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
920

921
#ifdef SWIFT_DEBUG_CHECKS
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
        /* Check that particles are in the correct frame after the shifts */
        if (pix > shift_threshold_x || pix < -shift_threshold_x)
          error(
              "Invalid particle position in X for pi (pix=%e ci->width[0]=%e)",
              pix, ci->width[0]);
        if (piy > shift_threshold_y || piy < -shift_threshold_y)
          error(
              "Invalid particle position in Y for pi (piy=%e ci->width[1]=%e)",
              piy, ci->width[1]);
        if (piz > shift_threshold_z || piz < -shift_threshold_z)
          error(
              "Invalid particle position in Z for pi (piz=%e ci->width[2]=%e)",
              piz, ci->width[2]);
        if (pjx > shift_threshold_x || pjx < -shift_threshold_x)
          error(
              "Invalid particle position in X for pj (pjx=%e ci->width[0]=%e)",
              pjx, ci->width[0]);
        if (pjy > shift_threshold_y || pjy < -shift_threshold_y)
          error(
              "Invalid particle position in Y for pj (pjy=%e ci->width[1]=%e)",
              pjy, ci->width[1]);
        if (pjz > shift_threshold_z || pjz < -shift_threshold_z)
          error(
              "Invalid particle position in Z for pj (pjz=%e ci->width[2]=%e)",
              pjz, ci->width[2]);

948
949
950
951
952
        /* Check that particles have been drifted to the current time */
        if (pi->ti_drift != e->ti_current)
          error("Particle pi not drifted to current time");
        if (pj->ti_drift != e->ti_current)
          error("Particle pj not drifted to current time");
953
954
#endif

955
956
        /* Hit or miss? */
        if (r2 < hjg2) {
957