runner_doiact.h 124 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
5
 *
6
7
8
9
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
10
 *
11
12
13
14
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
15
 *
16
17
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
 *
19
20
21
22
23
24
25
26
 ******************************************************************************/

/* Before including this file, define FUNCTION, which is the
   name of the interaction function. This creates the interaction functions
   runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION,
   and runner_dosub_FUNCTION calling the pairwise interaction function
   runner_iact_FUNCTION. */

27
#define PASTE(x, y) x##_##y
28

29
30
31
#define _DOPAIR1_BRANCH(f) PASTE(runner_dopair1_branch, f)
#define DOPAIR1_BRANCH _DOPAIR1_BRANCH(FUNCTION)

32
#define _DOPAIR1(f) PASTE(runner_dopair1, f)
33
#define DOPAIR1 _DOPAIR1(FUNCTION)
34

35
36
37
#define _DOPAIR2_BRANCH(f) PASTE(runner_dopair2_branch, f)
#define DOPAIR2_BRANCH _DOPAIR2_BRANCH(FUNCTION)

38
#define _DOPAIR2(f) PASTE(runner_dopair2, f)
39
#define DOPAIR2 _DOPAIR2(FUNCTION)
40

41
#define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f)
42
#define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION)
43

44
45
46
#define _DOPAIR_SUBSET_BRANCH(f) PASTE(runner_dopair_subset_branch, f)
#define DOPAIR_SUBSET_BRANCH _DOPAIR_SUBSET_BRANCH(FUNCTION)

47
48
49
#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f)
#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION)

50
#define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f)
Pedro Gonnet's avatar
Pedro Gonnet committed
51
52
#define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION)

53
54
55
56
57
#define _DOPAIR1_NAIVE(f) PASTE(runner_dopair1_naive, f)
#define DOPAIR1_NAIVE _DOPAIR1_NAIVE(FUNCTION)

#define _DOPAIR2_NAIVE(f) PASTE(runner_dopair2_naive, f)
#define DOPAIR2_NAIVE _DOPAIR2_NAIVE(FUNCTION)
58

59
60
61
#define _DOSELF1_NAIVE(f) PASTE(runner_doself1_naive, f)
#define DOSELF1_NAIVE _DOSELF1_NAIVE(FUNCTION)

Matthieu Schaller's avatar
Matthieu Schaller committed
62
63
#define _DOSELF2_NAIVE(f) PASTE(runner_doself2_naive, f)
#define DOSELF2_NAIVE _DOSELF2_NAIVE(FUNCTION)
64

65
66
67
#define _DOSELF1_BRANCH(f) PASTE(runner_doself1_branch, f)
#define DOSELF1_BRANCH _DOSELF1_BRANCH(FUNCTION)

68
#define _DOSELF1(f) PASTE(runner_doself1, f)
69
#define DOSELF1 _DOSELF1(FUNCTION)
70

71
72
73
#define _DOSELF2_BRANCH(f) PASTE(runner_doself2_branch, f)
#define DOSELF2_BRANCH _DOSELF2_BRANCH(FUNCTION)

74
#define _DOSELF2(f) PASTE(runner_doself2, f)
75
#define DOSELF2 _DOSELF2(FUNCTION)
76

77
#define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f)
78
#define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION)
79

80
81
82
#define _DOSELF_SUBSET_BRANCH(f) PASTE(runner_doself_subset_branch, f)
#define DOSELF_SUBSET_BRANCH _DOSELF_SUBSET_BRANCH(FUNCTION)

83
84
85
86
87
88
89
90
#define _DOSUB_SELF1(f) PASTE(runner_dosub_self1, f)
#define DOSUB_SELF1 _DOSUB_SELF1(FUNCTION)

#define _DOSUB_PAIR1(f) PASTE(runner_dosub_pair1, f)
#define DOSUB_PAIR1 _DOSUB_PAIR1(FUNCTION)

#define _DOSUB_SELF2(f) PASTE(runner_dosub_self2, f)
#define DOSUB_SELF2 _DOSUB_SELF2(FUNCTION)
91

92
93
#define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f)
#define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION)
94

95
#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f)
96
#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION)
97

98
#define _IACT_NONSYM(f) PASTE(runner_iact_nonsym, f)
99
#define IACT_NONSYM _IACT_NONSYM(FUNCTION)
100

101
#define _IACT(f) PASTE(runner_iact, f)
102
#define IACT _IACT(FUNCTION)
103

104
105
106
107
108
109
#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f)
#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)

#define _IACT_VEC(f) PASTE(runner_iact_vec, f)
#define IACT_VEC _IACT_VEC(FUNCTION)

110
#define _TIMER_DOSELF(f) PASTE(timer_doself, f)
111
#define TIMER_DOSELF _TIMER_DOSELF(FUNCTION)
112

113
#define _TIMER_DOPAIR(f) PASTE(timer_dopair, f)
114
#define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION)
Pedro Gonnet's avatar
Pedro Gonnet committed
115

116
117
118
119
120
#define _TIMER_DOSUB_SELF(f) PASTE(timer_dosub_self, f)
#define TIMER_DOSUB_SELF _TIMER_DOSUB_SELF(FUNCTION)

#define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f)
#define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION)
121

122
#define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f)
123
124
#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION)

125
#define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f)
126
127
#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)

128
129
130
/* Local headers. */
#include "space_getsid.h"

131
/**
Matthieu Schaller's avatar
Matthieu Schaller committed
132
133
134
 * @brief Compute the interactions between a cell pair (non-symmetric case).
 *
 * Inefficient version using a brute-force algorithm.
135
136
137
138
139
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
 */
140
void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci,
141
                   struct cell *restrict cj) {
142
143

  const struct engine *e = r->e;
144
  const struct cosmology *cosmo = e->cosmology;
145
146
147
148

  TIMER_TIC;

  /* Anything to do here? */
149
  if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return;
150
151
152
153
154
155

  const int count_i = ci->count;
  const int count_j = cj->count;
  struct part *restrict parts_i = ci->parts;
  struct part *restrict parts_j = cj->parts;

156
157
158
159
  /* Cosmological terms */
  const float a = cosmo->a;
  const float H = cosmo->H;

160
161
162
163
164
165
166
167
168
169
170
171
172
173
  /* Get the relative distance between the pairs, wrapping. */
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Loop over the parts in ci. */
  for (int pid = 0; pid < count_i; pid++) {

    /* Get a hold of the ith part in ci. */
    struct part *restrict pi = &parts_i[pid];
174
    const int pi_active = part_is_active(pi, e);
175
176
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
177
178
179
    const float pix[3] = {(float)(pi->x[0] - (cj->loc[0] + shift[0])),
                          (float)(pi->x[1] - (cj->loc[1] + shift[1])),
                          (float)(pi->x[2] - (cj->loc[2] + shift[2]))};
180
181
182
183
184
185

    /* Loop over the parts in cj. */
    for (int pjd = 0; pjd < count_j; pjd++) {

      /* Get a pointer to the jth particle. */
      struct part *restrict pj = &parts_j[pjd];
186
      const float hj = pj->h;
187
      const float hjg2 = hj * hj * kernel_gamma2;
188
      const int pj_active = part_is_active(pj, e);
189
190

      /* Compute the pairwise distance. */
191
192
193
      const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]),
                            (float)(pj->x[1] - cj->loc[1]),
                            (float)(pj->x[2] - cj->loc[2])};
194
195
      float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]};
      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
196

197
198
199
200
201
202
203
204
#ifdef SWIFT_DEBUG_CHECKS
      /* Check that particles have been drifted to the current time */
      if (pi->ti_drift != e->ti_current)
        error("Particle pi not drifted to current time");
      if (pj->ti_drift != e->ti_current)
        error("Particle pj not drifted to current time");
#endif

205
      /* Hit or miss? */
206
      if (r2 < hig2 && pi_active) {
207

208
        IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H);
209
#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
210
        runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H);
lhausamm's avatar
lhausamm committed
211
#endif
212
      }
213
      if (r2 < hjg2 && pj_active) {
214

215
216
217
        dx[0] = -dx[0];
        dx[1] = -dx[1];
        dx[2] = -dx[2];
218

219
        IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H);
220
#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
221
        runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H);
lhausamm's avatar
lhausamm committed
222
#endif
223
224
225
      }

    } /* loop over the parts in cj. */
226
  }   /* loop over the parts in ci. */
227
228
229
230

  TIMER_TOC(TIMER_DOPAIR);
}

Matthieu Schaller's avatar
Matthieu Schaller committed
231
232
233
234
235
236
237
238
239
/**
 * @brief Compute the interactions between a cell pair (symmetric case).
 *
 * Inefficient version using a brute-force algorithm.
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
 */
240
void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci,
241
                   struct cell *restrict cj) {
242

243
  const struct engine *e = r->e;
244
  const struct cosmology *cosmo = e->cosmology;
245

Matthieu Schaller's avatar
Matthieu Schaller committed
246
  TIMER_TIC;
247
248

  /* Anything to do here? */
249
  if (!cell_is_active_hydro(ci, e) && !cell_is_active_hydro(cj, e)) return;
250

251
252
253
254
255
  const int count_i = ci->count;
  const int count_j = cj->count;
  struct part *restrict parts_i = ci->parts;
  struct part *restrict parts_j = cj->parts;

256
257
258
259
  /* Cosmological terms */
  const float a = cosmo->a;
  const float H = cosmo->H;

260
  /* Get the relative distance between the pairs, wrapping. */
261
262
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
263
264
265
266
267
268
269
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Loop over the parts in ci. */
270
  for (int pid = 0; pid < count_i; pid++) {
271
272

    /* Get a hold of the ith part in ci. */
273
    struct part *restrict pi = &parts_i[pid];
274
    const int pi_active = part_is_active(pi, e);
275
276
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
277
278
279
    const float pix[3] = {(float)(pi->x[0] - (cj->loc[0] + shift[0])),
                          (float)(pi->x[1] - (cj->loc[1] + shift[1])),
                          (float)(pi->x[2] - (cj->loc[2] + shift[2]))};
280
281

    /* Loop over the parts in cj. */
282
    for (int pjd = 0; pjd < count_j; pjd++) {
283
284

      /* Get a pointer to the jth particle. */
285
      struct part *restrict pj = &parts_j[pjd];
286
      const int pj_active = part_is_active(pj, e);
287
      const float hj = pj->h;
288
      const float hjg2 = hj * hj * kernel_gamma2;
289
290

      /* Compute the pairwise distance. */
291
292
293
      const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]),
                            (float)(pj->x[1] - cj->loc[1]),
                            (float)(pj->x[2] - cj->loc[2])};
294
295
      float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]};
      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
296

297
298
299
300
301
302
303
304
#ifdef SWIFT_DEBUG_CHECKS
      /* Check that particles have been drifted to the current time */
      if (pi->ti_drift != e->ti_current)
        error("Particle pi not drifted to current time");
      if (pj->ti_drift != e->ti_current)
        error("Particle pj not drifted to current time");
#endif

305
      /* Hit or miss? */
306
      if (r2 < hig2 || r2 < hjg2) {
307

308
        if (pi_active && pj_active) {
309

310
          IACT(r2, dx, hi, hj, pi, pj, a, H);
311
#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
312
          runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H);
lhausamm's avatar
lhausamm committed
313
#endif
314
        } else if (pi_active) {
315

316
          IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H);
317
#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
318
          runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H);
lhausamm's avatar
lhausamm committed
319
#endif
320
        } else if (pj_active) {
321

322
323
324
          dx[0] = -dx[0];
          dx[1] = -dx[1];
          dx[2] = -dx[2];
325

326
          IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H);
327
#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
328
          runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H);
lhausamm's avatar
lhausamm committed
329
#endif
330
        }
331
332
      }
    } /* loop over the parts in cj. */
333
  }   /* loop over the parts in ci. */
334
335
336
337

  TIMER_TOC(TIMER_DOPAIR);
}

Matthieu Schaller's avatar
Matthieu Schaller committed
338
/**
339
 * @brief Compute the interactions within a cell (non-symmetric case).
Matthieu Schaller's avatar
Matthieu Schaller committed
340
341
342
343
344
345
 *
 * Inefficient version using a brute-force algorithm.
 *
 * @param r The #runner.
 * @param c The #cell.
 */
346
void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) {
347

348
  const struct engine *e = r->e;
349
  const struct cosmology *cosmo = e->cosmology;
350

Matthieu Schaller's avatar
Matthieu Schaller committed
351
  TIMER_TIC;
352
353

  /* Anything to do here? */
354
  if (!cell_is_active_hydro(c, e)) return;
355

356
357
358
359
  /* Cosmological terms */
  const float a = cosmo->a;
  const float H = cosmo->H;

360
361
  const int count = c->count;
  struct part *restrict parts = c->parts;
362
363

  /* Loop over the parts in ci. */
364
  for (int pid = 0; pid < count; pid++) {
365
366

    /* Get a hold of the ith part in ci. */
367
    struct part *restrict pi = &parts[pid];
368
    const int pi_active = part_is_active(pi, e);
369
370
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
371
372
373
    const float pix[3] = {(float)(pi->x[0] - c->loc[0]),
                          (float)(pi->x[1] - c->loc[1]),
                          (float)(pi->x[2] - c->loc[2])};
374

375
    /* Loop over the parts in cj. */
376
    for (int pjd = pid + 1; pjd < count; pjd++) {
377
378

      /* Get a pointer to the jth particle. */
379
      struct part *restrict pj = &parts[pjd];
380
      const float hj = pj->h;
381
      const float hjg2 = hj * hj * kernel_gamma2;
382
      const int pj_active = part_is_active(pj, e);
383
384

      /* Compute the pairwise distance. */
385
386
387
      const float pjx[3] = {(float)(pj->x[0] - c->loc[0]),
                            (float)(pj->x[1] - c->loc[1]),
                            (float)(pj->x[2] - c->loc[2])};
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
      float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]};
      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];

      const int doi = pi_active && (r2 < hig2);
      const int doj = pj_active && (r2 < hjg2);

#ifdef SWIFT_DEBUG_CHECKS
      /* Check that particles have been drifted to the current time */
      if (pi->ti_drift != e->ti_current)
        error("Particle pi not drifted to current time");
      if (pj->ti_drift != e->ti_current)
        error("Particle pj not drifted to current time");
#endif

      /* Hit or miss? */
      if (doi && doj) {

405
        IACT(r2, dx, hi, hj, pi, pj, a, H);
406
#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
407
        runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H);
lhausamm's avatar
lhausamm committed
408
#endif
409
410
      } else if (doi) {

411
        IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H);
412
#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
413
        runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H);
lhausamm's avatar
lhausamm committed
414
#endif
415
416
417
418
419
420
      } else if (doj) {

        dx[0] = -dx[0];
        dx[1] = -dx[1];
        dx[2] = -dx[2];

421
        IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H);
422
#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
423
        runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H);
lhausamm's avatar
lhausamm committed
424
#endif
425
      }
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
    } /* loop over the parts in cj. */
  }   /* loop over the parts in ci. */

  TIMER_TOC(TIMER_DOSELF);
}

/**
 * @brief Compute the interactions within a cell (symmetric case).
 *
 * Inefficient version using a brute-force algorithm.
 *
 * @param r The #runner.
 * @param c The #cell.
 */
void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) {

  const struct engine *e = r->e;
443
  const struct cosmology *cosmo = e->cosmology;
444
445
446
447

  TIMER_TIC;

  /* Anything to do here? */
448
  if (!cell_is_active_hydro(c, e)) return;
449

450
451
452
453
  /* Cosmological terms */
  const float a = cosmo->a;
  const float H = cosmo->H;

454
455
456
457
458
459
460
461
462
463
464
  const int count = c->count;
  struct part *restrict parts = c->parts;

  /* Loop over the parts in ci. */
  for (int pid = 0; pid < count; pid++) {

    /* Get a hold of the ith part in ci. */
    struct part *restrict pi = &parts[pid];
    const int pi_active = part_is_active(pi, e);
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
465
466
467
    const float pix[3] = {(float)(pi->x[0] - c->loc[0]),
                          (float)(pi->x[1] - c->loc[1]),
                          (float)(pi->x[2] - c->loc[2])};
468
469
470
471
472
473
474

    /* Loop over the parts in cj. */
    for (int pjd = pid + 1; pjd < count; pjd++) {

      /* Get a pointer to the jth particle. */
      struct part *restrict pj = &parts[pjd];
      const float hj = pj->h;
475
      const float hjg2 = hj * hj * kernel_gamma2;
476
      const int pj_active = part_is_active(pj, e);
477

478
      /* Compute the pairwise distance. */
479
480
481
      const float pjx[3] = {(float)(pj->x[0] - c->loc[0]),
                            (float)(pj->x[1] - c->loc[1]),
                            (float)(pj->x[2] - c->loc[2])};
482
483
      float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]};
      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
484

485
486
      const int doi = pi_active && ((r2 < hig2) || (r2 < hjg2));
      const int doj = pj_active && ((r2 < hig2) || (r2 < hjg2));
487

488
489
490
491
492
493
494
#ifdef SWIFT_DEBUG_CHECKS
      /* Check that particles have been drifted to the current time */
      if (pi->ti_drift != e->ti_current)
        error("Particle pi not drifted to current time");
      if (pj->ti_drift != e->ti_current)
        error("Particle pj not drifted to current time");
#endif
495

496
497
      /* Hit or miss? */
      if (doi && doj) {
498

499
        IACT(r2, dx, hi, hj, pi, pj, a, H);
500
#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
501
        runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H);
lhausamm's avatar
lhausamm committed
502
#endif
503
      } else if (doi) {
504

505
        IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H);
506
#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
507
        runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H);
lhausamm's avatar
lhausamm committed
508
#endif
509
      } else if (doj) {
510

511
512
513
        dx[0] = -dx[0];
        dx[1] = -dx[1];
        dx[2] = -dx[2];
514

515
        IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H);
516
#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
517
        runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H);
lhausamm's avatar
lhausamm committed
518
#endif
519
520
521
      }
    } /* loop over the parts in cj. */
  }   /* loop over the parts in ci. */
522
523
524

  TIMER_TOC(TIMER_DOSELF);
}
525

526
527
528
529
/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
Matthieu Schaller's avatar
Matthieu Schaller committed
530
531
 * Version using a brute-force algorithm.
 *
532
533
 * @param r The #runner.
 * @param ci The first #cell.
534
 * @param parts_i The #part to interact with @c cj.
535
536
537
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 * @param cj The second #cell.
538
 * @param shift The shift vector to apply to the particles in ci.
539
 */
540
541
void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
                         struct part *restrict parts_i, int *restrict ind,
James Willis's avatar
James Willis committed
542
543
                         int count, struct cell *restrict cj,
                         const double *shift) {
544

545
546
547
  const struct engine *e = r->e;
  const struct cosmology *cosmo = e->cosmology;

Matthieu Schaller's avatar
Matthieu Schaller committed
548
  TIMER_TIC;
549
550
551
552

  const int count_j = cj->count;
  struct part *restrict parts_j = cj->parts;

553
554
555
556
  /* Cosmological terms */
  const float a = cosmo->a;
  const float H = cosmo->H;

557
558
559
560
561
562
563
564
565
566
  /* Loop over the parts_i. */
  for (int pid = 0; pid < count; pid++) {

    /* Get a hold of the ith part in ci. */
    struct part *restrict pi = &parts_i[ind[pid]];
    double pix[3];
    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;

567
568
569
570
571
#ifdef SWIFT_DEBUG_CHECKS
    if (!part_is_active(pi, e))
      error("Trying to correct smoothing length of inactive particle !");
#endif

572
573
574
575
576
577
578
579
580
581
582
583
584
    /* Loop over the parts in cj. */
    for (int pjd = 0; pjd < count_j; pjd++) {

      /* Get a pointer to the jth particle. */
      struct part *restrict pj = &parts_j[pjd];

      /* Compute the pairwise distance. */
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }
585

586
587
588
589
590
591
592
593
#ifdef SWIFT_DEBUG_CHECKS
      /* Check that particles have been drifted to the current time */
      if (pi->ti_drift != e->ti_current)
        error("Particle pi not drifted to current time");
      if (pj->ti_drift != e->ti_current)
        error("Particle pj not drifted to current time");
#endif

594
595
596
      /* Hit or miss? */
      if (r2 < hig2) {

597
        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj, a, H);
598
#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
599
        runner_iact_nonsym_chemistry(r2, dx, hi, pj->h, pi, pj, a, H);
lhausamm's avatar
lhausamm committed
600
#endif
601
602
      }
    } /* loop over the parts in cj. */
603
  }   /* loop over the parts in ci. */
604

605
  TIMER_TOC(timer_dopair_subset_naive);
606
607
608
609
610
611
612
613
614
615
616
617
}

/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param parts_i The #part to interact with @c cj.
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 * @param cj The second #cell.
618
619
620
 * @param sid The direction of the pair.
 * @param flipped Flag to check whether the cells have been flipped or not.
 * @param shift The shift vector to apply to the particles in ci.
621
 */
622
623
void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
                   struct part *restrict parts_i, int *restrict ind, int count,
James Willis's avatar
James Willis committed
624
625
                   struct cell *restrict cj, const int sid, const int flipped,
                   const double *shift) {
626

627
628
629
  const struct engine *e = r->e;
  const struct cosmology *cosmo = e->cosmology;

Matthieu Schaller's avatar
Matthieu Schaller committed
630
  TIMER_TIC;
631

632
633
  const int count_j = cj->count;
  struct part *restrict parts_j = cj->parts;
James Willis's avatar
James Willis committed
634

635
636
637
638
  /* Cosmological terms */
  const float a = cosmo->a;
  const float H = cosmo->H;

639
  /* Pick-out the sorted lists. */
640
  const struct entry *restrict sort_j = cj->sort[sid];
641
  const float dxj = cj->dx_max_sort;
642
643
644
645
646

  /* Parts are on the left? */
  if (!flipped) {

    /* Loop over the parts_i. */
647
    for (int pid = 0; pid < count; pid++) {
648
649

      /* Get a hold of the ith part in ci. */
650
      struct part *restrict pi = &parts_i[ind[pid]];
651
652
653
      const double pix = pi->x[0] - (shift[0]);
      const double piy = pi->x[1] - (shift[1]);
      const double piz = pi->x[2] - (shift[2]);
654
655
      const float hi = pi->h;
      const float hig2 = hi * hi * kernel_gamma2;
656
      const double di = hi * kernel_gamma + dxj + pix * runner_shift[sid][0] +
657
                        piy * runner_shift[sid][1] + piz * runner_shift[sid][2];
658
659

      /* Loop over the parts in cj. */
660
      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
661
662

        /* Get a pointer to the jth particle. */
663
        struct part *restrict pj = &parts_j[sort_j[pjd].i];
664
        const float hj = pj->h;
665
666
667
        const double pjx = pj->x[0];
        const double pjy = pj->x[1];
        const double pjz = pj->x[2];
668

669
        /* Compute the pairwise distance. */
670
671
        float dx[3] = {(float)(pix - pjx), (float)(piy - pjy),
                       (float)(piz - pjz)};
672
        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
673

674
675
676
677
678
679
680
681
#ifdef SWIFT_DEBUG_CHECKS
        /* Check that particles have been drifted to the current time */
        if (pi->ti_drift != e->ti_current)
          error("Particle pi not drifted to current time");
        if (pj->ti_drift != e->ti_current)
          error("Particle pj not drifted to current time");
#endif

682
683
684
        /* Hit or miss? */
        if (r2 < hig2) {

685
          IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H);
686
#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
687
          runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H);
lhausamm's avatar
lhausamm committed
688
#endif
689
        }
690
      } /* loop over the parts in cj. */
691
    }   /* loop over the parts in ci. */
692
693
694
695
696
697
  }

  /* Parts are on the right. */
  else {

    /* Loop over the parts_i. */
698
    for (int pid = 0; pid < count; pid++) {
699
700

      /* Get a hold of the ith part in ci. */
701
      struct part *restrict pi = &parts_i[ind[pid]];
702
703
704
      const double pix = pi->x[0] - (shift[0]);
      const double piy = pi->x[1] - (shift[1]);
      const double piz = pi->x[2] - (shift[2]);
705
706
      const float hi = pi->h;
      const float hig2 = hi * hi * kernel_gamma2;
707
708
      const double di = -hi * kernel_gamma - dxj + pix * runner_shift[sid][0] +
                        piy * runner_shift[sid][1] + piz * runner_shift[sid][2];
709
710

      /* Loop over the parts in cj. */
711
      for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) {
712
713

        /* Get a pointer to the jth particle. */
714
        struct part *restrict pj = &parts_j[sort_j[pjd].i];
715
        const float hj = pj->h;
716
717
718
        const double pjx = pj->x[0];
        const double pjy = pj->x[1];
        const double pjz = pj->x[2];
719

720
        /* Compute the pairwise distance. */
721
722
        float dx[3] = {(float)(pix - pjx), (float)(piy - pjy),
                       (float)(piz - pjz)};
723
        const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
724

725
726
727
728
729
730
731
732
#ifdef SWIFT_DEBUG_CHECKS
        /* Check that particles have been drifted to the current time */
        if (pi->ti_drift != e->ti_current)
          error("Particle pi not drifted to current time");
        if (pj->ti_drift != e->ti_current)
          error("Particle pj not drifted to current time");
#endif

733
734
        /* Hit or miss? */
        if (r2 < hig2) {
735

736
          IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H);
737
#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
738
          runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H);
lhausamm's avatar
lhausamm committed
739
#endif
740
741
        }
      } /* loop over the parts in cj. */
742
    }   /* loop over the parts in ci. */
743
744
745
746
  }

  TIMER_TOC(timer_dopair_subset);
}
Pedro Gonnet's avatar
Pedro Gonnet committed
747

748
/**
James Willis's avatar
James Willis committed
749
750
 * @brief Determine which version of DOPAIR_SUBSET needs to be called depending
 * on the
751
752
753
754
755
756
757
758
759
760
 * orientation of the cells or whether DOPAIR_SUBSET needs to be called at all.
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param parts_i The #part to interact with @c cj.
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 * @param cj The second #cell.
 */
void DOPAIR_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci,
James Willis's avatar
James Willis committed
761
762
                          struct part *restrict parts_i, int *restrict ind,
                          int count, struct cell *restrict cj) {
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796

  const struct engine *e = r->e;

  /* Get the relative distance between the pairs, wrapping. */
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

#if !defined(SWIFT_USE_NAIVE_INTERACTIONS)
  /* Get the sorting index. */
  int sid = 0;
  for (int k = 0; k < 3; k++)
    sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
                         ? 0
                         : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1);

  /* Switch the cells around? */
  const int flipped = runner_flip[sid];
  sid = sortlistID[sid];

  /* Has the cell cj been sorted? */
  if (!(cj->sorted & (1 << sid)) ||
      cj->dx_max_sort_old > space_maxreldx * cj->dmin)
    error("Interacting unsorted cells.");
#endif

#if defined(SWIFT_USE_NAIVE_INTERACTIONS)
  DOPAIR_SUBSET_NAIVE(r, ci, parts_i, ind, count, cj, shift);
#elif defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
  if (sort_is_face(sid))
James Willis's avatar
James Willis committed
797
798
    runner_dopair_subset_density_vec(r, ci, parts_i, ind, count, cj, sid,
                                     flipped, shift);
799
800
801
802
803
804
805
  else
    DOPAIR_SUBSET(r, ci, parts_i, ind, count, cj, sid, flipped, shift);
#else
  DOPAIR_SUBSET(r, ci, parts_i, ind, count, cj, sid, flipped, shift);
#endif
}

806
807
808
809
810
811
/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
812
 * @param parts The #part to interact.
813
814
815
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 */
816
817
818
void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
                   struct part *restrict parts, int *restrict ind, int count) {

819
  const struct engine *e = r->e;
820
  const struct cosmology *cosmo = e->cosmology;
821

Matthieu Schaller's avatar
Matthieu Schaller committed
822
  TIMER_TIC;
823

824
825
826
827
  /* Cosmological terms */
  const float a = cosmo->a;
  const float H = cosmo->H;

828
829
  const int count_i = ci->count;
  struct part *restrict parts_j = ci->parts;
830
831

  /* Loop over the parts in ci. */
832
  for (int pid = 0; pid < count; pid++) {
833
834

    /* Get a hold of the ith part in ci. */
835
    struct part *pi = &parts[ind[pid]];
836
837
838
    const float pix[3] = {(float)(pi->x[0] - ci->loc[0]),
                          (float)(pi->x[1] - ci->loc[1]),
                          (float)(pi->x[2] - ci->loc[2])};
839
840
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
841

842
843
844
845
#ifdef SWIFT_DEBUG_CHECKS
    if (!part_is_active(pi, e)) error("Inactive particle in subset function!");
#endif

846
    /* Loop over the parts in cj. */
847
    for (int pjd = 0; pjd < count_i; pjd++) {
848
849

      /* Get a pointer to the jth particle. */
850
      struct part *restrict pj = &parts_j[pjd];
851
      const float hj = pj->h;
852

853
      /* Compute the pairwise distance. */
854
855
856
      const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]),
                            (float)(pj->x[1] - ci->loc[1]),
                            (float)(pj->x[2] - ci->loc[2])};
857
858
      float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]};
      const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
859

860
861
862
863
864
865
866
867
#ifdef SWIFT_DEBUG_CHECKS
      /* Check that particles have been drifted to the current time */
      if (pi->ti_drift != e->ti_current)
        error("Particle pi not drifted to current time");
      if (pj->ti_drift != e->ti_current)
        error("Particle pj not drifted to current time");
#endif

868
      /* Hit or miss? */
869
      if (r2 > 0.f && r2 < hig2) {
870

871
        IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H);
872
#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY)
873
        runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H);
lhausamm's avatar
lhausamm committed
874
#endif
875
876
      }
    } /* loop over the parts in cj. */
877
  }   /* loop over the parts in ci. */
878

879
  TIMER_TOC(timer_doself_subset);
880
}
881

882
883
884
/**
 * @brief Determine which version of DOSELF_SUBSET needs to be called depending
 * on the optimisation level.
Matthieu Schaller's avatar
Matthieu Schaller committed
885

886
887
888
889
890
891
892
893
894
895
896
 * @param r The #runner.
 * @param ci The first #cell.
 * @param parts The #part to interact.
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 */
void DOSELF_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci,
                          struct part *restrict parts, int *restrict ind,
                          int count) {

#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH)
Matthieu Schaller's avatar
Matthieu Schaller committed
897
  runner_doself_subset_density_vec(r, ci, parts, ind, count);
898
#else
Matthieu Schaller's avatar
Matthieu Schaller committed
899
  DOSELF_SUBSET(r, ci, parts, ind, count);
900
901
902
#endif
}