runner_doiact.h 108 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
5
 *
6
7
8
9
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
10
 *
11
12
13
14
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
15
 *
16
17
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
 *
19
20
21
22
23
24
25
26
 ******************************************************************************/

/* Before including this file, define FUNCTION, which is the
   name of the interaction function. This creates the interaction functions
   runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION,
   and runner_dosub_FUNCTION calling the pairwise interaction function
   runner_iact_FUNCTION. */

27
#define PASTE(x, y) x##_##y
28

29
30
31
#define _DOPAIR1_BRANCH(f) PASTE(runner_dopair1_branch, f)
#define DOPAIR1_BRANCH _DOPAIR1_BRANCH(FUNCTION)

32
#define _DOPAIR1(f) PASTE(runner_dopair1, f)
33
#define DOPAIR1 _DOPAIR1(FUNCTION)
34

35
#define _DOPAIR2(f) PASTE(runner_dopair2, f)
36
#define DOPAIR2 _DOPAIR2(FUNCTION)
37

38
#define _DOPAIR1_NOSORT(f) PASTE(runner_dopair1_nosort, f)
39
40
#define DOPAIR1_NOSORT _DOPAIR1_NOSORT(FUNCTION)

41
#define _DOPAIR2_NOSORT(f) PASTE(runner_dopair2_nosort, f)
42
43
#define DOPAIR2_NOSORT _DOPAIR2_NOSORT(FUNCTION)

44
#define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f)
45
#define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION)
46

47
48
49
#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f)
#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION)

50
#define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f)
Pedro Gonnet's avatar
Pedro Gonnet committed
51
52
#define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION)

53
#define _DOPAIR_NAIVE(f) PASTE(runner_dopair_naive, f)
54
#define DOPAIR_NAIVE _DOPAIR_NAIVE(FUNCTION)
55

56
#define _DOSELF_NAIVE(f) PASTE(runner_doself_naive, f)
57
#define DOSELF_NAIVE _DOSELF_NAIVE(FUNCTION)
58

59
#define _DOSELF1(f) PASTE(runner_doself1, f)
60
#define DOSELF1 _DOSELF1(FUNCTION)
61

62
#define _DOSELF2(f) PASTE(runner_doself2, f)
63
#define DOSELF2 _DOSELF2(FUNCTION)
64

65
#define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f)
66
#define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION)
67

68
69
70
71
72
73
74
75
#define _DOSUB_SELF1(f) PASTE(runner_dosub_self1, f)
#define DOSUB_SELF1 _DOSUB_SELF1(FUNCTION)

#define _DOSUB_PAIR1(f) PASTE(runner_dosub_pair1, f)
#define DOSUB_PAIR1 _DOSUB_PAIR1(FUNCTION)

#define _DOSUB_SELF2(f) PASTE(runner_dosub_self2, f)
#define DOSUB_SELF2 _DOSUB_SELF2(FUNCTION)
76

77
78
#define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f)
#define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION)
79

80
#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f)
81
#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION)
82

83
#define _IACT_NONSYM(f) PASTE(runner_iact_nonsym, f)
84
#define IACT_NONSYM _IACT_NONSYM(FUNCTION)
85

86
#define _IACT(f) PASTE(runner_iact, f)
87
#define IACT _IACT(FUNCTION)
88

89
90
91
92
93
94
#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f)
#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)

#define _IACT_VEC(f) PASTE(runner_iact_vec, f)
#define IACT_VEC _IACT_VEC(FUNCTION)

95
#define _TIMER_DOSELF(f) PASTE(timer_doself, f)
96
#define TIMER_DOSELF _TIMER_DOSELF(FUNCTION)
97

98
#define _TIMER_DOPAIR(f) PASTE(timer_dopair, f)
99
#define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION)
Pedro Gonnet's avatar
Pedro Gonnet committed
100

101
102
103
104
105
#define _TIMER_DOSUB_SELF(f) PASTE(timer_dosub_self, f)
#define TIMER_DOSUB_SELF _TIMER_DOSUB_SELF(FUNCTION)

#define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f)
#define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION)
106

107
#define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f)
108
109
#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION)

110
#define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f)
111
112
#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)

113
114
#include "runner_doiact_nosort.h"

115
116
117
118
119
120
121
/**
 * @brief Compute the interactions between a cell pair.
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
 */
122
123
124
void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci,
                  struct cell *restrict cj) {

125
126
127
128
  const struct engine *e = r->e;

  error("Don't use in actual runs ! Slow code !");

129
#ifdef WITH_OLD_VECTORIZATION
130
131
132
133
134
135
136
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
Matthieu Schaller's avatar
Matthieu Schaller committed
137
  TIMER_TIC;
138
139

  /* Anything to do here? */
140
  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
141

142
143
144
145
146
  const int count_i = ci->count;
  const int count_j = cj->count;
  struct part *restrict parts_i = ci->parts;
  struct part *restrict parts_j = cj->parts;

147
  /* Get the relative distance between the pairs, wrapping. */
148
149
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
150
151
152
153
154
155
156
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Loop over the parts in ci. */
157
  for (int pid = 0; pid < count_i; pid++) {
158
159

    /* Get a hold of the ith part in ci. */
160
161
162
163
164
165
    struct part *restrict pi = &parts_i[pid];
    const float hi = pi->h;

    double pix[3];
    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
    const float hig2 = hi * hi * kernel_gamma2;
166
167

    /* Loop over the parts in cj. */
168
    for (int pjd = 0; pjd < count_j; pjd++) {
169
170

      /* Get a pointer to the jth particle. */
171
      struct part *restrict pj = &parts_j[pjd];
172
173

      /* Compute the pairwise distance. */
174
175
176
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
177
178
179
180
181
182
183
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }

      /* Hit or miss? */
      if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {

184
#ifndef WITH_OLD_VECTORIZATION
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204

        IACT(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
205
206
        }

207
208
209
210
211
212
213
#endif
      }

    } /* loop over the parts in cj. */

  } /* loop over the parts in ci. */

214
#ifdef WITH_OLD_VECTORIZATION
215
216
  /* Pick up any leftovers. */
  if (icount > 0)
217
    for (int k = 0; k < icount; k++)
218
219
220
221
222
223
224
225
      IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

  TIMER_TOC(TIMER_DOPAIR);
}

void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {

226
  const struct engine *e = r->e;
227
228
229

  error("Don't use in actual runs ! Slow code !");

230
#ifdef WITH_OLD_VECTORIZATION
231
232
233
234
235
236
237
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
238

Matthieu Schaller's avatar
Matthieu Schaller committed
239
  TIMER_TIC;
240
241

  /* Anything to do here? */
242
  if (!cell_is_active(c, e)) return;
243

244
245
  const int count = c->count;
  struct part *restrict parts = c->parts;
246
247

  /* Loop over the parts in ci. */
248
  for (int pid = 0; pid < count; pid++) {
249
250

    /* Get a hold of the ith part in ci. */
251
252
253
254
    struct part *restrict pi = &parts[pid];
    const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]};
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
255

256
    /* Loop over the parts in cj. */
257
    for (int pjd = pid + 1; pjd < count; pjd++) {
258
259

      /* Get a pointer to the jth particle. */
260
      struct part *restrict pj = &parts[pjd];
261
262

      /* Compute the pairwise distance. */
263
264
265
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
266
267
268
269
270
271
272
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }

      /* Hit or miss? */
      if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {

273
#ifndef WITH_OLD_VECTORIZATION
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294

        IACT(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }
295

296
297
#endif
      }
298

299
    } /* loop over the parts in cj. */
300

301
302
  } /* loop over the parts in ci. */

303
#ifdef WITH_OLD_VECTORIZATION
304
305
  /* Pick up any leftovers. */
  if (icount > 0)
306
    for (int k = 0; k < icount; k++)
307
308
      IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif
309

310
311
  TIMER_TOC(TIMER_DOSELF);
}
312

313
314
315
316
317
318
/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
319
 * @param parts_i The #part to interact with @c cj.
320
321
322
323
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 * @param cj The second #cell.
 */
324
325
326
327
328
329
330
331
void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
                         struct part *restrict parts_i, int *restrict ind,
                         int count, struct cell *restrict cj) {

  struct engine *e = r->e;

  error("Don't use in actual runs ! Slow code !");

332
#ifdef WITH_OLD_VECTORIZATION
333
334
335
336
337
338
339
340
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif

Matthieu Schaller's avatar
Matthieu Schaller committed
341
  TIMER_TIC;
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377

  const int count_j = cj->count;
  struct part *restrict parts_j = cj->parts;

  /* Get the relative distance between the pairs, wrapping. */
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Loop over the parts_i. */
  for (int pid = 0; pid < count; pid++) {

    /* Get a hold of the ith part in ci. */
    struct part *restrict pi = &parts_i[ind[pid]];
    double pix[3];
    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;

    /* Loop over the parts in cj. */
    for (int pjd = 0; pjd < count_j; pjd++) {

      /* Get a pointer to the jth particle. */
      struct part *restrict pj = &parts_j[pjd];

      /* Compute the pairwise distance. */
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }
378

379
380
381
      /* Hit or miss? */
      if (r2 < hig2) {

382
#ifndef WITH_OLD_VECTORIZATION
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410

        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }

#endif
      }

    } /* loop over the parts in cj. */

  } /* loop over the parts in ci. */
411

412
#ifdef WITH_OLD_VECTORIZATION
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
  /* Pick up any leftovers. */
  if (icount > 0)
    for (int k = 0; k < icount; k++)
      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

  TIMER_TOC(timer_dopair_subset);
}

/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param parts_i The #part to interact with @c cj.
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 * @param cj The second #cell.
 */
433
434
435
436
437
void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
                   struct part *restrict parts_i, int *restrict ind, int count,
                   struct cell *restrict cj) {

  struct engine *e = r->e;
438

439
#ifdef WITH_MPI
Matthieu Schaller's avatar
Matthieu Schaller committed
440
  if (ci->nodeID != cj->nodeID) {
441
442
443
444
445
    DOPAIR_SUBSET_NOSORT(r, ci, parts_i, ind, count, cj);
    return;
  }
#endif

446
#ifdef WITH_OLD_VECTORIZATION
447
448
449
450
451
452
453
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
454

Matthieu Schaller's avatar
Matthieu Schaller committed
455
  TIMER_TIC;
456

457
458
459
  const int count_j = cj->count;
  struct part *restrict parts_j = cj->parts;

460
  /* Get the relative distance between the pairs, wrapping. */
461
462
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
463
464
465
466
467
468
469
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Get the sorting index. */
470
471
  int sid = 0;
  for (int k = 0; k < 3; k++)
472
473
474
475
476
    sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
                         ? 0
                         : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1);

  /* Switch the cells around? */
477
  const int flipped = runner_flip[sid];
478
479
480
481
482
483
  sid = sortlistID[sid];

  /* Have the cells been sorted? */
  if (!(cj->sorted & (1 << sid))) error("Trying to interact unsorted cells.");

  /* Pick-out the sorted lists. */
484
485
  const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)];
  const float dxj = cj->dx_max;
486
487
488
489
490

  /* Parts are on the left? */
  if (!flipped) {

    /* Loop over the parts_i. */
491
    for (int pid = 0; pid < count; pid++) {
492
493

      /* Get a hold of the ith part in ci. */
494
495
496
497
498
499
500
501
502
      struct part *restrict pi = &parts_i[ind[pid]];
      double pix[3];
      for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];

      const float hi = pi->h;
      const float hig2 = hi * hi * kernel_gamma2;
      const float di = hi * kernel_gamma + dxj + pix[0] * runner_shift[sid][0] +
                       pix[1] * runner_shift[sid][1] +
                       pix[2] * runner_shift[sid][2];
503
504

      /* Loop over the parts in cj. */
505
      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
506
507

        /* Get a pointer to the jth particle. */
508
        struct part *restrict pj = &parts_j[sort_j[pjd].i];
509
510

        /* Compute the pairwise distance. */
511
512
513
        float r2 = 0.0f;
        float dx[3];
        for (int k = 0; k < 3; k++) {
514
515
          dx[k] = pix[k] - pj->x[k];
          r2 += dx[k] * dx[k];
516
        }
517
518
519
520

        /* Hit or miss? */
        if (r2 < hig2) {

521
#ifndef WITH_OLD_VECTORIZATION
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544

          IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);

#else

          /* Add this interaction to the queue. */
          r2q[icount] = r2;
          dxq[3 * icount + 0] = dx[0];
          dxq[3 * icount + 1] = dx[1];
          dxq[3 * icount + 2] = dx[2];
          hiq[icount] = hi;
          hjq[icount] = pj->h;
          piq[icount] = pi;
          pjq[icount] = pj;
          icount += 1;

          /* Flush? */
          if (icount == VEC_SIZE) {
            IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
            icount = 0;
          }

#endif
545
        }
546
547
548
549
550
551
552
553
554
555
556

      } /* loop over the parts in cj. */

    } /* loop over the parts in ci. */

  }

  /* Parts are on the right. */
  else {

    /* Loop over the parts_i. */
557
    for (int pid = 0; pid < count; pid++) {
558
559

      /* Get a hold of the ith part in ci. */
560
561
562
563
564
565
566
567
      struct part *restrict pi = &parts_i[ind[pid]];
      double pix[3];
      for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
      const float hi = pi->h;
      const float hig2 = hi * hi * kernel_gamma2;
      const float di =
          -hi * kernel_gamma - dxj + pix[0] * runner_shift[sid][0] +
          pix[1] * runner_shift[sid][1] + pix[2] * runner_shift[sid][2];
568
569

      /* Loop over the parts in cj. */
570
      for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) {
571
572

        /* Get a pointer to the jth particle. */
573
        struct part *restrict pj = &parts_j[sort_j[pjd].i];
574
575

        /* Compute the pairwise distance. */
576
577
578
        float r2 = 0.0f;
        float dx[3];
        for (int k = 0; k < 3; k++) {
579
580
          dx[k] = pix[k] - pj->x[k];
          r2 += dx[k] * dx[k];
581
        }
582

583
584
        /* Hit or miss? */
        if (r2 < hig2) {
585

586
#ifndef WITH_OLD_VECTORIZATION
587
588

          IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
589

590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
#else

          /* Add this interaction to the queue. */
          r2q[icount] = r2;
          dxq[3 * icount + 0] = dx[0];
          dxq[3 * icount + 1] = dx[1];
          dxq[3 * icount + 2] = dx[2];
          hiq[icount] = hi;
          hjq[icount] = pj->h;
          piq[icount] = pi;
          pjq[icount] = pj;
          icount += 1;

          /* Flush? */
          if (icount == VEC_SIZE) {
            IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
            icount = 0;
          }

#endif
        }

      } /* loop over the parts in cj. */

    } /* loop over the parts in ci. */
  }

617
#ifdef WITH_OLD_VECTORIZATION
618
619
  /* Pick up any leftovers. */
  if (icount > 0)
620
    for (int k = 0; k < icount; k++)
621
622
      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif
Pedro Gonnet's avatar
Pedro Gonnet committed
623

624
625
  TIMER_TOC(timer_dopair_subset);
}
Pedro Gonnet's avatar
Pedro Gonnet committed
626

627
628
629
630
631
632
/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
633
 * @param parts The #part to interact.
634
635
636
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 */
637
638
639
void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
                   struct part *restrict parts, int *restrict ind, int count) {

640
#ifdef WITH_OLD_VECTORIZATION
641
642
643
644
645
646
647
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
648

Matthieu Schaller's avatar
Matthieu Schaller committed
649
  TIMER_TIC;
650

651
652
  const int count_i = ci->count;
  struct part *restrict parts_j = ci->parts;
653
654

  /* Loop over the parts in ci. */
655
  for (int pid = 0; pid < count; pid++) {
656
657

    /* Get a hold of the ith part in ci. */
658
659
660
661
    struct part *restrict pi = &parts[ind[pid]];
    const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]};
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
662

663
    /* Loop over the parts in cj. */
664
    for (int pjd = 0; pjd < count_i; pjd++) {
665
666

      /* Get a pointer to the jth particle. */
667
      struct part *restrict pj = &parts_j[pjd];
668
669

      /* Compute the pairwise distance. */
670
671
672
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
673
674
675
676
677
678
679
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }

      /* Hit or miss? */
      if (r2 > 0.0f && r2 < hig2) {

680
#ifndef WITH_OLD_VECTORIZATION
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708

        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }

#endif
      }

    } /* loop over the parts in cj. */

  } /* loop over the parts in ci. */
709

710
#ifdef WITH_OLD_VECTORIZATION
711
712
  /* Pick up any leftovers. */
  if (icount > 0)
713
    for (int k = 0; k < icount; k++)
714
715
716
717
718
      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

  TIMER_TOC(timer_dopair_subset);
}
719

720
/**
721
 * @brief Compute the interactions between a cell pair (non-symmetric).
722
723
724
725
726
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
 */
727
728
void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {

729
  const struct engine *restrict e = r->e;
730

731
#ifdef WITH_MPI
Matthieu Schaller's avatar
Matthieu Schaller committed
732
  if (ci->nodeID != cj->nodeID) {
733
734
735
736
737
    DOPAIR1_NOSORT(r, ci, cj);
    return;
  }
#endif

738
#ifdef WITH_OLD_VECTORIZATION
739
740
741
742
743
744
745
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
746

Matthieu Schaller's avatar
Matthieu Schaller committed
747
  TIMER_TIC;
748
749

  /* Anything to do here? */
750
  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
751

752
753
  if (!cell_is_drifted(ci, e)) cell_drift_particles(ci, e);
  if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e);
754

755
  /* Get the sort ID. */
756
757
  double shift[3] = {0.0, 0.0, 0.0};
  const int sid = space_getsid(e->s, &ci, &cj, shift);
758
759
760
761
762
763

  /* Have the cells been sorted? */
  if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid)))
    error("Trying to interact unsorted cells.");

  /* Get the cutoff shift. */
764
765
  double rshift = 0.0;
  for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
766
767

  /* Pick-out the sorted lists. */
768
769
  const struct entry *restrict sort_i = &ci->sort[sid * (ci->count + 1)];
  const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)];
770
771

  /* Get some other useful values. */
772
773
774
775
776
777
778
779
780
  const double hi_max = ci->h_max * kernel_gamma - rshift;
  const double hj_max = cj->h_max * kernel_gamma;
  const int count_i = ci->count;
  const int count_j = cj->count;
  struct part *restrict parts_i = ci->parts;
  struct part *restrict parts_j = cj->parts;
  const double di_max = sort_i[count_i - 1].d - rshift;
  const double dj_min = sort_j[0].d;
  const float dx_max = (ci->dx_max + cj->dx_max);
781

782
  if (cell_is_active(ci, e)) {
783

784
785
786
    /* Loop over the parts in ci. */
    for (int pid = count_i - 1;
         pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
787

788
789
790
791
792
793
      /* Get a hold of the ith part in ci. */
      struct part *restrict pi = &parts_i[sort_i[pid].i];
      if (!part_is_active(pi, e)) continue;
      const float hi = pi->h;
      const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
      if (di < dj_min) continue;
794

795
796
797
      double pix[3];
      for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
      const float hig2 = hi * hi * kernel_gamma2;
798

799
800
      /* Loop over the parts in cj. */
      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
801

802
803
804
805
806
807
808
809
810
811
        /* Get a pointer to the jth particle. */
        struct part *restrict pj = &parts_j[sort_j[pjd].i];

        /* Compute the pairwise distance. */
        float r2 = 0.0f;
        float dx[3];
        for (int k = 0; k < 3; k++) {
          dx[k] = pix[k] - pj->x[k];
          r2 += dx[k] * dx[k];
        }
812

813
#ifdef SWIFT_DEBUG_CHECKS
814
815
816
817
818
        /* Check that particles have been drifted to the current time */
        if (pi->ti_drift != e->ti_current)
          error("Particle pi not drifted to current time");
        if (pj->ti_drift != e->ti_current)
          error("Particle pj not drifted to current time");
819
820
#endif

821
822
        /* Hit or miss? */
        if (r2 < hig2) {
823

824
#ifndef WITH_OLD_VECTORIZATION
825

826
          IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
827
828
829

#else

830
831
832
833
834
835
836
837
838
839
          /* Add this interaction to the queue. */
          r2q[icount] = r2;
          dxq[3 * icount + 0] = dx[0];
          dxq[3 * icount + 1] = dx[1];
          dxq[3 * icount + 2] = dx[2];
          hiq[icount] = hi;
          hjq[icount] = pj->h;
          piq[icount] = pi;
          pjq[icount] = pj;
          icount += 1;
840

841
842
843
844
845
          /* Flush? */
          if (icount == VEC_SIZE) {
            IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
            icount = 0;
          }
846
847

#endif
848
        }
849

850
      } /* loop over the parts in cj. */
851

852
    } /* loop over the parts in ci. */
853

854
  } /* Cell ci is active */
855

856
  if (cell_is_active(cj, e)) {
857

858
859
860
    /* Loop over the parts in cj. */
    for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max;
         pjd++) {
861

862
863
864
865
866
867
      /* Get a hold of the jth part in cj. */
      struct part *restrict pj = &parts_j[sort_j[pjd].i];
      if (!part_is_active(pj, e)) continue;
      const float hj = pj->h;
      const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max - rshift;
      if (dj > di_max) continue;
868

869
870
871
      double pjx[3];
      for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k];
      const float hjg2 = hj * hj * kernel_gamma2;
872

873
874
875
876
877
878
879
880
881
882
883
884
885
      /* Loop over the parts in ci. */
      for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d > dj; pid--) {

        /* Get a pointer to the jth particle. */
        struct part *restrict pi = &parts_i[sort_i[pid].i];

        /* Compute the pairwise distance. */
        float r2 = 0.0f;
        float dx[3];
        for (int k = 0; k < 3; k++) {
          dx[k] = pjx[k] - pi->x[k];
          r2 += dx[k] * dx[k];
        }
886

887
#ifdef SWIFT_DEBUG_CHECKS
888
889
890
891
892
        /* Check that particles have been drifted to the current time */
        if (pi->ti_drift != e->ti_current)
          error("Particle pi not drifted to current time");
        if (pj->ti_drift != e->ti_current)
          error("Particle pj not drifted to current time");
893
894
#endif

895
896
        /* Hit or miss? */
        if (r2 < hjg2) {
897

898
#ifndef WITH_OLD_VECTORIZATION
899

900
          IACT_NONSYM(r2, dx, hj, pi->h, pj, pi);
901
902
903

#else

904
905
906
907
908
909
910
911
912
913
          /* Add this interaction to the queue. */
          r2q[icount] = r2;
          dxq[3 * icount + 0] = dx[0];
          dxq[3 * icount + 1] = dx[1];
          dxq[3 * icount + 2] = dx[2];
          hiq[icount] = hj;
          hjq[icount] = pi->h;
          piq[icount] = pj;
          pjq[icount] = pi;
          icount += 1;
914

915
916
917
918
919
          /* Flush? */
          if (icount == VEC_SIZE) {
            IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
            icount = 0;
          }
920
921

#endif
922
923
924
        }

      } /* loop over the parts in ci. */
925
926
927

    } /* loop over the parts in cj. */

928
  } /* Cell cj is active */
929

930
#ifdef WITH_OLD_VECTORIZATION
931
932
  /* Pick up any leftovers. */
  if (icount > 0)
933
    for (int k = 0; k < icount; k++)
934
935
936
937
938
939
      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

  TIMER_TOC(TIMER_DOPAIR);
}

940
941
942
943
944
945
946
/**
 * @brief Compute the interactions between a cell pair (symmetric)
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
 */
947
948
949
void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {

  struct engine *restrict e = r->e;
950

951
#ifdef WITH_MPI
Matthieu Schaller's avatar
Matthieu Schaller committed
952
  if (ci->nodeID != cj->nodeID) {
953
954
955
956
957
    DOPAIR2_NOSORT(r, ci, cj);
    return;
  }
#endif

958
#ifdef WITH_OLD_VECTORIZATION
959
960
961
962
963
964
965
966
967
968
969
970
971
  int icount1 = 0;
  float r2q1[VEC_SIZE] __attribute__((aligned(16)));
  float hiq1[VEC_SIZE] __attribute__((aligned(16)));
  float hjq1[VEC_SIZE] __attribute__((aligned(16)));
  float dxq1[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq1[VEC_SIZE], *pjq1[VEC_SIZE];
  int icount2 = 0;
  float r2q2[VEC_SIZE] __attribute__((aligned(16)));
  float hiq2[VEC_SIZE] __attribute__((aligned(16)));
  float hjq2[VEC_SIZE] __attribute__((aligned(16)));
  float dxq2[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq2[VEC_SIZE], *pjq2[VEC_SIZE];
#endif
972

Matthieu Schaller's avatar
Matthieu Schaller committed
973
  TIMER_TIC;
974
975

  /* Anything to do here? */
976
  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
977

978
979
980
  if (!cell_is_drifted(ci, e)) error("Cell ci not drifted");
  if (!cell_is_drifted(cj, e)) error("Cell cj not drifted");

981
  /* Get the shift ID. */
982
983
  double shift[3] = {0.0, 0.0, 0.0};
  const int sid = space_getsid(e->s, &ci, &cj, shift);
984
985
986
987
988
989

  /* Have the cells been sorted? */
  if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid)))
    error("Trying to interact unsorted cells.");

  /* Get the cutoff shift. */
990
991
  double rshift = 0.0;
  for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
992
993

  /* Pick-out the sorted lists. */
994
995
  struct entry *restrict sort_i = &ci->sort[sid * (ci->count + 1)];
  struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)];
996
997

  /* Get some other useful values. */
998
999
1000
  const double hi_max = ci->h_max * kernel_gamma - rshift;
  const double hj_max = cj->h_max * kernel_gamma;
  const int count_i = ci->count;
For faster browsing, not all history is shown. View entire blame