runner_doiact.h 112 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
5
 *
6
7
8
9
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
10
 *
11
12
13
14
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
15
 *
16
17
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
 *
19
20
21
22
23
24
25
26
 ******************************************************************************/

/* Before including this file, define FUNCTION, which is the
   name of the interaction function. This creates the interaction functions
   runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION,
   and runner_dosub_FUNCTION calling the pairwise interaction function
   runner_iact_FUNCTION. */

27
#define PASTE(x, y) x##_##y
28

29
#define _DOPAIR1(f) PASTE(runner_dopair1, f)
30
#define DOPAIR1 _DOPAIR1(FUNCTION)
31

32
#define _DOPAIR2(f) PASTE(runner_dopair2, f)
33
#define DOPAIR2 _DOPAIR2(FUNCTION)
34

35
#define _DOPAIR1_NOSORT(f) PASTE(runner_dopair1_nosort, f)
36
37
#define DOPAIR1_NOSORT _DOPAIR1_NOSORT(FUNCTION)

38
#define _DOPAIR2_NOSORT(f) PASTE(runner_dopair2_nosort, f)
39
40
#define DOPAIR2_NOSORT _DOPAIR2_NOSORT(FUNCTION)

41
#define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f)
42
#define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION)
43

44
45
46
#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f)
#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION)

47
#define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f)
Pedro Gonnet's avatar
Pedro Gonnet committed
48
49
#define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION)

50
51
52
53
54
#define _DOPAIR1_NAIVE(f) PASTE(runner_dopair1_naive, f)
#define DOPAIR1_NAIVE _DOPAIR1_NAIVE(FUNCTION)

#define _DOPAIR2_NAIVE(f) PASTE(runner_dopair2_naive, f)
#define DOPAIR2_NAIVE _DOPAIR2_NAIVE(FUNCTION)
55

56
#define _DOSELF_NAIVE(f) PASTE(runner_doself_naive, f)
57
#define DOSELF_NAIVE _DOSELF_NAIVE(FUNCTION)
58

59
#define _DOSELF1(f) PASTE(runner_doself1, f)
60
#define DOSELF1 _DOSELF1(FUNCTION)
61

62
#define _DOSELF2(f) PASTE(runner_doself2, f)
63
#define DOSELF2 _DOSELF2(FUNCTION)
64

65
#define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f)
66
#define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION)
67

68
69
70
71
72
73
74
75
#define _DOSUB_SELF1(f) PASTE(runner_dosub_self1, f)
#define DOSUB_SELF1 _DOSUB_SELF1(FUNCTION)

#define _DOSUB_PAIR1(f) PASTE(runner_dosub_pair1, f)
#define DOSUB_PAIR1 _DOSUB_PAIR1(FUNCTION)

#define _DOSUB_SELF2(f) PASTE(runner_dosub_self2, f)
#define DOSUB_SELF2 _DOSUB_SELF2(FUNCTION)
76

77
78
#define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f)
#define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION)
79

80
#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f)
81
#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION)
82

83
#define _IACT_NONSYM(f) PASTE(runner_iact_nonsym, f)
84
#define IACT_NONSYM _IACT_NONSYM(FUNCTION)
85

86
#define _IACT(f) PASTE(runner_iact, f)
87
#define IACT _IACT(FUNCTION)
88

89
90
91
92
93
94
#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f)
#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)

#define _IACT_VEC(f) PASTE(runner_iact_vec, f)
#define IACT_VEC _IACT_VEC(FUNCTION)

95
#define _TIMER_DOSELF(f) PASTE(timer_doself, f)
96
#define TIMER_DOSELF _TIMER_DOSELF(FUNCTION)
97

98
#define _TIMER_DOPAIR(f) PASTE(timer_dopair, f)
99
#define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION)
Pedro Gonnet's avatar
Pedro Gonnet committed
100

101
102
103
104
105
#define _TIMER_DOSUB_SELF(f) PASTE(timer_dosub_self, f)
#define TIMER_DOSUB_SELF _TIMER_DOSUB_SELF(FUNCTION)

#define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f)
#define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION)
106

107
#define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f)
108
109
#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION)

110
#define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f)
111
112
#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)

113
114
#include "runner_doiact_nosort.h"

115
/**
116
 * @brief Compute the interactions between a cell pair (non-symmetric).
117
118
119
120
121
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
 */
122
void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci,
123
                   struct cell *restrict cj) {
124
125
126
127

  const struct engine *e = r->e;

#ifndef SWIFT_DEBUG_CHECKS
128
// error("Don't use in actual runs ! Slow code !");
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
#endif

#ifdef WITH_VECTORIZATION
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
  TIMER_TIC;

  /* Anything to do here? */
  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;

  const int count_i = ci->count;
  const int count_j = cj->count;
  struct part *restrict parts_i = ci->parts;
  struct part *restrict parts_j = cj->parts;

  /* Get the relative distance between the pairs, wrapping. */
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Loop over the parts in ci. */
  for (int pid = 0; pid < count_i; pid++) {

    /* Get a hold of the ith part in ci. */
    struct part *restrict pi = &parts_i[pid];
    const float hi = pi->h;

    double pix[3];
    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
    const float hig2 = hi * hi * kernel_gamma2;

    /* Loop over the parts in cj. */
    for (int pjd = 0; pjd < count_j; pjd++) {

      /* Get a pointer to the jth particle. */
      struct part *restrict pj = &parts_j[pjd];

      /* Compute the pairwise distance. */
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }

      /* Hit or miss? */
      if (r2 < hig2) {

#ifndef WITH_VECTORIZATION

        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }

#endif
      }
      if (r2 < pj->h * pj->h * kernel_gamma2) {

#ifndef WITH_VECTORIZATION
214

215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
        for (int k = 0; k < 3; k++) dx[k] = -dx[k];
        IACT_NONSYM(r2, dx, pj->h, hi, pj, pi);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = -dx[0];
        dxq[3 * icount + 1] = -dx[1];
        dxq[3 * icount + 2] = -dx[2];
        hiq[icount] = pj->h;
        hjq[icount] = hi;
        piq[icount] = pj;
        pjq[icount] = pi;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }

#endif
      }

    } /* loop over the parts in cj. */

  } /* loop over the parts in ci. */

#ifdef WITH_VECTORIZATION
  /* Pick up any leftovers. */
  if (icount > 0)
    for (int k = 0; k < icount; k++)
      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

  TIMER_TOC(TIMER_DOPAIR);
}

void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci,
255
                   struct cell *restrict cj) {
256

257
258
  const struct engine *e = r->e;

259
#ifndef SWIFT_DEBUG_CHECKS
260
// error("Don't use in actual runs ! Slow code !");
261
#endif
262

263
#ifdef WITH_OLD_VECTORIZATION
264
265
266
267
268
269
270
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
Matthieu Schaller's avatar
Matthieu Schaller committed
271
  TIMER_TIC;
272
273

  /* Anything to do here? */
274
  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
275

276
277
278
279
280
  const int count_i = ci->count;
  const int count_j = cj->count;
  struct part *restrict parts_i = ci->parts;
  struct part *restrict parts_j = cj->parts;

281
  /* Get the relative distance between the pairs, wrapping. */
282
283
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
284
285
286
287
288
289
290
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Loop over the parts in ci. */
291
  for (int pid = 0; pid < count_i; pid++) {
292
293

    /* Get a hold of the ith part in ci. */
294
295
296
297
298
299
    struct part *restrict pi = &parts_i[pid];
    const float hi = pi->h;

    double pix[3];
    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
    const float hig2 = hi * hi * kernel_gamma2;
300
301

    /* Loop over the parts in cj. */
302
    for (int pjd = 0; pjd < count_j; pjd++) {
303
304

      /* Get a pointer to the jth particle. */
305
      struct part *restrict pj = &parts_j[pjd];
306
307

      /* Compute the pairwise distance. */
308
309
310
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
311
312
313
314
315
316
317
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }

      /* Hit or miss? */
      if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {

318
#ifndef WITH_OLD_VECTORIZATION
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338

        IACT(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
339
340
        }

341
342
343
344
345
346
347
#endif
      }

    } /* loop over the parts in cj. */

  } /* loop over the parts in ci. */

348
#ifdef WITH_OLD_VECTORIZATION
349
350
  /* Pick up any leftovers. */
  if (icount > 0)
351
    for (int k = 0; k < icount; k++)
352
353
354
355
356
357
358
359
      IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

  TIMER_TOC(TIMER_DOPAIR);
}

void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {

360
  const struct engine *e = r->e;
361

362
#ifndef SWIFT_DEBUG_CHECKS
363
// error("Don't use in actual runs ! Slow code !");
364
#endif
365

366
#ifdef WITH_OLD_VECTORIZATION
367
368
369
370
371
372
373
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
374

Matthieu Schaller's avatar
Matthieu Schaller committed
375
  TIMER_TIC;
376
377

  /* Anything to do here? */
378
  if (!cell_is_active(c, e)) return;
379

380
381
  const int count = c->count;
  struct part *restrict parts = c->parts;
382
383

  /* Loop over the parts in ci. */
384
  for (int pid = 0; pid < count; pid++) {
385
386

    /* Get a hold of the ith part in ci. */
387
388
389
390
    struct part *restrict pi = &parts[pid];
    const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]};
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
391

392
    /* Loop over the parts in cj. */
393
    for (int pjd = pid + 1; pjd < count; pjd++) {
394
395

      /* Get a pointer to the jth particle. */
396
      struct part *restrict pj = &parts[pjd];
397
398

      /* Compute the pairwise distance. */
399
400
401
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
402
403
404
405
406
407
408
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }

      /* Hit or miss? */
      if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {

409
#ifndef WITH_OLD_VECTORIZATION
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430

        IACT(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }
431

432
433
#endif
      }
434

435
    } /* loop over the parts in cj. */
436

437
438
  } /* loop over the parts in ci. */

439
#ifdef WITH_OLD_VECTORIZATION
440
441
  /* Pick up any leftovers. */
  if (icount > 0)
442
    for (int k = 0; k < icount; k++)
443
444
      IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif
445

446
447
  TIMER_TOC(TIMER_DOSELF);
}
448

449
450
451
452
453
454
/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
455
 * @param parts_i The #part to interact with @c cj.
456
457
458
459
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 * @param cj The second #cell.
 */
460
461
462
463
464
465
466
467
void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
                         struct part *restrict parts_i, int *restrict ind,
                         int count, struct cell *restrict cj) {

  struct engine *e = r->e;

  error("Don't use in actual runs ! Slow code !");

468
#ifdef WITH_OLD_VECTORIZATION
469
470
471
472
473
474
475
476
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif

Matthieu Schaller's avatar
Matthieu Schaller committed
477
  TIMER_TIC;
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513

  const int count_j = cj->count;
  struct part *restrict parts_j = cj->parts;

  /* Get the relative distance between the pairs, wrapping. */
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Loop over the parts_i. */
  for (int pid = 0; pid < count; pid++) {

    /* Get a hold of the ith part in ci. */
    struct part *restrict pi = &parts_i[ind[pid]];
    double pix[3];
    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;

    /* Loop over the parts in cj. */
    for (int pjd = 0; pjd < count_j; pjd++) {

      /* Get a pointer to the jth particle. */
      struct part *restrict pj = &parts_j[pjd];

      /* Compute the pairwise distance. */
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }
514

515
516
517
      /* Hit or miss? */
      if (r2 < hig2) {

518
#ifndef WITH_OLD_VECTORIZATION
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546

        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }

#endif
      }

    } /* loop over the parts in cj. */

  } /* loop over the parts in ci. */
547

548
#ifdef WITH_OLD_VECTORIZATION
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
  /* Pick up any leftovers. */
  if (icount > 0)
    for (int k = 0; k < icount; k++)
      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

  TIMER_TOC(timer_dopair_subset);
}

/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param parts_i The #part to interact with @c cj.
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 * @param cj The second #cell.
 */
569
570
571
572
573
void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
                   struct part *restrict parts_i, int *restrict ind, int count,
                   struct cell *restrict cj) {

  struct engine *e = r->e;
574

575
#ifdef WITH_MPI
Matthieu Schaller's avatar
Matthieu Schaller committed
576
  if (ci->nodeID != cj->nodeID) {
577
578
579
580
581
    DOPAIR_SUBSET_NOSORT(r, ci, parts_i, ind, count, cj);
    return;
  }
#endif

582
#ifdef WITH_OLD_VECTORIZATION
583
584
585
586
587
588
589
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
590

Matthieu Schaller's avatar
Matthieu Schaller committed
591
  TIMER_TIC;
592

593
594
595
  const int count_j = cj->count;
  struct part *restrict parts_j = cj->parts;

596
  /* Get the relative distance between the pairs, wrapping. */
597
598
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
599
600
601
602
603
604
605
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Get the sorting index. */
606
607
  int sid = 0;
  for (int k = 0; k < 3; k++)
608
609
610
611
612
    sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
                         ? 0
                         : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1);

  /* Switch the cells around? */
613
  const int flipped = runner_flip[sid];
614
615
616
  sid = sortlistID[sid];

  /* Have the cells been sorted? */
617
618
619
620
  if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin)
    runner_do_sort(r, cj, (1 << sid), 1);
  // if (!(cj->sorted & (1 << sid))) error("Trying to interact unsorted
  // cells.");
621
622

  /* Pick-out the sorted lists. */
623
  const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)];
624
  const float dxj = cj->dx_max_sort;
625
626
627
628
629

  /* Parts are on the left? */
  if (!flipped) {

    /* Loop over the parts_i. */
630
    for (int pid = 0; pid < count; pid++) {
631
632

      /* Get a hold of the ith part in ci. */
633
634
635
636
637
638
639
640
641
      struct part *restrict pi = &parts_i[ind[pid]];
      double pix[3];
      for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];

      const float hi = pi->h;
      const float hig2 = hi * hi * kernel_gamma2;
      const float di = hi * kernel_gamma + dxj + pix[0] * runner_shift[sid][0] +
                       pix[1] * runner_shift[sid][1] +
                       pix[2] * runner_shift[sid][2];
642
643

      /* Loop over the parts in cj. */
644
      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
645
646

        /* Get a pointer to the jth particle. */
647
        struct part *restrict pj = &parts_j[sort_j[pjd].i];
648
649

        /* Compute the pairwise distance. */
650
651
652
        float r2 = 0.0f;
        float dx[3];
        for (int k = 0; k < 3; k++) {
653
654
          dx[k] = pix[k] - pj->x[k];
          r2 += dx[k] * dx[k];
655
        }
656
657
658
659

        /* Hit or miss? */
        if (r2 < hig2) {

660
#ifndef WITH_OLD_VECTORIZATION
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683

          IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);

#else

          /* Add this interaction to the queue. */
          r2q[icount] = r2;
          dxq[3 * icount + 0] = dx[0];
          dxq[3 * icount + 1] = dx[1];
          dxq[3 * icount + 2] = dx[2];
          hiq[icount] = hi;
          hjq[icount] = pj->h;
          piq[icount] = pi;
          pjq[icount] = pj;
          icount += 1;

          /* Flush? */
          if (icount == VEC_SIZE) {
            IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
            icount = 0;
          }

#endif
684
        }
685
686
687
688
689
690
691
692
693
694
695

      } /* loop over the parts in cj. */

    } /* loop over the parts in ci. */

  }

  /* Parts are on the right. */
  else {

    /* Loop over the parts_i. */
696
    for (int pid = 0; pid < count; pid++) {
697
698

      /* Get a hold of the ith part in ci. */
699
700
701
702
703
704
705
706
      struct part *restrict pi = &parts_i[ind[pid]];
      double pix[3];
      for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
      const float hi = pi->h;
      const float hig2 = hi * hi * kernel_gamma2;
      const float di =
          -hi * kernel_gamma - dxj + pix[0] * runner_shift[sid][0] +
          pix[1] * runner_shift[sid][1] + pix[2] * runner_shift[sid][2];
707
708

      /* Loop over the parts in cj. */
709
      for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) {
710
711

        /* Get a pointer to the jth particle. */
712
        struct part *restrict pj = &parts_j[sort_j[pjd].i];
713
714

        /* Compute the pairwise distance. */
715
716
717
        float r2 = 0.0f;
        float dx[3];
        for (int k = 0; k < 3; k++) {
718
719
          dx[k] = pix[k] - pj->x[k];
          r2 += dx[k] * dx[k];
720
        }
721

722
723
        /* Hit or miss? */
        if (r2 < hig2) {
724

725
#ifndef WITH_OLD_VECTORIZATION
726
727

          IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
728

729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
#else

          /* Add this interaction to the queue. */
          r2q[icount] = r2;
          dxq[3 * icount + 0] = dx[0];
          dxq[3 * icount + 1] = dx[1];
          dxq[3 * icount + 2] = dx[2];
          hiq[icount] = hi;
          hjq[icount] = pj->h;
          piq[icount] = pi;
          pjq[icount] = pj;
          icount += 1;

          /* Flush? */
          if (icount == VEC_SIZE) {
            IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
            icount = 0;
          }

#endif
        }

      } /* loop over the parts in cj. */

    } /* loop over the parts in ci. */
  }

756
#ifdef WITH_OLD_VECTORIZATION
757
758
  /* Pick up any leftovers. */
  if (icount > 0)
759
    for (int k = 0; k < icount; k++)
760
761
      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif
Pedro Gonnet's avatar
Pedro Gonnet committed
762

763
764
  TIMER_TOC(timer_dopair_subset);
}
Pedro Gonnet's avatar
Pedro Gonnet committed
765

766
767
768
769
770
771
/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
772
 * @param parts The #part to interact.
773
774
775
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 */
776
777
778
void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
                   struct part *restrict parts, int *restrict ind, int count) {

779
#ifdef WITH_OLD_VECTORIZATION
780
781
782
783
784
785
786
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
787

Matthieu Schaller's avatar
Matthieu Schaller committed
788
  TIMER_TIC;
789

790
791
  const int count_i = ci->count;
  struct part *restrict parts_j = ci->parts;
792
793

  /* Loop over the parts in ci. */
794
  for (int pid = 0; pid < count; pid++) {
795
796

    /* Get a hold of the ith part in ci. */
797
798
799
800
    struct part *restrict pi = &parts[ind[pid]];
    const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]};
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
801

802
    /* Loop over the parts in cj. */
803
    for (int pjd = 0; pjd < count_i; pjd++) {
804
805

      /* Get a pointer to the jth particle. */
806
      struct part *restrict pj = &parts_j[pjd];
807
808

      /* Compute the pairwise distance. */
809
810
811
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
812
813
814
815
816
817
818
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }

      /* Hit or miss? */
      if (r2 > 0.0f && r2 < hig2) {

819
#ifndef WITH_OLD_VECTORIZATION
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847

        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }

#endif
      }

    } /* loop over the parts in cj. */

  } /* loop over the parts in ci. */
848

849
#ifdef WITH_OLD_VECTORIZATION
850
851
  /* Pick up any leftovers. */
  if (icount > 0)
852
    for (int k = 0; k < icount; k++)
853
854
855
856
857
      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

  TIMER_TOC(timer_dopair_subset);
}
858

859
/**
860
 * @brief Compute the interactions between a cell pair (non-symmetric).
861
862
863
864
865
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
 */
866
867
void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {

868
  const struct engine *restrict e = r->e;
869

870
#ifdef WITH_MPI
Matthieu Schaller's avatar
Matthieu Schaller committed
871
  if (ci->nodeID != cj->nodeID) {
872
873
874
875
876
    DOPAIR1_NOSORT(r, ci, cj);
    return;
  }
#endif

877
#ifdef WITH_OLD_VECTORIZATION
878
879
880
881
882
883
884
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
885

Matthieu Schaller's avatar
Matthieu Schaller committed
886
  TIMER_TIC;
887
888

  /* Anything to do here? */
889
  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
890

891
892
  if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e))
    error("Interacting undrifted cells.");
893

894
  /* Get the sort ID. */
895
896
  double shift[3] = {0.0, 0.0, 0.0};
  const int sid = space_getsid(e->s, &ci, &cj, shift);
897
898

  /* Have the cells been sorted? */
899
900
901
902
903
904
  if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin)
    runner_do_sort(r, ci, (1 << sid), 1);
  if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin)
    runner_do_sort(r, cj, (1 << sid), 1);
  // if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid)))
  //   error("Trying to interact unsorted cells.");
905
906

  /* Get the cutoff shift. */
907
908
  double rshift = 0.0;
  for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
909
910

  /* Pick-out the sorted lists. */
911
912
  const struct entry *restrict sort_i = &ci->sort[sid * (ci->count + 1)];
  const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)];
913

914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
#ifdef SWIFT_DEBUG_CHECKS
  /* Check that the dx_max_sort values in the cell are indeed an upper
     bound on particle movement. */
  for (int pid = 0; pid < ci->count; pid++) {
    const struct part *p = &ci->parts[sort_i[pid].i];
    const float d = p->x[0] * runner_shift[sid][0] +
                    p->x[1] * runner_shift[sid][1] +
                    p->x[2] * runner_shift[sid][2];
    if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort > 1.0e-6)
      error("particle shift diff exceeds dx_max_sort.");
  }
  for (int pjd = 0; pjd < cj->count; pjd++) {
    const struct part *p = &cj->parts[sort_j[pjd].i];
    const float d = p->x[0] * runner_shift[sid][0] +
                    p->x[1] * runner_shift[sid][1] +
                    p->x[2] * runner_shift[sid][2];
    if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort > 1.0e-6)
      error("particle shift diff exceeds dx_max_sort.");
  }
#endif /* SWIFT_DEBUG_CHECKS */

935
  /* Get some other useful values. */
936
937
938
939
940
941
942
943
  const double hi_max = ci->h_max * kernel_gamma - rshift;
  const double hj_max = cj->h_max * kernel_gamma;
  const int count_i = ci->count;
  const int count_j = cj->count;
  struct part *restrict parts_i = ci->parts;
  struct part *restrict parts_j = cj->parts;
  const double di_max = sort_i[count_i - 1].d - rshift;
  const double dj_min = sort_j[0].d;
944
  const float dx_max = (ci->dx_max_sort + cj->dx_max_sort);
945
946

  /* Loop over the parts in ci. */
947
948
  for (int pid = count_i - 1;
       pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
949
950

    /* Get a hold of the ith part in ci. */
951
    struct part *restrict pi = &parts_i[sort_i[pid].i];
952
    if (!part_is_active(pi, e)) continue;
953
954
    const float hi = pi->h;
    const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
955
956
    if (di < dj_min) continue;

957
958
959
    double pix[3];
    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
    const float hig2 = hi * hi * kernel_gamma2;
960
961

    /* Loop over the parts in cj. */
962
    for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
963
964

      /* Get a pointer to the jth particle. */
965
      struct part *restrict pj = &parts_j[sort_j[pjd].i];
966
967

      /* Compute the pairwise distance. */
968
969
970
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
971
972
973
974
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }

975
976
977
978
979
980
981
982
#ifdef SWIFT_DEBUG_CHECKS
      /* Check that particles have been drifted to the current time */
      if (pi->ti_drift != e->ti_current)
        error("Particle pi not drifted to current time");
      if (pj->ti_drift != e->ti_current)
        error("Particle pj not drifted to current time");
#endif

983
984
985
      /* Hit or miss? */
      if (r2 < hig2) {

986
#ifndef WITH_OLD_VECTORIZATION
987
988
989
990
991
992
993
994
995
996
997
998
999
1000

        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
For faster browsing, not all history is shown. View entire blame