runner_doiact.h 112 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *               2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
5
 *
6
7
8
9
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
10
 *
11
12
13
14
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
15
 *
16
17
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
 *
19
20
21
22
23
24
25
26
 ******************************************************************************/

/* Before including this file, define FUNCTION, which is the
   name of the interaction function. This creates the interaction functions
   runner_dopair_FUNCTION, runner_dopair_FUNCTION_naive, runner_doself_FUNCTION,
   and runner_dosub_FUNCTION calling the pairwise interaction function
   runner_iact_FUNCTION. */

27
#define PASTE(x, y) x##_##y
28

29
#define _DOPAIR1(f) PASTE(runner_dopair1, f)
30
#define DOPAIR1 _DOPAIR1(FUNCTION)
31

32
#define _DOPAIR2(f) PASTE(runner_dopair2, f)
33
#define DOPAIR2 _DOPAIR2(FUNCTION)
34

35
#define _DOPAIR1_NOSORT(f) PASTE(runner_dopair1_nosort, f)
36
37
#define DOPAIR1_NOSORT _DOPAIR1_NOSORT(FUNCTION)

38
#define _DOPAIR2_NOSORT(f) PASTE(runner_dopair2_nosort, f)
39
40
#define DOPAIR2_NOSORT _DOPAIR2_NOSORT(FUNCTION)

41
#define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f)
42
#define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION)
43

44
45
46
#define _DOPAIR_SUBSET_NOSORT(f) PASTE(runner_dopair_subset_nosort, f)
#define DOPAIR_SUBSET_NOSORT _DOPAIR_SUBSET_NOSORT(FUNCTION)

47
#define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f)
Pedro Gonnet's avatar
Pedro Gonnet committed
48
49
#define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION)

50
51
52
53
54
#define _DOPAIR1_NAIVE(f) PASTE(runner_dopair1_naive, f)
#define DOPAIR1_NAIVE _DOPAIR1_NAIVE(FUNCTION)

#define _DOPAIR2_NAIVE(f) PASTE(runner_dopair2_naive, f)
#define DOPAIR2_NAIVE _DOPAIR2_NAIVE(FUNCTION)
55

56
#define _DOSELF_NAIVE(f) PASTE(runner_doself_naive, f)
57
#define DOSELF_NAIVE _DOSELF_NAIVE(FUNCTION)
58

59
#define _DOSELF1(f) PASTE(runner_doself1, f)
60
#define DOSELF1 _DOSELF1(FUNCTION)
61

62
#define _DOSELF2(f) PASTE(runner_doself2, f)
63
#define DOSELF2 _DOSELF2(FUNCTION)
64

65
#define _DOSELF_SUBSET(f) PASTE(runner_doself_subset, f)
66
#define DOSELF_SUBSET _DOSELF_SUBSET(FUNCTION)
67

68
69
70
71
72
73
74
75
#define _DOSUB_SELF1(f) PASTE(runner_dosub_self1, f)
#define DOSUB_SELF1 _DOSUB_SELF1(FUNCTION)

#define _DOSUB_PAIR1(f) PASTE(runner_dosub_pair1, f)
#define DOSUB_PAIR1 _DOSUB_PAIR1(FUNCTION)

#define _DOSUB_SELF2(f) PASTE(runner_dosub_self2, f)
#define DOSUB_SELF2 _DOSUB_SELF2(FUNCTION)
76

77
78
#define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f)
#define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION)
79

80
#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f)
81
#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION)
82

83
#define _IACT_NONSYM(f) PASTE(runner_iact_nonsym, f)
84
#define IACT_NONSYM _IACT_NONSYM(FUNCTION)
85

86
#define _IACT(f) PASTE(runner_iact, f)
87
#define IACT _IACT(FUNCTION)
88

89
90
91
92
93
94
#define _IACT_NONSYM_VEC(f) PASTE(runner_iact_nonsym_vec, f)
#define IACT_NONSYM_VEC _IACT_NONSYM_VEC(FUNCTION)

#define _IACT_VEC(f) PASTE(runner_iact_vec, f)
#define IACT_VEC _IACT_VEC(FUNCTION)

95
#define _TIMER_DOSELF(f) PASTE(timer_doself, f)
96
#define TIMER_DOSELF _TIMER_DOSELF(FUNCTION)
97

98
#define _TIMER_DOPAIR(f) PASTE(timer_dopair, f)
99
#define TIMER_DOPAIR _TIMER_DOPAIR(FUNCTION)
Pedro Gonnet's avatar
Pedro Gonnet committed
100

101
102
103
104
105
#define _TIMER_DOSUB_SELF(f) PASTE(timer_dosub_self, f)
#define TIMER_DOSUB_SELF _TIMER_DOSUB_SELF(FUNCTION)

#define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f)
#define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION)
106

107
#define _TIMER_DOSELF_SUBSET(f) PASTE(timer_doself_subset, f)
108
109
#define TIMER_DOSELF_SUBSET _TIMER_DOSELF_SUBSET(FUNCTION)

110
#define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f)
111
112
#define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION)

113
/**
114
 * @brief Compute the interactions between a cell pair (non-symmetric).
115
116
117
118
119
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
 */
120
void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci,
121
                   struct cell *restrict cj) {
122
123
124
125

  const struct engine *e = r->e;

#ifndef SWIFT_DEBUG_CHECKS
126
  error("Don't use in actual runs ! Slow code !");
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
#endif

#ifdef WITH_VECTORIZATION
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
  TIMER_TIC;

  /* Anything to do here? */
  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;

  const int count_i = ci->count;
  const int count_j = cj->count;
  struct part *restrict parts_i = ci->parts;
  struct part *restrict parts_j = cj->parts;

  /* Get the relative distance between the pairs, wrapping. */
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Loop over the parts in ci. */
  for (int pid = 0; pid < count_i; pid++) {

    /* Get a hold of the ith part in ci. */
    struct part *restrict pi = &parts_i[pid];
    const float hi = pi->h;

    double pix[3];
    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
    const float hig2 = hi * hi * kernel_gamma2;

    /* Loop over the parts in cj. */
    for (int pjd = 0; pjd < count_j; pjd++) {

      /* Get a pointer to the jth particle. */
      struct part *restrict pj = &parts_j[pjd];

      /* Compute the pairwise distance. */
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }

      /* Hit or miss? */
      if (r2 < hig2) {

#ifndef WITH_VECTORIZATION

        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }

#endif
      }
      if (r2 < pj->h * pj->h * kernel_gamma2) {

#ifndef WITH_VECTORIZATION
212

213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
        for (int k = 0; k < 3; k++) dx[k] = -dx[k];
        IACT_NONSYM(r2, dx, pj->h, hi, pj, pi);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = -dx[0];
        dxq[3 * icount + 1] = -dx[1];
        dxq[3 * icount + 2] = -dx[2];
        hiq[icount] = pj->h;
        hjq[icount] = hi;
        piq[icount] = pj;
        pjq[icount] = pi;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }

#endif
      }

    } /* loop over the parts in cj. */

  } /* loop over the parts in ci. */

#ifdef WITH_VECTORIZATION
  /* Pick up any leftovers. */
  if (icount > 0)
    for (int k = 0; k < icount; k++)
      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

  TIMER_TOC(TIMER_DOPAIR);
}

void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci,
253
                   struct cell *restrict cj) {
254

255
256
  const struct engine *e = r->e;

257
#ifndef SWIFT_DEBUG_CHECKS
258
  error("Don't use in actual runs ! Slow code !");
259
#endif
260

261
#ifdef WITH_OLD_VECTORIZATION
262
263
264
265
266
267
268
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
Matthieu Schaller's avatar
Matthieu Schaller committed
269
  TIMER_TIC;
270
271

  /* Anything to do here? */
272
  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
273

274
275
276
277
278
  const int count_i = ci->count;
  const int count_j = cj->count;
  struct part *restrict parts_i = ci->parts;
  struct part *restrict parts_j = cj->parts;

279
  /* Get the relative distance between the pairs, wrapping. */
280
281
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
282
283
284
285
286
287
288
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Loop over the parts in ci. */
289
  for (int pid = 0; pid < count_i; pid++) {
290
291

    /* Get a hold of the ith part in ci. */
292
293
294
295
296
297
    struct part *restrict pi = &parts_i[pid];
    const float hi = pi->h;

    double pix[3];
    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
    const float hig2 = hi * hi * kernel_gamma2;
298
299

    /* Loop over the parts in cj. */
300
    for (int pjd = 0; pjd < count_j; pjd++) {
301
302

      /* Get a pointer to the jth particle. */
303
      struct part *restrict pj = &parts_j[pjd];
304
305

      /* Compute the pairwise distance. */
306
307
308
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
309
310
311
312
313
314
315
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }

      /* Hit or miss? */
      if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {

316
#ifndef WITH_OLD_VECTORIZATION
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336

        IACT(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
337
338
        }

339
340
341
342
343
344
345
#endif
      }

    } /* loop over the parts in cj. */

  } /* loop over the parts in ci. */

346
#ifdef WITH_OLD_VECTORIZATION
347
348
  /* Pick up any leftovers. */
  if (icount > 0)
349
    for (int k = 0; k < icount; k++)
350
351
352
353
354
355
356
357
      IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

  TIMER_TOC(TIMER_DOPAIR);
}

void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) {

358
  const struct engine *e = r->e;
359

360
#ifndef SWIFT_DEBUG_CHECKS
361
  error("Don't use in actual runs ! Slow code !");
362
#endif
363

364
#ifdef WITH_OLD_VECTORIZATION
365
366
367
368
369
370
371
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
372

Matthieu Schaller's avatar
Matthieu Schaller committed
373
  TIMER_TIC;
374
375

  /* Anything to do here? */
376
  if (!cell_is_active(c, e)) return;
377

378
379
  const int count = c->count;
  struct part *restrict parts = c->parts;
380
381

  /* Loop over the parts in ci. */
382
  for (int pid = 0; pid < count; pid++) {
383
384

    /* Get a hold of the ith part in ci. */
385
386
387
388
    struct part *restrict pi = &parts[pid];
    const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]};
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
389

390
    /* Loop over the parts in cj. */
391
    for (int pjd = pid + 1; pjd < count; pjd++) {
392
393

      /* Get a pointer to the jth particle. */
394
      struct part *restrict pj = &parts[pjd];
395
396

      /* Compute the pairwise distance. */
397
398
399
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
400
401
402
403
404
405
406
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }

      /* Hit or miss? */
      if (r2 < hig2 || r2 < pj->h * pj->h * kernel_gamma2) {

407
#ifndef WITH_OLD_VECTORIZATION
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428

        IACT(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }
429

430
431
#endif
      }
432

433
    } /* loop over the parts in cj. */
434

435
436
  } /* loop over the parts in ci. */

437
#ifdef WITH_OLD_VECTORIZATION
438
439
  /* Pick up any leftovers. */
  if (icount > 0)
440
    for (int k = 0; k < icount; k++)
441
442
      IACT(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif
443

444
445
  TIMER_TOC(TIMER_DOSELF);
}
446

447
448
449
450
451
452
/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
453
 * @param parts_i The #part to interact with @c cj.
454
455
456
457
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 * @param cj The second #cell.
 */
458
459
460
461
void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci,
                         struct part *restrict parts_i, int *restrict ind,
                         int count, struct cell *restrict cj) {

462
  const struct engine *e = r->e;
463

464
#ifdef WITH_OLD_VECTORIZATION
465
466
467
468
469
470
471
472
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif

Matthieu Schaller's avatar
Matthieu Schaller committed
473
  TIMER_TIC;
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496

  const int count_j = cj->count;
  struct part *restrict parts_j = cj->parts;

  /* Get the relative distance between the pairs, wrapping. */
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Loop over the parts_i. */
  for (int pid = 0; pid < count; pid++) {

    /* Get a hold of the ith part in ci. */
    struct part *restrict pi = &parts_i[ind[pid]];
    double pix[3];
    for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;

497
498
499
500
501
502
#ifdef SWIFT_DEBUG_CHECKS
    if (!part_is_active(pi, e))
      error("Trying to correct smoothing length of inactive particle !");

#endif

503
504
505
506
507
508
509
510
511
512
513
514
515
    /* Loop over the parts in cj. */
    for (int pjd = 0; pjd < count_j; pjd++) {

      /* Get a pointer to the jth particle. */
      struct part *restrict pj = &parts_j[pjd];

      /* Compute the pairwise distance. */
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }
516

517
518
519
520
521
522
523
524
#ifdef SWIFT_DEBUG_CHECKS
      /* Check that particles have been drifted to the current time */
      if (pi->ti_drift != e->ti_current)
        error("Particle pi not drifted to current time");
      if (pj->ti_drift != e->ti_current)
        error("Particle pj not drifted to current time");
#endif

525
526
527
      /* Hit or miss? */
      if (r2 < hig2) {

528
#ifndef WITH_OLD_VECTORIZATION
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556

        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }

#endif
      }

    } /* loop over the parts in cj. */

  } /* loop over the parts in ci. */
557

558
#ifdef WITH_OLD_VECTORIZATION
559
560
561
562
563
564
  /* Pick up any leftovers. */
  if (icount > 0)
    for (int k = 0; k < icount; k++)
      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

565
  TIMER_TOC(timer_dopair_subset_naive);
566
567
568
569
570
571
572
573
574
575
576
577
578
}

/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param parts_i The #part to interact with @c cj.
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 * @param cj The second #cell.
 */
579
580
581
582
583
void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci,
                   struct part *restrict parts_i, int *restrict ind, int count,
                   struct cell *restrict cj) {

  struct engine *e = r->e;
584

585
#ifdef WITH_OLD_VECTORIZATION
586
587
588
589
590
591
592
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
593

Matthieu Schaller's avatar
Matthieu Schaller committed
594
  TIMER_TIC;
595

596
597
598
  const int count_j = cj->count;
  struct part *restrict parts_j = cj->parts;

599
  /* Get the relative distance between the pairs, wrapping. */
600
601
  double shift[3] = {0.0, 0.0, 0.0};
  for (int k = 0; k < 3; k++) {
602
603
604
605
606
607
608
    if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
      shift[k] = e->s->dim[k];
    else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
      shift[k] = -e->s->dim[k];
  }

  /* Get the sorting index. */
609
610
  int sid = 0;
  for (int k = 0; k < 3; k++)
611
612
613
614
615
    sid = 3 * sid + ((cj->loc[k] - ci->loc[k] + shift[k] < 0)
                         ? 0
                         : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1);

  /* Switch the cells around? */
616
  const int flipped = runner_flip[sid];
617
618
619
  sid = sortlistID[sid];

  /* Have the cells been sorted? */
Pedro Gonnet's avatar
Pedro Gonnet committed
620
621
  if (!(cj->sorted & (1 << sid)) ||
      cj->dx_max_sort > space_maxreldx * cj->dmin) {
622
623
624
    DOPAIR_SUBSET_NAIVE(r, ci, parts_i, ind, count, cj);
    return;
  }
Pedro Gonnet's avatar
Pedro Gonnet committed
625

626
  /* Pick-out the sorted lists. */
627
  const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)];
628
  const float dxj = cj->dx_max_sort;
629
630
631
632
633

  /* Parts are on the left? */
  if (!flipped) {

    /* Loop over the parts_i. */
634
    for (int pid = 0; pid < count; pid++) {
635
636

      /* Get a hold of the ith part in ci. */
637
638
639
640
641
642
643
644
645
      struct part *restrict pi = &parts_i[ind[pid]];
      double pix[3];
      for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];

      const float hi = pi->h;
      const float hig2 = hi * hi * kernel_gamma2;
      const float di = hi * kernel_gamma + dxj + pix[0] * runner_shift[sid][0] +
                       pix[1] * runner_shift[sid][1] +
                       pix[2] * runner_shift[sid][2];
646
647

      /* Loop over the parts in cj. */
648
      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
649
650

        /* Get a pointer to the jth particle. */
651
        struct part *restrict pj = &parts_j[sort_j[pjd].i];
652
653

        /* Compute the pairwise distance. */
654
655
656
        float r2 = 0.0f;
        float dx[3];
        for (int k = 0; k < 3; k++) {
657
658
          dx[k] = pix[k] - pj->x[k];
          r2 += dx[k] * dx[k];
659
        }
660
661
662
663

        /* Hit or miss? */
        if (r2 < hig2) {

664
#ifndef WITH_OLD_VECTORIZATION
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687

          IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);

#else

          /* Add this interaction to the queue. */
          r2q[icount] = r2;
          dxq[3 * icount + 0] = dx[0];
          dxq[3 * icount + 1] = dx[1];
          dxq[3 * icount + 2] = dx[2];
          hiq[icount] = hi;
          hjq[icount] = pj->h;
          piq[icount] = pi;
          pjq[icount] = pj;
          icount += 1;

          /* Flush? */
          if (icount == VEC_SIZE) {
            IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
            icount = 0;
          }

#endif
688
        }
689
690
691
692
693
694
695
696
697
698
699

      } /* loop over the parts in cj. */

    } /* loop over the parts in ci. */

  }

  /* Parts are on the right. */
  else {

    /* Loop over the parts_i. */
700
    for (int pid = 0; pid < count; pid++) {
701
702

      /* Get a hold of the ith part in ci. */
703
704
705
706
707
708
709
710
      struct part *restrict pi = &parts_i[ind[pid]];
      double pix[3];
      for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
      const float hi = pi->h;
      const float hig2 = hi * hi * kernel_gamma2;
      const float di =
          -hi * kernel_gamma - dxj + pix[0] * runner_shift[sid][0] +
          pix[1] * runner_shift[sid][1] + pix[2] * runner_shift[sid][2];
711
712

      /* Loop over the parts in cj. */
713
      for (int pjd = count_j - 1; pjd >= 0 && di < sort_j[pjd].d; pjd--) {
714
715

        /* Get a pointer to the jth particle. */
716
        struct part *restrict pj = &parts_j[sort_j[pjd].i];
717
718

        /* Compute the pairwise distance. */
719
720
721
        float r2 = 0.0f;
        float dx[3];
        for (int k = 0; k < 3; k++) {
722
723
          dx[k] = pix[k] - pj->x[k];
          r2 += dx[k] * dx[k];
724
        }
725

726
727
        /* Hit or miss? */
        if (r2 < hig2) {
728

729
#ifndef WITH_OLD_VECTORIZATION
730
731

          IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
732

733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
#else

          /* Add this interaction to the queue. */
          r2q[icount] = r2;
          dxq[3 * icount + 0] = dx[0];
          dxq[3 * icount + 1] = dx[1];
          dxq[3 * icount + 2] = dx[2];
          hiq[icount] = hi;
          hjq[icount] = pj->h;
          piq[icount] = pi;
          pjq[icount] = pj;
          icount += 1;

          /* Flush? */
          if (icount == VEC_SIZE) {
            IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
            icount = 0;
          }

#endif
        }

      } /* loop over the parts in cj. */

    } /* loop over the parts in ci. */
  }

760
#ifdef WITH_OLD_VECTORIZATION
761
762
  /* Pick up any leftovers. */
  if (icount > 0)
763
    for (int k = 0; k < icount; k++)
764
765
      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif
Pedro Gonnet's avatar
Pedro Gonnet committed
766

767
768
  TIMER_TOC(timer_dopair_subset);
}
Pedro Gonnet's avatar
Pedro Gonnet committed
769

770
771
772
773
774
775
/**
 * @brief Compute the interactions between a cell pair, but only for the
 *      given indices in ci.
 *
 * @param r The #runner.
 * @param ci The first #cell.
776
 * @param parts The #part to interact.
777
778
779
 * @param ind The list of indices of particles in @c ci to interact with.
 * @param count The number of particles in @c ind.
 */
780
781
782
void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci,
                   struct part *restrict parts, int *restrict ind, int count) {

783
#ifdef WITH_OLD_VECTORIZATION
784
785
786
787
788
789
790
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
791

Matthieu Schaller's avatar
Matthieu Schaller committed
792
  TIMER_TIC;
793

794
795
  const int count_i = ci->count;
  struct part *restrict parts_j = ci->parts;
796
797

  /* Loop over the parts in ci. */
798
  for (int pid = 0; pid < count; pid++) {
799
800

    /* Get a hold of the ith part in ci. */
801
802
803
804
    struct part *restrict pi = &parts[ind[pid]];
    const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]};
    const float hi = pi->h;
    const float hig2 = hi * hi * kernel_gamma2;
805

806
    /* Loop over the parts in cj. */
807
    for (int pjd = 0; pjd < count_i; pjd++) {
808
809

      /* Get a pointer to the jth particle. */
810
      struct part *restrict pj = &parts_j[pjd];
811
812

      /* Compute the pairwise distance. */
813
814
815
      float r2 = 0.0f;
      float dx[3];
      for (int k = 0; k < 3; k++) {
816
817
818
819
820
821
822
        dx[k] = pix[k] - pj->x[k];
        r2 += dx[k] * dx[k];
      }

      /* Hit or miss? */
      if (r2 > 0.0f && r2 < hig2) {

823
#ifndef WITH_OLD_VECTORIZATION
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851

        IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);

#else

        /* Add this interaction to the queue. */
        r2q[icount] = r2;
        dxq[3 * icount + 0] = dx[0];
        dxq[3 * icount + 1] = dx[1];
        dxq[3 * icount + 2] = dx[2];
        hiq[icount] = hi;
        hjq[icount] = pj->h;
        piq[icount] = pi;
        pjq[icount] = pj;
        icount += 1;

        /* Flush? */
        if (icount == VEC_SIZE) {
          IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq);
          icount = 0;
        }

#endif
      }

    } /* loop over the parts in cj. */

  } /* loop over the parts in ci. */
852

853
#ifdef WITH_OLD_VECTORIZATION
854
855
  /* Pick up any leftovers. */
  if (icount > 0)
856
    for (int k = 0; k < icount; k++)
857
858
859
      IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]);
#endif

860
  TIMER_TOC(timer_doself_subset);
861
}
862

863
/**
864
 * @brief Compute the interactions between a cell pair (non-symmetric).
865
866
867
868
869
 *
 * @param r The #runner.
 * @param ci The first #cell.
 * @param cj The second #cell.
 */
870
871
void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) {

872
  const struct engine *restrict e = r->e;
873

874
#ifdef WITH_OLD_VECTORIZATION
875
876
877
878
879
880
881
  int icount = 0;
  float r2q[VEC_SIZE] __attribute__((aligned(16)));
  float hiq[VEC_SIZE] __attribute__((aligned(16)));
  float hjq[VEC_SIZE] __attribute__((aligned(16)));
  float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
  struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
882

Matthieu Schaller's avatar
Matthieu Schaller committed
883
  TIMER_TIC;
884
885

  /* Anything to do here? */
886
  if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return;
887

888
889
  if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e))
    error("Interacting undrifted cells.");
890

891
  /* Get the sort ID. */
892
893
  double shift[3] = {0.0, 0.0, 0.0};
  const int sid = space_getsid(e->s, &ci, &cj, shift);
894
895

  /* Have the cells been sorted? */
896
897
898
899
  if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin)
    runner_do_sort(r, ci, (1 << sid), 1);
  if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin)
    runner_do_sort(r, cj, (1 << sid), 1);
900
901

  /* Get the cutoff shift. */
902
903
  double rshift = 0.0;
  for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k];
904
905

  /* Pick-out the sorted lists. */
906
907
  const struct entry *restrict sort_i = &ci->sort[sid * (ci->count + 1)];
  const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)];
908

909
910
911
912
913
914
915
916
#ifdef SWIFT_DEBUG_CHECKS
  /* Check that the dx_max_sort values in the cell are indeed an upper
     bound on particle movement. */
  for (int pid = 0; pid < ci->count; pid++) {
    const struct part *p = &ci->parts[sort_i[pid].i];
    const float d = p->x[0] * runner_shift[sid][0] +
                    p->x[1] * runner_shift[sid][1] +
                    p->x[2] * runner_shift[sid][2];
917
918
    if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort >
        1.0e-6 * max(fabsf(d), ci->dx_max_sort))
919
920
921
922
923
924
925
      error("particle shift diff exceeds dx_max_sort.");
  }
  for (int pjd = 0; pjd < cj->count; pjd++) {
    const struct part *p = &cj->parts[sort_j[pjd].i];
    const float d = p->x[0] * runner_shift[sid][0] +
                    p->x[1] * runner_shift[sid][1] +
                    p->x[2] * runner_shift[sid][2];
926
    if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort >
927
        1.0e-6 * max(fabsf(d), cj->dx_max_sort))
928
929
930
931
      error("particle shift diff exceeds dx_max_sort.");
  }
#endif /* SWIFT_DEBUG_CHECKS */

932
  /* Get some other useful values. */
933
934
935
936
937
938
939
940
  const double hi_max = ci->h_max * kernel_gamma - rshift;
  const double hj_max = cj->h_max * kernel_gamma;
  const int count_i = ci->count;
  const int count_j = cj->count;
  struct part *restrict parts_i = ci->parts;
  struct part *restrict parts_j = cj->parts;
  const double di_max = sort_i[count_i - 1].d - rshift;
  const double dj_min = sort_j[0].d;
941
  const float dx_max = (ci->dx_max_sort + cj->dx_max_sort);
942

943
  if (cell_is_active(ci, e)) {
944

945
946
947
    /* Loop over the parts in ci. */
    for (int pid = count_i - 1;
         pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) {
948

949
950
951
952
953
954
      /* Get a hold of the ith part in ci. */
      struct part *restrict pi = &parts_i[sort_i[pid].i];
      if (!part_is_active(pi, e)) continue;
      const float hi = pi->h;
      const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift;
      if (di < dj_min) continue;
955

956
957
958
      double pix[3];
      for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
      const float hig2 = hi * hi * kernel_gamma2;
959

960
961
      /* Loop over the parts in cj. */
      for (int pjd = 0; pjd < count_j && sort_j[pjd].d < di; pjd++) {
962

963
964
965
966
967
968
969
970
971
972
        /* Get a pointer to the jth particle. */
        struct part *restrict pj = &parts_j[sort_j[pjd].i];

        /* Compute the pairwise distance. */
        float r2 = 0.0f;
        float dx[3];
        for (int k = 0; k < 3; k++) {
          dx[k] = pix[k] - pj->x[k];
          r2 += dx[k] * dx[k];
        }
973

974
#ifdef SWIFT_DEBUG_CHECKS
975
976
977
978
979
        /* Check that particles have been drifted to the current time */
        if (pi->ti_drift != e->ti_current)
          error("Particle pi not drifted to current time");
        if (pj->ti_drift != e->ti_current)
          error("Particle pj not drifted to current time");
980
981
#endif

982
983
        /* Hit or miss? */
        if (r2 < hig2) {
984

985
#ifndef WITH_OLD_VECTORIZATION
986

987
          IACT_NONSYM(r2, dx, hi, pj->h, pi, pj);
988
989
990

#else

991
992
993
994
995
996
997
998
999
1000
          /* Add this interaction to the queue. */
          r2q[icount] = r2;
          dxq[3 * icount + 0] = dx[0];
          dxq[3 * icount + 1] = dx[1];
          dxq[3 * icount + 2] = dx[2];
          hiq[icount] = hi;
          hjq[icount] = pj->h;
          piq[icount] = pi;
          pjq[icount] = pj;
          icount += 1;
For faster browsing, not all history is shown. View entire blame