runner.c 34.7 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
5
6
7
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
8
 *
9
10
11
12
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
13
 *
14
15
16
17
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
18
 *
19
20
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 *
22
 ******************************************************************************/
Pedro Gonnet's avatar
Pedro Gonnet committed
23

Pedro Gonnet's avatar
Pedro Gonnet committed
24
25
/* Config parameters. */
#include "../config.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
26
27
28
29

/* Some standard headers. */
#include <float.h>
#include <limits.h>
30
#include <stdlib.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
31

32
33
/* MPI headers. */
#ifdef WITH_MPI
34
#include <mpi.h>
35
36
#endif

37
38
39
/* This object's header. */
#include "runner.h"

Pedro Gonnet's avatar
Pedro Gonnet committed
40
/* Local headers. */
Matthieu Schaller's avatar
Matthieu Schaller committed
41
#include "approx_math.h"
42
#include "atomic.h"
43
#include "cell.h"
44
#include "const.h"
45
#include "debug.h"
Matthieu Schaller's avatar
Matthieu Schaller committed
46
#include "drift.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
47
#include "engine.h"
48
#include "error.h"
49
50
#include "gravity.h"
#include "hydro.h"
Matthieu Schaller's avatar
Matthieu Schaller committed
51
#include "hydro_properties.h"
52
#include "kick.h"
53
#include "minmax.h"
54
55
56
57
#include "scheduler.h"
#include "space.h"
#include "task.h"
#include "timers.h"
58
#include "timestep.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
59

60
/* Orientation of the cell pairs */
Matthieu Schaller's avatar
Matthieu Schaller committed
61
62
const double runner_shift[13][3] = {
    {5.773502691896258e-01, 5.773502691896258e-01, 5.773502691896258e-01},
63
    {7.071067811865475e-01, 7.071067811865475e-01, 0.0},
Matthieu Schaller's avatar
Matthieu Schaller committed
64
65
66
67
68
69
70
71
72
73
74
    {5.773502691896258e-01, 5.773502691896258e-01, -5.773502691896258e-01},
    {7.071067811865475e-01, 0.0, 7.071067811865475e-01},
    {1.0, 0.0, 0.0},
    {7.071067811865475e-01, 0.0, -7.071067811865475e-01},
    {5.773502691896258e-01, -5.773502691896258e-01, 5.773502691896258e-01},
    {7.071067811865475e-01, -7.071067811865475e-01, 0.0},
    {5.773502691896258e-01, -5.773502691896258e-01, -5.773502691896258e-01},
    {0.0, 7.071067811865475e-01, 7.071067811865475e-01},
    {0.0, 1.0, 0.0},
    {0.0, 7.071067811865475e-01, -7.071067811865475e-01},
    {0.0, 0.0, 1.0},
Matthieu Schaller's avatar
Matthieu Schaller committed
75
};
76
77

/* Does the axis need flipping ? */
78
79
const char runner_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
                              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
Pedro Gonnet's avatar
Pedro Gonnet committed
80

81
/* Import the density loop functions. */
82
83
84
#define FUNCTION density
#include "runner_doiact.h"

85
86
87
88
89
90
91
#ifdef EXTRA_HYDRO_LOOP
/* Import the gradient loop functions. */
#undef FUNCTION
#define FUNCTION gradient
#include "runner_doiact.h"
#endif

92
/* Import the force loop functions. */
93
94
95
96
#undef FUNCTION
#define FUNCTION force
#include "runner_doiact.h"

97
/* Import the gravity loop functions. */
98
#include "runner_doiact_fft.h"
Matthieu Schaller's avatar
Matthieu Schaller committed
99
#include "runner_doiact_grav.h"
100

Tom Theuns's avatar
Tom Theuns committed
101
102
103
/**
 * @brief Calculate gravity acceleration from external potential
 *
Matthieu Schaller's avatar
Matthieu Schaller committed
104
105
106
 * @param r runner task
 * @param c cell
 * @param timer 1 if the time is to be recorded.
Tom Theuns's avatar
Tom Theuns committed
107
 */
108
void runner_do_grav_external(struct runner *r, struct cell *c, int timer) {
Tom Theuns's avatar
Tom Theuns committed
109

Matthieu Schaller's avatar
Matthieu Schaller committed
110
111
  struct gpart *restrict gparts = c->gparts;
  const int gcount = c->gcount;
112
  const int ti_current = r->e->ti_current;
Matthieu Schaller's avatar
Matthieu Schaller committed
113
  const struct external_potential *potential = r->e->external_potential;
114
  const struct phys_const *constants = r->e->physical_constants;
115
  const double time = r->e->time;
116
  TIMER_TIC;
Tom Theuns's avatar
Tom Theuns committed
117
118
119

  /* Recurse? */
  if (c->split) {
Matthieu Schaller's avatar
Matthieu Schaller committed
120
    for (int k = 0; k < 8; k++)
121
      if (c->progeny[k] != NULL) runner_do_grav_external(r, c->progeny[k], 0);
Tom Theuns's avatar
Tom Theuns committed
122
123
124
125
    return;
  }

#ifdef TASK_VERBOSE
Matthieu Schaller's avatar
Matthieu Schaller committed
126
  OUT;
Tom Theuns's avatar
Tom Theuns committed
127
#endif
Matthieu Schaller's avatar
Matthieu Schaller committed
128

129
  /* Loop over the gparts in this cell. */
Matthieu Schaller's avatar
Matthieu Schaller committed
130
  for (int i = 0; i < gcount; i++) {
131

132
    /* Get a direct pointer on the part. */
Matthieu Schaller's avatar
Matthieu Schaller committed
133
    struct gpart *restrict g = &gparts[i];
134
135
136

    /* Is this part within the time step? */
    if (g->ti_end <= ti_current) {
Matthieu Schaller's avatar
Matthieu Schaller committed
137

138
      external_gravity(time, potential, constants, g);
139
    }
140
  }
Matthieu Schaller's avatar
Matthieu Schaller committed
141

142
  if (timer) TIMER_TOC(timer_dograv_external);
Tom Theuns's avatar
Tom Theuns committed
143
144
}

Pedro Gonnet's avatar
Pedro Gonnet committed
145
146
147
148
149
150
/**
 * @brief Sort the entries in ascending order using QuickSort.
 *
 * @param sort The entries
 * @param N The number of entries.
 */
151
void runner_do_sort_ascending(struct entry *sort, int N) {
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205

  struct {
    short int lo, hi;
  } qstack[10];
  int qpos, i, j, lo, hi, imin;
  struct entry temp;
  float pivot;

  /* Sort parts in cell_i in decreasing order with quicksort */
  qstack[0].lo = 0;
  qstack[0].hi = N - 1;
  qpos = 0;
  while (qpos >= 0) {
    lo = qstack[qpos].lo;
    hi = qstack[qpos].hi;
    qpos -= 1;
    if (hi - lo < 15) {
      for (i = lo; i < hi; i++) {
        imin = i;
        for (j = i + 1; j <= hi; j++)
          if (sort[j].d < sort[imin].d) imin = j;
        if (imin != i) {
          temp = sort[imin];
          sort[imin] = sort[i];
          sort[i] = temp;
        }
      }
    } else {
      pivot = sort[(lo + hi) / 2].d;
      i = lo;
      j = hi;
      while (i <= j) {
        while (sort[i].d < pivot) i++;
        while (sort[j].d > pivot) j--;
        if (i <= j) {
          if (i < j) {
            temp = sort[i];
            sort[i] = sort[j];
            sort[j] = temp;
          }
          i += 1;
          j -= 1;
        }
      }
      if (j > (lo + hi) / 2) {
        if (lo < j) {
          qpos += 1;
          qstack[qpos].lo = lo;
          qstack[qpos].hi = j;
        }
        if (i < hi) {
          qpos += 1;
          qstack[qpos].lo = i;
          qstack[qpos].hi = hi;
Pedro Gonnet's avatar
Pedro Gonnet committed
206
        }
207
208
209
210
211
212
213
214
215
216
217
218
      } else {
        if (i < hi) {
          qpos += 1;
          qstack[qpos].lo = i;
          qstack[qpos].hi = hi;
        }
        if (lo < j) {
          qpos += 1;
          qstack[qpos].lo = lo;
          qstack[qpos].hi = j;
        }
      }
Pedro Gonnet's avatar
Pedro Gonnet committed
219
    }
220
221
222
  }
}

Pedro Gonnet's avatar
Pedro Gonnet committed
223
224
225
226
227
/**
 * @brief Sort the particles in the given cell along all cardinal directions.
 *
 * @param r The #runner.
 * @param c The #cell.
228
 * @param flags Cell flag.
229
230
 * @param clock Flag indicating whether to record the timing or not, needed
 *      for recursive calls.
Pedro Gonnet's avatar
Pedro Gonnet committed
231
 */
232
void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) {
233
234
235
236
237
238
239

  struct entry *finger;
  struct entry *fingers[8];
  struct part *parts = c->parts;
  struct entry *sort;
  int j, k, count = c->count;
  int i, ind, off[8], inds[8], temp_i, missing;
Matthieu Schaller's avatar
Matthieu Schaller committed
240
241
  float buff[8];
  double px[3];
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265

  TIMER_TIC

  /* Clean-up the flags, i.e. filter out what's already been sorted. */
  flags &= ~c->sorted;
  if (flags == 0) return;

  /* start by allocating the entry arrays. */
  if (c->sort == NULL || c->sortsize < count) {
    if (c->sort != NULL) free(c->sort);
    c->sortsize = count * 1.1;
    if ((c->sort = (struct entry *)malloc(sizeof(struct entry) *
                                          (c->sortsize + 1) * 13)) == NULL)
      error("Failed to allocate sort memory.");
  }
  sort = c->sort;

  /* Does this cell have any progeny? */
  if (c->split) {

    /* Fill in the gaps within the progeny. */
    for (k = 0; k < 8; k++) {
      if (c->progeny[k] == NULL) continue;
      missing = flags & ~c->progeny[k]->sorted;
266
      if (missing) runner_do_sort(r, c->progeny[k], missing, 0);
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
    }

    /* Loop over the 13 different sort arrays. */
    for (j = 0; j < 13; j++) {

      /* Has this sort array been flagged? */
      if (!(flags & (1 << j))) continue;

      /* Init the particle index offsets. */
      for (off[0] = 0, k = 1; k < 8; k++)
        if (c->progeny[k - 1] != NULL)
          off[k] = off[k - 1] + c->progeny[k - 1]->count;
        else
          off[k] = off[k - 1];

      /* Init the entries and indices. */
      for (k = 0; k < 8; k++) {
        inds[k] = k;
        if (c->progeny[k] != NULL && c->progeny[k]->count > 0) {
          fingers[k] = &c->progeny[k]->sort[j * (c->progeny[k]->count + 1)];
          buff[k] = fingers[k]->d;
          off[k] = off[k];
        } else
          buff[k] = FLT_MAX;
      }

      /* Sort the buffer. */
      for (i = 0; i < 7; i++)
        for (k = i + 1; k < 8; k++)
          if (buff[inds[k]] < buff[inds[i]]) {
            temp_i = inds[i];
            inds[i] = inds[k];
            inds[k] = temp_i;
          }

      /* For each entry in the new sort list. */
      finger = &sort[j * (count + 1)];
      for (ind = 0; ind < count; ind++) {

        /* Copy the minimum into the new sort array. */
        finger[ind].d = buff[inds[0]];
        finger[ind].i = fingers[inds[0]]->i + off[inds[0]];

        /* Update the buffer. */
        fingers[inds[0]] += 1;
        buff[inds[0]] = fingers[inds[0]]->d;

        /* Find the smallest entry. */
        for (k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
          temp_i = inds[k - 1];
          inds[k - 1] = inds[k];
          inds[k] = temp_i;
Pedro Gonnet's avatar
Pedro Gonnet committed
319
        }
320

321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
      } /* Merge. */

      /* Add a sentinel. */
      sort[j * (count + 1) + count].d = FLT_MAX;
      sort[j * (count + 1) + count].i = 0;

      /* Mark as sorted. */
      c->sorted |= (1 << j);

    } /* loop over sort arrays. */

  } /* progeny? */

  /* Otherwise, just sort. */
  else {

    /* Fill the sort array. */
    for (k = 0; k < count; k++) {
      px[0] = parts[k].x[0];
      px[1] = parts[k].x[1];
      px[2] = parts[k].x[2];
      for (j = 0; j < 13; j++)
        if (flags & (1 << j)) {
          sort[j * (count + 1) + k].i = k;
Matthieu Schaller's avatar
Matthieu Schaller committed
345
346
347
          sort[j * (count + 1) + k].d = px[0] * runner_shift[j][0] +
                                        px[1] * runner_shift[j][1] +
                                        px[2] * runner_shift[j][2];
348
        }
349
    }
350
351
352
353
354
355

    /* Add the sentinel and sort. */
    for (j = 0; j < 13; j++)
      if (flags & (1 << j)) {
        sort[j * (count + 1) + count].d = FLT_MAX;
        sort[j * (count + 1) + count].i = 0;
356
        runner_do_sort_ascending(&sort[j * (count + 1)], count);
357
358
359
360
        c->sorted |= (1 << j);
      }
  }

361
#ifdef SWIFT_DEBUG_CHECKS
Matthieu Schaller's avatar
Matthieu Schaller committed
362
  /* Verify the sorting. */
363
364
365
366
367
368
369
370
371
372
  for (j = 0; j < 13; j++) {
    if (!(flags & (1 << j))) continue;
    finger = &sort[j * (count + 1)];
    for (k = 1; k < count; k++) {
      if (finger[k].d < finger[k - 1].d)
        error("Sorting failed, ascending array.");
      if (finger[k].i >= count) error("Sorting failed, indices borked.");
    }
  }
#endif
373
374
375
376

  if (clock) TIMER_TOC(timer_dosort);
}

377
378
379
380
381
/**
 * @brief Initialize the particles before the density calculation
 *
 * @param r The runner thread.
 * @param c The cell.
Matthieu Schaller's avatar
Matthieu Schaller committed
382
 * @param timer 1 if the time is to be recorded.
383
 */
384
void runner_do_init(struct runner *r, struct cell *c, int timer) {
385

Matthieu Schaller's avatar
Matthieu Schaller committed
386
387
  struct part *restrict parts = c->parts;
  struct gpart *restrict gparts = c->gparts;
Matthieu Schaller's avatar
Matthieu Schaller committed
388
  const int count = c->count;
389
  const int gcount = c->gcount;
390
  const int ti_current = r->e->ti_current;
391
392

  TIMER_TIC;
393

394
395
  /* Recurse? */
  if (c->split) {
Matthieu Schaller's avatar
Matthieu Schaller committed
396
    for (int k = 0; k < 8; k++)
397
      if (c->progeny[k] != NULL) runner_do_init(r, c->progeny[k], 0);
398
    return;
399
400
  } else {

401
402
    /* Loop over the parts in this cell. */
    for (int i = 0; i < count; i++) {
403

404
      /* Get a direct pointer on the part. */
405
      struct part *restrict p = &parts[i];
406

407
      if (p->ti_end <= ti_current) {
Matthieu Schaller's avatar
Matthieu Schaller committed
408

409
410
        /* Get ready for a density calculation */
        hydro_init_part(p);
411
      }
412
    }
413
414
415
416
417

    /* Loop over the gparts in this cell. */
    for (int i = 0; i < gcount; i++) {

      /* Get a direct pointer on the part. */
418
      struct gpart *restrict gp = &gparts[i];
419
420
421
422

      if (gp->ti_end <= ti_current) {

        /* Get ready for a density calculation */
423
        gravity_init_gpart(gp);
424
425
      }
    }
426
  }
427

Peter W. Draper's avatar
Peter W. Draper committed
428
  if (timer) TIMER_TOC(timer_init);
429
430
}

431
432
433
434
435
436
437
438
/**
 * @brief Intermediate task after the gradient loop that does final operations
 * on the gradient quantities and optionally slope limits the gradients
 *
 * @param r The runner thread.
 * @param c The cell.
 */
void runner_do_extra_ghost(struct runner *r, struct cell *c) {
439
#ifdef EXTRA_HYDRO_LOOP
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
  struct part *restrict parts = c->parts;
  const int count = c->count;
  const int ti_current = r->e->ti_current;

  /* Recurse? */
  if (c->split) {
    for (int k = 0; k < 8; k++)
      if (c->progeny[k] != NULL) runner_do_extra_ghost(r, c->progeny[k]);
    return;
  } else {

    /* Loop over the parts in this cell. */
    for (int i = 0; i < count; i++) {

      /* Get a direct pointer on the part. */
      struct part *restrict p = &parts[i];

      if (p->ti_end <= ti_current) {

        /* Get ready for a force calculation */
        hydro_end_gradient(p);
      }
    }
  }
464
#endif
465
}
466

467
/**
468
469
 * @brief Intermediate task after the density to check that the smoothing
 * lengths are correct.
470
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
471
 * @param r The runner thread.
472
 * @param c The cell.
473
 */
Matthieu Schaller's avatar
Matthieu Schaller committed
474
void runner_do_ghost(struct runner *r, struct cell *c) {
475

Matthieu Schaller's avatar
Matthieu Schaller committed
476
477
  struct part *restrict parts = c->parts;
  struct xpart *restrict xparts = c->xparts;
Matthieu Schaller's avatar
Matthieu Schaller committed
478
  int redo, count = c->count;
479
480
  const int ti_current = r->e->ti_current;
  const double timeBase = r->e->timeBase;
481
482
483
484
485
486
487
  const float target_wcount = r->e->hydro_properties->target_neighbours;
  const float max_wcount =
      target_wcount + r->e->hydro_properties->delta_neighbours;
  const float min_wcount =
      target_wcount - r->e->hydro_properties->delta_neighbours;
  const int max_smoothing_iter =
      r->e->hydro_properties->max_smoothing_iterations;
488

489
490
  TIMER_TIC;

491
492
  /* Recurse? */
  if (c->split) {
Matthieu Schaller's avatar
Matthieu Schaller committed
493
    for (int k = 0; k < 8; k++)
Matthieu Schaller's avatar
Matthieu Schaller committed
494
      if (c->progeny[k] != NULL) runner_do_ghost(r, c->progeny[k]);
495
496
497
498
    return;
  }

  /* Init the IDs that have to be updated. */
499
500
501
  int *pid = NULL;
  if ((pid = malloc(sizeof(int) * count)) == NULL)
    error("Can't allocate memory for pid.");
Matthieu Schaller's avatar
Matthieu Schaller committed
502
  for (int k = 0; k < count; k++) pid[k] = k;
503
504

  /* While there are particles that need to be updated... */
505
  for (int num_reruns = 0; count > 0 && num_reruns < max_smoothing_iter;
Matthieu Schaller's avatar
Matthieu Schaller committed
506
       num_reruns++) {
507
508
509

    /* Reset the redo-count. */
    redo = 0;
510

511
    /* Loop over the parts in this cell. */
Matthieu Schaller's avatar
Matthieu Schaller committed
512
    for (int i = 0; i < count; i++) {
513
514

      /* Get a direct pointer on the part. */
515
516
      struct part *restrict p = &parts[pid[i]];
      struct xpart *restrict xp = &xparts[pid[i]];
517
518

      /* Is this part within the timestep? */
519
      if (p->ti_end <= ti_current) {
520

521
        /* Finish the density calculation */
522
        hydro_end_density(p, ti_current);
523

Matthieu Schaller's avatar
Matthieu Schaller committed
524
525
        float h_corr = 0.f;

526
        /* If no derivative, double the smoothing length. */
527
        if (p->density.wcount_dh == 0.0f) h_corr = p->h;
528
529
530

        /* Otherwise, compute the smoothing length update (Newton step). */
        else {
531
          h_corr = (target_wcount - p->density.wcount) / p->density.wcount_dh;
532
533

          /* Truncate to the range [ -p->h/2 , p->h ]. */
534
535
          h_corr = (h_corr < p->h) ? h_corr : p->h;
          h_corr = (h_corr > -0.5f * p->h) ? h_corr : -0.5f * p->h;
Pedro Gonnet's avatar
Pedro Gonnet committed
536
        }
537
538

        /* Did we get the right number density? */
539
        if (p->density.wcount > max_wcount || p->density.wcount < min_wcount) {
540
541
542
543

          /* Ok, correct then */
          p->h += h_corr;

544
          /* Flag for another round of fun */
545
546
          pid[redo] = pid[i];
          redo += 1;
547

548
549
          /* Re-initialise everything */
          hydro_init_part(p);
550

551
          /* Off we go ! */
552
          continue;
553
554
        }

Matthieu Schaller's avatar
Matthieu Schaller committed
555
        /* We now have a particle whose smoothing length has converged */
Matthieu Schaller's avatar
Matthieu Schaller committed
556

557
        /* As of here, particle force variables will be set. */
558

559
        /* Compute variables required for the force loop */
560
        hydro_prepare_force(p, xp, ti_current, timeBase);
561

Matthieu Schaller's avatar
Matthieu Schaller committed
562
        /* The particle force values are now set.  Do _NOT_
563
           try to read any particle density variables! */
Matthieu Schaller's avatar
Matthieu Schaller committed
564

565
566
        /* Prepare the particle for the force loop over neighbours */
        hydro_reset_acceleration(p);
567
568
569
      }
    }

570
571
572
    /* We now need to treat the particles whose smoothing length had not
     * converged again */

573
574
575
576
577
    /* Re-set the counter for the next loop (potentially). */
    count = redo;
    if (count > 0) {

      /* Climb up the cell hierarchy. */
Matthieu Schaller's avatar
Matthieu Schaller committed
578
      for (struct cell *finger = c; finger != NULL; finger = finger->parent) {
Matthieu Schaller's avatar
Matthieu Schaller committed
579

580
581
        /* Run through this cell's density interactions. */
        for (struct link *l = finger->density; l != NULL; l = l->next) {
Matthieu Schaller's avatar
Matthieu Schaller committed
582

583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
          /* Self-interaction? */
          if (l->t->type == task_type_self)
            runner_doself_subset_density(r, finger, parts, pid, count);

          /* Otherwise, pair interaction? */
          else if (l->t->type == task_type_pair) {

            /* Left or right? */
            if (l->t->ci == finger)
              runner_dopair_subset_density(r, finger, parts, pid, count,
                                           l->t->cj);
            else
              runner_dopair_subset_density(r, finger, parts, pid, count,
                                           l->t->ci);

          }

600
601
602
603
604
605
606
          /* Otherwise, sub-self interaction? */
          else if (l->t->type == task_type_sub_self)
            runner_dosub_subset_density(r, finger, parts, pid, count, NULL, -1,
                                        1);

          /* Otherwise, sub-pair interaction? */
          else if (l->t->type == task_type_sub_pair) {
607
608
609
610
611
612
613
614
615
616
617

            /* Left or right? */
            if (l->t->ci == finger)
              runner_dosub_subset_density(r, finger, parts, pid, count,
                                          l->t->cj, -1, 1);
            else
              runner_dosub_subset_density(r, finger, parts, pid, count,
                                          l->t->ci, -1, 1);
          }
        }
      }
618
    }
619
  }
620

Matthieu Schaller's avatar
Matthieu Schaller committed
621
622
  if (count)
    message("Smoothing length failed to converge on %i particles.", count);
623

624
625
626
  /* Be clean */
  free(pid);

Matthieu Schaller's avatar
Matthieu Schaller committed
627
  TIMER_TOC(timer_do_ghost);
628
629
}

630
/**
631
 * @brief Drift particles and g-particles in a cell forward in time
632
633
 *
 * @param c The cell.
634
 * @param e The engine.
635
 */
636
static void runner_do_drift(struct cell *c, struct engine *e) {
637

638
639
640
641
642
  const double timeBase = e->timeBase;
  const double dt = (e->ti_current - e->ti_old) * timeBase;
  const int ti_old = e->ti_old;
  const int ti_current = e->ti_current;

643
644
645
  struct part *const parts = c->parts;
  struct xpart *const xparts = c->xparts;
  struct gpart *const gparts = c->gparts;
646
  float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f;
647

648
  double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, entropy = 0.0, mass = 0.0;
649
650
651
  double mom[3] = {0.0, 0.0, 0.0};
  double ang_mom[3] = {0.0, 0.0, 0.0};

652
653
  /* No children? */
  if (!c->split) {
654

655
    /* Loop over all the g-particles in the cell */
656
    const size_t nr_gparts = c->gcount;
Peter W. Draper's avatar
Peter W. Draper committed
657
658
    for (size_t k = 0; k < nr_gparts; k++) {

659
      /* Get a handle on the gpart. */
660
      struct gpart *const gp = &gparts[k];
661
662

      /* Drift... */
663
      drift_gpart(gp, dt, timeBase, ti_old, ti_current);
664
665
666
667
668

      /* Compute (square of) motion since last cell construction */
      const float dx2 = gp->x_diff[0] * gp->x_diff[0] +
                        gp->x_diff[1] * gp->x_diff[1] +
                        gp->x_diff[2] * gp->x_diff[2];
669
      dx2_max = (dx2_max > dx2) ? dx2_max : dx2;
670
671
672
    }

    /* Loop over all the particles in the cell (more work for these !) */
Peter W. Draper's avatar
Peter W. Draper committed
673
674
    const size_t nr_parts = c->count;
    for (size_t k = 0; k < nr_parts; k++) {
675

676
      /* Get a handle on the part. */
677
678
      struct part *const p = &parts[k];
      struct xpart *const xp = &xparts[k];
679

680
      /* Drift... */
681
      drift_part(p, xp, dt, timeBase, ti_old, ti_current);
682

683
      /* Compute (square of) motion since last cell construction */
684
685
686
      const float dx2 = xp->x_diff[0] * xp->x_diff[0] +
                        xp->x_diff[1] * xp->x_diff[1] +
                        xp->x_diff[2] * xp->x_diff[2];
687
      dx2_max = (dx2_max > dx2) ? dx2_max : dx2;
Matthieu Schaller's avatar
Matthieu Schaller committed
688
689

      /* Maximal smoothing length */
690
      h_max = (h_max > p->h) ? h_max : p->h;
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717

      /* Now collect quantities for statistics */

      const float half_dt =
          (ti_current - (p->ti_begin + p->ti_end) / 2) * timeBase;
      const double x[3] = {p->x[0], p->x[1], p->x[2]};
      const float v[3] = {xp->v_full[0] + p->a_hydro[0] * half_dt,
                          xp->v_full[1] + p->a_hydro[1] * half_dt,
                          xp->v_full[2] + p->a_hydro[2] * half_dt};
      const float m = p->mass;

      /* Collect mass */
      mass += m;

      /* Collect momentum */
      mom[0] += m * v[0];
      mom[1] += m * v[1];
      mom[2] += m * v[2];

      /* Collect angular momentum */
      ang_mom[0] += m * (x[1] * v[2] - x[2] * v[1]);
      ang_mom[1] += m * (x[2] * v[0] - x[0] * v[2]);
      ang_mom[2] += m * (x[0] * v[1] - x[1] * v[0]);

      /* Collect energies. */
      e_kin += 0.5 * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
      e_pot += 0.;
718
      e_int += m * hydro_get_internal_energy(p, half_dt);
719
720

      /* Collect entropy */
721
      entropy += m * hydro_get_entropy(p, half_dt);
722
    }
723
724

    /* Now, get the maximal particle motion from its square */
725
    dx_max = sqrtf(dx2_max);
726
  }
727

Matthieu Schaller's avatar
Matthieu Schaller committed
728
729
730
  /* Otherwise, aggregate data from children. */
  else {

731
    /* Loop over the progeny and collect their data. */
Matthieu Schaller's avatar
Matthieu Schaller committed
732
733
    for (int k = 0; k < 8; k++)
      if (c->progeny[k] != NULL) {
734
        struct cell *cp = c->progeny[k];
735

736
737
        /* Recurse. */
        runner_do_drift(cp, e);
Matthieu Schaller's avatar
Matthieu Schaller committed
738

739
740
        dx_max = fmaxf(dx_max, cp->dx_max);
        h_max = fmaxf(h_max, cp->h_max);
741
742
743
744
        mass += cp->mass;
        e_kin += cp->e_kin;
        e_int += cp->e_int;
        e_pot += cp->e_pot;
745
        entropy += cp->entropy;
746
747
748
749
750
751
        mom[0] += cp->mom[0];
        mom[1] += cp->mom[1];
        mom[2] += cp->mom[2];
        ang_mom[0] += cp->ang_mom[0];
        ang_mom[1] += cp->ang_mom[1];
        ang_mom[2] += cp->ang_mom[2];
Matthieu Schaller's avatar
Matthieu Schaller committed
752
753
754
755
756
757
      }
  }

  /* Store the values */
  c->h_max = h_max;
  c->dx_max = dx_max;
758
759
760
761
  c->mass = mass;
  c->e_kin = e_kin;
  c->e_int = e_int;
  c->e_pot = e_pot;
762
  c->entropy = entropy;
763
764
765
766
767
768
  c->mom[0] = mom[0];
  c->mom[1] = mom[1];
  c->mom[2] = mom[2];
  c->ang_mom[0] = ang_mom[0];
  c->ang_mom[1] = ang_mom[1];
  c->ang_mom[2] = ang_mom[2];
769
}
770

771
772
773
774
775
776
777
778
779
780
/**
 * @brief Mapper function to drift particles and g-particles forward in time.
 *
 * @param map_data An array of #cell%s.
 * @param num_elements Chunk size.
 * @param extra_data Pointer to an #engine.
 */

void runner_do_drift_mapper(void *map_data, int num_elements,
                            void *extra_data) {
781

782
783
784
785
786
787
788
  struct engine *e = (struct engine *)extra_data;
  struct cell *cells = (struct cell *)map_data;

  for (int ind = 0; ind < num_elements; ind++) {
    struct cell *c = &cells[ind];

    /* Only drift local particles. */
Peter W. Draper's avatar
Peter W. Draper committed
789
    if (c != NULL && c->nodeID == e->nodeID) runner_do_drift(c, e);
790
  }
791
}
792

793
/**
Matthieu Schaller's avatar
Matthieu Schaller committed
794
795
 * @brief Kick particles in momentum space and collect statistics (fixed
 * time-step case)
796
797
798
 *
 * @param r The runner thread.
 * @param c The cell.
799
 * @param timer Are we timing this ?
800
 */
Matthieu Schaller's avatar
Matthieu Schaller committed
801
void runner_do_kick_fixdt(struct runner *r, struct cell *c, int timer) {
802

Matthieu Schaller's avatar
Matthieu Schaller committed
803
  const double global_dt = r->e->dt_max;
804
  const double timeBase = r->e->timeBase;
Matthieu Schaller's avatar
Matthieu Schaller committed
805
  const int count = c->count;
806
  const int gcount = c->gcount;
Matthieu Schaller's avatar
Matthieu Schaller committed
807
808
809
  struct part *restrict parts = c->parts;
  struct xpart *restrict xparts = c->xparts;
  struct gpart *restrict gparts = c->gparts;
Matthieu Schaller's avatar
Matthieu Schaller committed
810
  const double const_G = r->e->physical_constants->const_newton_G;
Matthieu Schaller's avatar
Matthieu Schaller committed
811

812
  int updated = 0, g_updated = 0;
813
  int ti_end_min = max_nr_timesteps, ti_end_max = 0;
814
815
816

  TIMER_TIC

Tom Theuns's avatar
Tom Theuns committed
817
#ifdef TASK_VERBOSE
Matthieu Schaller's avatar
Matthieu Schaller committed
818
  OUT;
Tom Theuns's avatar
Tom Theuns committed
819
820
#endif

Matthieu Schaller's avatar
Matthieu Schaller committed
821
822
823
  /* The new time-step */
  const int new_dti = global_dt / timeBase;

824
825
826
  /* No children? */
  if (!c->split) {

827
    /* Loop over the g-particles and kick everyone. */
828
829
830
    for (int k = 0; k < gcount; k++) {

      /* Get a handle on the part. */
831
      struct gpart *restrict gp = &gparts[k];
832

833
      /* If the g-particle has no counterpart */
834
      if (gp->id_or_neg_offset > 0) {
835

Matthieu Schaller's avatar
Matthieu Schaller committed
836
        /* First, finish the force calculation */
837
        gravity_end_force(gp, const_G);
838

839
840
        /* Kick the g-particle forward */
        kick_gpart(gp, new_dti, timeBase);
841

Matthieu Schaller's avatar
Matthieu Schaller committed
842
843
        /* Number of updated g-particles */
        g_updated++;
844

Matthieu Schaller's avatar
Matthieu Schaller committed
845
846
847
848
849
        /* Minimal time for next end of time-step */
        ti_end_min = min(gp->ti_end, ti_end_min);
        ti_end_max = max(gp->ti_end, ti_end_max);
      }
    }
850

Matthieu Schaller's avatar
Matthieu Schaller committed
851
    /* Now do the hydro ones... */
852

853
    /* Loop over the particles and kick everyone. */
Matthieu Schaller's avatar
Matthieu Schaller committed
854
    for (int k = 0; k < count; k++) {
855

Matthieu Schaller's avatar
Matthieu Schaller committed
856
      /* Get a handle on the part. */
857
858
      struct part *restrict p = &parts[k];
      struct xpart *restrict xp = &xparts[k];
859

Matthieu Schaller's avatar
Matthieu Schaller committed
860
861
      /* First, finish the force loop */
      hydro_end_force(p);
862
      if (p->gpart != NULL) gravity_end_force(p->gpart, const_G);
863

864
865
      /* Kick the particle forward */
      kick_part(p, xp, new_dti, timeBase);
866

Matthieu Schaller's avatar
Matthieu Schaller committed
867
868
869
      /* Number of updated particles */
      updated++;
      if (p->gpart != NULL) g_updated++;
870

Matthieu Schaller's avatar
Matthieu Schaller committed
871
872
873
      /* Minimal time for next end of time-step */
      ti_end_min = min(p->ti_end, ti_end_min);
      ti_end_max = max(p->ti_end, ti_end_max);
Matthieu Schaller's avatar
Matthieu Schaller committed
874
875
    }
  }
876

Matthieu Schaller's avatar
Matthieu Schaller committed
877
878
  /* Otherwise, aggregate data from children. */
  else {
879

Matthieu Schaller's avatar
Matthieu Schaller committed
880
881
882
    /* Loop over the progeny. */
    for (int k = 0; k < 8; k++)
      if (c->progeny[k] != NULL) {
883
        struct cell *restrict cp = c->progeny[k];
884

Matthieu Schaller's avatar
Matthieu Schaller committed
885
886
        /* Recurse */
        runner_do_kick_fixdt(r, cp, 0);
887

Matthieu Schaller's avatar
Matthieu Schaller committed
888
889
890
891
892
893
894
        /* And aggregate */
        updated += cp->updated;
        g_updated += cp->g_updated;
        ti_end_min = min(cp->ti_end_min, ti_end_min);
        ti_end_max = max(cp->ti_end_max, ti_end_max);
      }
  }
895

Matthieu Schaller's avatar
Matthieu Schaller committed
896
897
898
899
900
  /* Store the values. */
  c->updated = updated;
  c->g_updated = g_updated;
  c->ti_end_min = ti_end_min;
  c->ti_end_max = ti_end_max;
901

Matthieu Schaller's avatar
Matthieu Schaller committed
902
903
  if (timer) TIMER_TOC(timer_kick);
}
904

Matthieu Schaller's avatar
Matthieu Schaller committed
905
906
907
/**
 * @brief Kick particles in momentum space and collect statistics (floating
 * time-step case)
908
909
910
 *
 * @param r The runner thread.
 * @param c The cell.
911
 * @param timer Are we timing this ?
912
 */
913
void runner_do_kick(struct runner *r, struct cell *c, int timer) {
914

915
916
  const struct engine *e = r->e;
  const double timeBase = e->timeBase;
917
  const int ti_current = r->e->ti_current;
Matthieu Schaller's avatar
Matthieu Schaller committed
918
  const int count = c->count;
919
  const int gcount = c->gcount;
Matthieu Schaller's avatar
Matthieu Schaller committed
920
921
922
  struct part *restrict parts = c->parts;
  struct xpart *restrict xparts = c->xparts;
  struct gpart *restrict gparts = c->gparts;
Matthieu Schaller's avatar
Matthieu Schaller committed
923
  const double const_G = r->e->physical_constants->const_newton_G;
Matthieu Schaller's avatar
Matthieu Schaller committed
924

925
  int updated = 0, g_updated = 0;
926
  int ti_end_min = max_nr_timesteps, ti_end_max = 0;
927
928

  TIMER_TIC
929

Tom Theuns's avatar
Tom Theuns committed
930
#ifdef TASK_VERBOSE
Matthieu Schaller's avatar
Matthieu Schaller committed
931
  OUT;
Tom Theuns's avatar
Tom Theuns committed
932
933
#endif

934
935
936
  /* No children? */
  if (!c->split) {

937
938
939
940
    /* Loop over the g-particles and kick the active ones. */
    for (int k = 0; k < gcount; k++) {

      /* Get a handle on the part. */
941
      struct gpart *restrict gp = &gparts[k];
942
943

      /* If the g-particle has no counterpart and needs to be kicked */
944
      if (gp->id_or_neg_offset > 0) {
945

Matthieu Schaller's avatar
Matthieu Schaller committed
946
        if (gp->ti_end <= ti_current) {
947

Matthieu Schaller's avatar
Matthieu Schaller committed
948
          /* First, finish the force calculation */
Matthieu Schaller's avatar
Matthieu Schaller committed
949
          gravity_end_force(gp, const_G);
950

951
          /* Compute the next timestep */
952
          const int new_dti = get_gpart_timestep(gp, e);
953

Matthieu Schaller's avatar
Matthieu Schaller committed
954
          /* Now we have a time step, proceed with the kick */
955
          kick_gpart(gp, new_dti, timeBase);
Matthieu Schaller's avatar
Matthieu Schaller committed
956
957
958
959
960
961
962
963

          /* Number of updated g-particles */
          g_updated++;
        }

        /* Minimal time for next end of time-step */
        ti_end_min = min(gp->ti_end, ti_end_min);
        ti_end_max = max(gp->ti_end, ti_end_max);
964
965
966
967
968
      }
    }

    /* Now do the hydro ones... */

969
    /* Loop over the particles and kick the active ones. */
Matthieu Schaller's avatar
Matthieu Schaller committed
970
    for (int k = 0; k < count; k++) {
971
972

      /* Get a handle on the part. */
973
974
      struct part *restrict p = &parts[k];
      struct xpart *restrict xp = &xparts[k];
975
976

      /* If particle needs to be kicked */
Matthieu Schaller's avatar
Matthieu Schaller committed
977
      if (p->ti_end <= ti_current) {
978
<