runner.c 31.7 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *
5
6
7
8
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
9
 *
10
11
12
13
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
14
 *
15
16
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17
 *
18
 ******************************************************************************/
Pedro Gonnet's avatar
Pedro Gonnet committed
19

Pedro Gonnet's avatar
Pedro Gonnet committed
20
21
/* Config parameters. */
#include "../config.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
22
23
24
25

/* Some standard headers. */
#include <float.h>
#include <limits.h>
26
#include <stdlib.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
27

28
29
/* MPI headers. */
#ifdef WITH_MPI
30
#include <mpi.h>
31
32
#endif

33
34
35
/* This object's header. */
#include "runner.h"

Pedro Gonnet's avatar
Pedro Gonnet committed
36
/* Local headers. */
Matthieu Schaller's avatar
Matthieu Schaller committed
37
#include "approx_math.h"
38
#include "atomic.h"
39
#include "const.h"
40
#include "debug.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
41
#include "engine.h"
42
#include "error.h"
43
44
45
46
#include "scheduler.h"
#include "space.h"
#include "task.h"
#include "timers.h"
47
48
#include "hydro.h"
#include "gravity.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
49

50
/* Orientation of the cell pairs */
51
52
53
54
55
56
57
58
59
60
61
62
63
64
const float runner_shift[13 * 3] = {
    5.773502691896258e-01, 5.773502691896258e-01,  5.773502691896258e-01,
    7.071067811865475e-01, 7.071067811865475e-01,  0.0,
    5.773502691896258e-01, 5.773502691896258e-01,  -5.773502691896258e-01,
    7.071067811865475e-01, 0.0,                    7.071067811865475e-01,
    1.0,                   0.0,                    0.0,
    7.071067811865475e-01, 0.0,                    -7.071067811865475e-01,
    5.773502691896258e-01, -5.773502691896258e-01, 5.773502691896258e-01,
    7.071067811865475e-01, -7.071067811865475e-01, 0.0,
    5.773502691896258e-01, -5.773502691896258e-01, -5.773502691896258e-01,
    0.0,                   7.071067811865475e-01,  7.071067811865475e-01,
    0.0,                   1.0,                    0.0,
    0.0,                   7.071067811865475e-01,  -7.071067811865475e-01,
    0.0,                   0.0,                    1.0, };
65
66

/* Does the axis need flipping ? */
67
68
const char runner_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
                              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
Pedro Gonnet's avatar
Pedro Gonnet committed
69

70
/* Import the density loop functions. */
71
72
73
#define FUNCTION density
#include "runner_doiact.h"

74
/* Import the force loop functions. */
75
76
77
78
#undef FUNCTION
#define FUNCTION force
#include "runner_doiact.h"

79
80
81
/* Import the gravity loop functions. */
#include "runner_doiact_grav.h"

Pedro Gonnet's avatar
Pedro Gonnet committed
82
83
84
85
86
87
/**
 * @brief Sort the entries in ascending order using QuickSort.
 *
 * @param sort The entries
 * @param N The number of entries.
 */
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143

void runner_dosort_ascending(struct entry *sort, int N) {

  struct {
    short int lo, hi;
  } qstack[10];
  int qpos, i, j, lo, hi, imin;
  struct entry temp;
  float pivot;

  /* Sort parts in cell_i in decreasing order with quicksort */
  qstack[0].lo = 0;
  qstack[0].hi = N - 1;
  qpos = 0;
  while (qpos >= 0) {
    lo = qstack[qpos].lo;
    hi = qstack[qpos].hi;
    qpos -= 1;
    if (hi - lo < 15) {
      for (i = lo; i < hi; i++) {
        imin = i;
        for (j = i + 1; j <= hi; j++)
          if (sort[j].d < sort[imin].d) imin = j;
        if (imin != i) {
          temp = sort[imin];
          sort[imin] = sort[i];
          sort[i] = temp;
        }
      }
    } else {
      pivot = sort[(lo + hi) / 2].d;
      i = lo;
      j = hi;
      while (i <= j) {
        while (sort[i].d < pivot) i++;
        while (sort[j].d > pivot) j--;
        if (i <= j) {
          if (i < j) {
            temp = sort[i];
            sort[i] = sort[j];
            sort[j] = temp;
          }
          i += 1;
          j -= 1;
        }
      }
      if (j > (lo + hi) / 2) {
        if (lo < j) {
          qpos += 1;
          qstack[qpos].lo = lo;
          qstack[qpos].hi = j;
        }
        if (i < hi) {
          qpos += 1;
          qstack[qpos].lo = i;
          qstack[qpos].hi = hi;
Pedro Gonnet's avatar
Pedro Gonnet committed
144
        }
145
146
147
148
149
150
151
152
153
154
155
156
      } else {
        if (i < hi) {
          qpos += 1;
          qstack[qpos].lo = i;
          qstack[qpos].hi = hi;
        }
        if (lo < j) {
          qpos += 1;
          qstack[qpos].lo = lo;
          qstack[qpos].hi = j;
        }
      }
Pedro Gonnet's avatar
Pedro Gonnet committed
157
    }
158
159
160
  }
}

Pedro Gonnet's avatar
Pedro Gonnet committed
161
162
163
164
165
/**
 * @brief Sort the particles in the given cell along all cardinal directions.
 *
 * @param r The #runner.
 * @param c The #cell.
166
 * @param flags Cell flag.
167
168
 * @param clock Flag indicating whether to record the timing or not, needed
 *      for recursive calls.
Pedro Gonnet's avatar
Pedro Gonnet committed
169
 */
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257

void runner_dosort(struct runner *r, struct cell *c, int flags, int clock) {

  struct entry *finger;
  struct entry *fingers[8];
  struct part *parts = c->parts;
  struct entry *sort;
  int j, k, count = c->count;
  int i, ind, off[8], inds[8], temp_i, missing;
  // float shift[3];
  float buff[8], px[3];

  TIMER_TIC

  /* Clean-up the flags, i.e. filter out what's already been sorted. */
  flags &= ~c->sorted;
  if (flags == 0) return;

  /* start by allocating the entry arrays. */
  if (c->sort == NULL || c->sortsize < count) {
    if (c->sort != NULL) free(c->sort);
    c->sortsize = count * 1.1;
    if ((c->sort = (struct entry *)malloc(sizeof(struct entry) *
                                          (c->sortsize + 1) * 13)) == NULL)
      error("Failed to allocate sort memory.");
  }
  sort = c->sort;

  /* Does this cell have any progeny? */
  if (c->split) {

    /* Fill in the gaps within the progeny. */
    for (k = 0; k < 8; k++) {
      if (c->progeny[k] == NULL) continue;
      missing = flags & ~c->progeny[k]->sorted;
      if (missing) runner_dosort(r, c->progeny[k], missing, 0);
    }

    /* Loop over the 13 different sort arrays. */
    for (j = 0; j < 13; j++) {

      /* Has this sort array been flagged? */
      if (!(flags & (1 << j))) continue;

      /* Init the particle index offsets. */
      for (off[0] = 0, k = 1; k < 8; k++)
        if (c->progeny[k - 1] != NULL)
          off[k] = off[k - 1] + c->progeny[k - 1]->count;
        else
          off[k] = off[k - 1];

      /* Init the entries and indices. */
      for (k = 0; k < 8; k++) {
        inds[k] = k;
        if (c->progeny[k] != NULL && c->progeny[k]->count > 0) {
          fingers[k] = &c->progeny[k]->sort[j * (c->progeny[k]->count + 1)];
          buff[k] = fingers[k]->d;
          off[k] = off[k];
        } else
          buff[k] = FLT_MAX;
      }

      /* Sort the buffer. */
      for (i = 0; i < 7; i++)
        for (k = i + 1; k < 8; k++)
          if (buff[inds[k]] < buff[inds[i]]) {
            temp_i = inds[i];
            inds[i] = inds[k];
            inds[k] = temp_i;
          }

      /* For each entry in the new sort list. */
      finger = &sort[j * (count + 1)];
      for (ind = 0; ind < count; ind++) {

        /* Copy the minimum into the new sort array. */
        finger[ind].d = buff[inds[0]];
        finger[ind].i = fingers[inds[0]]->i + off[inds[0]];

        /* Update the buffer. */
        fingers[inds[0]] += 1;
        buff[inds[0]] = fingers[inds[0]]->d;

        /* Find the smallest entry. */
        for (k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
          temp_i = inds[k - 1];
          inds[k - 1] = inds[k];
          inds[k] = temp_i;
Pedro Gonnet's avatar
Pedro Gonnet committed
258
        }
259

260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
      } /* Merge. */

      /* Add a sentinel. */
      sort[j * (count + 1) + count].d = FLT_MAX;
      sort[j * (count + 1) + count].i = 0;

      /* Mark as sorted. */
      c->sorted |= (1 << j);

    } /* loop over sort arrays. */

  } /* progeny? */

  /* Otherwise, just sort. */
  else {

    /* Fill the sort array. */
    for (k = 0; k < count; k++) {
      px[0] = parts[k].x[0];
      px[1] = parts[k].x[1];
      px[2] = parts[k].x[2];
      for (j = 0; j < 13; j++)
        if (flags & (1 << j)) {
          sort[j * (count + 1) + k].i = k;
          sort[j * (count + 1) + k].d = px[0] * runner_shift[3 * j + 0] +
                                        px[1] * runner_shift[3 * j + 1] +
                                        px[2] * runner_shift[3 * j + 2];
        }
288
    }
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309

    /* Add the sentinel and sort. */
    for (j = 0; j < 13; j++)
      if (flags & (1 << j)) {
        sort[j * (count + 1) + count].d = FLT_MAX;
        sort[j * (count + 1) + count].i = 0;
        runner_dosort_ascending(&sort[j * (count + 1)], count);
        c->sorted |= (1 << j);
      }
  }

/* Verify the sorting. */
/* for ( j = 0 ; j < 13 ; j++ ) {
    if ( !( flags & (1 << j) ) )
        continue;
    finger = &sort[ j*(count + 1) ];
    for ( k = 1 ; k < count ; k++ ) {
        if ( finger[k].d < finger[k-1].d )
            error( "Sorting failed, ascending array." );
        if ( finger[k].i >= count )
            error( "Sorting failed, indices borked." );
310
        }
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
    } */

#ifdef TIMER_VERBOSE
  message(
      "runner %02i: %i parts at depth %i (flags = %i%i%i%i%i%i%i%i%i%i%i%i%i) "
      "took %.3f ms.",
      r->id, count, c->depth, (flags & 0x1000) >> 12, (flags & 0x800) >> 11,
      (flags & 0x400) >> 10, (flags & 0x200) >> 9, (flags & 0x100) >> 8,
      (flags & 0x80) >> 7, (flags & 0x40) >> 6, (flags & 0x20) >> 5,
      (flags & 0x10) >> 4, (flags & 0x8) >> 3, (flags & 0x4) >> 2,
      (flags & 0x2) >> 1, (flags & 0x1) >> 0,
      ((double)TIMER_TOC(timer_dosort)) / CPU_TPS * 1000);
  fflush(stdout);
#else
  if (clock) TIMER_TOC(timer_dosort);
#endif
}

void runner_dogsort(struct runner *r, struct cell *c, int flags, int clock) {

  struct entry *finger;
  struct entry *fingers[8];
  struct gpart *gparts = c->gparts;
  struct entry *gsort;
  int j, k, count = c->gcount;
  int i, ind, off[8], inds[8], temp_i, missing;
  // float shift[3];
  float buff[8], px[3];

  TIMER_TIC

  /* Clean-up the flags, i.e. filter out what's already been sorted. */
  flags &= ~c->gsorted;
  if (flags == 0) return;

  /* start by allocating the entry arrays. */
  if (c->gsort == NULL || c->gsortsize < count) {
    if (c->gsort != NULL) free(c->gsort);
    c->gsortsize = count * 1.1;
    if ((c->gsort = (struct entry *)malloc(sizeof(struct entry) *
                                           (c->gsortsize + 1) * 13)) == NULL)
      error("Failed to allocate sort memory.");
  }
  gsort = c->gsort;

  /* Does this cell have any progeny? */
  if (c->split) {

    /* Fill in the gaps within the progeny. */
    for (k = 0; k < 8; k++) {
      if (c->progeny[k] == NULL) continue;
      missing = flags & ~c->progeny[k]->gsorted;
      if (missing) runner_dogsort(r, c->progeny[k], missing, 0);
    }

    /* Loop over the 13 different sort arrays. */
    for (j = 0; j < 13; j++) {

      /* Has this sort array been flagged? */
      if (!(flags & (1 << j))) continue;

      /* Init the particle index offsets. */
      for (off[0] = 0, k = 1; k < 8; k++)
        if (c->progeny[k - 1] != NULL)
          off[k] = off[k - 1] + c->progeny[k - 1]->gcount;
        else
          off[k] = off[k - 1];

      /* Init the entries and indices. */
      for (k = 0; k < 8; k++) {
        inds[k] = k;
        if (c->progeny[k] != NULL && c->progeny[k]->gcount > 0) {
          fingers[k] = &c->progeny[k]->gsort[j * (c->progeny[k]->gcount + 1)];
          buff[k] = fingers[k]->d;
          off[k] = off[k];
        } else
          buff[k] = FLT_MAX;
      }

      /* Sort the buffer. */
      for (i = 0; i < 7; i++)
        for (k = i + 1; k < 8; k++)
          if (buff[inds[k]] < buff[inds[i]]) {
            temp_i = inds[i];
            inds[i] = inds[k];
            inds[k] = temp_i;
          }

      /* For each entry in the new sort list. */
      finger = &gsort[j * (count + 1)];
      for (ind = 0; ind < count; ind++) {

        /* Copy the minimum into the new sort array. */
        finger[ind].d = buff[inds[0]];
        finger[ind].i = fingers[inds[0]]->i + off[inds[0]];

        /* Update the buffer. */
        fingers[inds[0]] += 1;
        buff[inds[0]] = fingers[inds[0]]->d;

        /* Find the smallest entry. */
        for (k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
          temp_i = inds[k - 1];
          inds[k - 1] = inds[k];
          inds[k] = temp_i;
416
        }
Pedro Gonnet's avatar
Pedro Gonnet committed
417

418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
      } /* Merge. */

      /* Add a sentinel. */
      gsort[j * (count + 1) + count].d = FLT_MAX;
      gsort[j * (count + 1) + count].i = 0;

      /* Mark as sorted. */
      c->gsorted |= (1 << j);

    } /* loop over sort arrays. */

  } /* progeny? */

  /* Otherwise, just sort. */
  else {

    /* Fill the sort array. */
    for (k = 0; k < count; k++) {
      px[0] = gparts[k].x[0];
      px[1] = gparts[k].x[1];
      px[2] = gparts[k].x[2];
      for (j = 0; j < 13; j++)
        if (flags & (1 << j)) {
          gsort[j * (count + 1) + k].i = k;
          gsort[j * (count + 1) + k].d = px[0] * runner_shift[3 * j + 0] +
                                         px[1] * runner_shift[3 * j + 1] +
                                         px[2] * runner_shift[3 * j + 2];
        }
Pedro Gonnet's avatar
Pedro Gonnet committed
446
    }
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486

    /* Add the sentinel and sort. */
    for (j = 0; j < 13; j++)
      if (flags & (1 << j)) {
        gsort[j * (count + 1) + count].d = FLT_MAX;
        gsort[j * (count + 1) + count].i = 0;
        runner_dosort_ascending(&gsort[j * (count + 1)], count);
        c->gsorted |= (1 << j);
      }
  }

/* Verify the sorting. */
/* for ( j = 0 ; j < 13 ; j++ ) {
    if ( !( flags & (1 << j) ) )
        continue;
    finger = &c->gsort[ j*(count + 1) ];
    for ( k = 1 ; k < count ; k++ ) {
        if ( finger[k].d < finger[k-1].d )
            error( "Sorting failed, ascending array." );
        if ( finger[k].i < 0 || finger[k].i >= count )
            error( "Sorting failed, indices borked." );
        }
    } */

#ifdef TIMER_VERBOSE
  message(
      "runner %02i: %i parts at depth %i (flags = %i%i%i%i%i%i%i%i%i%i%i%i%i) "
      "took %.3f ms.",
      r->id, count, c->depth, (flags & 0x1000) >> 12, (flags & 0x800) >> 11,
      (flags & 0x400) >> 10, (flags & 0x200) >> 9, (flags & 0x100) >> 8,
      (flags & 0x80) >> 7, (flags & 0x40) >> 6, (flags & 0x20) >> 5,
      (flags & 0x10) >> 4, (flags & 0x8) >> 3, (flags & 0x4) >> 2,
      (flags & 0x2) >> 1, (flags & 0x1) >> 0,
      ((double)TIMER_TOC(timer_dosort)) / CPU_TPS * 1000);
  fflush(stdout);
#else
  if (clock) TIMER_TOC(timer_dosort);
#endif
}

487
488
489
490
491
/**
 * @brief Initialize the particles before the density calculation
 *
 * @param r The runner thread.
 * @param c The cell.
Matthieu Schaller's avatar
Matthieu Schaller committed
492
 * @param timer 1 if the time is to be recorded.
493
494
 */

495
void runner_doinit(struct runner *r, struct cell *c, int timer) {
496
497

  struct part *p, *parts = c->parts;
Matthieu Schaller's avatar
Matthieu Schaller committed
498
499
  const int count = c->count;
  const float t_end = r->e->time;
500
501

  TIMER_TIC;
502

503
504
  /* Recurse? */
  if (c->split) {
Matthieu Schaller's avatar
Matthieu Schaller committed
505
    for (int k = 0; k < 8; k++)
506
      if (c->progeny[k] != NULL) runner_doinit(r, c->progeny[k], 0);
507
    return;
508
509
  } else {

510
511
    /* Loop over the parts in this cell. */
    for (int i = 0; i < count; i++) {
512

513
514
      /* Get a direct pointer on the part. */
      p = &parts[i];
515

Matthieu Schaller's avatar
Matthieu Schaller committed
516
517
      if (p->t_end <= t_end) {

518
519
        /* Get ready for a density calculation */
        hydro_init_part(p);
520
      }
521
522
    }
  }
523
524
525
526
527
528
529
530
531
532

  if (timer) {
#ifdef TIMER_VERBOSE
    message("runner %02i: %i parts at depth %i took %.3f ms.", r->id, c->count,
            c->depth, ((double)TIMER_TOC(timer_init)) / CPU_TPS * 1000);
    fflush(stdout);
#else
    TIMER_TOC(timer_init);
#endif
  }
533
534
}

535
536
537
/**
 * @brief Intermediate task between density and force
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
538
 * @param r The runner thread.
539
 * @param c The cell.
540
 */
541
542
543
544

void runner_doghost(struct runner *r, struct cell *c) {

  struct part *p, *parts = c->parts;
545
  struct xpart *xp, *xparts = c->xparts;
546
  struct cell *finger;
Matthieu Schaller's avatar
Matthieu Schaller committed
547
  int redo, count = c->count;
548
  int *pid;
549
  float h_corr;
550
551
  float t_end = r->e->time;

552
553
  TIMER_TIC;

554
555
  /* Recurse? */
  if (c->split) {
Matthieu Schaller's avatar
Matthieu Schaller committed
556
    for (int k = 0; k < 8; k++)
557
558
559
560
561
562
563
      if (c->progeny[k] != NULL) runner_doghost(r, c->progeny[k]);
    return;
  }

  /* Init the IDs that have to be updated. */
  if ((pid = (int *)alloca(sizeof(int) * count)) == NULL)
    error("Call to alloca failed.");
Matthieu Schaller's avatar
Matthieu Schaller committed
564
  for (int k = 0; k < count; k++) pid[k] = k;
565
566

  /* While there are particles that need to be updated... */
Matthieu Schaller's avatar
Matthieu Schaller committed
567
  for (int num_reruns = 0; count > 0 && num_reruns < const_smoothing_max_iter;
Matthieu Schaller's avatar
Matthieu Schaller committed
568
       num_reruns++) {
569
570
571

    /* Reset the redo-count. */
    redo = 0;
572

573
    /* Loop over the parts in this cell. */
Matthieu Schaller's avatar
Matthieu Schaller committed
574
    for (int i = 0; i < count; i++) {
575
576
577

      /* Get a direct pointer on the part. */
      p = &parts[pid[i]];
578
      xp = &xparts[pid[i]];
579
580

      /* Is this part within the timestep? */
581
      if (p->t_end <= t_end) {
582

583
584
        /* Finish the density calculation */
        hydro_end_density(p, t_end);
585
586

        /* If no derivative, double the smoothing length. */
587
        if (p->density.wcount_dh == 0.0f) h_corr = p->h;
588
589
590

        /* Otherwise, compute the smoothing length update (Newton step). */
        else {
591
          h_corr = (kernel_nwneigh - p->density.wcount) / p->density.wcount_dh;
592
593

          /* Truncate to the range [ -p->h/2 , p->h ]. */
594
595
          h_corr = fminf(h_corr, p->h);
          h_corr = fmaxf(h_corr, -p->h * 0.5f);
Pedro Gonnet's avatar
Pedro Gonnet committed
596
        }
597
598

        /* Did we get the right number density? */
599
        if (p->density.wcount > kernel_nwneigh + const_delta_nwneigh ||
600
            p->density.wcount < kernel_nwneigh - const_delta_nwneigh) {
601
602
603
604

          /* Ok, correct then */
          p->h += h_corr;

605
          /* Flag for another round of fun */
606
607
          pid[redo] = pid[i];
          redo += 1;
608

609
610
          /* Re-initialise everything */
          hydro_init_part(p);
611

612
          /* Off we go ! */
613
          continue;
614
615
        }

Matthieu Schaller's avatar
Matthieu Schaller committed
616
        /* We now have a particle whose smoothing length has converged */
Matthieu Schaller's avatar
Matthieu Schaller committed
617

618
        /* As of here, particle force variables will be set. */
619

620
        /* Compute variables required for the force loop */
621
        hydro_prepare_force(p, xp, t_end);
622

Matthieu Schaller's avatar
Matthieu Schaller committed
623
        /* The particle force values are now set.  Do _NOT_
624
           try to read any particle density variables! */
Matthieu Schaller's avatar
Matthieu Schaller committed
625

626
627
        /* Prepare the particle for the force loop over neighbours */
        hydro_reset_acceleration(p);
628
629
630
      }
    }

631
632
633
    /* We now need to treat the particles whose smoothing length had not
     * converged again */

634
635
636
637
638
639
    /* Re-set the counter for the next loop (potentially). */
    count = redo;
    if (count > 0) {

      /* Climb up the cell hierarchy. */
      for (finger = c; finger != NULL; finger = finger->parent) {
Matthieu Schaller's avatar
Matthieu Schaller committed
640

641
642
        /* Run through this cell's density interactions. */
        for (struct link *l = finger->density; l != NULL; l = l->next) {
Matthieu Schaller's avatar
Matthieu Schaller committed
643

644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
          /* Self-interaction? */
          if (l->t->type == task_type_self)
            runner_doself_subset_density(r, finger, parts, pid, count);

          /* Otherwise, pair interaction? */
          else if (l->t->type == task_type_pair) {

            /* Left or right? */
            if (l->t->ci == finger)
              runner_dopair_subset_density(r, finger, parts, pid, count,
                                           l->t->cj);
            else
              runner_dopair_subset_density(r, finger, parts, pid, count,
                                           l->t->ci);

          }

          /* Otherwise, sub interaction? */
          else if (l->t->type == task_type_sub) {

            /* Left or right? */
            if (l->t->ci == finger)
              runner_dosub_subset_density(r, finger, parts, pid, count,
                                          l->t->cj, -1, 1);
            else
              runner_dosub_subset_density(r, finger, parts, pid, count,
                                          l->t->ci, -1, 1);
          }
        }
      }
674
    }
675
  }
676

Matthieu Schaller's avatar
Matthieu Schaller committed
677
678
  if (count)
    message("Smoothing length failed to converge on %i particles.", count);
679
680
681
682
683
684
685
686
687
688

#ifdef TIMER_VERBOSE
  message("runner %02i: %i parts at depth %i took %.3f ms.", r->id, c->count,
          c->depth, ((double)TIMER_TOC(timer_doghost)) / CPU_TPS * 1000);
  fflush(stdout);
#else
  TIMER_TOC(timer_doghost);
#endif
}

689
/**
690
 * @brief Drift particles forward in time
691
692
693
 *
 * @param r The runner thread.
 * @param c The cell.
694
 * @param timer Are we timing this ?
695
 */
696
void runner_dodrift(struct runner *r, struct cell *c, int timer) {
697

698
699
  const int nr_parts = c->count;
  const float dt = r->e->time - r->e->timeOld;
700
701
  struct part *restrict p, *restrict parts = c->parts;
  struct xpart *restrict xp, *restrict xparts = c->xparts;
Matthieu Schaller's avatar
Matthieu Schaller committed
702
  float dx_max = 0.f, h_max = 0.f;
Matthieu Schaller's avatar
Matthieu Schaller committed
703
  float w;
704

705
706
  TIMER_TIC

707
708
  /* No children? */
  if (!c->split) {
709

710
    /* Loop over all the particles in the cell */
711
    for (int k = 0; k < nr_parts; k++) {
712

713
714
715
      /* Get a handle on the part. */
      p = &parts[k];
      xp = &xparts[k];
716

Matthieu Schaller's avatar
Matthieu Schaller committed
717
718
719
      /* Useful quantity */
      const float h_inv = 1.0f / p->h;

720
721
722
723
      /* Drift... */
      p->x[0] += xp->v_full[0] * dt;
      p->x[1] += xp->v_full[1] * dt;
      p->x[2] += xp->v_full[2] * dt;
724

Matthieu Schaller's avatar
Matthieu Schaller committed
725
      /* Predict velocities (for hydro terms) */
Matthieu Schaller's avatar
Matthieu Schaller committed
726
727
728
      p->v[0] += p->a_hydro[0] * dt;
      p->v[1] += p->a_hydro[1] * dt;
      p->v[2] += p->a_hydro[2] * dt;
729

Matthieu Schaller's avatar
Matthieu Schaller committed
730
      /* Predict smoothing length */
731
      w = p->h_dt * h_inv * dt;
Matthieu Schaller's avatar
Matthieu Schaller committed
732
733
734
735
736
737
      if (fabsf(w) < 0.2f)
        p->h *= approx_expf(w); /* 4th order expansion of exp(w) */
      else
        p->h *= expf(w);

      /* Predict density */
738
      w = -3.0f * p->h_dt * h_inv * dt;
Matthieu Schaller's avatar
Matthieu Schaller committed
739
740
741
742
      if (fabsf(w) < 0.2f)
        p->rho *= approx_expf(w); /* 4th order expansion of exp(w) */
      else
        p->rho *= expf(w);
743

744
      /* Predict the values of the extra fields */
745
      hydro_predict_extra(p, xp, r->e->timeOld, r->e->time);
746

Matthieu Schaller's avatar
Matthieu Schaller committed
747
      /* Compute motion since last cell construction */
748
749
750
751
      const float dx =
          sqrtf((p->x[0] - xp->x_old[0]) * (p->x[0] - xp->x_old[0]) +
                (p->x[1] - xp->x_old[1]) * (p->x[1] - xp->x_old[1]) +
                (p->x[2] - xp->x_old[2]) * (p->x[2] - xp->x_old[2]));
Matthieu Schaller's avatar
Matthieu Schaller committed
752
753
754
755
      dx_max = fmaxf(dx_max, dx);

      /* Maximal smoothing length */
      h_max = fmaxf(p->h, h_max);
756
    }
757
  }
758

Matthieu Schaller's avatar
Matthieu Schaller committed
759
760
761
762
763
764
765
766
767
  /* Otherwise, aggregate data from children. */
  else {

    /* Loop over the progeny. */
    for (int k = 0; k < 8; k++)
      if (c->progeny[k] != NULL) {
        struct cell *cp = c->progeny[k];
        runner_dodrift(r, cp, 0);

768
769
        dx_max = fmaxf(dx_max, cp->dx_max);
        h_max = fmaxf(h_max, cp->h_max);
Matthieu Schaller's avatar
Matthieu Schaller committed
770
771
772
773
774
775
      }
  }

  /* Store the values */
  c->h_max = h_max;
  c->dx_max = dx_max;
776

777
778
779
780
781
782
783
784
  if (timer) {
#ifdef TIMER_VERBOSE
    message("runner %02i: %i parts at depth %i took %.3f ms.", r->id, c->count,
            c->depth, ((double)TIMER_TOC(timer_drift)) / CPU_TPS * 1000);
    fflush(stdout);
#else
    TIMER_TOC(timer_drift);
#endif
785
786
  }
}
787

788
789
790
791
792
/**
 * @brief Combined second and first kick for fixed dt.
 *
 * @param r The runner thread.
 * @param c The cell.
793
 * @param timer The timer
794
795
 */

796
797
void runner_dokick(struct runner *r, struct cell *c, int timer) {

798
799
800
  const float dt_max_timeline = r->e->timeEnd - r->e->timeBegin;
  const float global_dt_min = r->e->dt_min, global_dt_max = r->e->dt_max;
  const float t_current = r->e->time;
Matthieu Schaller's avatar
Matthieu Schaller committed
801
  const int count = c->count;
802
803
  const int is_fixdt =
      (r->e->policy & engine_policy_fixdt) == engine_policy_fixdt;
Matthieu Schaller's avatar
Matthieu Schaller committed
804
805

  float new_dt;
806
  float dt_timeline;
Matthieu Schaller's avatar
Matthieu Schaller committed
807
808
809

  int updated = 0;
  float t_end_min = FLT_MAX, t_end_max = 0.f;
810
811
812
813
  double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, mass = 0.0;
  float mom[3] = {0.0f, 0.0f, 0.0f};
  float ang[3] = {0.0f, 0.0f, 0.0f};
  float x[3], v_full[3];
814
815
816
817
818
819
820
821
  struct part *restrict p, *restrict parts = c->parts;
  struct xpart *restrict xp, *restrict xparts = c->xparts;

  TIMER_TIC

  /* No children? */
  if (!c->split) {

822
    /* Loop over the particles and kick the active ones. */
Matthieu Schaller's avatar
Matthieu Schaller committed
823
    for (int k = 0; k < count; k++) {
824
825
826
827
828

      /* Get a handle on the part. */
      p = &parts[k];
      xp = &xparts[k];

829
      const float m = p->mass;
Matthieu Schaller's avatar
Matthieu Schaller committed
830
831
832
      x[0] = p->x[0];
      x[1] = p->x[1];
      x[2] = p->x[2];
833
834

      /* If particle needs to be kicked */
835
      if (is_fixdt || p->t_end <= t_current) {
836
837

        /* First, finish the force loop */
838
        p->h_dt *= p->h * 0.333333333f;
839
840

        /* And do the same of the extra variable */
841
842
        hydro_end_force(p);

843
844
        /* Now we are ready to compute the next time-step size */

845
846
847
848
849
850
851
852
853
854
855
856
        if (is_fixdt) {

          /* Now we have a time step, proceed with the kick */
          new_dt = global_dt_max;

        } else {

          /* Compute the next timestep */
          const float new_dt_hydro = hydro_compute_timestep(p, xp);
          const float new_dt_grav = gravity_compute_timestep(p, xp);

          new_dt = fminf(new_dt_hydro, new_dt_grav);
Matthieu Schaller's avatar
Matthieu Schaller committed
857
858
859
860
861
862
863
864

          /* Limit change in h */
          const float dt_h_change =
              (p->h_dt != 0.0f) ? fabsf(const_ln_max_h_change * p->h / p->h_dt)
                                : FLT_MAX;

          new_dt = fminf(new_dt, dt_h_change);

865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
          /* Recover the current timestep */
          const float current_dt = p->t_end - p->t_begin;

          /* Limit timestep increase */
          if (current_dt > 0.0f) new_dt = fminf(new_dt, 2.0f * current_dt);

          /* Limit timestep within the allowed range */
          new_dt = fminf(new_dt, global_dt_max);
          new_dt = fmaxf(new_dt, global_dt_min);

          /* Put this timestep on the time line */
          dt_timeline = dt_max_timeline;
          while (new_dt < dt_timeline) dt_timeline /= 2.;

          /* Now we have a time step, proceed with the kick */
          new_dt = dt_timeline;
        }
882

883
        /* Compute the time step for this kick */
Matthieu Schaller's avatar
Matthieu Schaller committed
884
885
886
        const float t_start = 0.5f * (p->t_begin + p->t_end);
        const float t_end = p->t_end + 0.5f * new_dt;
        const float dt = t_end - t_start;
887
        const float half_dt = t_end - p->t_end;
888
889
890

        /* Move particle forward in time */
        p->t_begin = p->t_end;
891
        p->t_end = p->t_begin + new_dt;
892

893
        /* Kick particles in momentum space */
Matthieu Schaller's avatar
Matthieu Schaller committed
894
895
896
        xp->v_full[0] += p->a_hydro[0] * dt;
        xp->v_full[1] += p->a_hydro[1] * dt;
        xp->v_full[2] += p->a_hydro[2] * dt;
897

Matthieu Schaller's avatar
Matthieu Schaller committed
898
899
900
        p->v[0] = xp->v_full[0] - half_dt * p->a_hydro[0];
        p->v[1] = xp->v_full[1] - half_dt * p->a_hydro[1];
        p->v[2] = xp->v_full[2] - half_dt * p->a_hydro[2];
Matthieu Schaller's avatar
Matthieu Schaller committed
901

902
        /* Extra kick work */
903
        hydro_kick_extra(p, xp, dt, half_dt);
904
905
906
907
      }

      /* Now collect quantities for statistics */

908
909
      v_full[0] = xp->v_full[0];
      v_full[1] = xp->v_full[1];
910
      v_full[2] = xp->v_full[2];
911

912
913
914
      /* Collect mass */
      mass += m;

915
      /* Collect momentum */
916
917
918
      mom[0] += m * v_full[0];
      mom[1] += m * v_full[1];
      mom[2] += m * v_full[2];
919
920

      /* Collect angular momentum */
921
922
923
      ang[0] += m * (x[1] * v_full[2] - x[2] * v_full[1]);
      ang[1] += m * (x[2] * v_full[0] - x[0] * v_full[2]);
      ang[2] += m * (x[0] * v_full[1] - x[1] * v_full[0]);
924
925

      /* Collect total energy. */
926
927
928
929
      e_kin += 0.5 * m * (v_full[0] * v_full[0] + v_full[1] * v_full[1] +
                          v_full[2] * v_full[2]);
      e_pot += 0.f; /* No gravitational potential thus far */
      e_int += hydro_get_internal_energy(p);
930
931

      /* Minimal time for next end of time-step */
Matthieu Schaller's avatar
Matthieu Schaller committed
932
933
      t_end_min = fminf(p->t_end, t_end_min);
      t_end_max = fmaxf(p->t_end, t_end_max);
934

Matthieu Schaller's avatar
Matthieu Schaller committed
935
936
      /* Number of updated particles */
      updated++;
937
938
    }

939
940
  }

941
  /* Otherwise, aggregate data from children. */
942
943
944
  else {

    /* Loop over the progeny. */
945
    for (int k = 0; k < 8; k++)
946
947
      if (c->progeny[k] != NULL) {
        struct cell *cp = c->progeny[k];
948
949

        /* Recurse */
950
        runner_dokick(r, cp, 0);
Matthieu Schaller's avatar
Matthieu Schaller committed
951

952
        /* And aggregate */
953
        updated += cp->updated;
954
955
956
957
        e_kin += cp->e_kin;
        e_int += cp->e_int;
        e_pot += cp->e_pot;
        mass += cp->mass;
958
959
960
961
962
963
        mom[0] += cp->mom[0];
        mom[1] += cp->mom[1];
        mom[2] += cp->mom[2];
        ang[0] += cp->ang[0];
        ang[1] += cp->ang[1];
        ang[2] += cp->ang[2];
Matthieu Schaller's avatar
Matthieu Schaller committed
964
965
        t_end_min = fminf(cp->t_end_min, t_end_min);
        t_end_max = fmaxf(cp->t_end_max, t_end_max);
966
967
968
969
      }
  }

  /* Store the values. */
970
  c->updated = updated;
971
972
973
974
  c->e_kin = e_kin;
  c->e_int = e_int;
  c->e_pot = e_pot;
  c->mass = mass;
975
976
977
978
979
980
  c->mom[0] = mom[0];
  c->mom[1] = mom[1];
  c->mom[2] = mom[2];
  c->ang[0] = ang[0];
  c->ang[1] = ang[1];
  c->ang[2] = ang[2];
981
982
983
  c->t_end_min = t_end_min;
  c->t_end_max = t_end_max;

984
985
986
  if (timer) {
#ifdef TIMER_VERBOSE
    message("runner %02i: %i parts at depth %i took %.3f ms.", r->id, c->count,
987
            c->depth, ((double)TIMER_TOC(timer_kick)) / CPU_TPS * 1000);
988
989
    fflush(stdout);
#else
990
    TIMER_TOC(timer_kick);
991
992
993
#endif
  }
}
994

Pedro Gonnet's avatar
Pedro Gonnet committed
995
996
997
998
999
1000
/**
 * @brief The #runner main thread routine.
 *
 * @param data A pointer to this thread's data.
 */