runner.c 30.3 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *
5
6
7
8
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
9
 *
10
11
12
13
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
14
 *
15
16
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17
 *
18
 ******************************************************************************/
Pedro Gonnet's avatar
Pedro Gonnet committed
19

Pedro Gonnet's avatar
Pedro Gonnet committed
20
21
/* Config parameters. */
#include "../config.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
22
23
24
25

/* Some standard headers. */
#include <float.h>
#include <limits.h>
26
#include <stdlib.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
27

28
29
/* MPI headers. */
#ifdef WITH_MPI
30
#include <mpi.h>
31
32
#endif

33
34
35
/* This object's header. */
#include "runner.h"

Pedro Gonnet's avatar
Pedro Gonnet committed
36
/* Local headers. */
Matthieu Schaller's avatar
Matthieu Schaller committed
37
#include "approx_math.h"
38
#include "atomic.h"
39
#include "const.h"
40
#include "debug.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
41
#include "engine.h"
42
#include "error.h"
43
44
45
#include "gravity.h"
#include "hydro.h"
#include "minmax.h"
46
47
48
49
#include "scheduler.h"
#include "space.h"
#include "task.h"
#include "timers.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
50

51
/* Orientation of the cell pairs */
52
53
54
55
56
57
58
59
60
61
62
63
64
65
const float runner_shift[13 * 3] = {
    5.773502691896258e-01, 5.773502691896258e-01,  5.773502691896258e-01,
    7.071067811865475e-01, 7.071067811865475e-01,  0.0,
    5.773502691896258e-01, 5.773502691896258e-01,  -5.773502691896258e-01,
    7.071067811865475e-01, 0.0,                    7.071067811865475e-01,
    1.0,                   0.0,                    0.0,
    7.071067811865475e-01, 0.0,                    -7.071067811865475e-01,
    5.773502691896258e-01, -5.773502691896258e-01, 5.773502691896258e-01,
    7.071067811865475e-01, -7.071067811865475e-01, 0.0,
    5.773502691896258e-01, -5.773502691896258e-01, -5.773502691896258e-01,
    0.0,                   7.071067811865475e-01,  7.071067811865475e-01,
    0.0,                   1.0,                    0.0,
    0.0,                   7.071067811865475e-01,  -7.071067811865475e-01,
    0.0,                   0.0,                    1.0, };
66
67

/* Does the axis need flipping ? */
68
69
const char runner_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
                              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
Pedro Gonnet's avatar
Pedro Gonnet committed
70

71
/* Import the density loop functions. */
72
73
74
#define FUNCTION density
#include "runner_doiact.h"

75
/* Import the force loop functions. */
76
77
78
79
#undef FUNCTION
#define FUNCTION force
#include "runner_doiact.h"

80
81
82
/* Import the gravity loop functions. */
#include "runner_doiact_grav.h"

Pedro Gonnet's avatar
Pedro Gonnet committed
83
84
85
86
87
88
/**
 * @brief Sort the entries in ascending order using QuickSort.
 *
 * @param sort The entries
 * @param N The number of entries.
 */
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144

void runner_dosort_ascending(struct entry *sort, int N) {

  struct {
    short int lo, hi;
  } qstack[10];
  int qpos, i, j, lo, hi, imin;
  struct entry temp;
  float pivot;

  /* Sort parts in cell_i in decreasing order with quicksort */
  qstack[0].lo = 0;
  qstack[0].hi = N - 1;
  qpos = 0;
  while (qpos >= 0) {
    lo = qstack[qpos].lo;
    hi = qstack[qpos].hi;
    qpos -= 1;
    if (hi - lo < 15) {
      for (i = lo; i < hi; i++) {
        imin = i;
        for (j = i + 1; j <= hi; j++)
          if (sort[j].d < sort[imin].d) imin = j;
        if (imin != i) {
          temp = sort[imin];
          sort[imin] = sort[i];
          sort[i] = temp;
        }
      }
    } else {
      pivot = sort[(lo + hi) / 2].d;
      i = lo;
      j = hi;
      while (i <= j) {
        while (sort[i].d < pivot) i++;
        while (sort[j].d > pivot) j--;
        if (i <= j) {
          if (i < j) {
            temp = sort[i];
            sort[i] = sort[j];
            sort[j] = temp;
          }
          i += 1;
          j -= 1;
        }
      }
      if (j > (lo + hi) / 2) {
        if (lo < j) {
          qpos += 1;
          qstack[qpos].lo = lo;
          qstack[qpos].hi = j;
        }
        if (i < hi) {
          qpos += 1;
          qstack[qpos].lo = i;
          qstack[qpos].hi = hi;
Pedro Gonnet's avatar
Pedro Gonnet committed
145
        }
146
147
148
149
150
151
152
153
154
155
156
157
      } else {
        if (i < hi) {
          qpos += 1;
          qstack[qpos].lo = i;
          qstack[qpos].hi = hi;
        }
        if (lo < j) {
          qpos += 1;
          qstack[qpos].lo = lo;
          qstack[qpos].hi = j;
        }
      }
Pedro Gonnet's avatar
Pedro Gonnet committed
158
    }
159
160
161
  }
}

Pedro Gonnet's avatar
Pedro Gonnet committed
162
163
164
165
166
/**
 * @brief Sort the particles in the given cell along all cardinal directions.
 *
 * @param r The #runner.
 * @param c The #cell.
167
 * @param flags Cell flag.
168
169
 * @param clock Flag indicating whether to record the timing or not, needed
 *      for recursive calls.
Pedro Gonnet's avatar
Pedro Gonnet committed
170
 */
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258

void runner_dosort(struct runner *r, struct cell *c, int flags, int clock) {

  struct entry *finger;
  struct entry *fingers[8];
  struct part *parts = c->parts;
  struct entry *sort;
  int j, k, count = c->count;
  int i, ind, off[8], inds[8], temp_i, missing;
  // float shift[3];
  float buff[8], px[3];

  TIMER_TIC

  /* Clean-up the flags, i.e. filter out what's already been sorted. */
  flags &= ~c->sorted;
  if (flags == 0) return;

  /* start by allocating the entry arrays. */
  if (c->sort == NULL || c->sortsize < count) {
    if (c->sort != NULL) free(c->sort);
    c->sortsize = count * 1.1;
    if ((c->sort = (struct entry *)malloc(sizeof(struct entry) *
                                          (c->sortsize + 1) * 13)) == NULL)
      error("Failed to allocate sort memory.");
  }
  sort = c->sort;

  /* Does this cell have any progeny? */
  if (c->split) {

    /* Fill in the gaps within the progeny. */
    for (k = 0; k < 8; k++) {
      if (c->progeny[k] == NULL) continue;
      missing = flags & ~c->progeny[k]->sorted;
      if (missing) runner_dosort(r, c->progeny[k], missing, 0);
    }

    /* Loop over the 13 different sort arrays. */
    for (j = 0; j < 13; j++) {

      /* Has this sort array been flagged? */
      if (!(flags & (1 << j))) continue;

      /* Init the particle index offsets. */
      for (off[0] = 0, k = 1; k < 8; k++)
        if (c->progeny[k - 1] != NULL)
          off[k] = off[k - 1] + c->progeny[k - 1]->count;
        else
          off[k] = off[k - 1];

      /* Init the entries and indices. */
      for (k = 0; k < 8; k++) {
        inds[k] = k;
        if (c->progeny[k] != NULL && c->progeny[k]->count > 0) {
          fingers[k] = &c->progeny[k]->sort[j * (c->progeny[k]->count + 1)];
          buff[k] = fingers[k]->d;
          off[k] = off[k];
        } else
          buff[k] = FLT_MAX;
      }

      /* Sort the buffer. */
      for (i = 0; i < 7; i++)
        for (k = i + 1; k < 8; k++)
          if (buff[inds[k]] < buff[inds[i]]) {
            temp_i = inds[i];
            inds[i] = inds[k];
            inds[k] = temp_i;
          }

      /* For each entry in the new sort list. */
      finger = &sort[j * (count + 1)];
      for (ind = 0; ind < count; ind++) {

        /* Copy the minimum into the new sort array. */
        finger[ind].d = buff[inds[0]];
        finger[ind].i = fingers[inds[0]]->i + off[inds[0]];

        /* Update the buffer. */
        fingers[inds[0]] += 1;
        buff[inds[0]] = fingers[inds[0]]->d;

        /* Find the smallest entry. */
        for (k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
          temp_i = inds[k - 1];
          inds[k - 1] = inds[k];
          inds[k] = temp_i;
Pedro Gonnet's avatar
Pedro Gonnet committed
259
        }
260

261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
      } /* Merge. */

      /* Add a sentinel. */
      sort[j * (count + 1) + count].d = FLT_MAX;
      sort[j * (count + 1) + count].i = 0;

      /* Mark as sorted. */
      c->sorted |= (1 << j);

    } /* loop over sort arrays. */

  } /* progeny? */

  /* Otherwise, just sort. */
  else {

    /* Fill the sort array. */
    for (k = 0; k < count; k++) {
      px[0] = parts[k].x[0];
      px[1] = parts[k].x[1];
      px[2] = parts[k].x[2];
      for (j = 0; j < 13; j++)
        if (flags & (1 << j)) {
          sort[j * (count + 1) + k].i = k;
          sort[j * (count + 1) + k].d = px[0] * runner_shift[3 * j + 0] +
                                        px[1] * runner_shift[3 * j + 1] +
                                        px[2] * runner_shift[3 * j + 2];
        }
289
    }
290
291
292
293
294
295
296
297
298
299
300

    /* Add the sentinel and sort. */
    for (j = 0; j < 13; j++)
      if (flags & (1 << j)) {
        sort[j * (count + 1) + count].d = FLT_MAX;
        sort[j * (count + 1) + count].i = 0;
        runner_dosort_ascending(&sort[j * (count + 1)], count);
        c->sorted |= (1 << j);
      }
  }

Matthieu Schaller's avatar
Matthieu Schaller committed
301
302
303
304
305
306
307
308
309
310
311
312
  /* Verify the sorting. */
  /* for ( j = 0 ; j < 13 ; j++ ) {
      if ( !( flags & (1 << j) ) )
          continue;
      finger = &sort[ j*(count + 1) ];
      for ( k = 1 ; k < count ; k++ ) {
          if ( finger[k].d < finger[k-1].d )
              error( "Sorting failed, ascending array." );
          if ( finger[k].i >= count )
              error( "Sorting failed, indices borked." );
          }
      } */
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403

  if (clock) TIMER_TOC(timer_dosort);
}

void runner_dogsort(struct runner *r, struct cell *c, int flags, int clock) {

  struct entry *finger;
  struct entry *fingers[8];
  struct gpart *gparts = c->gparts;
  struct entry *gsort;
  int j, k, count = c->gcount;
  int i, ind, off[8], inds[8], temp_i, missing;
  // float shift[3];
  float buff[8], px[3];

  TIMER_TIC

  /* Clean-up the flags, i.e. filter out what's already been sorted. */
  flags &= ~c->gsorted;
  if (flags == 0) return;

  /* start by allocating the entry arrays. */
  if (c->gsort == NULL || c->gsortsize < count) {
    if (c->gsort != NULL) free(c->gsort);
    c->gsortsize = count * 1.1;
    if ((c->gsort = (struct entry *)malloc(sizeof(struct entry) *
                                           (c->gsortsize + 1) * 13)) == NULL)
      error("Failed to allocate sort memory.");
  }
  gsort = c->gsort;

  /* Does this cell have any progeny? */
  if (c->split) {

    /* Fill in the gaps within the progeny. */
    for (k = 0; k < 8; k++) {
      if (c->progeny[k] == NULL) continue;
      missing = flags & ~c->progeny[k]->gsorted;
      if (missing) runner_dogsort(r, c->progeny[k], missing, 0);
    }

    /* Loop over the 13 different sort arrays. */
    for (j = 0; j < 13; j++) {

      /* Has this sort array been flagged? */
      if (!(flags & (1 << j))) continue;

      /* Init the particle index offsets. */
      for (off[0] = 0, k = 1; k < 8; k++)
        if (c->progeny[k - 1] != NULL)
          off[k] = off[k - 1] + c->progeny[k - 1]->gcount;
        else
          off[k] = off[k - 1];

      /* Init the entries and indices. */
      for (k = 0; k < 8; k++) {
        inds[k] = k;
        if (c->progeny[k] != NULL && c->progeny[k]->gcount > 0) {
          fingers[k] = &c->progeny[k]->gsort[j * (c->progeny[k]->gcount + 1)];
          buff[k] = fingers[k]->d;
          off[k] = off[k];
        } else
          buff[k] = FLT_MAX;
      }

      /* Sort the buffer. */
      for (i = 0; i < 7; i++)
        for (k = i + 1; k < 8; k++)
          if (buff[inds[k]] < buff[inds[i]]) {
            temp_i = inds[i];
            inds[i] = inds[k];
            inds[k] = temp_i;
          }

      /* For each entry in the new sort list. */
      finger = &gsort[j * (count + 1)];
      for (ind = 0; ind < count; ind++) {

        /* Copy the minimum into the new sort array. */
        finger[ind].d = buff[inds[0]];
        finger[ind].i = fingers[inds[0]]->i + off[inds[0]];

        /* Update the buffer. */
        fingers[inds[0]] += 1;
        buff[inds[0]] = fingers[inds[0]]->d;

        /* Find the smallest entry. */
        for (k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
          temp_i = inds[k - 1];
          inds[k - 1] = inds[k];
          inds[k] = temp_i;
404
        }
Pedro Gonnet's avatar
Pedro Gonnet committed
405

406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
      } /* Merge. */

      /* Add a sentinel. */
      gsort[j * (count + 1) + count].d = FLT_MAX;
      gsort[j * (count + 1) + count].i = 0;

      /* Mark as sorted. */
      c->gsorted |= (1 << j);

    } /* loop over sort arrays. */

  } /* progeny? */

  /* Otherwise, just sort. */
  else {

    /* Fill the sort array. */
    for (k = 0; k < count; k++) {
      px[0] = gparts[k].x[0];
      px[1] = gparts[k].x[1];
      px[2] = gparts[k].x[2];
      for (j = 0; j < 13; j++)
        if (flags & (1 << j)) {
          gsort[j * (count + 1) + k].i = k;
          gsort[j * (count + 1) + k].d = px[0] * runner_shift[3 * j + 0] +
                                         px[1] * runner_shift[3 * j + 1] +
                                         px[2] * runner_shift[3 * j + 2];
        }
Pedro Gonnet's avatar
Pedro Gonnet committed
434
    }
435
436
437
438
439
440
441
442
443
444
445

    /* Add the sentinel and sort. */
    for (j = 0; j < 13; j++)
      if (flags & (1 << j)) {
        gsort[j * (count + 1) + count].d = FLT_MAX;
        gsort[j * (count + 1) + count].i = 0;
        runner_dosort_ascending(&gsort[j * (count + 1)], count);
        c->gsorted |= (1 << j);
      }
  }

Matthieu Schaller's avatar
Matthieu Schaller committed
446
447
448
449
450
451
452
453
454
455
456
457
  /* Verify the sorting. */
  /* for ( j = 0 ; j < 13 ; j++ ) {
      if ( !( flags & (1 << j) ) )
          continue;
      finger = &c->gsort[ j*(count + 1) ];
      for ( k = 1 ; k < count ; k++ ) {
          if ( finger[k].d < finger[k-1].d )
              error( "Sorting failed, ascending array." );
          if ( finger[k].i < 0 || finger[k].i >= count )
              error( "Sorting failed, indices borked." );
          }
      } */
458
459
460
461

  if (clock) TIMER_TOC(timer_dosort);
}

462
463
464
465
466
/**
 * @brief Initialize the particles before the density calculation
 *
 * @param r The runner thread.
 * @param c The cell.
Matthieu Schaller's avatar
Matthieu Schaller committed
467
 * @param timer 1 if the time is to be recorded.
468
469
 */

470
void runner_doinit(struct runner *r, struct cell *c, int timer) {
471
472

  struct part *p, *parts = c->parts;
Matthieu Schaller's avatar
Matthieu Schaller committed
473
  const int count = c->count;
474
  const int ti_current = r->e->ti_current;
475
476

  TIMER_TIC;
477

478
479
  /* Recurse? */
  if (c->split) {
Matthieu Schaller's avatar
Matthieu Schaller committed
480
    for (int k = 0; k < 8; k++)
481
      if (c->progeny[k] != NULL) runner_doinit(r, c->progeny[k], 0);
482
    return;
483
484
  } else {

485
486
    /* Loop over the parts in this cell. */
    for (int i = 0; i < count; i++) {
487

488
489
      /* Get a direct pointer on the part. */
      p = &parts[i];
490

491
      if (p->ti_end <= ti_current) {
Matthieu Schaller's avatar
Matthieu Schaller committed
492

493
494
        /* Get ready for a density calculation */
        hydro_init_part(p);
495
      }
496
497
    }
  }
498

Peter W. Draper's avatar
Peter W. Draper committed
499
  if (timer) TIMER_TOC(timer_init);
500
501
}

502
503
504
/**
 * @brief Intermediate task between density and force
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
505
 * @param r The runner thread.
506
 * @param c The cell.
507
 */
508
509
510
511

void runner_doghost(struct runner *r, struct cell *c) {

  struct part *p, *parts = c->parts;
512
  struct xpart *xp, *xparts = c->xparts;
513
  struct cell *finger;
Matthieu Schaller's avatar
Matthieu Schaller committed
514
  int redo, count = c->count;
515
  int *pid;
516
  float h_corr;
517
518
  const int ti_current = r->e->ti_current;
  const double timeBase = r->e->timeBase;
519

520
521
  TIMER_TIC;

522
523
  /* Recurse? */
  if (c->split) {
Matthieu Schaller's avatar
Matthieu Schaller committed
524
    for (int k = 0; k < 8; k++)
525
526
527
528
529
530
531
      if (c->progeny[k] != NULL) runner_doghost(r, c->progeny[k]);
    return;
  }

  /* Init the IDs that have to be updated. */
  if ((pid = (int *)alloca(sizeof(int) * count)) == NULL)
    error("Call to alloca failed.");
Matthieu Schaller's avatar
Matthieu Schaller committed
532
  for (int k = 0; k < count; k++) pid[k] = k;
533
534

  /* While there are particles that need to be updated... */
Matthieu Schaller's avatar
Matthieu Schaller committed
535
  for (int num_reruns = 0; count > 0 && num_reruns < const_smoothing_max_iter;
Matthieu Schaller's avatar
Matthieu Schaller committed
536
       num_reruns++) {
537
538
539

    /* Reset the redo-count. */
    redo = 0;
540

541
    /* Loop over the parts in this cell. */
Matthieu Schaller's avatar
Matthieu Schaller committed
542
    for (int i = 0; i < count; i++) {
543
544
545

      /* Get a direct pointer on the part. */
      p = &parts[pid[i]];
546
      xp = &xparts[pid[i]];
547
548

      /* Is this part within the timestep? */
549
      if (p->ti_end <= ti_current) {
550

551
        /* Finish the density calculation */
552
        hydro_end_density(p, ti_current);
553
554

        /* If no derivative, double the smoothing length. */
555
        if (p->density.wcount_dh == 0.0f) h_corr = p->h;
556
557
558

        /* Otherwise, compute the smoothing length update (Newton step). */
        else {
559
          h_corr = (kernel_nwneigh - p->density.wcount) / p->density.wcount_dh;
560
561

          /* Truncate to the range [ -p->h/2 , p->h ]. */
562
563
          h_corr = fminf(h_corr, p->h);
          h_corr = fmaxf(h_corr, -p->h * 0.5f);
Pedro Gonnet's avatar
Pedro Gonnet committed
564
        }
565
566

        /* Did we get the right number density? */
567
        if (p->density.wcount > kernel_nwneigh + const_delta_nwneigh ||
568
            p->density.wcount < kernel_nwneigh - const_delta_nwneigh) {
569
570
571
572

          /* Ok, correct then */
          p->h += h_corr;

573
          /* Flag for another round of fun */
574
575
          pid[redo] = pid[i];
          redo += 1;
576

577
578
          /* Re-initialise everything */
          hydro_init_part(p);
579

580
          /* Off we go ! */
581
          continue;
582
583
        }

Matthieu Schaller's avatar
Matthieu Schaller committed
584
        /* We now have a particle whose smoothing length has converged */
Matthieu Schaller's avatar
Matthieu Schaller committed
585

586
        /* As of here, particle force variables will be set. */
587

588
        /* Compute variables required for the force loop */
589
        hydro_prepare_force(p, xp, ti_current, timeBase);
590

Matthieu Schaller's avatar
Matthieu Schaller committed
591
        /* The particle force values are now set.  Do _NOT_
592
           try to read any particle density variables! */
Matthieu Schaller's avatar
Matthieu Schaller committed
593

594
595
        /* Prepare the particle for the force loop over neighbours */
        hydro_reset_acceleration(p);
596
597
598
      }
    }

599
600
601
    /* We now need to treat the particles whose smoothing length had not
     * converged again */

602
603
604
605
606
607
    /* Re-set the counter for the next loop (potentially). */
    count = redo;
    if (count > 0) {

      /* Climb up the cell hierarchy. */
      for (finger = c; finger != NULL; finger = finger->parent) {
Matthieu Schaller's avatar
Matthieu Schaller committed
608

609
610
        /* Run through this cell's density interactions. */
        for (struct link *l = finger->density; l != NULL; l = l->next) {
Matthieu Schaller's avatar
Matthieu Schaller committed
611

612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
          /* Self-interaction? */
          if (l->t->type == task_type_self)
            runner_doself_subset_density(r, finger, parts, pid, count);

          /* Otherwise, pair interaction? */
          else if (l->t->type == task_type_pair) {

            /* Left or right? */
            if (l->t->ci == finger)
              runner_dopair_subset_density(r, finger, parts, pid, count,
                                           l->t->cj);
            else
              runner_dopair_subset_density(r, finger, parts, pid, count,
                                           l->t->ci);

          }

          /* Otherwise, sub interaction? */
          else if (l->t->type == task_type_sub) {

            /* Left or right? */
            if (l->t->ci == finger)
              runner_dosub_subset_density(r, finger, parts, pid, count,
                                          l->t->cj, -1, 1);
            else
              runner_dosub_subset_density(r, finger, parts, pid, count,
                                          l->t->ci, -1, 1);
          }
        }
      }
642
    }
643
  }
644

Matthieu Schaller's avatar
Matthieu Schaller committed
645
646
  if (count)
    message("Smoothing length failed to converge on %i particles.", count);
647
648
649
650

  TIMER_TOC(timer_doghost);
}

651
/**
652
 * @brief Drift particles forward in time
653
654
655
 *
 * @param r The runner thread.
 * @param c The cell.
656
 * @param timer Are we timing this ?
657
 */
658
void runner_dodrift(struct runner *r, struct cell *c, int timer) {
659

660
  const int nr_parts = c->count;
661
662
663
664
  const double timeBase = r->e->timeBase;
  const double dt = (r->e->ti_current - r->e->ti_old) * timeBase;
  const float ti_old = r->e->ti_old;
  const float ti_current = r->e->ti_current;
665
666
  struct part *restrict p, *restrict parts = c->parts;
  struct xpart *restrict xp, *restrict xparts = c->xparts;
Matthieu Schaller's avatar
Matthieu Schaller committed
667
  float dx_max = 0.f, h_max = 0.f;
Matthieu Schaller's avatar
Matthieu Schaller committed
668
  float w;
669

670
671
  TIMER_TIC

672
673
  /* No children? */
  if (!c->split) {
674

675
    /* Loop over all the particles in the cell */
676
    for (int k = 0; k < nr_parts; k++) {
677

678
679
680
      /* Get a handle on the part. */
      p = &parts[k];
      xp = &xparts[k];
681

Matthieu Schaller's avatar
Matthieu Schaller committed
682
683
684
      /* Useful quantity */
      const float h_inv = 1.0f / p->h;

685
686
687
688
      /* Drift... */
      p->x[0] += xp->v_full[0] * dt;
      p->x[1] += xp->v_full[1] * dt;
      p->x[2] += xp->v_full[2] * dt;
689

Matthieu Schaller's avatar
Matthieu Schaller committed
690
      /* Predict velocities (for hydro terms) */
Matthieu Schaller's avatar
Matthieu Schaller committed
691
692
693
      p->v[0] += p->a_hydro[0] * dt;
      p->v[1] += p->a_hydro[1] * dt;
      p->v[2] += p->a_hydro[2] * dt;
694

Matthieu Schaller's avatar
Matthieu Schaller committed
695
      /* Predict smoothing length */
696
      w = p->h_dt * h_inv * dt;
Matthieu Schaller's avatar
Matthieu Schaller committed
697
698
699
700
701
702
      if (fabsf(w) < 0.2f)
        p->h *= approx_expf(w); /* 4th order expansion of exp(w) */
      else
        p->h *= expf(w);

      /* Predict density */
703
      w = -3.0f * p->h_dt * h_inv * dt;
Matthieu Schaller's avatar
Matthieu Schaller committed
704
705
706
707
      if (fabsf(w) < 0.2f)
        p->rho *= approx_expf(w); /* 4th order expansion of exp(w) */
      else
        p->rho *= expf(w);
708

709
      /* Predict the values of the extra fields */
710
      hydro_predict_extra(p, xp, ti_old, ti_current, timeBase);
711

Matthieu Schaller's avatar
Matthieu Schaller committed
712
      /* Compute motion since last cell construction */
713
714
715
716
      const float dx =
          sqrtf((p->x[0] - xp->x_old[0]) * (p->x[0] - xp->x_old[0]) +
                (p->x[1] - xp->x_old[1]) * (p->x[1] - xp->x_old[1]) +
                (p->x[2] - xp->x_old[2]) * (p->x[2] - xp->x_old[2]));
Matthieu Schaller's avatar
Matthieu Schaller committed
717
718
719
720
      dx_max = fmaxf(dx_max, dx);

      /* Maximal smoothing length */
      h_max = fmaxf(p->h, h_max);
721
    }
722
  }
723

Matthieu Schaller's avatar
Matthieu Schaller committed
724
725
726
727
728
729
730
731
732
  /* Otherwise, aggregate data from children. */
  else {

    /* Loop over the progeny. */
    for (int k = 0; k < 8; k++)
      if (c->progeny[k] != NULL) {
        struct cell *cp = c->progeny[k];
        runner_dodrift(r, cp, 0);

733
734
        dx_max = fmaxf(dx_max, cp->dx_max);
        h_max = fmaxf(h_max, cp->h_max);
Matthieu Schaller's avatar
Matthieu Schaller committed
735
736
737
738
739
740
      }
  }

  /* Store the values */
  c->h_max = h_max;
  c->dx_max = dx_max;
741

Peter W. Draper's avatar
Peter W. Draper committed
742
  if (timer) TIMER_TOC(timer_drift);
743
}
744

745
746
747
748
749
/**
 * @brief Combined second and first kick for fixed dt.
 *
 * @param r The runner thread.
 * @param c The cell.
750
 * @param timer The timer
751
752
 */

753
754
void runner_dokick(struct runner *r, struct cell *c, int timer) {

755
756
  const float global_dt_min = r->e->dt_min;
  const float global_dt_max = r->e->dt_max;
757
  const int ti_current = r->e->ti_current;
758
759
  const double timeBase = r->e->timeBase;
  const double timeBase_inv = 1.0 / r->e->timeBase;
Matthieu Schaller's avatar
Matthieu Schaller committed
760
  const int count = c->count;
761
762
  const int is_fixdt =
      (r->e->policy & engine_policy_fixdt) == engine_policy_fixdt;
Matthieu Schaller's avatar
Matthieu Schaller committed
763

764
765
  int new_dti;
  int dti_timeline;
Matthieu Schaller's avatar
Matthieu Schaller committed
766
767

  int updated = 0;
768
  int ti_end_min = max_nr_timesteps, ti_end_max = 0;
769
770
771
772
  double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, mass = 0.0;
  float mom[3] = {0.0f, 0.0f, 0.0f};
  float ang[3] = {0.0f, 0.0f, 0.0f};
  float x[3], v_full[3];
773
774
775
776
777
778
779
780
  struct part *restrict p, *restrict parts = c->parts;
  struct xpart *restrict xp, *restrict xparts = c->xparts;

  TIMER_TIC

  /* No children? */
  if (!c->split) {

781
    /* Loop over the particles and kick the active ones. */
Matthieu Schaller's avatar
Matthieu Schaller committed
782
    for (int k = 0; k < count; k++) {
783
784
785
786
787

      /* Get a handle on the part. */
      p = &parts[k];
      xp = &xparts[k];

788
      const float m = p->mass;
Matthieu Schaller's avatar
Matthieu Schaller committed
789
790
791
      x[0] = p->x[0];
      x[1] = p->x[1];
      x[2] = p->x[2];
792
793

      /* If particle needs to be kicked */
794
      if (is_fixdt || p->ti_end <= ti_current) {
795
796

        /* First, finish the force loop */
797
        p->h_dt *= p->h * 0.333333333f;
798
799

        /* And do the same of the extra variable */
800
801
        hydro_end_force(p);

802
803
        /* Now we are ready to compute the next time-step size */

804
805
806
        if (is_fixdt) {

          /* Now we have a time step, proceed with the kick */
807
          new_dti = global_dt_max * timeBase_inv;
808
809
810
811
812
813
814

        } else {

          /* Compute the next timestep */
          const float new_dt_hydro = hydro_compute_timestep(p, xp);
          const float new_dt_grav = gravity_compute_timestep(p, xp);

815
          float new_dt = fminf(new_dt_hydro, new_dt_grav);
Matthieu Schaller's avatar
Matthieu Schaller committed
816
817
818
819
820
821
822
823

          /* Limit change in h */
          const float dt_h_change =
              (p->h_dt != 0.0f) ? fabsf(const_ln_max_h_change * p->h / p->h_dt)
                                : FLT_MAX;

          new_dt = fminf(new_dt, dt_h_change);

824
825
826
827
          /* Limit timestep within the allowed range */
          new_dt = fminf(new_dt, global_dt_max);
          new_dt = fmaxf(new_dt, global_dt_min);

828
829
          /* Convert to integer time */
          new_dti = new_dt * timeBase_inv;
830
831

          /* Recover the current timestep */
832
          const int current_dti = p->ti_end - p->ti_begin;
833
834

          /* Limit timestep increase */
835
836
          if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti);

837
          /* Put this timestep on the time line */
838
839
          dti_timeline = max_nr_timesteps;
          while (new_dti < dti_timeline) dti_timeline /= 2;
840
841

          /* Now we have a time step, proceed with the kick */
842
          new_dti = dti_timeline;
843
        }
844

845
        /* Compute the time step for this kick */
846
        const int ti_start = (p->ti_begin + p->ti_end) / 2;
847
        const int ti_end = p->ti_end + new_dti / 2;
848
        const float dt = (ti_end - ti_start) * timeBase;
849
        const float half_dt = (ti_end - p->ti_end) * timeBase;
850
851

        /* Move particle forward in time */
852
853
        p->ti_begin = p->ti_end;
        p->ti_end = p->ti_begin + new_dti;
854

855
        /* Kick particles in momentum space */
Matthieu Schaller's avatar
Matthieu Schaller committed
856
857
858
        xp->v_full[0] += p->a_hydro[0] * dt;
        xp->v_full[1] += p->a_hydro[1] * dt;
        xp->v_full[2] += p->a_hydro[2] * dt;
859

Matthieu Schaller's avatar
Matthieu Schaller committed
860
861
862
        p->v[0] = xp->v_full[0] - half_dt * p->a_hydro[0];
        p->v[1] = xp->v_full[1] - half_dt * p->a_hydro[1];
        p->v[2] = xp->v_full[2] - half_dt * p->a_hydro[2];
Matthieu Schaller's avatar
Matthieu Schaller committed
863

864
        /* Extra kick work */
865
        hydro_kick_extra(p, xp, dt, half_dt);
866

Matthieu Schaller's avatar
Matthieu Schaller committed
867
868
        /* Number of updated particles */
        updated++;
869
870
871
872
      }

      /* Now collect quantities for statistics */

873
874
      v_full[0] = xp->v_full[0];
      v_full[1] = xp->v_full[1];
875
      v_full[2] = xp->v_full[2];
876

877
878
879
      /* Collect mass */
      mass += m;

880
      /* Collect momentum */
881
882
883
      mom[0] += m * v_full[0];
      mom[1] += m * v_full[1];
      mom[2] += m * v_full[2];
884
885

      /* Collect angular momentum */
886
887
888
      ang[0] += m * (x[1] * v_full[2] - x[2] * v_full[1]);
      ang[1] += m * (x[2] * v_full[0] - x[0] * v_full[2]);
      ang[2] += m * (x[0] * v_full[1] - x[1] * v_full[0]);
889
890

      /* Collect total energy. */
891
892
893
894
      e_kin += 0.5 * m * (v_full[0] * v_full[0] + v_full[1] * v_full[1] +
                          v_full[2] * v_full[2]);
      e_pot += 0.f; /* No gravitational potential thus far */
      e_int += hydro_get_internal_energy(p);
895
896

      /* Minimal time for next end of time-step */
897
898
      ti_end_min = min(p->ti_end, ti_end_min);
      ti_end_max = max(p->ti_end, ti_end_max);
899
900
    }

901
902
  }

903
  /* Otherwise, aggregate data from children. */
904
905
906
  else {

    /* Loop over the progeny. */
907
    for (int k = 0; k < 8; k++)
908
909
      if (c->progeny[k] != NULL) {
        struct cell *cp = c->progeny[k];
910
911

        /* Recurse */
912
        runner_dokick(r, cp, 0);
Matthieu Schaller's avatar
Matthieu Schaller committed
913

914
        /* And aggregate */
915
        updated += cp->updated;
916
917
918
919
        e_kin += cp->e_kin;
        e_int += cp->e_int;
        e_pot += cp->e_pot;
        mass += cp->mass;
920
921
922
923
924
925
        mom[0] += cp->mom[0];
        mom[1] += cp->mom[1];
        mom[2] += cp->mom[2];
        ang[0] += cp->ang[0];
        ang[1] += cp->ang[1];
        ang[2] += cp->ang[2];
926
927
        ti_end_min = min(cp->ti_end_min, ti_end_min);
        ti_end_max = max(cp->ti_end_max, ti_end_max);
928
929
930
931
      }
  }

  /* Store the values. */
932
  c->updated = updated;
933
934
935
936
  c->e_kin = e_kin;
  c->e_int = e_int;
  c->e_pot = e_pot;
  c->mass = mass;
937
938
939
940
941
942
  c->mom[0] = mom[0];
  c->mom[1] = mom[1];
  c->mom[2] = mom[2];
  c->ang[0] = ang[0];
  c->ang[1] = ang[1];
  c->ang[2] = ang[2];
943
944
  c->ti_end_min = ti_end_min;
  c->ti_end_max = ti_end_max;
945

Peter W. Draper's avatar
Peter W. Draper committed
946
  if (timer) TIMER_TOC(timer_kick);
947
}
948

Pedro Gonnet's avatar
Pedro Gonnet committed
949
950
951
952
953
954
/**
 * @brief The #runner main thread routine.
 *
 * @param data A pointer to this thread's data.
 */

955
956
957
958
959
960
void *runner_main(void *data) {

  struct runner *r = (struct runner *)data;
  struct engine *e = r->e;
  struct scheduler *sched = &e->sched;
  struct task *t = NULL;
961
  struct cell *ci, *cj;
962
963
964
965
966
967
968
969
970
  struct part *parts;
  int k, nr_parts;

  /* Main loop. */
  while (1) {

    /* Wait at the barrier. */
    engine_barrier(e, r->id);

971
    /* Re-set the pointer to the previous task, as there is none. */
972
    struct task *prev = NULL;
973
974
975
976
977
978
979
980
981

    /* Loop while there are tasks... */
    while (1) {

      /* If there's no old task, try to get a new one. */
      if (t == NULL) {

        /* Get the task. */
        TIMER_TIC
982
        t = scheduler_gettask(sched, r->qid, prev);
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
        TIMER_TOC(timer_gettask);

        /* Did I get anything? */
        if (t == NULL) break;
      }

      /* Get the cells. */
      ci = t->ci;
      cj = t->cj;
      t->rid = r->cpuid;

      /* Different types of tasks... */
      switch (t->type) {
        case task_type_self:
          if (t->subtype == task_subtype_density)
            runner_doself1_density(r, ci);
          else if (t->subtype == task_subtype_force)
1000
            runner_doself2_force(r, ci);
For faster browsing, not all history is shown. View entire blame