runner.c 39.1 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
5
6
7
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
8
 *
9
10
11
12
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
13
 *
14
15
16
17
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
18
 *
19
20
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 *
22
 ******************************************************************************/
Pedro Gonnet's avatar
Pedro Gonnet committed
23

Pedro Gonnet's avatar
Pedro Gonnet committed
24
25
/* Config parameters. */
#include "../config.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
26
27
28
29

/* Some standard headers. */
#include <float.h>
#include <limits.h>
30
#include <stdlib.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
31

32
33
/* MPI headers. */
#ifdef WITH_MPI
34
#include <mpi.h>
35
36
#endif

37
38
39
/* This object's header. */
#include "runner.h"

Pedro Gonnet's avatar
Pedro Gonnet committed
40
/* Local headers. */
Matthieu Schaller's avatar
Matthieu Schaller committed
41
#include "approx_math.h"
42
#include "atomic.h"
43
#include "const.h"
44
#include "debug.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
45
#include "engine.h"
46
#include "error.h"
47
48
49
#include "gravity.h"
#include "hydro.h"
#include "minmax.h"
50
51
52
53
#include "scheduler.h"
#include "space.h"
#include "task.h"
#include "timers.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
54

55
/* Orientation of the cell pairs */
56
57
58
59
60
61
62
63
64
65
66
67
68
69
const float runner_shift[13 * 3] = {
    5.773502691896258e-01, 5.773502691896258e-01,  5.773502691896258e-01,
    7.071067811865475e-01, 7.071067811865475e-01,  0.0,
    5.773502691896258e-01, 5.773502691896258e-01,  -5.773502691896258e-01,
    7.071067811865475e-01, 0.0,                    7.071067811865475e-01,
    1.0,                   0.0,                    0.0,
    7.071067811865475e-01, 0.0,                    -7.071067811865475e-01,
    5.773502691896258e-01, -5.773502691896258e-01, 5.773502691896258e-01,
    7.071067811865475e-01, -7.071067811865475e-01, 0.0,
    5.773502691896258e-01, -5.773502691896258e-01, -5.773502691896258e-01,
    0.0,                   7.071067811865475e-01,  7.071067811865475e-01,
    0.0,                   1.0,                    0.0,
    0.0,                   7.071067811865475e-01,  -7.071067811865475e-01,
    0.0,                   0.0,                    1.0, };
70
71

/* Does the axis need flipping ? */
72
73
const char runner_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
                              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
Tom Theuns's avatar
Tom Theuns committed
74
#define MY_CELL 428428428
Matthieu Schaller's avatar
Matthieu Schaller committed
75
76
77
78
79
80
81
82
83
#define DX 0.1
#define NX 1000
#define CELL_ID                                                  \
  ((int)(c->loc[0] / DX) * NX *NX + (int)(c->loc[1] / DX) * NX + \
   (int)(c->loc[2] / DX))
#define OUT                                                                    \
  if (CELL_ID == MY_CELL) {                                                    \
    message(" cell= %d gcount=%d time=%f \n", CELL_ID, c->gcount, r->e->time); \
  }
Tom Theuns's avatar
Tom Theuns committed
84
//#define OUT  message(" cell %d %d %f \n",CELL_ID, c->count, r->e->time);
Matthieu Schaller's avatar
Matthieu Schaller committed
85
86
//#define OUT  if(CELL_ID == MY_CELL) message("\n cell %f %f %f %d %d
//%f\n",c->loc[0],c->loc[1],c->loc[2], CELL_ID, c->count, r->e->time);
Tom Theuns's avatar
Tom Theuns committed
87

88
/* Import the density loop functions. */
89
90
91
#define FUNCTION density
#include "runner_doiact.h"

92
/* Import the force loop functions. */
93
94
95
96
#undef FUNCTION
#define FUNCTION force
#include "runner_doiact.h"

97
98
99
/* Import the gravity loop functions. */
#include "runner_doiact_grav.h"

Tom Theuns's avatar
Tom Theuns committed
100
101
102
103
104
105
106
107
/**
 * @brief Calculate gravity acceleration from external potential
 *
 * @param runner task
 * @param cell
 */
void runner_dograv_external(struct runner *r, struct cell *c) {

108
  struct gpart *g, *gparts = c->gparts;
Tom Theuns's avatar
Tom Theuns committed
109
  float L[3], E;
110
111
  int i, k, gcount = c->gcount;
  const int ti_current = r->e->ti_current;
Matthieu Schaller's avatar
Matthieu Schaller committed
112
113
114

  // struct space *s = r->e->s;
  // double CentreOfPotential[3];
115
  TIMER_TIC;
Tom Theuns's avatar
Tom Theuns committed
116

Matthieu Schaller's avatar
Matthieu Schaller committed
117
118
  /* 	 /\* location of external gravity point mass - should pass in as
   * paraneter *\/ */
Tom Theuns's avatar
Tom Theuns committed
119
120
121
122
123
124
125
126
127
128
129
130
  /* CentreOfPotential[0] = 0.5 * s->dim[0]; */
  /* CentreOfPotential[1] = 0.5 * s->dim[1]; */
  /* CentreOfPotential[2] = 0.5 * s->dim[2]; */

  /* Recurse? */
  if (c->split) {
    for (k = 0; k < 8; k++)
      if (c->progeny[k] != NULL) runner_dograv_external(r, c->progeny[k]);
    return;
  }

#ifdef TASK_VERBOSE
Matthieu Schaller's avatar
Matthieu Schaller committed
131
  OUT;
Tom Theuns's avatar
Tom Theuns committed
132
#endif
133
  /* Loop over the parts in this cell. */
134
  for (i = 0; i < gcount; i++) {
135

136
137
138
139
140
    /* Get a direct pointer on the part. */
    g = &gparts[i];

    /* Is this part within the time step? */
    if (g->ti_end <= ti_current) {
Matthieu Schaller's avatar
Matthieu Schaller committed
141
142
      //		external_gravity_pointmass(e->physical_constants,
      // potential_constants, g);
143
      external_gravity(r->e->potential, r->e->physical_constants, g);
Matthieu Schaller's avatar
Matthieu Schaller committed
144
145
146

      /* check for energy and angular momentum conservation - begin by
       * synchronizing velocity*/
147
148
149
      const float dx = g->x[0] - r->e->potential->point_mass.x;
      const float dy = g->x[1] - r->e->potential->point_mass.y;
      const float dz = g->x[2] - r->e->potential->point_mass.z;
Matthieu Schaller's avatar
Matthieu Schaller committed
150
151
152
153
154
155
156
157
158
159
160
      const float dr = sqrtf((dx * dx) + (dy * dy) + (dz * dz));
      const float rinv = 1.f / sqrtf(dx * dx + dy * dy + dz * dz);

      const int current_dti = g->ti_end - g->ti_begin;
      const float dt = 0.5f * current_dti * r->e->timeBase;
      const float vx = g->v_full[0] + dt * g->a_grav[0];
      const float vy = g->v_full[1] + dt * g->a_grav[1];
      const float vz = g->v_full[2] + dt * g->a_grav[2];

      /* E/L */
      E = 0.5 * ((vx * vx) + (vy * vy) + (vz * vz)) -
Matthieu Schaller's avatar
Matthieu Schaller committed
161
162
          r->e->physical_constants->newton_gravity *
              r->e->potential->point_mass.mass * rinv;
163
164
165
      L[0] = dy * vz - dz * vy;
      L[1] = dz * vx - dx * vz;
      L[2] = dx * vy - dy * vx;
Matthieu Schaller's avatar
Matthieu Schaller committed
166
167
      if (abs(g->id) == 1) {
        float v2 = vx * vx + vy * vy + vz * vz;
Matthieu Schaller's avatar
Matthieu Schaller committed
168
169
        float fg = r->e->physical_constants->newton_gravity *
                   r->e->potential->point_mass.mass * rinv;
Matthieu Schaller's avatar
Matthieu Schaller committed
170
171
172
173
174
175
176
        float fga = sqrtf((g->a_grav[0] * g->a_grav[0]) +
                          (g->a_grav[1] * g->a_grav[1]) +
                          (g->a_grav[2] * g->a_grav[2])) *
                    dr;
        // message("grav_external time= %f\t V_c^2= %f GM/r= %f E= %f L[2]= %f
        // x= %f y= %f vx= %f vy= %f\n", r->e->time, v2, fg, E, L[2], g->x[0],
        // g->x[1], vx, vy);
Matthieu Schaller's avatar
Matthieu Schaller committed
177
178
179
180
181
182
        message("%f\t %f %f %f %f %f %f %f %f %f %f %f %f %f\n", r->e->time,
                g->tx, g->tv, dt, v2, fg, fga, dr, E, L[2], g->x[0], g->x[1],
                vx, vy);
        /* message(" G=%e M=%e\n", r->e->physical_constants->newton_gravity,
         * r->e->potential->point_mass.mass); */
        /* exit(-1); */
183
184
      }
    }
185
186
  }
  TIMER_TOC(timer_dograv_external);
Tom Theuns's avatar
Tom Theuns committed
187
188
}

Pedro Gonnet's avatar
Pedro Gonnet committed
189
190
191
192
193
194
/**
 * @brief Sort the entries in ascending order using QuickSort.
 *
 * @param sort The entries
 * @param N The number of entries.
 */
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250

void runner_dosort_ascending(struct entry *sort, int N) {

  struct {
    short int lo, hi;
  } qstack[10];
  int qpos, i, j, lo, hi, imin;
  struct entry temp;
  float pivot;

  /* Sort parts in cell_i in decreasing order with quicksort */
  qstack[0].lo = 0;
  qstack[0].hi = N - 1;
  qpos = 0;
  while (qpos >= 0) {
    lo = qstack[qpos].lo;
    hi = qstack[qpos].hi;
    qpos -= 1;
    if (hi - lo < 15) {
      for (i = lo; i < hi; i++) {
        imin = i;
        for (j = i + 1; j <= hi; j++)
          if (sort[j].d < sort[imin].d) imin = j;
        if (imin != i) {
          temp = sort[imin];
          sort[imin] = sort[i];
          sort[i] = temp;
        }
      }
    } else {
      pivot = sort[(lo + hi) / 2].d;
      i = lo;
      j = hi;
      while (i <= j) {
        while (sort[i].d < pivot) i++;
        while (sort[j].d > pivot) j--;
        if (i <= j) {
          if (i < j) {
            temp = sort[i];
            sort[i] = sort[j];
            sort[j] = temp;
          }
          i += 1;
          j -= 1;
        }
      }
      if (j > (lo + hi) / 2) {
        if (lo < j) {
          qpos += 1;
          qstack[qpos].lo = lo;
          qstack[qpos].hi = j;
        }
        if (i < hi) {
          qpos += 1;
          qstack[qpos].lo = i;
          qstack[qpos].hi = hi;
Pedro Gonnet's avatar
Pedro Gonnet committed
251
        }
252
253
254
255
256
257
258
259
260
261
262
263
      } else {
        if (i < hi) {
          qpos += 1;
          qstack[qpos].lo = i;
          qstack[qpos].hi = hi;
        }
        if (lo < j) {
          qpos += 1;
          qstack[qpos].lo = lo;
          qstack[qpos].hi = j;
        }
      }
Pedro Gonnet's avatar
Pedro Gonnet committed
264
    }
265
266
267
  }
}

Pedro Gonnet's avatar
Pedro Gonnet committed
268
269
270
271
272
/**
 * @brief Sort the particles in the given cell along all cardinal directions.
 *
 * @param r The #runner.
 * @param c The #cell.
273
 * @param flags Cell flag.
274
275
 * @param clock Flag indicating whether to record the timing or not, needed
 *      for recursive calls.
Pedro Gonnet's avatar
Pedro Gonnet committed
276
 */
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364

void runner_dosort(struct runner *r, struct cell *c, int flags, int clock) {

  struct entry *finger;
  struct entry *fingers[8];
  struct part *parts = c->parts;
  struct entry *sort;
  int j, k, count = c->count;
  int i, ind, off[8], inds[8], temp_i, missing;
  // float shift[3];
  float buff[8], px[3];

  TIMER_TIC

  /* Clean-up the flags, i.e. filter out what's already been sorted. */
  flags &= ~c->sorted;
  if (flags == 0) return;

  /* start by allocating the entry arrays. */
  if (c->sort == NULL || c->sortsize < count) {
    if (c->sort != NULL) free(c->sort);
    c->sortsize = count * 1.1;
    if ((c->sort = (struct entry *)malloc(sizeof(struct entry) *
                                          (c->sortsize + 1) * 13)) == NULL)
      error("Failed to allocate sort memory.");
  }
  sort = c->sort;

  /* Does this cell have any progeny? */
  if (c->split) {

    /* Fill in the gaps within the progeny. */
    for (k = 0; k < 8; k++) {
      if (c->progeny[k] == NULL) continue;
      missing = flags & ~c->progeny[k]->sorted;
      if (missing) runner_dosort(r, c->progeny[k], missing, 0);
    }

    /* Loop over the 13 different sort arrays. */
    for (j = 0; j < 13; j++) {

      /* Has this sort array been flagged? */
      if (!(flags & (1 << j))) continue;

      /* Init the particle index offsets. */
      for (off[0] = 0, k = 1; k < 8; k++)
        if (c->progeny[k - 1] != NULL)
          off[k] = off[k - 1] + c->progeny[k - 1]->count;
        else
          off[k] = off[k - 1];

      /* Init the entries and indices. */
      for (k = 0; k < 8; k++) {
        inds[k] = k;
        if (c->progeny[k] != NULL && c->progeny[k]->count > 0) {
          fingers[k] = &c->progeny[k]->sort[j * (c->progeny[k]->count + 1)];
          buff[k] = fingers[k]->d;
          off[k] = off[k];
        } else
          buff[k] = FLT_MAX;
      }

      /* Sort the buffer. */
      for (i = 0; i < 7; i++)
        for (k = i + 1; k < 8; k++)
          if (buff[inds[k]] < buff[inds[i]]) {
            temp_i = inds[i];
            inds[i] = inds[k];
            inds[k] = temp_i;
          }

      /* For each entry in the new sort list. */
      finger = &sort[j * (count + 1)];
      for (ind = 0; ind < count; ind++) {

        /* Copy the minimum into the new sort array. */
        finger[ind].d = buff[inds[0]];
        finger[ind].i = fingers[inds[0]]->i + off[inds[0]];

        /* Update the buffer. */
        fingers[inds[0]] += 1;
        buff[inds[0]] = fingers[inds[0]]->d;

        /* Find the smallest entry. */
        for (k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
          temp_i = inds[k - 1];
          inds[k - 1] = inds[k];
          inds[k] = temp_i;
Pedro Gonnet's avatar
Pedro Gonnet committed
365
        }
366

367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
      } /* Merge. */

      /* Add a sentinel. */
      sort[j * (count + 1) + count].d = FLT_MAX;
      sort[j * (count + 1) + count].i = 0;

      /* Mark as sorted. */
      c->sorted |= (1 << j);

    } /* loop over sort arrays. */

  } /* progeny? */

  /* Otherwise, just sort. */
  else {

    /* Fill the sort array. */
    for (k = 0; k < count; k++) {
      px[0] = parts[k].x[0];
      px[1] = parts[k].x[1];
      px[2] = parts[k].x[2];
      for (j = 0; j < 13; j++)
        if (flags & (1 << j)) {
          sort[j * (count + 1) + k].i = k;
          sort[j * (count + 1) + k].d = px[0] * runner_shift[3 * j + 0] +
                                        px[1] * runner_shift[3 * j + 1] +
                                        px[2] * runner_shift[3 * j + 2];
        }
395
    }
396
397
398
399
400
401
402
403
404
405
406

    /* Add the sentinel and sort. */
    for (j = 0; j < 13; j++)
      if (flags & (1 << j)) {
        sort[j * (count + 1) + count].d = FLT_MAX;
        sort[j * (count + 1) + count].i = 0;
        runner_dosort_ascending(&sort[j * (count + 1)], count);
        c->sorted |= (1 << j);
      }
  }

Matthieu Schaller's avatar
Matthieu Schaller committed
407
408
409
410
411
412
413
414
415
416
417
418
  /* Verify the sorting. */
  /* for ( j = 0 ; j < 13 ; j++ ) {
      if ( !( flags & (1 << j) ) )
          continue;
      finger = &sort[ j*(count + 1) ];
      for ( k = 1 ; k < count ; k++ ) {
          if ( finger[k].d < finger[k-1].d )
              error( "Sorting failed, ascending array." );
          if ( finger[k].i >= count )
              error( "Sorting failed, indices borked." );
          }
      } */
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509

  if (clock) TIMER_TOC(timer_dosort);
}

void runner_dogsort(struct runner *r, struct cell *c, int flags, int clock) {

  struct entry *finger;
  struct entry *fingers[8];
  struct gpart *gparts = c->gparts;
  struct entry *gsort;
  int j, k, count = c->gcount;
  int i, ind, off[8], inds[8], temp_i, missing;
  // float shift[3];
  float buff[8], px[3];

  TIMER_TIC

  /* Clean-up the flags, i.e. filter out what's already been sorted. */
  flags &= ~c->gsorted;
  if (flags == 0) return;

  /* start by allocating the entry arrays. */
  if (c->gsort == NULL || c->gsortsize < count) {
    if (c->gsort != NULL) free(c->gsort);
    c->gsortsize = count * 1.1;
    if ((c->gsort = (struct entry *)malloc(sizeof(struct entry) *
                                           (c->gsortsize + 1) * 13)) == NULL)
      error("Failed to allocate sort memory.");
  }
  gsort = c->gsort;

  /* Does this cell have any progeny? */
  if (c->split) {

    /* Fill in the gaps within the progeny. */
    for (k = 0; k < 8; k++) {
      if (c->progeny[k] == NULL) continue;
      missing = flags & ~c->progeny[k]->gsorted;
      if (missing) runner_dogsort(r, c->progeny[k], missing, 0);
    }

    /* Loop over the 13 different sort arrays. */
    for (j = 0; j < 13; j++) {

      /* Has this sort array been flagged? */
      if (!(flags & (1 << j))) continue;

      /* Init the particle index offsets. */
      for (off[0] = 0, k = 1; k < 8; k++)
        if (c->progeny[k - 1] != NULL)
          off[k] = off[k - 1] + c->progeny[k - 1]->gcount;
        else
          off[k] = off[k - 1];

      /* Init the entries and indices. */
      for (k = 0; k < 8; k++) {
        inds[k] = k;
        if (c->progeny[k] != NULL && c->progeny[k]->gcount > 0) {
          fingers[k] = &c->progeny[k]->gsort[j * (c->progeny[k]->gcount + 1)];
          buff[k] = fingers[k]->d;
          off[k] = off[k];
        } else
          buff[k] = FLT_MAX;
      }

      /* Sort the buffer. */
      for (i = 0; i < 7; i++)
        for (k = i + 1; k < 8; k++)
          if (buff[inds[k]] < buff[inds[i]]) {
            temp_i = inds[i];
            inds[i] = inds[k];
            inds[k] = temp_i;
          }

      /* For each entry in the new sort list. */
      finger = &gsort[j * (count + 1)];
      for (ind = 0; ind < count; ind++) {

        /* Copy the minimum into the new sort array. */
        finger[ind].d = buff[inds[0]];
        finger[ind].i = fingers[inds[0]]->i + off[inds[0]];

        /* Update the buffer. */
        fingers[inds[0]] += 1;
        buff[inds[0]] = fingers[inds[0]]->d;

        /* Find the smallest entry. */
        for (k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
          temp_i = inds[k - 1];
          inds[k - 1] = inds[k];
          inds[k] = temp_i;
510
        }
Pedro Gonnet's avatar
Pedro Gonnet committed
511

512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
      } /* Merge. */

      /* Add a sentinel. */
      gsort[j * (count + 1) + count].d = FLT_MAX;
      gsort[j * (count + 1) + count].i = 0;

      /* Mark as sorted. */
      c->gsorted |= (1 << j);

    } /* loop over sort arrays. */

  } /* progeny? */

  /* Otherwise, just sort. */
  else {

    /* Fill the sort array. */
    for (k = 0; k < count; k++) {
      px[0] = gparts[k].x[0];
      px[1] = gparts[k].x[1];
      px[2] = gparts[k].x[2];
      for (j = 0; j < 13; j++)
        if (flags & (1 << j)) {
          gsort[j * (count + 1) + k].i = k;
          gsort[j * (count + 1) + k].d = px[0] * runner_shift[3 * j + 0] +
                                         px[1] * runner_shift[3 * j + 1] +
                                         px[2] * runner_shift[3 * j + 2];
        }
Pedro Gonnet's avatar
Pedro Gonnet committed
540
    }
541
542
543
544
545
546
547
548
549
550
551

    /* Add the sentinel and sort. */
    for (j = 0; j < 13; j++)
      if (flags & (1 << j)) {
        gsort[j * (count + 1) + count].d = FLT_MAX;
        gsort[j * (count + 1) + count].i = 0;
        runner_dosort_ascending(&gsort[j * (count + 1)], count);
        c->gsorted |= (1 << j);
      }
  }

Matthieu Schaller's avatar
Matthieu Schaller committed
552
553
554
555
556
557
558
559
560
561
562
563
  /* Verify the sorting. */
  /* for ( j = 0 ; j < 13 ; j++ ) {
      if ( !( flags & (1 << j) ) )
          continue;
      finger = &c->gsort[ j*(count + 1) ];
      for ( k = 1 ; k < count ; k++ ) {
          if ( finger[k].d < finger[k-1].d )
              error( "Sorting failed, ascending array." );
          if ( finger[k].i < 0 || finger[k].i >= count )
              error( "Sorting failed, indices borked." );
          }
      } */
564
565
566
567

  if (clock) TIMER_TOC(timer_dosort);
}

568
569
570
571
572
/**
 * @brief Initialize the particles before the density calculation
 *
 * @param r The runner thread.
 * @param c The cell.
Matthieu Schaller's avatar
Matthieu Schaller committed
573
 * @param timer 1 if the time is to be recorded.
574
575
 */

576
void runner_doinit(struct runner *r, struct cell *c, int timer) {
577

578
579
  struct part *const parts = c->parts;
  struct gpart *const gparts = c->gparts;
Matthieu Schaller's avatar
Matthieu Schaller committed
580
  const int count = c->count;
581
  const int gcount = c->gcount;
582
  const int ti_current = r->e->ti_current;
583
584

  TIMER_TIC;
585

586
587
  /* Recurse? */
  if (c->split) {
Matthieu Schaller's avatar
Matthieu Schaller committed
588
    for (int k = 0; k < 8; k++)
589
      if (c->progeny[k] != NULL) runner_doinit(r, c->progeny[k], 0);
590
    return;
591
592
  } else {

593
594
    /* Loop over the parts in this cell. */
    for (int i = 0; i < count; i++) {
595

596
      /* Get a direct pointer on the part. */
597
      struct part *const p = &parts[i];
598

599
      if (p->ti_end <= ti_current) {
Matthieu Schaller's avatar
Matthieu Schaller committed
600

601
602
        /* Get ready for a density calculation */
        hydro_init_part(p);
603
      }
604
    }
605

606
    /* Loop over the gparts in this cell. */
607
608
609
    for (int i = 0; i < gcount; i++) {

      /* Get a direct pointer on the part. */
610
      struct gpart *const gp = &gparts[i];
611

612
      if (gp->ti_end <= ti_current) {
613
614

        /* Get ready for a density calculation */
615
        gravity_init_part(gp);
616
      }
617
618
    }
  }
619

Peter W. Draper's avatar
Peter W. Draper committed
620
  if (timer) TIMER_TOC(timer_init);
621
622
}

623
624
625
/**
 * @brief Intermediate task between density and force
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
626
 * @param r The runner thread.
627
 * @param c The cell.
628
 */
629
630
631
632

void runner_doghost(struct runner *r, struct cell *c) {

  struct part *p, *parts = c->parts;
633
  struct xpart *xp, *xparts = c->xparts;
634
  struct cell *finger;
Matthieu Schaller's avatar
Matthieu Schaller committed
635
  int redo, count = c->count;
636
  int *pid;
637
  float h_corr;
638
639
  const int ti_current = r->e->ti_current;
  const double timeBase = r->e->timeBase;
640

641
642
  TIMER_TIC;

643
644
  /* Recurse? */
  if (c->split) {
Matthieu Schaller's avatar
Matthieu Schaller committed
645
    for (int k = 0; k < 8; k++)
646
647
648
649
650
651
652
      if (c->progeny[k] != NULL) runner_doghost(r, c->progeny[k]);
    return;
  }

  /* Init the IDs that have to be updated. */
  if ((pid = (int *)alloca(sizeof(int) * count)) == NULL)
    error("Call to alloca failed.");
Matthieu Schaller's avatar
Matthieu Schaller committed
653
  for (int k = 0; k < count; k++) pid[k] = k;
654
655

  /* While there are particles that need to be updated... */
Matthieu Schaller's avatar
Matthieu Schaller committed
656
  for (int num_reruns = 0; count > 0 && num_reruns < const_smoothing_max_iter;
Matthieu Schaller's avatar
Matthieu Schaller committed
657
       num_reruns++) {
658
659
660

    /* Reset the redo-count. */
    redo = 0;
661

662
    /* Loop over the parts in this cell. */
Matthieu Schaller's avatar
Matthieu Schaller committed
663
    for (int i = 0; i < count; i++) {
664
665
666

      /* Get a direct pointer on the part. */
      p = &parts[pid[i]];
667
      xp = &xparts[pid[i]];
668
669

      /* Is this part within the timestep? */
670
      if (p->ti_end <= ti_current) {
671

672
        /* Finish the density calculation */
673
        hydro_end_density(p, ti_current);
674
675

        /* If no derivative, double the smoothing length. */
676
        if (p->density.wcount_dh == 0.0f) h_corr = p->h;
677
678
679

        /* Otherwise, compute the smoothing length update (Newton step). */
        else {
680
          h_corr = (kernel_nwneigh - p->density.wcount) / p->density.wcount_dh;
681
682

          /* Truncate to the range [ -p->h/2 , p->h ]. */
683
684
          h_corr = fminf(h_corr, p->h);
          h_corr = fmaxf(h_corr, -p->h * 0.5f);
Pedro Gonnet's avatar
Pedro Gonnet committed
685
        }
686
687

        /* Did we get the right number density? */
688
        if (p->density.wcount > kernel_nwneigh + const_delta_nwneigh ||
689
            p->density.wcount < kernel_nwneigh - const_delta_nwneigh) {
690
691
692
693

          /* Ok, correct then */
          p->h += h_corr;

694
          /* Flag for another round of fun */
695
696
          pid[redo] = pid[i];
          redo += 1;
697

698
699
          /* Re-initialise everything */
          hydro_init_part(p);
700

701
          /* Off we go ! */
702
          continue;
703
704
        }

Matthieu Schaller's avatar
Matthieu Schaller committed
705
        /* We now have a particle whose smoothing length has converged */
Matthieu Schaller's avatar
Matthieu Schaller committed
706

707
        /* As of here, particle force variables will be set. */
708

709
        /* Compute variables required for the force loop */
710
        hydro_prepare_force(p, xp, ti_current, timeBase);
711

Matthieu Schaller's avatar
Matthieu Schaller committed
712
        /* The particle force values are now set.  Do _NOT_
713
           try to read any particle density variables! */
Matthieu Schaller's avatar
Matthieu Schaller committed
714

715
716
        /* Prepare the particle for the force loop over neighbours */
        hydro_reset_acceleration(p);
717
718
719
      }
    }

720
721
722
    /* We now need to treat the particles whose smoothing length had not
     * converged again */

723
724
725
726
727
728
    /* Re-set the counter for the next loop (potentially). */
    count = redo;
    if (count > 0) {

      /* Climb up the cell hierarchy. */
      for (finger = c; finger != NULL; finger = finger->parent) {
Matthieu Schaller's avatar
Matthieu Schaller committed
729

730
731
        /* Run through this cell's density interactions. */
        for (struct link *l = finger->density; l != NULL; l = l->next) {
Matthieu Schaller's avatar
Matthieu Schaller committed
732

733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
          /* Self-interaction? */
          if (l->t->type == task_type_self)
            runner_doself_subset_density(r, finger, parts, pid, count);

          /* Otherwise, pair interaction? */
          else if (l->t->type == task_type_pair) {

            /* Left or right? */
            if (l->t->ci == finger)
              runner_dopair_subset_density(r, finger, parts, pid, count,
                                           l->t->cj);
            else
              runner_dopair_subset_density(r, finger, parts, pid, count,
                                           l->t->ci);

          }

          /* Otherwise, sub interaction? */
          else if (l->t->type == task_type_sub) {

            /* Left or right? */
            if (l->t->ci == finger)
              runner_dosub_subset_density(r, finger, parts, pid, count,
                                          l->t->cj, -1, 1);
            else
              runner_dosub_subset_density(r, finger, parts, pid, count,
                                          l->t->ci, -1, 1);
          }
        }
      }
763
    }
764
  }
765

Matthieu Schaller's avatar
Matthieu Schaller committed
766
767
  if (count)
    message("Smoothing length failed to converge on %i particles.", count);
768
769
770
771

  TIMER_TOC(timer_doghost);
}

772
/**
773
 * @brief Drift particles and g-particles forward in time
774
775
776
 *
 * @param r The runner thread.
 * @param c The cell.
777
 * @param timer Are we timing this ?
778
 */
779
void runner_dodrift(struct runner *r, struct cell *c, int timer) {
780

781
782
  const double timeBase = r->e->timeBase;
  const double dt = (r->e->ti_current - r->e->ti_old) * timeBase;
783
784
785
786
787
  const int ti_old = r->e->ti_old;
  const int ti_current = r->e->ti_current;
  struct part *const parts = c->parts;
  struct xpart *const xparts = c->xparts;
  struct gpart *const gparts = c->gparts;
788
  float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f;
789

790
  TIMER_TIC
Tom Theuns's avatar
Tom Theuns committed
791
#ifdef TASK_VERBOSE
Matthieu Schaller's avatar
Matthieu Schaller committed
792
  OUT;
Tom Theuns's avatar
Tom Theuns committed
793
#endif
794

795
796
  /* No children? */
  if (!c->split) {
797

798
    /* Loop over all the g-particles in the cell */
Peter W. Draper's avatar
Peter W. Draper committed
799
800
    const int nr_gparts = c->gcount;
    for (size_t k = 0; k < nr_gparts; k++) {
801
802
803
804
805
806
807
808
809
810
811

      /* Get a handle on the gpart. */
      struct gpart *const gp = &gparts[k];

      /* Drift... */
      gp->x[0] += gp->v_full[0] * dt;
      gp->x[1] += gp->v_full[1] * dt;
      gp->x[2] += gp->v_full[2] * dt;
    }

    /* Loop over all the particles in the cell (more work for these !) */
Peter W. Draper's avatar
Peter W. Draper committed
812
813
    const size_t nr_parts = c->count;
    for (size_t k = 0; k < nr_parts; k++) {
814

815
      /* Get a handle on the part. */
816
817
      struct part *const p = &parts[k];
      struct xpart *const xp = &xparts[k];
818

Matthieu Schaller's avatar
Matthieu Schaller committed
819
820
821
      /* Useful quantity */
      const float h_inv = 1.0f / p->h;

822
823
824
825
      /* Drift... */
      p->x[0] += xp->v_full[0] * dt;
      p->x[1] += xp->v_full[1] * dt;
      p->x[2] += xp->v_full[2] * dt;
826

Matthieu Schaller's avatar
Matthieu Schaller committed
827
      /* Predict velocities (for hydro terms) */
Matthieu Schaller's avatar
Matthieu Schaller committed
828
829
830
      p->v[0] += p->a_hydro[0] * dt;
      p->v[1] += p->a_hydro[1] * dt;
      p->v[2] += p->a_hydro[2] * dt;
831

Matthieu Schaller's avatar
Matthieu Schaller committed
832
      /* Predict smoothing length */
833
834
835
      const float w1 = p->h_dt * h_inv * dt;
      if (fabsf(w1) < 0.2f)
        p->h *= approx_expf(w1); /* 4th order expansion of exp(w) */
Matthieu Schaller's avatar
Matthieu Schaller committed
836
      else
837
        p->h *= expf(w1);
Matthieu Schaller's avatar
Matthieu Schaller committed
838
839

      /* Predict density */
840
841
842
      const float w2 = -3.0f * p->h_dt * h_inv * dt;
      if (fabsf(w2) < 0.2f)
        p->rho *= approx_expf(w2); /* 4th order expansion of exp(w) */
Matthieu Schaller's avatar
Matthieu Schaller committed
843
      else
844
        p->rho *= expf(w2);
845

846
      /* Predict the values of the extra fields */
847
      hydro_predict_extra(p, xp, ti_old, ti_current, timeBase);
848

849
      /* Compute (square of) motion since last cell construction */
850
851
852
853
      const float dx2 = (p->x[0] - xp->x_old[0]) * (p->x[0] - xp->x_old[0]) +
                        (p->x[1] - xp->x_old[1]) * (p->x[1] - xp->x_old[1]) +
                        (p->x[2] - xp->x_old[2]) * (p->x[2] - xp->x_old[2]);
      dx2_max = fmaxf(dx2_max, dx2);
Matthieu Schaller's avatar
Matthieu Schaller committed
854
855
856

      /* Maximal smoothing length */
      h_max = fmaxf(p->h, h_max);
857
    }
858

859
    /* Now, get the maximal particle motion from its square */
860
    dx_max = sqrtf(dx2_max);
861
  }
862

Matthieu Schaller's avatar
Matthieu Schaller committed
863
864
865
866
867
868
869
870
871
  /* Otherwise, aggregate data from children. */
  else {

    /* Loop over the progeny. */
    for (int k = 0; k < 8; k++)
      if (c->progeny[k] != NULL) {
        struct cell *cp = c->progeny[k];
        runner_dodrift(r, cp, 0);

872
873
        dx_max = fmaxf(dx_max, cp->dx_max);
        h_max = fmaxf(h_max, cp->h_max);
Matthieu Schaller's avatar
Matthieu Schaller committed
874
875
876
877
878
879
      }
  }

  /* Store the values */
  c->h_max = h_max;
  c->dx_max = dx_max;
880

Peter W. Draper's avatar
Peter W. Draper committed
881
  if (timer) TIMER_TOC(timer_drift);
882
}
883

884
885
886
887
888
/**
 * @brief Combined second and first kick for fixed dt.
 *
 * @param r The runner thread.
 * @param c The cell.
889
 * @param timer The timer
890
891
 */

892
893
void runner_dokick(struct runner *r, struct cell *c, int timer) {

894
895
  const float global_dt_min = r->e->dt_min;
  const float global_dt_max = r->e->dt_max;
896
  const int ti_current = r->e->ti_current;
897
898
  const double timeBase = r->e->timeBase;
  const double timeBase_inv = 1.0 / r->e->timeBase;
Matthieu Schaller's avatar
Matthieu Schaller committed
899
  const int count = c->count;
Tom Theuns's avatar
Tom Theuns committed
900
  const int gcount = c->gcount;
901
902
903
  struct part *const parts = c->parts;
  struct xpart *const xparts = c->xparts;
  struct gpart *const gparts = c->gparts;
904
905
  const int is_fixdt =
      (r->e->policy & engine_policy_fixdt) == engine_policy_fixdt;
Matthieu Schaller's avatar
Matthieu Schaller committed
906

907
  int updated = 0, g_updated = 0;
908
  int ti_end_min = max_nr_timesteps, ti_end_max = 0;
909
910
911
  double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, mass = 0.0;
  float mom[3] = {0.0f, 0.0f, 0.0f};
  float ang[3] = {0.0f, 0.0f, 0.0f};
912
913
914

  TIMER_TIC

Tom Theuns's avatar
Tom Theuns committed
915
#ifdef TASK_VERBOSE
Matthieu Schaller's avatar
Matthieu Schaller committed
916
  OUT;
Tom Theuns's avatar
Tom Theuns committed
917
918
#endif

919
920
921
  /* No children? */
  if (!c->split) {

922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
    /* Loop over the g-particles and kick the active ones. */
    for (int k = 0; k < gcount; k++) {

      /* Get a handle on the part. */
      struct gpart *const gp = &gparts[k];

      /* If the g-particle has no counterpart and needs to be kicked */
      if (gp->id < 0 && (is_fixdt || gp->ti_end <= ti_current)) {

        /* First, finish the force calculation */
        gravity_end_force(gp);

        /* Now we are ready to compute the next time-step size */
        int new_dti;

        if (is_fixdt) {

          /* Now we have a time step, proceed with the kick */
          new_dti = global_dt_max * timeBase_inv;

        } else {

          /* Compute the next timestep (gravity condition) */
Matthieu Schaller's avatar
Matthieu Schaller committed
945
946
          float new_dt = gravity_compute_timestep(r->e->potential,
                                                  r->e->physical_constants, gp);
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974

          /* Limit timestep within the allowed range */
          new_dt = fminf(new_dt, global_dt_max);
          new_dt = fmaxf(new_dt, global_dt_min);

          /* Convert to integer time */
          new_dti = new_dt * timeBase_inv;

          /* Recover the current timestep */
          const int current_dti = gp->ti_end - gp->ti_begin;

          /* Limit timestep increase */
          if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti);

          /* Put this timestep on the time line */
          int dti_timeline = max_nr_timesteps;
          while (new_dti < dti_timeline) dti_timeline /= 2;

          /* Now we have a time step, proceed with the kick */
          new_dti = dti_timeline;
        }

        /* Compute the time step for this kick */
        const int ti_start = (gp->ti_begin + gp->ti_end) / 2;
        const int ti_end = gp->ti_end + new_dti / 2;
        const double dt = (ti_end - ti_start) * timeBase;
        const double half_dt = (ti_end - gp->ti_end) * timeBase;

Matthieu Schaller's avatar
Matthieu Schaller committed
975
        /* Move particle forward in time */
976
977
978
        gp->ti_begin = gp->ti_end;
        gp->ti_end = gp->ti_begin + new_dti;

979
980
981
982
983
984
985
986
987
988
989
        /* Kick particles in momentum space */
        gp->v_full[0] += gp->a_grav[0] * dt;
        gp->v_full[1] += gp->a_grav[1] * dt;
        gp->v_full[2] += gp->a_grav[2] * dt;

        /* Extra kick work */
        gravity_kick_extra(gp, dt, half_dt);

        /* Number of updated g-particles */
        g_updated++;
      }
990

Matthieu Schaller's avatar
Matthieu Schaller committed
991
992
993
      /* Minimal time for next end of time-step */
      ti_end_min = min(gp->ti_end, ti_end_min);
      ti_end_max = max(gp->ti_end, ti_end_max);
994
995
996
997
    }

    /* Now do the hydro ones... */

998
    /* Loop over the particles and kick the active ones. */
Matthieu Schaller's avatar
Matthieu Schaller committed
999
    for (int k = 0; k < count; k++) {
1000