runner.c 37.1 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *
5
6
7
8
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
9
 *
10
11
12
13
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
14
 *
15
16
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17
 *
18
 ******************************************************************************/
Pedro Gonnet's avatar
Pedro Gonnet committed
19

Pedro Gonnet's avatar
Pedro Gonnet committed
20
21
/* Config parameters. */
#include "../config.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
22
23
24
25

/* Some standard headers. */
#include <float.h>
#include <limits.h>
26
#include <stdlib.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
27

28
29
/* MPI headers. */
#ifdef WITH_MPI
30
#include <mpi.h>
31
32
#endif

33
34
35
/* This object's header. */
#include "runner.h"

Pedro Gonnet's avatar
Pedro Gonnet committed
36
/* Local headers. */
Matthieu Schaller's avatar
Matthieu Schaller committed
37
#include "approx_math.h"
38
#include "atomic.h"
39
#include "const.h"
40
#include "debug.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
41
#include "engine.h"
42
#include "error.h"
43
44
45
#include "gravity.h"
#include "hydro.h"
#include "minmax.h"
46
47
48
49
#include "scheduler.h"
#include "space.h"
#include "task.h"
#include "timers.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
50

51
/* Orientation of the cell pairs */
52
53
54
55
56
57
58
59
60
61
62
63
64
65
const float runner_shift[13 * 3] = {
    5.773502691896258e-01, 5.773502691896258e-01,  5.773502691896258e-01,
    7.071067811865475e-01, 7.071067811865475e-01,  0.0,
    5.773502691896258e-01, 5.773502691896258e-01,  -5.773502691896258e-01,
    7.071067811865475e-01, 0.0,                    7.071067811865475e-01,
    1.0,                   0.0,                    0.0,
    7.071067811865475e-01, 0.0,                    -7.071067811865475e-01,
    5.773502691896258e-01, -5.773502691896258e-01, 5.773502691896258e-01,
    7.071067811865475e-01, -7.071067811865475e-01, 0.0,
    5.773502691896258e-01, -5.773502691896258e-01, -5.773502691896258e-01,
    0.0,                   7.071067811865475e-01,  7.071067811865475e-01,
    0.0,                   1.0,                    0.0,
    0.0,                   7.071067811865475e-01,  -7.071067811865475e-01,
    0.0,                   0.0,                    1.0, };
66
67

/* Does the axis need flipping ? */
68
69
const char runner_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
                              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
Tom Theuns's avatar
Tom Theuns committed
70
71
72
73
74
#define MY_CELL 428428428
#define DX     0.1
#define NX     1000
#define CELL_ID ( (int)(c->loc[0]/DX) * NX * NX + (int)(c->loc[1]/DX) * NX + (int)(c->loc[2]/DX))
#define OUT  if(CELL_ID == MY_CELL) {message(" cell= %d gcount=%d time=%f \n",CELL_ID, c->gcount, r->e->time);}
Tom Theuns's avatar
Tom Theuns committed
75
76
77
78
//#define OUT  message(" cell %d %d %f \n",CELL_ID, c->count, r->e->time);
//#define OUT  if(CELL_ID == MY_CELL) message("\n cell %f %f %f %d %d %f\n",c->loc[0],c->loc[1],c->loc[2], CELL_ID, c->count, r->e->time);


79
/* Import the density loop functions. */
80
81
82
#define FUNCTION density
#include "runner_doiact.h"

83
/* Import the force loop functions. */
84
85
86
87
#undef FUNCTION
#define FUNCTION force
#include "runner_doiact.h"

88
89
90
/* Import the gravity loop functions. */
#include "runner_doiact_grav.h"

Tom Theuns's avatar
Tom Theuns committed
91
92
93
94
95
96
97
98
99

/**
 * @brief Calculate gravity acceleration from external potential
 *
 * @param runner task
 * @param cell
 */
void runner_dograv_external(struct runner *r, struct cell *c) {

100
  struct gpart *g, *gparts = c->gparts;
Tom Theuns's avatar
Tom Theuns committed
101
102
  float rinv;
  float L[3], E;
103
  float dx, dy, dz;
104
105
106
  int i, k, gcount = c->gcount;
  const int ti_current = r->e->ti_current;

Tom Theuns's avatar
Tom Theuns committed
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
  //struct space *s = r->e->s;
  //double CentreOfPotential[3];
  TIMER_TIC

  /* 	 /\* location of external gravity point mass - should pass in as paraneter *\/ */
  /* CentreOfPotential[0] = 0.5 * s->dim[0]; */
  /* CentreOfPotential[1] = 0.5 * s->dim[1]; */
  /* CentreOfPotential[2] = 0.5 * s->dim[2]; */

  /* Recurse? */
  if (c->split) {
    for (k = 0; k < 8; k++)
      if (c->progeny[k] != NULL) runner_dograv_external(r, c->progeny[k]);
    return;
  }

#ifdef TASK_VERBOSE
  	 OUT
#endif
126
  /* Loop over the parts in this cell. */
127
  for (i = 0; i < gcount; i++) {
128
129

  	 /* Get a direct pointer on the part. */
130
  	 g = &gparts[i];
131
132

  	 /* Is this part within the time step? */
133
  	 if (g->ti_end <= ti_current) {
134
135
136
137
138
		dx  = g->x[0]-External_Potential_X;
		dy  = g->x[1]-External_Potential_Y;
		dz  = g->x[2]-External_Potential_Z;

  		rinv = 1 / sqrtf(dx*dx + dy*dy + dz*dz);
139
140

  		/* check for energy and angular momentum conservation */
141
  		E = 0.5 * ((g->v_full[0]*g->v_full[0]) + (g->v_full[1]*g->v_full[1]) + (g->v_full[2]*g->v_full[2])) - const_G *  External_Potential_Mass * rinv;
142
143
144
  		L[0] = dy * g->v_full[2] - dz * g->v_full[1];
  		L[1] = dz * g->v_full[0] - dx * g->v_full[2];
  		L[2] = dx * g->v_full[1] - dy * g->v_full[0];
145
  		if(g->id == 0) {
146
147
148
149
150
		  float v2 = g->v_full[0]*g->v_full[0] + g->v_full[1]*g->v_full[1] + g->v_full[2]*g->v_full[2];
		  float fg = const_G * External_Potential_Mass * rinv;
  		  //message("grav_external time= %f\t V_c^2= %f GM/r= %f E= %f L[2]= %f x= %f y= %f vx= %f vy= %f\n", r->e->time, v2, fg, E, L[2], g->x[0], g->x[1], g->v_full[0], g->v_full[1]);
		  message("%f\t %f %f %f %f %f %f %f %f %f %f\n", r->e->time, g->tx, g->tv, v2, fg, E, L[2], g->x[0], g->x[1], g->v_full[0], g->v_full[1]);
  		  // message(" G=%e M=%e\n", const_G, External_Potential_Mass);
151
  		}
152
153
154
  		g->a_grav_external[0] = - const_G *  External_Potential_Mass * dx * rinv * rinv * rinv;
  		g->a_grav_external[1] = - const_G *  External_Potential_Mass * dy * rinv * rinv * rinv;
  		g->a_grav_external[2] = - const_G *  External_Potential_Mass * dz * rinv * rinv * rinv;
Tom Theuns's avatar
Tom Theuns committed
155
		
156
157
158
  	 }
  }
  TIMER_TOC(timer_dograv_external);
Tom Theuns's avatar
Tom Theuns committed
159
160
161
}


Pedro Gonnet's avatar
Pedro Gonnet committed
162
/**
163
 * @Brief Sort the entries in ascending order using QuickSort.
Pedro Gonnet's avatar
Pedro Gonnet committed
164
165
166
167
 *
 * @param sort The entries
 * @param N The number of entries.
 */
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223

void runner_dosort_ascending(struct entry *sort, int N) {

  struct {
    short int lo, hi;
  } qstack[10];
  int qpos, i, j, lo, hi, imin;
  struct entry temp;
  float pivot;

  /* Sort parts in cell_i in decreasing order with quicksort */
  qstack[0].lo = 0;
  qstack[0].hi = N - 1;
  qpos = 0;
  while (qpos >= 0) {
    lo = qstack[qpos].lo;
    hi = qstack[qpos].hi;
    qpos -= 1;
    if (hi - lo < 15) {
      for (i = lo; i < hi; i++) {
        imin = i;
        for (j = i + 1; j <= hi; j++)
          if (sort[j].d < sort[imin].d) imin = j;
        if (imin != i) {
          temp = sort[imin];
          sort[imin] = sort[i];
          sort[i] = temp;
        }
      }
    } else {
      pivot = sort[(lo + hi) / 2].d;
      i = lo;
      j = hi;
      while (i <= j) {
        while (sort[i].d < pivot) i++;
        while (sort[j].d > pivot) j--;
        if (i <= j) {
          if (i < j) {
            temp = sort[i];
            sort[i] = sort[j];
            sort[j] = temp;
          }
          i += 1;
          j -= 1;
        }
      }
      if (j > (lo + hi) / 2) {
        if (lo < j) {
          qpos += 1;
          qstack[qpos].lo = lo;
          qstack[qpos].hi = j;
        }
        if (i < hi) {
          qpos += 1;
          qstack[qpos].lo = i;
          qstack[qpos].hi = hi;
Pedro Gonnet's avatar
Pedro Gonnet committed
224
        }
225
226
227
228
229
230
231
232
233
234
235
236
      } else {
        if (i < hi) {
          qpos += 1;
          qstack[qpos].lo = i;
          qstack[qpos].hi = hi;
        }
        if (lo < j) {
          qpos += 1;
          qstack[qpos].lo = lo;
          qstack[qpos].hi = j;
        }
      }
Pedro Gonnet's avatar
Pedro Gonnet committed
237
    }
238
239
240
  }
}

Pedro Gonnet's avatar
Pedro Gonnet committed
241
242
243
244
245
/**
 * @brief Sort the particles in the given cell along all cardinal directions.
 *
 * @param r The #runner.
 * @param c The #cell.
246
 * @param flags Cell flag.
247
248
 * @param clock Flag indicating whether to record the timing or not, needed
 *      for recursive calls.
Pedro Gonnet's avatar
Pedro Gonnet committed
249
 */
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337

void runner_dosort(struct runner *r, struct cell *c, int flags, int clock) {

  struct entry *finger;
  struct entry *fingers[8];
  struct part *parts = c->parts;
  struct entry *sort;
  int j, k, count = c->count;
  int i, ind, off[8], inds[8], temp_i, missing;
  // float shift[3];
  float buff[8], px[3];

  TIMER_TIC

  /* Clean-up the flags, i.e. filter out what's already been sorted. */
  flags &= ~c->sorted;
  if (flags == 0) return;

  /* start by allocating the entry arrays. */
  if (c->sort == NULL || c->sortsize < count) {
    if (c->sort != NULL) free(c->sort);
    c->sortsize = count * 1.1;
    if ((c->sort = (struct entry *)malloc(sizeof(struct entry) *
                                          (c->sortsize + 1) * 13)) == NULL)
      error("Failed to allocate sort memory.");
  }
  sort = c->sort;

  /* Does this cell have any progeny? */
  if (c->split) {

    /* Fill in the gaps within the progeny. */
    for (k = 0; k < 8; k++) {
      if (c->progeny[k] == NULL) continue;
      missing = flags & ~c->progeny[k]->sorted;
      if (missing) runner_dosort(r, c->progeny[k], missing, 0);
    }

    /* Loop over the 13 different sort arrays. */
    for (j = 0; j < 13; j++) {

      /* Has this sort array been flagged? */
      if (!(flags & (1 << j))) continue;

      /* Init the particle index offsets. */
      for (off[0] = 0, k = 1; k < 8; k++)
        if (c->progeny[k - 1] != NULL)
          off[k] = off[k - 1] + c->progeny[k - 1]->count;
        else
          off[k] = off[k - 1];

      /* Init the entries and indices. */
      for (k = 0; k < 8; k++) {
        inds[k] = k;
        if (c->progeny[k] != NULL && c->progeny[k]->count > 0) {
          fingers[k] = &c->progeny[k]->sort[j * (c->progeny[k]->count + 1)];
          buff[k] = fingers[k]->d;
          off[k] = off[k];
        } else
          buff[k] = FLT_MAX;
      }

      /* Sort the buffer. */
      for (i = 0; i < 7; i++)
        for (k = i + 1; k < 8; k++)
          if (buff[inds[k]] < buff[inds[i]]) {
            temp_i = inds[i];
            inds[i] = inds[k];
            inds[k] = temp_i;
          }

      /* For each entry in the new sort list. */
      finger = &sort[j * (count + 1)];
      for (ind = 0; ind < count; ind++) {

        /* Copy the minimum into the new sort array. */
        finger[ind].d = buff[inds[0]];
        finger[ind].i = fingers[inds[0]]->i + off[inds[0]];

        /* Update the buffer. */
        fingers[inds[0]] += 1;
        buff[inds[0]] = fingers[inds[0]]->d;

        /* Find the smallest entry. */
        for (k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
          temp_i = inds[k - 1];
          inds[k - 1] = inds[k];
          inds[k] = temp_i;
Pedro Gonnet's avatar
Pedro Gonnet committed
338
        }
339

340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
      } /* Merge. */

      /* Add a sentinel. */
      sort[j * (count + 1) + count].d = FLT_MAX;
      sort[j * (count + 1) + count].i = 0;

      /* Mark as sorted. */
      c->sorted |= (1 << j);

    } /* loop over sort arrays. */

  } /* progeny? */

  /* Otherwise, just sort. */
  else {

    /* Fill the sort array. */
    for (k = 0; k < count; k++) {
      px[0] = parts[k].x[0];
      px[1] = parts[k].x[1];
      px[2] = parts[k].x[2];
      for (j = 0; j < 13; j++)
        if (flags & (1 << j)) {
          sort[j * (count + 1) + k].i = k;
          sort[j * (count + 1) + k].d = px[0] * runner_shift[3 * j + 0] +
                                        px[1] * runner_shift[3 * j + 1] +
                                        px[2] * runner_shift[3 * j + 2];
        }
368
    }
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389

    /* Add the sentinel and sort. */
    for (j = 0; j < 13; j++)
      if (flags & (1 << j)) {
        sort[j * (count + 1) + count].d = FLT_MAX;
        sort[j * (count + 1) + count].i = 0;
        runner_dosort_ascending(&sort[j * (count + 1)], count);
        c->sorted |= (1 << j);
      }
  }

/* Verify the sorting. */
/* for ( j = 0 ; j < 13 ; j++ ) {
    if ( !( flags & (1 << j) ) )
        continue;
    finger = &sort[ j*(count + 1) ];
    for ( k = 1 ; k < count ; k++ ) {
        if ( finger[k].d < finger[k-1].d )
            error( "Sorting failed, ascending array." );
        if ( finger[k].i >= count )
            error( "Sorting failed, indices borked." );
390
        }
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
    } */

#ifdef TIMER_VERBOSE
  message(
      "runner %02i: %i parts at depth %i (flags = %i%i%i%i%i%i%i%i%i%i%i%i%i) "
      "took %.3f ms.",
      r->id, count, c->depth, (flags & 0x1000) >> 12, (flags & 0x800) >> 11,
      (flags & 0x400) >> 10, (flags & 0x200) >> 9, (flags & 0x100) >> 8,
      (flags & 0x80) >> 7, (flags & 0x40) >> 6, (flags & 0x20) >> 5,
      (flags & 0x10) >> 4, (flags & 0x8) >> 3, (flags & 0x4) >> 2,
      (flags & 0x2) >> 1, (flags & 0x1) >> 0,
      ((double)TIMER_TOC(timer_dosort)) / CPU_TPS * 1000);
  fflush(stdout);
#else
  if (clock) TIMER_TOC(timer_dosort);
#endif
}

void runner_dogsort(struct runner *r, struct cell *c, int flags, int clock) {

  struct entry *finger;
  struct entry *fingers[8];
  struct gpart *gparts = c->gparts;
  struct entry *gsort;
  int j, k, count = c->gcount;
  int i, ind, off[8], inds[8], temp_i, missing;
  // float shift[3];
  float buff[8], px[3];

  TIMER_TIC

  /* Clean-up the flags, i.e. filter out what's already been sorted. */
  flags &= ~c->gsorted;
  if (flags == 0) return;

  /* start by allocating the entry arrays. */
  if (c->gsort == NULL || c->gsortsize < count) {
    if (c->gsort != NULL) free(c->gsort);
    c->gsortsize = count * 1.1;
    if ((c->gsort = (struct entry *)malloc(sizeof(struct entry) *
                                           (c->gsortsize + 1) * 13)) == NULL)
      error("Failed to allocate sort memory.");
  }
  gsort = c->gsort;

  /* Does this cell have any progeny? */
  if (c->split) {

    /* Fill in the gaps within the progeny. */
    for (k = 0; k < 8; k++) {
      if (c->progeny[k] == NULL) continue;
      missing = flags & ~c->progeny[k]->gsorted;
      if (missing) runner_dogsort(r, c->progeny[k], missing, 0);
    }

    /* Loop over the 13 different sort arrays. */
    for (j = 0; j < 13; j++) {

      /* Has this sort array been flagged? */
      if (!(flags & (1 << j))) continue;

      /* Init the particle index offsets. */
      for (off[0] = 0, k = 1; k < 8; k++)
        if (c->progeny[k - 1] != NULL)
          off[k] = off[k - 1] + c->progeny[k - 1]->gcount;
        else
          off[k] = off[k - 1];

      /* Init the entries and indices. */
      for (k = 0; k < 8; k++) {
        inds[k] = k;
        if (c->progeny[k] != NULL && c->progeny[k]->gcount > 0) {
          fingers[k] = &c->progeny[k]->gsort[j * (c->progeny[k]->gcount + 1)];
          buff[k] = fingers[k]->d;
          off[k] = off[k];
        } else
          buff[k] = FLT_MAX;
      }

      /* Sort the buffer. */
      for (i = 0; i < 7; i++)
        for (k = i + 1; k < 8; k++)
          if (buff[inds[k]] < buff[inds[i]]) {
            temp_i = inds[i];
            inds[i] = inds[k];
            inds[k] = temp_i;
          }

      /* For each entry in the new sort list. */
      finger = &gsort[j * (count + 1)];
      for (ind = 0; ind < count; ind++) {

        /* Copy the minimum into the new sort array. */
        finger[ind].d = buff[inds[0]];
        finger[ind].i = fingers[inds[0]]->i + off[inds[0]];

        /* Update the buffer. */
        fingers[inds[0]] += 1;
        buff[inds[0]] = fingers[inds[0]]->d;

        /* Find the smallest entry. */
        for (k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) {
          temp_i = inds[k - 1];
          inds[k - 1] = inds[k];
          inds[k] = temp_i;
496
        }
Pedro Gonnet's avatar
Pedro Gonnet committed
497

498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
      } /* Merge. */

      /* Add a sentinel. */
      gsort[j * (count + 1) + count].d = FLT_MAX;
      gsort[j * (count + 1) + count].i = 0;

      /* Mark as sorted. */
      c->gsorted |= (1 << j);

    } /* loop over sort arrays. */

  } /* progeny? */

  /* Otherwise, just sort. */
  else {

    /* Fill the sort array. */
    for (k = 0; k < count; k++) {
      px[0] = gparts[k].x[0];
      px[1] = gparts[k].x[1];
      px[2] = gparts[k].x[2];
      for (j = 0; j < 13; j++)
        if (flags & (1 << j)) {
          gsort[j * (count + 1) + k].i = k;
          gsort[j * (count + 1) + k].d = px[0] * runner_shift[3 * j + 0] +
                                         px[1] * runner_shift[3 * j + 1] +
                                         px[2] * runner_shift[3 * j + 2];
        }
Pedro Gonnet's avatar
Pedro Gonnet committed
526
    }
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566

    /* Add the sentinel and sort. */
    for (j = 0; j < 13; j++)
      if (flags & (1 << j)) {
        gsort[j * (count + 1) + count].d = FLT_MAX;
        gsort[j * (count + 1) + count].i = 0;
        runner_dosort_ascending(&gsort[j * (count + 1)], count);
        c->gsorted |= (1 << j);
      }
  }

/* Verify the sorting. */
/* for ( j = 0 ; j < 13 ; j++ ) {
    if ( !( flags & (1 << j) ) )
        continue;
    finger = &c->gsort[ j*(count + 1) ];
    for ( k = 1 ; k < count ; k++ ) {
        if ( finger[k].d < finger[k-1].d )
            error( "Sorting failed, ascending array." );
        if ( finger[k].i < 0 || finger[k].i >= count )
            error( "Sorting failed, indices borked." );
        }
    } */

#ifdef TIMER_VERBOSE
  message(
      "runner %02i: %i parts at depth %i (flags = %i%i%i%i%i%i%i%i%i%i%i%i%i) "
      "took %.3f ms.",
      r->id, count, c->depth, (flags & 0x1000) >> 12, (flags & 0x800) >> 11,
      (flags & 0x400) >> 10, (flags & 0x200) >> 9, (flags & 0x100) >> 8,
      (flags & 0x80) >> 7, (flags & 0x40) >> 6, (flags & 0x20) >> 5,
      (flags & 0x10) >> 4, (flags & 0x8) >> 3, (flags & 0x4) >> 2,
      (flags & 0x2) >> 1, (flags & 0x1) >> 0,
      ((double)TIMER_TOC(timer_dosort)) / CPU_TPS * 1000);
  fflush(stdout);
#else
  if (clock) TIMER_TOC(timer_dosort);
#endif
}

567
568
569
570
571
/**
 * @brief Initialize the particles before the density calculation
 *
 * @param r The runner thread.
 * @param c The cell.
Matthieu Schaller's avatar
Matthieu Schaller committed
572
 * @param timer 1 if the time is to be recorded.
573
574
 */

575
void runner_doinit(struct runner *r, struct cell *c, int timer) {
576
577

  struct part *p, *parts = c->parts;
Matthieu Schaller's avatar
Matthieu Schaller committed
578
  const int count = c->count;
579
  const int ti_current = r->e->ti_current;
580
581

  TIMER_TIC;
582

583
584
  /* Recurse? */
  if (c->split) {
Matthieu Schaller's avatar
Matthieu Schaller committed
585
    for (int k = 0; k < 8; k++)
586
      if (c->progeny[k] != NULL) runner_doinit(r, c->progeny[k], 0);
587
    return;
588
589
  } else {

590
591
    /* Loop over the parts in this cell. */
    for (int i = 0; i < count; i++) {
592

593
594
      /* Get a direct pointer on the part. */
      p = &parts[i];
595

596
      if (p->ti_end <= ti_current) {
Matthieu Schaller's avatar
Matthieu Schaller committed
597

598
599
        /* Get ready for a density calculation */
        hydro_init_part(p);
600
      }
601
602
    }
  }
603
604
605
606
607
608
609
610
611
612

  if (timer) {
#ifdef TIMER_VERBOSE
    message("runner %02i: %i parts at depth %i took %.3f ms.", r->id, c->count,
            c->depth, ((double)TIMER_TOC(timer_init)) / CPU_TPS * 1000);
    fflush(stdout);
#else
    TIMER_TOC(timer_init);
#endif
  }
613
614
}

615
616
617
/**
 * @brief Intermediate task between density and force
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
618
 * @param r The runner thread.
619
 * @param c The cell.
620
 */
621
622
623
624

void runner_doghost(struct runner *r, struct cell *c) {

  struct part *p, *parts = c->parts;
625
  struct xpart *xp, *xparts = c->xparts;
626
  struct cell *finger;
Matthieu Schaller's avatar
Matthieu Schaller committed
627
  int redo, count = c->count;
628
  int *pid;
629
  float h_corr;
630
631
  const int ti_current = r->e->ti_current;
  const double timeBase = r->e->timeBase;
632

633
634
  TIMER_TIC;

635
636
  /* Recurse? */
  if (c->split) {
Matthieu Schaller's avatar
Matthieu Schaller committed
637
    for (int k = 0; k < 8; k++)
638
639
640
641
642
643
644
      if (c->progeny[k] != NULL) runner_doghost(r, c->progeny[k]);
    return;
  }

  /* Init the IDs that have to be updated. */
  if ((pid = (int *)alloca(sizeof(int) * count)) == NULL)
    error("Call to alloca failed.");
Matthieu Schaller's avatar
Matthieu Schaller committed
645
  for (int k = 0; k < count; k++) pid[k] = k;
646
647

  /* While there are particles that need to be updated... */
Matthieu Schaller's avatar
Matthieu Schaller committed
648
  for (int num_reruns = 0; count > 0 && num_reruns < const_smoothing_max_iter;
Matthieu Schaller's avatar
Matthieu Schaller committed
649
       num_reruns++) {
650
651
652

    /* Reset the redo-count. */
    redo = 0;
653

654
    /* Loop over the parts in this cell. */
Matthieu Schaller's avatar
Matthieu Schaller committed
655
    for (int i = 0; i < count; i++) {
656
657
658

      /* Get a direct pointer on the part. */
      p = &parts[pid[i]];
659
      xp = &xparts[pid[i]];
660
661

      /* Is this part within the timestep? */
662
      if (p->ti_end <= ti_current) {
663

664
        /* Finish the density calculation */
665
        hydro_end_density(p, ti_current);
666
667

        /* If no derivative, double the smoothing length. */
668
        if (p->density.wcount_dh == 0.0f) h_corr = p->h;
669
670
671

        /* Otherwise, compute the smoothing length update (Newton step). */
        else {
672
          h_corr = (kernel_nwneigh - p->density.wcount) / p->density.wcount_dh;
673
674

          /* Truncate to the range [ -p->h/2 , p->h ]. */
675
676
          h_corr = fminf(h_corr, p->h);
          h_corr = fmaxf(h_corr, -p->h * 0.5f);
Pedro Gonnet's avatar
Pedro Gonnet committed
677
        }
678
679

        /* Did we get the right number density? */
680
        if (p->density.wcount > kernel_nwneigh + const_delta_nwneigh ||
681
            p->density.wcount < kernel_nwneigh - const_delta_nwneigh) {
682
683
684
685

          /* Ok, correct then */
          p->h += h_corr;

686
          /* Flag for another round of fun */
687
688
          pid[redo] = pid[i];
          redo += 1;
689

690
691
          /* Re-initialise everything */
          hydro_init_part(p);
692

693
          /* Off we go ! */
694
          continue;
695
696
        }

Matthieu Schaller's avatar
Matthieu Schaller committed
697
        /* We now have a particle whose smoothing length has converged */
Matthieu Schaller's avatar
Matthieu Schaller committed
698

699
        /* As of here, particle force variables will be set. */
700

701
        /* Compute variables required for the force loop */
702
        hydro_prepare_force(p, xp, ti_current, timeBase);
703

Matthieu Schaller's avatar
Matthieu Schaller committed
704
        /* The particle force values are now set.  Do _NOT_
705
           try to read any particle density variables! */
Matthieu Schaller's avatar
Matthieu Schaller committed
706

707
708
        /* Prepare the particle for the force loop over neighbours */
        hydro_reset_acceleration(p);
709
710
711
      }
    }

712
713
714
    /* We now need to treat the particles whose smoothing length had not
     * converged again */

715
716
717
718
719
720
    /* Re-set the counter for the next loop (potentially). */
    count = redo;
    if (count > 0) {

      /* Climb up the cell hierarchy. */
      for (finger = c; finger != NULL; finger = finger->parent) {
Matthieu Schaller's avatar
Matthieu Schaller committed
721

722
723
        /* Run through this cell's density interactions. */
        for (struct link *l = finger->density; l != NULL; l = l->next) {
Matthieu Schaller's avatar
Matthieu Schaller committed
724

725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
          /* Self-interaction? */
          if (l->t->type == task_type_self)
            runner_doself_subset_density(r, finger, parts, pid, count);

          /* Otherwise, pair interaction? */
          else if (l->t->type == task_type_pair) {

            /* Left or right? */
            if (l->t->ci == finger)
              runner_dopair_subset_density(r, finger, parts, pid, count,
                                           l->t->cj);
            else
              runner_dopair_subset_density(r, finger, parts, pid, count,
                                           l->t->ci);

          }

          /* Otherwise, sub interaction? */
          else if (l->t->type == task_type_sub) {

            /* Left or right? */
            if (l->t->ci == finger)
              runner_dosub_subset_density(r, finger, parts, pid, count,
                                          l->t->cj, -1, 1);
            else
              runner_dosub_subset_density(r, finger, parts, pid, count,
                                          l->t->ci, -1, 1);
          }
        }
      }
755
    }
756
  }
757

Matthieu Schaller's avatar
Matthieu Schaller committed
758
759
  if (count)
    message("Smoothing length failed to converge on %i particles.", count);
760
761
762
763
764
765
766
767
768
769

#ifdef TIMER_VERBOSE
  message("runner %02i: %i parts at depth %i took %.3f ms.", r->id, c->count,
          c->depth, ((double)TIMER_TOC(timer_doghost)) / CPU_TPS * 1000);
  fflush(stdout);
#else
  TIMER_TOC(timer_doghost);
#endif
}

770
/**
771
 * @brief Drift particles forward in time
772
773
774
 *
 * @param r The runner thread.
 * @param c The cell.
775
 * @param timer Are we timing this ?
776
 */
777
void runner_dodrift(struct runner *r, struct cell *c, int timer) {
778

779
  const int nr_parts = c->count;
Tom Theuns's avatar
Tom Theuns committed
780
  const int nr_gparts = c->gcount;
781
782
783
784
  const double timeBase = r->e->timeBase;
  const double dt = (r->e->ti_current - r->e->ti_old) * timeBase;
  const float ti_old = r->e->ti_old;
  const float ti_current = r->e->ti_current;
785
786
  struct part *restrict p, *restrict parts = c->parts;
  struct xpart *restrict xp, *restrict xparts = c->xparts;
787
788
  struct gpart *restrict g, 
  *restrict gparts = c->gparts;
Matthieu Schaller's avatar
Matthieu Schaller committed
789
  float dx_max = 0.f, h_max = 0.f;
Matthieu Schaller's avatar
Matthieu Schaller committed
790
  float w;
791

792
  TIMER_TIC
Tom Theuns's avatar
Tom Theuns committed
793
794
795
#ifdef TASK_VERBOSE
	 OUT
#endif
796

797
798
  /* No children? */
  if (!c->split) {
799

800
    /* Loop over all the particles in the cell */
801
    for (int k = 0; k < nr_parts; k++) {
802

803
804
805
      /* Get a handle on the part. */
      p = &parts[k];
      xp = &xparts[k];
806

Matthieu Schaller's avatar
Matthieu Schaller committed
807
808
809
      /* Useful quantity */
      const float h_inv = 1.0f / p->h;

810
811
812
813
      /* Drift... */
      p->x[0] += xp->v_full[0] * dt;
      p->x[1] += xp->v_full[1] * dt;
      p->x[2] += xp->v_full[2] * dt;
814

Matthieu Schaller's avatar
Matthieu Schaller committed
815
      /* Predict velocities (for hydro terms) */
Matthieu Schaller's avatar
Matthieu Schaller committed
816
817
818
      p->v[0] += p->a_hydro[0] * dt;
      p->v[1] += p->a_hydro[1] * dt;
      p->v[2] += p->a_hydro[2] * dt;
819

Matthieu Schaller's avatar
Matthieu Schaller committed
820
      /* Predict smoothing length */
821
      w = p->h_dt * h_inv * dt;
Matthieu Schaller's avatar
Matthieu Schaller committed
822
823
824
825
826
827
      if (fabsf(w) < 0.2f)
        p->h *= approx_expf(w); /* 4th order expansion of exp(w) */
      else
        p->h *= expf(w);

      /* Predict density */
828
      w = -3.0f * p->h_dt * h_inv * dt;
Matthieu Schaller's avatar
Matthieu Schaller committed
829
830
831
832
      if (fabsf(w) < 0.2f)
        p->rho *= approx_expf(w); /* 4th order expansion of exp(w) */
      else
        p->rho *= expf(w);
833

834
      /* Predict the values of the extra fields */
835
      hydro_predict_extra(p, xp, ti_old, ti_current, timeBase);
836

Matthieu Schaller's avatar
Matthieu Schaller committed
837
      /* Compute motion since last cell construction */
838
839
840
841
      const float dx =
          sqrtf((p->x[0] - xp->x_old[0]) * (p->x[0] - xp->x_old[0]) +
                (p->x[1] - xp->x_old[1]) * (p->x[1] - xp->x_old[1]) +
                (p->x[2] - xp->x_old[2]) * (p->x[2] - xp->x_old[2]));
Matthieu Schaller's avatar
Matthieu Schaller committed
842
843
844
845
      dx_max = fmaxf(dx_max, dx);

      /* Maximal smoothing length */
      h_max = fmaxf(p->h, h_max);
846
    }
Tom Theuns's avatar
Tom Theuns committed
847
848
849
850
	 
	 /* Loop over all gparts in the cell */
	 for (int k = 0; k < nr_gparts; k++)
		{
851
852
853
854
855
856
857
		  g        = &gparts[k];
		  if(g->id == 0)
			 message(" dt= %f tx= %f tv=%f \n",dt, g->tx, g->tv);
		  g->x[0] += g->v_full[0] * dt;
		  g->x[1] += g->v_full[1] * dt;
		  g->x[2] += g->v_full[2] * dt;
		  g->tx   += dt;
Tom Theuns's avatar
Tom Theuns committed
858
859
		}
	 
860
  }
861

Matthieu Schaller's avatar
Matthieu Schaller committed
862
863
864
865
866
867
868
869
870
  /* Otherwise, aggregate data from children. */
  else {

    /* Loop over the progeny. */
    for (int k = 0; k < 8; k++)
      if (c->progeny[k] != NULL) {
        struct cell *cp = c->progeny[k];
        runner_dodrift(r, cp, 0);

871
872
        dx_max = fmaxf(dx_max, cp->dx_max);
        h_max = fmaxf(h_max, cp->h_max);
Matthieu Schaller's avatar
Matthieu Schaller committed
873
874
875
876
877
878
      }
  }

  /* Store the values */
  c->h_max = h_max;
  c->dx_max = dx_max;
879

880
881
882
883
884
885
886
887
  if (timer) {
#ifdef TIMER_VERBOSE
    message("runner %02i: %i parts at depth %i took %.3f ms.", r->id, c->count,
            c->depth, ((double)TIMER_TOC(timer_drift)) / CPU_TPS * 1000);
    fflush(stdout);
#else
    TIMER_TOC(timer_drift);
#endif
888
889
  }
}
890

891
892
893
894
895
/**
 * @brief Combined second and first kick for fixed dt.
 *
 * @param r The runner thread.
 * @param c The cell.
896
 * @param timer The timer
897
898
 */

899
900
void runner_dokick(struct runner *r, struct cell *c, int timer) {

901
902
  const float global_dt_min = r->e->dt_min;
  const float global_dt_max = r->e->dt_max;
903
  const int ti_current = r->e->ti_current;
904
905
  const double timeBase = r->e->timeBase;
  const double timeBase_inv = 1.0 / r->e->timeBase;
Matthieu Schaller's avatar
Matthieu Schaller committed
906
  const int count = c->count;
Tom Theuns's avatar
Tom Theuns committed
907
  const int gcount = c->gcount;
908
909
  const int is_fixdt =
      (r->e->policy & engine_policy_fixdt) == engine_policy_fixdt;
Matthieu Schaller's avatar
Matthieu Schaller committed
910

911
912
  int new_dti;
  int dti_timeline;
Matthieu Schaller's avatar
Matthieu Schaller committed
913
914

  int updated = 0;
915
  int ti_end_min = max_nr_timesteps, ti_end_max = 0;
916
917
918
919
  double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, mass = 0.0;
  float mom[3] = {0.0f, 0.0f, 0.0f};
  float ang[3] = {0.0f, 0.0f, 0.0f};
  float x[3], v_full[3];
920
921
  struct part *restrict p, *restrict parts = c->parts;
  struct xpart *restrict xp, *restrict xparts = c->xparts;
Tom Theuns's avatar
Tom Theuns committed
922
  struct gpart *restrict g, *restrict gparts = c->gparts;
923
924
925

  TIMER_TIC

Tom Theuns's avatar
Tom Theuns committed
926
927
928
929
#ifdef TASK_VERBOSE
	 OUT
#endif

930
931
932
  /* No children? */
  if (!c->split) {

933
    /* Loop over the particles and kick the active ones. */
Matthieu Schaller's avatar
Matthieu Schaller committed
934
    for (int k = 0; k < count; k++) {
935
936
937
938
939

      /* Get a handle on the part. */
      p = &parts[k];
      xp = &xparts[k];

940
      const float m = p->mass;
Matthieu Schaller's avatar
Matthieu Schaller committed
941
942
943
      x[0] = p->x[0];
      x[1] = p->x[1];
      x[2] = p->x[2];
944
945

      /* If particle needs to be kicked */
946
      if (is_fixdt || p->ti_end <= ti_current) {
947
948

        /* First, finish the force loop */
949
        p->h_dt *= p->h * 0.333333333f;
950
951

        /* And do the same of the extra variable */
952
953
        hydro_end_force(p);

954
955
        /* Now we are ready to compute the next time-step size */

956
957
958
        if (is_fixdt) {

          /* Now we have a time step, proceed with the kick */
959
          new_dti = global_dt_max * timeBase_inv;
960
961
962
963
964
965
966

        } else {

          /* Compute the next timestep */
          const float new_dt_hydro = hydro_compute_timestep(p, xp);
          const float new_dt_grav = gravity_compute_timestep(p, xp);

967
          float new_dt = fminf(new_dt_hydro, new_dt_grav);
Matthieu Schaller's avatar
Matthieu Schaller committed
968
969
970
971
972
973
974
975

          /* Limit change in h */
          const float dt_h_change =
              (p->h_dt != 0.0f) ? fabsf(const_ln_max_h_change * p->h / p->h_dt)
                                : FLT_MAX;

          new_dt = fminf(new_dt, dt_h_change);

976
977
978
979
          /* Limit timestep within the allowed range */
          new_dt = fminf(new_dt, global_dt_max);
          new_dt = fmaxf(new_dt, global_dt_min);

980
981
          /* Convert to integer time */
          new_dti = new_dt * timeBase_inv;
982
983

          /* Recover the current timestep */
984
          const int current_dti = p->ti_end - p->ti_begin;
985
986

          /* Limit timestep increase */
987
988
          if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti);

989
          /* Put this timestep on the time line */
990
991
          dti_timeline = max_nr_timesteps;
          while (new_dti < dti_timeline) dti_timeline /= 2;
992
993

          /* Now we have a time step, proceed with the kick */
994
          new_dti = dti_timeline;
995
        }
996

997
        /* Compute the time step for this kick */
998
        const int ti_start = (p->ti_begin + p->ti_end) / 2;
999
        const int ti_end = p->ti_end + new_dti / 2;
1000
        const float dt = (ti_end - ti_start) * timeBase;
For faster browsing, not all history is shown. View entire blame